]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Add dmu_tx_hold_append() interface
authorBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 9 May 2023 16:03:10 +0000 (09:03 -0700)
committerGitHub <noreply@github.com>
Tue, 9 May 2023 16:03:10 +0000 (09:03 -0700)
Provides an interface which callers can use to declare a write when
the exact starting offset in not yet known.  Since the full range
being updated is not available only the first L0 block at the
provided offset will be prefetched.

Reviewed-by: Olaf Faaland <faaland1@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #14819

include/sys/dmu.h
include/sys/dmu_tx.h
module/zfs/dmu_tx.c

index 1b82ff620f2739047f7921616eebbc7b0f83738c..a5a5c378279aa4587cbe91642969f8f365fefd00 100644 (file)
@@ -782,6 +782,9 @@ dmu_tx_t *dmu_tx_create(objset_t *os);
 void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
 void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
     int len);
+void dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
+void dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+    int len);
 void dmu_tx_hold_clone_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
     int len);
 void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
index ca8514e5d2d018df8199829678dfd986464de239..aa55da626149eca5f1a76516946fa697ae87234f 100644 (file)
@@ -91,6 +91,7 @@ enum dmu_tx_hold_type {
        THT_SPACE,
        THT_SPILL,
        THT_CLONE,
+       THT_APPEND,
        THT_NUMTYPES
 };
 
index 1c5608c4541b6599ef33c4c4f1e31642fd714850..c4e274bd4c428de2ed663c7902cf5b032b7adb44 100644 (file)
@@ -294,6 +294,53 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
        }
 }
 
+static void
+dmu_tx_count_append(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
+{
+       dnode_t *dn = txh->txh_dnode;
+       int err = 0;
+
+       if (len == 0)
+               return;
+
+       (void) zfs_refcount_add_many(&txh->txh_space_towrite, len, FTAG);
+
+       if (dn == NULL)
+               return;
+
+       /*
+        * For i/o error checking, read the blocks that will be needed
+        * to perform the append; first level-0 block (if not aligned, i.e.
+        * if they are partial-block writes), no additional blocks are read.
+        */
+       if (dn->dn_maxblkid == 0) {
+               if (off < dn->dn_datablksz &&
+                   (off > 0 || len < dn->dn_datablksz)) {
+                       err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
+                       if (err != 0) {
+                               txh->txh_tx->tx_err = err;
+                       }
+               }
+       } else {
+               zio_t *zio = zio_root(dn->dn_objset->os_spa,
+                   NULL, NULL, ZIO_FLAG_CANFAIL);
+
+               /* first level-0 block */
+               uint64_t start = off >> dn->dn_datablkshift;
+               if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) {
+                       err = dmu_tx_check_ioerr(zio, dn, 0, start);
+                       if (err != 0) {
+                               txh->txh_tx->tx_err = err;
+                       }
+               }
+
+               err = zio_wait(zio);
+               if (err != 0) {
+                       txh->txh_tx->tx_err = err;
+               }
+       }
+}
+
 static void
 dmu_tx_count_dnode(dmu_tx_hold_t *txh)
 {
@@ -334,6 +381,42 @@ dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
        }
 }
 
+/*
+ * Should be used when appending to an object and the exact offset is unknown.
+ * The write must occur at or beyond the specified offset.  Only the L0 block
+ * at provided offset will be prefetched.
+ */
+void
+dmu_tx_hold_append(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
+{
+       dmu_tx_hold_t *txh;
+
+       ASSERT0(tx->tx_txg);
+       ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+       txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+           object, THT_APPEND, off, DMU_OBJECT_END);
+       if (txh != NULL) {
+               dmu_tx_count_append(txh, off, len);
+               dmu_tx_count_dnode(txh);
+       }
+}
+
+void
+dmu_tx_hold_append_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
+{
+       dmu_tx_hold_t *txh;
+
+       ASSERT0(tx->tx_txg);
+       ASSERT3U(len, <=, DMU_MAX_ACCESS);
+
+       txh = dmu_tx_hold_dnode_impl(tx, dn, THT_APPEND, off, DMU_OBJECT_END);
+       if (txh != NULL) {
+               dmu_tx_count_append(txh, off, len);
+               dmu_tx_count_dnode(txh);
+       }
+}
+
 /*
  * This function marks the transaction as being a "net free".  The end
  * result is that refquotas will be disabled for this transaction, and
@@ -668,6 +751,26 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
                                if (blkid == 0)
                                        match_offset = TRUE;
                                break;
+                       case THT_APPEND:
+                               if (blkid >= beginblk && (blkid <= endblk ||
+                                   txh->txh_arg2 == DMU_OBJECT_END))
+                                       match_offset = TRUE;
+
+                               /*
+                                * THT_WRITE used for bonus and spill blocks.
+                                */
+                               ASSERT(blkid != DMU_BONUS_BLKID &&
+                                   blkid != DMU_SPILL_BLKID);
+
+                               /*
+                                * They might have to increase nlevels,
+                                * thus dirtying the new TLIBs.  Or the
+                                * might have to change the block size,
+                                * thus dirying the new lvl=0 blk=0.
+                                */
+                               if (blkid == 0)
+                                       match_offset = TRUE;
+                               break;
                        case THT_FREE:
                                /*
                                 * We will dirty all the level 1 blocks in
@@ -1454,6 +1557,8 @@ dmu_tx_fini(void)
 EXPORT_SYMBOL(dmu_tx_create);
 EXPORT_SYMBOL(dmu_tx_hold_write);
 EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode);
+EXPORT_SYMBOL(dmu_tx_hold_append);
+EXPORT_SYMBOL(dmu_tx_hold_append_by_dnode);
 EXPORT_SYMBOL(dmu_tx_hold_free);
 EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode);
 EXPORT_SYMBOL(dmu_tx_hold_zap);