]> git.proxmox.com Git - mirror_zfs.git/blobdiff - module/zfs/dmu_object.c
Fix send/recv lost spill block
[mirror_zfs.git] / module / zfs / dmu_object.c
index 586a04b1653688cb9358c8f65897b02478fc4c98..ec78ebbdcb46efc4367ecc3e820a0ec706759019 100644 (file)
@@ -24,6 +24,7 @@
  * Copyright 2014 HybridCluster. All rights reserved.
  */
 
+#include <sys/dbuf.h>
 #include <sys/dmu.h>
 #include <sys/dmu_objset.h>
 #include <sys/dmu_tx.h>
  */
 int dmu_object_alloc_chunk_shift = 7;
 
-uint64_t
-dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
-{
-       return dmu_object_alloc_dnsize(os, ot, blocksize, bonustype, bonuslen,
-           0, tx);
-}
-
-uint64_t
-dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
-    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+static uint64_t
+dmu_object_alloc_impl(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx)
 {
        uint64_t object;
        uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
@@ -87,6 +81,19 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
        if (dnodes_per_chunk > L1_dnode_count)
                dnodes_per_chunk = L1_dnode_count;
 
+       /*
+        * The caller requested the dnode be returned as a performance
+        * optimization in order to avoid releasing the hold only to
+        * immediately reacquire it.  Since they caller is responsible
+        * for releasing the hold they must provide the tag.
+        */
+       if (allocated_dnode != NULL) {
+               ASSERT3P(tag, !=, NULL);
+       } else {
+               ASSERT3P(tag, ==, NULL);
+               tag = FTAG;
+       }
+
        object = *cpuobj;
        for (;;) {
                /*
@@ -174,7 +181,7 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
                 * to do so.
                 */
                error = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
-                   dn_slots, FTAG, &dn);
+                   dn_slots, tag, &dn);
                if (error == 0) {
                        rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
                        /*
@@ -182,15 +189,25 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
                         * again now that we have the struct lock.
                         */
                        if (dn->dn_type == DMU_OT_NONE) {
-                               dnode_allocate(dn, ot, blocksize, 0,
-                                   bonustype, bonuslen, dn_slots, tx);
+                               dnode_allocate(dn, ot, blocksize,
+                                   indirect_blockshift, bonustype,
+                                   bonuslen, dn_slots, tx);
                                rw_exit(&dn->dn_struct_rwlock);
                                dmu_tx_add_new_object(tx, dn);
-                               dnode_rele(dn, FTAG);
+
+                               /*
+                                * Caller requested the allocated dnode be
+                                * returned and is responsible for the hold.
+                                */
+                               if (allocated_dnode != NULL)
+                                       *allocated_dnode = dn;
+                               else
+                                       dnode_rele(dn, tag);
+
                                return (object);
                        }
                        rw_exit(&dn->dn_struct_rwlock);
-                       dnode_rele(dn, FTAG);
+                       dnode_rele(dn, tag);
                        DNODE_STAT_BUMP(dnode_alloc_race);
                }
 
@@ -206,6 +223,45 @@ dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
        }
 }
 
+uint64_t
+dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
+    dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+       return dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
+           bonuslen, 0, NULL, NULL, tx);
+}
+
+uint64_t
+dmu_object_alloc_ibs(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
+    dmu_tx_t *tx)
+{
+       return dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift,
+           bonustype, bonuslen, 0, NULL, NULL, tx);
+}
+
+uint64_t
+dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
+    dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+       return (dmu_object_alloc_impl(os, ot, blocksize, 0, bonustype,
+           bonuslen, dnodesize, NULL, NULL, tx));
+}
+
+/*
+ * Allocate a new object and return a pointer to the newly allocated dnode
+ * via the allocated_dnode argument.  The returned dnode will be held and
+ * the caller is responsible for releasing the hold by calling dnode_rele().
+ */
+uint64_t
+dmu_object_alloc_hold(objset_t *os, dmu_object_type_t ot, int blocksize,
+    int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
+    int dnodesize, dnode_t **allocated_dnode, void *tag, dmu_tx_t *tx)
+{
+       return (dmu_object_alloc_impl(os, ot, blocksize, indirect_blockshift,
+           bonustype, bonuslen, dnodesize, allocated_dnode, tag, tx));
+}
+
 int
 dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
@@ -249,13 +305,13 @@ dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
 {
        return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
-           bonuslen, DNODE_MIN_SIZE, tx));
+           bonuslen, DNODE_MIN_SIZE, B_FALSE, tx));
 }
 
 int
 dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
     int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
-    dmu_tx_t *tx)
+    boolean_t keep_spill, dmu_tx_t *tx)
 {
        dnode_t *dn;
        int dn_slots = dnodesize >> DNODE_SHIFT;
@@ -272,7 +328,30 @@ dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
        if (err)
                return (err);
 
-       dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
+       dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots,
+           keep_spill, tx);
+
+       dnode_rele(dn, FTAG);
+       return (err);
+}
+
+int
+dmu_object_rm_spill(objset_t *os, uint64_t object, dmu_tx_t *tx)
+{
+       dnode_t *dn;
+       int err;
+
+       err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
+           FTAG, &dn);
+       if (err)
+               return (err);
+
+       rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+       if (dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+               dbuf_rm_spill(dn, tx);
+               dnode_rm_spill(dn, tx);
+       }
+       rw_exit(&dn->dn_struct_rwlock);
 
        dnode_rele(dn, FTAG);
        return (err);
@@ -292,6 +371,10 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
                return (err);
 
        ASSERT(dn->dn_type != DMU_OT_NONE);
+       /*
+        * If we don't create this free range, we'll leak indirect blocks when
+        * we get to freeing the dnode in syncing context.
+        */
        dnode_free_range(dn, 0, DMU_OBJECT_END, tx);
        dnode_free(dn, tx);
        dnode_rele(dn, FTAG);
@@ -314,7 +397,8 @@ dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
 
        if (*objectp == 0) {
                start_obj = 1;
-       } else if (ds && ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) {
+       } else if (ds && dsl_dataset_feature_is_active(ds,
+           SPA_FEATURE_LARGE_DNODE)) {
                uint64_t i = *objectp + 1;
                uint64_t last_obj = *objectp | (DNODES_PER_BLOCK - 1);
                dmu_object_info_t doi;
@@ -390,14 +474,13 @@ dmu_object_zapify(objset_t *mos, uint64_t object, dmu_object_type_t old_type,
         * so that concurrent calls to *_is_zapified() can determine if
         * the object has been completely zapified by checking the type.
         */
-       mzap_create_impl(mos, object, 0, 0, tx);
+       mzap_create_impl(dn, 0, 0, tx);
 
        dn->dn_next_type[tx->tx_txg & TXG_MASK] = dn->dn_type =
            DMU_OTN_ZAP_METADATA;
        dnode_setdirty(dn, tx);
        dnode_rele(dn, FTAG);
 
-
        spa_feature_incr(dmu_objset_spa(mos),
            SPA_FEATURE_EXTENSIBLE_DATASET, tx);
 }
@@ -423,11 +506,14 @@ dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx)
 
 #if defined(_KERNEL)
 EXPORT_SYMBOL(dmu_object_alloc);
+EXPORT_SYMBOL(dmu_object_alloc_ibs);
 EXPORT_SYMBOL(dmu_object_alloc_dnsize);
+EXPORT_SYMBOL(dmu_object_alloc_hold);
 EXPORT_SYMBOL(dmu_object_claim);
 EXPORT_SYMBOL(dmu_object_claim_dnsize);
 EXPORT_SYMBOL(dmu_object_reclaim);
 EXPORT_SYMBOL(dmu_object_reclaim_dnsize);
+EXPORT_SYMBOL(dmu_object_rm_spill);
 EXPORT_SYMBOL(dmu_object_free);
 EXPORT_SYMBOL(dmu_object_next);
 EXPORT_SYMBOL(dmu_object_zapify);