]> git.proxmox.com Git - mirror_zfs.git/commitdiff
Illumos 6267 - dn_bonus evicted too early
authorJustin T. Gibbs <gibbs@FreeBSD.org>
Tue, 13 Oct 2015 21:09:45 +0000 (14:09 -0700)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Tue, 13 Oct 2015 21:12:02 +0000 (14:12 -0700)
6267 dn_bonus evicted too early
Reviewed by: Richard Yao <ryao@gentoo.org>
Reviewed by: Xin LI <delphij@freebsd.org>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Richard Lowe <richlowe@richlowe.net>

References:
  https://www.illumos.org/issues/6267
  https://github.com/illumos/illumos-gate/commit/d205810

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Ported-by: Ned Bass bass6@llnl.gov
Issue #3865
Issue #3443

include/sys/dbuf.h
include/sys/dmu_objset.h
module/zfs/dbuf.c
module/zfs/dmu_objset.c
module/zfs/dnode_sync.c

index 94d326d5716c8b866903e5f8edf5ca169d8df1cd..0d262e87b5bc9fca09073783719b75eab25a83c5 100644 (file)
@@ -230,9 +230,25 @@ typedef struct dmu_buf_impl {
        /* User callback information. */
        dmu_buf_user_t *db_user;
 
-       uint8_t db_immediate_evict;
+       /*
+        * Evict user data as soon as the dirty and reference
+        * counts are equal.
+        */
+       uint8_t db_user_immediate_evict;
+
+       /*
+        * This block was freed while a read or write was
+        * active.
+        */
        uint8_t db_freed_in_flight;
 
+       /*
+        * dnode_evict_dbufs() or dnode_evict_bonus() tried to
+        * evict this dbuf, but couldn't due to outstanding
+        * references.  Evict once the refcount drops to 0.
+        */
+       uint8_t db_pending_evict;
+
        uint8_t db_dirtycnt;
 } dmu_buf_impl_t;
 
index bee4bbcfce03fae99f35b14184b3128b6f26639a..837a0d5107b7f1aa636445a0ec1ebb281bd6167a 100644 (file)
@@ -93,7 +93,6 @@ struct objset {
        uint8_t os_copies;
        enum zio_checksum os_dedup_checksum;
        boolean_t os_dedup_verify;
-       boolean_t os_evicting;
        zfs_logbias_op_t os_logbias;
        zfs_cache_type_t os_primary_cache;
        zfs_cache_type_t os_secondary_cache;
index bab546c28d0cf0b11558cb7a78a35609312d7ae7..d340da821fc50f0e65bf34acea66383b0be42804 100644 (file)
@@ -303,7 +303,7 @@ dbuf_verify_user(dmu_buf_impl_t *db, dbvu_verify_type_t verify_type)
                 */
                ASSERT3U(holds, >=, db->db_dirtycnt);
        } else {
-               if (db->db_immediate_evict == TRUE)
+               if (db->db_user_immediate_evict == TRUE)
                        ASSERT3U(holds, >=, db->db_dirtycnt);
                else
                        ASSERT3U(holds, >, 0);
@@ -1880,8 +1880,9 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
        db->db_blkptr = blkptr;
 
        db->db_user = NULL;
-       db->db_immediate_evict = 0;
-       db->db_freed_in_flight = 0;
+       db->db_user_immediate_evict = FALSE;
+       db->db_freed_in_flight = FALSE;
+       db->db_pending_evict = FALSE;
 
        if (blkid == DMU_BONUS_BLKID) {
                ASSERT3P(parent, ==, dn->dn_dbuf);
@@ -2318,12 +2319,13 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
                arc_buf_freeze(db->db_buf);
 
        if (holds == db->db_dirtycnt &&
-           db->db_level == 0 && db->db_immediate_evict)
+           db->db_level == 0 && db->db_user_immediate_evict)
                dbuf_evict_user(db);
 
        if (holds == 0) {
                if (db->db_blkid == DMU_BONUS_BLKID) {
                        dnode_t *dn;
+                       boolean_t evict_dbuf = db->db_pending_evict;
 
                        /*
                         * If the dnode moves here, we cannot cross this
@@ -2338,7 +2340,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
                         * Decrementing the dbuf count means that the bonus
                         * buffer's dnode hold is no longer discounted in
                         * dnode_move(). The dnode cannot move until after
-                        * the dnode_rele_and_unlock() below.
+                        * the dnode_rele() below.
                         */
                        DB_DNODE_EXIT(db);
 
@@ -2348,35 +2350,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
                         */
                        mutex_exit(&db->db_mtx);
 
-                       /*
-                        * If the dnode has been freed, evict the bonus
-                        * buffer immediately.  The data in the bonus
-                        * buffer is no longer relevant and this prevents
-                        * a stale bonus buffer from being associated
-                        * with this dnode_t should the dnode_t be reused
-                        * prior to being destroyed.
-                        */
-                       mutex_enter(&dn->dn_mtx);
-                       if (dn->dn_type == DMU_OT_NONE ||
-                           dn->dn_free_txg != 0) {
-                               /*
-                                * Drop dn_mtx.  It is a leaf lock and
-                                * cannot be held when dnode_evict_bonus()
-                                * acquires other locks in order to
-                                * perform the eviction.
-                                *
-                                * Freed dnodes cannot be reused until the
-                                * last hold is released.  Since this bonus
-                                * buffer has a hold, the dnode will remain
-                                * in the free state, even without dn_mtx
-                                * held, until the dnode_rele_and_unlock()
-                                * below.
-                                */
-                               mutex_exit(&dn->dn_mtx);
+                       if (evict_dbuf)
                                dnode_evict_bonus(dn);
-                               mutex_enter(&dn->dn_mtx);
-                       }
-                       dnode_rele_and_unlock(dn, db);
+
+                       dnode_rele(dn, db);
                } else if (db->db_buf == NULL) {
                        /*
                         * This is a special case: we never associated this
@@ -2423,7 +2400,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
                                } else {
                                        dbuf_clear(db);
                                }
-                       } else if (db->db_objset->os_evicting ||
+                       } else if (db->db_pending_evict ||
                            arc_buf_eviction_needed(db->db_buf)) {
                                dbuf_clear(db);
                        } else {
@@ -2471,7 +2448,7 @@ dmu_buf_set_user_ie(dmu_buf_t *db_fake, dmu_buf_user_t *user)
 {
        dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
 
-       db->db_immediate_evict = TRUE;
+       db->db_user_immediate_evict = TRUE;
        return (dmu_buf_set_user(db_fake, user));
 }
 
index b3168a8561419ee3f0bda66ad34be3dc312c8ba5..779b3bb789aad6a3476279f6778895a01f0e3782 100644 (file)
@@ -726,7 +726,6 @@ dmu_objset_evict(objset_t *os)
        if (os->os_sa)
                sa_tear_down(os);
 
-       os->os_evicting = B_TRUE;
        dmu_objset_evict_dbufs(os);
 
        mutex_enter(&os->os_lock);
index a8fa9a9527a9e8c25a9c428373bf3fe8a01359ca..df5c8e4ee6c43421ce04d4f2ec83d75059a4fda1 100644 (file)
@@ -432,6 +432,7 @@ dnode_evict_dbufs(dnode_t *dn)
                        db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
                        avl_remove(&dn->dn_dbufs, db_marker);
                } else {
+                       db->db_pending_evict = TRUE;
                        mutex_exit(&db->db_mtx);
                        db_next = AVL_NEXT(&dn->dn_dbufs, db);
                }
@@ -447,10 +448,14 @@ void
 dnode_evict_bonus(dnode_t *dn)
 {
        rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
-       if (dn->dn_bonus && refcount_is_zero(&dn->dn_bonus->db_holds)) {
-               mutex_enter(&dn->dn_bonus->db_mtx);
-               dbuf_evict(dn->dn_bonus);
-               dn->dn_bonus = NULL;
+       if (dn->dn_bonus != NULL) {
+               if (refcount_is_zero(&dn->dn_bonus->db_holds)) {
+                       mutex_enter(&dn->dn_bonus->db_mtx);
+                       dbuf_evict(dn->dn_bonus);
+                       dn->dn_bonus = NULL;
+               } else {
+                       dn->dn_bonus->db_pending_evict = TRUE;
+               }
        }
        rw_exit(&dn->dn_struct_rwlock);
 }
@@ -502,7 +507,6 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
 
        dnode_undirty_dbufs(&dn->dn_dirty_records[txgoff]);
        dnode_evict_dbufs(dn);
-       ASSERT(avl_is_empty(&dn->dn_dbufs));
 
        /*
         * XXX - It would be nice to assert this, but we may still