]> git.proxmox.com Git - mirror_zfs.git/commitdiff
panic in bpobj_space(): null pointer dereference
authorMatthew Ahrens <mahrens@delphix.com>
Thu, 9 Feb 2017 18:19:12 +0000 (10:19 -0800)
committerBrian Behlendorf <behlendorf1@llnl.gov>
Thu, 9 Feb 2017 18:19:12 +0000 (10:19 -0800)
This is a race condition in the deadlist code.

A thread executing an administrative command that uses
dsl_deadlist_space_range() holds the lock of the whole deadlist_t to
protect the access of all its entries that the deadlist contains in an
avl tree.

Sync threads trying to insert a new entry in the deadlist (through
dsl_deadlist_insert() -> dle_enqueue()) do not hold the deadlist lock at
that moment.  If the dle_bpobj is the empty bpobj (our sentinel value),
we close and reopen it.  Between these two operations, it is possible
for the dsl_deadlist_space_range() thread to dereference that bpobj
which is NULL during that window.

Threads should hold the a deadlist's dl_lock when they manipulate its
internal data so scenarios like the one above are avoided.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
Closes #5762

module/zfs/bpobj.c
module/zfs/dsl_deadlist.c

index 17d98c36e13479b194834819b0bb5698d8925cc5..5f2aff45349f490146b79a12fde1250344347990 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
  */
 
 #include <sys/bpobj.h>
@@ -395,6 +395,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
                return;
        }
 
+       mutex_enter(&bpo->bpo_lock);
        dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
        if (bpo->bpo_phys->bpo_subobjs == 0) {
                bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
@@ -405,7 +406,6 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
        ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
        ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
 
-       mutex_enter(&bpo->bpo_lock);
        dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
            bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
            sizeof (subobj), &subobj, tx);
index 0b99e9713a7c5d133855602502560d9df396f4b0..e7046b7861d4b814149402a5898f015f6c1f7af3 100644 (file)
@@ -20,7 +20,7 @@
  */
 /*
  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
  */
 
@@ -66,6 +66,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
        zap_cursor_t zc;
        zap_attribute_t za;
 
+       ASSERT(MUTEX_HELD(&dl->dl_lock));
+
        ASSERT(!dl->dl_oldfmt);
        if (dl->dl_havetree)
                return;
@@ -178,6 +180,7 @@ static void
 dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
     const blkptr_t *bp, dmu_tx_t *tx)
 {
+       ASSERT(MUTEX_HELD(&dl->dl_lock));
        if (dle->dle_bpobj.bpo_object ==
            dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
                uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
@@ -194,6 +197,7 @@ static void
 dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
     uint64_t obj, dmu_tx_t *tx)
 {
+       ASSERT(MUTEX_HELD(&dl->dl_lock));
        if (dle->dle_bpobj.bpo_object !=
            dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
                bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
@@ -218,15 +222,14 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
                return;
        }
 
+       mutex_enter(&dl->dl_lock);
        dsl_deadlist_load_tree(dl);
 
        dmu_buf_will_dirty(dl->dl_dbuf, tx);
-       mutex_enter(&dl->dl_lock);
        dl->dl_phys->dl_used +=
            bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
        dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
        dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
-       mutex_exit(&dl->dl_lock);
 
        dle_tofind.dle_mintxg = bp->blk_birth;
        dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -243,6 +246,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
 
        ASSERT3P(dle, !=, NULL);
        dle_enqueue(dl, dle, bp, tx);
+       mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -258,16 +262,19 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
        if (dl->dl_oldfmt)
                return;
 
-       dsl_deadlist_load_tree(dl);
-
        dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
        dle->dle_mintxg = mintxg;
+
+       mutex_enter(&dl->dl_lock);
+       dsl_deadlist_load_tree(dl);
+
        obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
        VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
        avl_add(&dl->dl_tree, dle);
 
        VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
            mintxg, obj, tx));
+       mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -282,6 +289,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
        if (dl->dl_oldfmt)
                return;
 
+       mutex_enter(&dl->dl_lock);
        dsl_deadlist_load_tree(dl);
 
        dle_tofind.dle_mintxg = mintxg;
@@ -295,6 +303,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
        kmem_free(dle, sizeof (*dle));
 
        VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
+       mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -338,6 +347,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
                return (newobj);
        }
 
+       mutex_enter(&dl->dl_lock);
        dsl_deadlist_load_tree(dl);
 
        for (dle = avl_first(&dl->dl_tree); dle;
@@ -351,6 +361,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
                VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
                    dle->dle_mintxg, obj, tx));
        }
+       mutex_exit(&dl->dl_lock);
        return (newobj);
 }
 
@@ -428,6 +439,8 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
        uint64_t used, comp, uncomp;
        bpobj_t bpo;
 
+       ASSERT(MUTEX_HELD(&dl->dl_lock));
+
        VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
        VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
        bpobj_close(&bpo);
@@ -435,11 +448,9 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
        dsl_deadlist_load_tree(dl);
 
        dmu_buf_will_dirty(dl->dl_dbuf, tx);
-       mutex_enter(&dl->dl_lock);
        dl->dl_phys->dl_used += used;
        dl->dl_phys->dl_comp += comp;
        dl->dl_phys->dl_uncomp += uncomp;
-       mutex_exit(&dl->dl_lock);
 
        dle_tofind.dle_mintxg = birth;
        dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -479,6 +490,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
                return;
        }
 
+       mutex_enter(&dl->dl_lock);
        for (zap_cursor_init(&zc, dl->dl_os, obj);
            zap_cursor_retrieve(&zc, &za) == 0;
            zap_cursor_advance(&zc)) {
@@ -493,6 +505,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
        dmu_buf_will_dirty(bonus, tx);
        bzero(dlp, sizeof (*dlp));
        dmu_buf_rele(bonus, FTAG);
+       mutex_exit(&dl->dl_lock);
 }
 
 /*
@@ -507,6 +520,8 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
        avl_index_t where;
 
        ASSERT(!dl->dl_oldfmt);
+
+       mutex_enter(&dl->dl_lock);
        dmu_buf_will_dirty(dl->dl_dbuf, tx);
        dsl_deadlist_load_tree(dl);
 
@@ -522,14 +537,12 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
 
                VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
                    &used, &comp, &uncomp));
-               mutex_enter(&dl->dl_lock);
                ASSERT3U(dl->dl_phys->dl_used, >=, used);
                ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
                ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
                dl->dl_phys->dl_used -= used;
                dl->dl_phys->dl_comp -= comp;
                dl->dl_phys->dl_uncomp -= uncomp;
-               mutex_exit(&dl->dl_lock);
 
                VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
                    dle->dle_mintxg, tx));
@@ -540,4 +553,5 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
                kmem_free(dle, sizeof (*dle));
                dle = dle_next;
        }
+       mutex_exit(&dl->dl_lock);
 }