]> git.proxmox.com Git - mirror_ubuntu-eoan-kernel.git/commitdiff
btrfs: correct zstd workspace manager lock to use spin_lock_bh()
authorDennis Zhou <dennis@kernel.org>
Fri, 17 May 2019 23:16:26 +0000 (19:16 -0400)
committerDavid Sterba <dsterba@suse.com>
Tue, 28 May 2019 16:54:09 +0000 (18:54 +0200)
The btrfs zstd workspace manager uses a background timer to reclaim not
recently used workspaces. I used spin_lock() from this context which
should have been caught with lockdep, but was not. This deadlock was
reported in bugzilla. The fix is to switch the zstd wsm lock to use
spin_lock_bh() from the softirq context.

This happened quite relibably on ppc64, unlike on other architectures.

  [  313.402874] ================================
  [  313.402875] WARNING: inconsistent lock state
  [  313.402879] 5.1.0-rc7 #1 Not tainted
  [  313.402880] --------------------------------
  [  313.402882] inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
  [  313.402885] swapper/5/0 [HC0[0]:SC1[1]:HE1:SE0] takes:
  [  313.402888] 0000000080d1120c (&(&wsm.lock)->rlock){+.?.}, at: .zstd_reclaim_timer_fn+0x40/0x230
  [  313.402895] {SOFTIRQ-ON-W} state was registered at:
  [  313.402899]   .lock_acquire+0xd0/0x240
  [  313.402903]   ._raw_spin_lock+0x34/0x60
  [  313.402906]   .zstd_get_workspace+0xd0/0x360
  [  313.402908]   .end_compressed_bio_read+0x3b8/0x540
  [  313.402911]   .bio_endio+0x174/0x2c0
  [  313.402914]   .end_workqueue_fn+0x4c/0x70
  [  313.402917]   .normal_work_helper+0x138/0x7e0
  [  313.402920]   .process_one_work+0x324/0x790
  [  313.402922]   .worker_thread+0x68/0x570
  [  313.402925]   .kthread+0x19c/0x1b0
  [  313.402928]   .ret_from_kernel_thread+0x58/0x78
  [  313.402930] irq event stamp: 2629216
  [  313.402933] hardirqs last  enabled at (2629216): [<c0000000009da738>] ._raw_spin_unlock_irq+0x38/0x60
  [  313.402936] hardirqs last disabled at (2629215): [<c0000000009da4c4>] ._raw_spin_lock_irq+0x24/0x70
  [  313.402939] softirqs last  enabled at (2629212): [<c0000000000af9fc>] .irq_enter+0x8c/0xd0
  [  313.402942] softirqs last disabled at (2629213): [<c0000000000afb58>] .irq_exit+0x118/0x170
  [  313.402944]
 other info that might help us debug this:
  [  313.402945]  Possible unsafe locking scenario:

  [  313.402947]        CPU0
  [  313.402948]        ----
  [  313.402949]   lock(&(&wsm.lock)->rlock);
  [  313.402951]   <Interrupt>
  [  313.402952]     lock(&(&wsm.lock)->rlock);
  [  313.402954]
  *** DEADLOCK ***

  [  313.402957] 1 lock held by swapper/5/0:
  [  313.402958]  #0: 000000004b612042 ((&wsm.timer)){+.-.}, at: .call_timer_fn+0x0/0x3c0
  [  313.402963]
 stack backtrace:
  [  313.402967] CPU: 5 PID: 0 Comm: swapper/5 Not tainted 5.1.0-rc7 #1
  [  313.402968] Call Trace:
  [  313.402972] [c0000007fa262e70] [c0000000009b3294] .dump_stack+0xe0/0x15c (unreliable)
  [  313.402975] [c0000007fa262f10] [c000000000125548] .print_usage_bug+0x348/0x390
  [  313.402978] [c0000007fa262fd0] [c000000000125cb4] .mark_lock+0x724/0x930
  [  313.402981] [c0000007fa263080] [c000000000126c20] .__lock_acquire+0xc90/0x16a0
  [  313.402984] [c0000007fa2631b0] [c000000000128040] .lock_acquire+0xd0/0x240
  [  313.402987] [c0000007fa263280] [c0000000009da2b4] ._raw_spin_lock+0x34/0x60
  [  313.402990] [c0000007fa263300] [c00000000054b0b0] .zstd_reclaim_timer_fn+0x40/0x230
  [  313.402993] [c0000007fa2633d0] [c000000000158b38] .call_timer_fn+0xc8/0x3c0
  [  313.402996] [c0000007fa2634a0] [c000000000158f74] .expire_timers+0x144/0x260
  [  313.402999] [c0000007fa263550] [c000000000159178] .run_timer_softirq+0xe8/0x230
  [  313.403002] [c0000007fa263680] [c0000000009db288] .__do_softirq+0x188/0x5d4
  [  313.403004] [c0000007fa263790] [c0000000000afb58] .irq_exit+0x118/0x170
  [  313.403008] [c0000007fa263800] [c000000000028d88] .timer_interrupt+0x158/0x430
  [  313.403012] [c0000007fa2638b0] [c0000000000091d4] decrementer_common+0x134/0x140
  [  313.403017] --- interrupt: 901 at replay_interrupt_return+0x0/0x4
     LR = .arch_local_irq_restore.part.0+0x68/0x80
  [  313.403020] [c0000007fa263bb0] [c00000000001a3ac] .arch_local_irq_restore.part.0+0x2c/0x80 (unreliable)
  [  313.403024] [c0000007fa263c30] [c0000000007bbbcc] .cpuidle_enter_state+0xec/0x670
  [  313.403027] [c0000007fa263d00] [c0000000000f5130] .call_cpuidle+0x40/0x90
  [  313.403031] [c0000007fa263d70] [c0000000000f554c] .do_idle+0x2dc/0x3a0
  [  313.403034] [c0000007fa263e30] [c0000000000f59ac] .cpu_startup_entry+0x2c/0x30
  [  313.403037] [c0000007fa263ea0] [c000000000045674] .start_secondary+0x644/0x650
  [  313.403041] [c0000007fa263f90] [c00000000000ad5c] start_secondary_prolog+0x10/0x14

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=203517
Fixes: 3f93aef535c8 ("btrfs: add zstd compression level support")
CC: stable@vger.kernel.org # 5.1+
Signed-off-by: Dennis Zhou <dennis@kernel.org>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/zstd.c

index a6ff07cf11d51a11089d3290403be0655ed16cce..3837ca180d52d67ec1c59459ed3a5c0e0b40d378 100644 (file)
@@ -105,10 +105,10 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
        unsigned long reclaim_threshold = jiffies - ZSTD_BTRFS_RECLAIM_JIFFIES;
        struct list_head *pos, *next;
 
-       spin_lock(&wsm.lock);
+       spin_lock_bh(&wsm.lock);
 
        if (list_empty(&wsm.lru_list)) {
-               spin_unlock(&wsm.lock);
+               spin_unlock_bh(&wsm.lock);
                return;
        }
 
@@ -137,7 +137,7 @@ static void zstd_reclaim_timer_fn(struct timer_list *timer)
        if (!list_empty(&wsm.lru_list))
                mod_timer(&wsm.timer, jiffies + ZSTD_BTRFS_RECLAIM_JIFFIES);
 
-       spin_unlock(&wsm.lock);
+       spin_unlock_bh(&wsm.lock);
 }
 
 /*
@@ -198,7 +198,7 @@ static void zstd_cleanup_workspace_manager(void)
        struct workspace *workspace;
        int i;
 
-       spin_lock(&wsm.lock);
+       spin_lock_bh(&wsm.lock);
        for (i = 0; i < ZSTD_BTRFS_MAX_LEVEL; i++) {
                while (!list_empty(&wsm.idle_ws[i])) {
                        workspace = container_of(wsm.idle_ws[i].next,
@@ -208,7 +208,7 @@ static void zstd_cleanup_workspace_manager(void)
                        zstd_free_workspace(&workspace->list);
                }
        }
-       spin_unlock(&wsm.lock);
+       spin_unlock_bh(&wsm.lock);
 
        del_timer_sync(&wsm.timer);
 }
@@ -230,7 +230,7 @@ static struct list_head *zstd_find_workspace(unsigned int level)
        struct workspace *workspace;
        int i = level - 1;
 
-       spin_lock(&wsm.lock);
+       spin_lock_bh(&wsm.lock);
        for_each_set_bit_from(i, &wsm.active_map, ZSTD_BTRFS_MAX_LEVEL) {
                if (!list_empty(&wsm.idle_ws[i])) {
                        ws = wsm.idle_ws[i].next;
@@ -242,11 +242,11 @@ static struct list_head *zstd_find_workspace(unsigned int level)
                                list_del(&workspace->lru_list);
                        if (list_empty(&wsm.idle_ws[i]))
                                clear_bit(i, &wsm.active_map);
-                       spin_unlock(&wsm.lock);
+                       spin_unlock_bh(&wsm.lock);
                        return ws;
                }
        }
-       spin_unlock(&wsm.lock);
+       spin_unlock_bh(&wsm.lock);
 
        return NULL;
 }
@@ -305,7 +305,7 @@ static void zstd_put_workspace(struct list_head *ws)
 {
        struct workspace *workspace = list_to_workspace(ws);
 
-       spin_lock(&wsm.lock);
+       spin_lock_bh(&wsm.lock);
 
        /* A node is only taken off the lru if we are the corresponding level */
        if (workspace->req_level == workspace->level) {
@@ -325,7 +325,7 @@ static void zstd_put_workspace(struct list_head *ws)
        list_add(&workspace->list, &wsm.idle_ws[workspace->level - 1]);
        workspace->req_level = 0;
 
-       spin_unlock(&wsm.lock);
+       spin_unlock_bh(&wsm.lock);
 
        if (workspace->level == ZSTD_BTRFS_MAX_LEVEL)
                cond_wake_up(&wsm.wait);