From 89ad2fa3f043a1e8daae193bcb5fe34d5f8caf28 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 14 Nov 2017 17:15:50 -0800 Subject: [PATCH] bpf: fix lockdep splat pcpu_freelist_pop() needs the same lockdep awareness than pcpu_freelist_populate() to avoid a false positive. [ INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected ] switchto-defaul/12508 [HC0[0]:SC0[6]:HE0:SE0] is trying to acquire: (&htab->buckets[i].lock){......}, at: [] __htab_percpu_map_update_elem+0x1cb/0x300 and this task is already holding: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...}, at: [] __dev_queue_xmit+0 x868/0x1240 which would create a new lock dependency: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...} -> (&htab->buckets[i].lock){......} but this new dependency connects a SOFTIRQ-irq-safe lock: (dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2){+.-...} ... which became SOFTIRQ-irq-safe at: [] __lock_acquire+0x42b/0x1f10 [] lock_acquire+0xbc/0x1b0 [] _raw_spin_lock+0x38/0x50 [] __dev_queue_xmit+0x868/0x1240 [] dev_queue_xmit+0x10/0x20 [] ip_finish_output2+0x439/0x590 [] ip_finish_output+0x150/0x2f0 [] ip_output+0x7d/0x260 [] ip_local_out+0x5e/0xe0 [] ip_queue_xmit+0x205/0x620 [] tcp_transmit_skb+0x5a8/0xcb0 [] tcp_write_xmit+0x242/0x1070 [] __tcp_push_pending_frames+0x3c/0xf0 [] tcp_rcv_established+0x312/0x700 [] tcp_v4_do_rcv+0x11c/0x200 [] tcp_v4_rcv+0xaa2/0xc30 [] ip_local_deliver_finish+0xa7/0x240 [] ip_local_deliver+0x66/0x200 [] ip_rcv_finish+0xdd/0x560 [] ip_rcv+0x295/0x510 [] __netif_receive_skb_core+0x988/0x1020 [] __netif_receive_skb+0x21/0x70 [] process_backlog+0x6f/0x230 [] net_rx_action+0x229/0x420 [] __do_softirq+0xd8/0x43d [] do_softirq_own_stack+0x1c/0x30 [] do_softirq+0x55/0x60 [] __local_bh_enable_ip+0xa8/0xb0 [] cpu_startup_entry+0x1c7/0x500 [] start_secondary+0x113/0x140 to a SOFTIRQ-irq-unsafe lock: (&head->lock){+.+...} ... which became SOFTIRQ-irq-unsafe at: ... [] __lock_acquire+0x82f/0x1f10 [] lock_acquire+0xbc/0x1b0 [] _raw_spin_lock+0x38/0x50 [] pcpu_freelist_pop+0x7a/0xb0 [] htab_map_alloc+0x50c/0x5f0 [] SyS_bpf+0x265/0x1200 [] entry_SYSCALL_64_fastpath+0x12/0x17 other info that might help us debug this: Chain exists of: dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2 --> &htab->buckets[i].lock --> &head->lock Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&head->lock); local_irq_disable(); lock(dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2); lock(&htab->buckets[i].lock); lock(dev_queue->dev->qdisc_class ?: &qdisc_tx_lock#2); *** DEADLOCK *** Fixes: e19494edab82 ("bpf: introduce percpu_freelist") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- kernel/bpf/percpu_freelist.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c index 5c51d1985b51..673fa6fe2d73 100644 --- a/kernel/bpf/percpu_freelist.c +++ b/kernel/bpf/percpu_freelist.c @@ -78,8 +78,10 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) { struct pcpu_freelist_head *head; struct pcpu_freelist_node *node; + unsigned long flags; int orig_cpu, cpu; + local_irq_save(flags); orig_cpu = cpu = raw_smp_processor_id(); while (1) { head = per_cpu_ptr(s->freelist, cpu); @@ -87,14 +89,16 @@ struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s) node = head->first; if (node) { head->first = node->next; - raw_spin_unlock(&head->lock); + raw_spin_unlock_irqrestore(&head->lock, flags); return node; } raw_spin_unlock(&head->lock); cpu = cpumask_next(cpu, cpu_possible_mask); if (cpu >= nr_cpu_ids) cpu = 0; - if (cpu == orig_cpu) + if (cpu == orig_cpu) { + local_irq_restore(flags); return NULL; + } } } -- 2.39.5