]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
watchdog/softlockup: Fix cpu_stop_queue_work() double-queue bug
authorPeter Zijlstra <peterz@infradead.org>
Fri, 13 Jul 2018 10:42:08 +0000 (12:42 +0200)
committerIngo Molnar <mingo@kernel.org>
Sun, 15 Jul 2018 21:51:19 +0000 (23:51 +0200)
When scheduling is delayed for longer than the softlockup interrupt
period it is possible to double-queue the cpu_stop_work, causing list
corruption.

Cure this by adding a completion to track the cpu_stop_work's
progress.

Reported-by: kernel test robot <lkp@intel.com>
Tested-by: Rong Chen <rong.a.chen@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work")
Link: http://lkml.kernel.org/r/20180713104208.GW2494@hirez.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
kernel/watchdog.c

index b81f777838d5ab30f20c6a46228803a71251dc27..5470dce212c0dbc9861d5d2108b926a473f89de4 100644 (file)
@@ -330,6 +330,9 @@ static void watchdog_interrupt_count(void)
        __this_cpu_inc(hrtimer_interrupts);
 }
 
+static DEFINE_PER_CPU(struct completion, softlockup_completion);
+static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
+
 /*
  * The watchdog thread function - touches the timestamp.
  *
@@ -343,12 +346,11 @@ static int softlockup_fn(void *data)
        __this_cpu_write(soft_lockup_hrtimer_cnt,
                         __this_cpu_read(hrtimer_interrupts));
        __touch_watchdog();
+       complete(this_cpu_ptr(&softlockup_completion));
 
        return 0;
 }
 
-static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work);
-
 /* watchdog kicker functions */
 static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 {
@@ -364,9 +366,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
        watchdog_interrupt_count();
 
        /* kick the softlockup detector */
-       stop_one_cpu_nowait(smp_processor_id(),
-                       softlockup_fn, NULL,
-                       this_cpu_ptr(&softlockup_stop_work));
+       if (completion_done(this_cpu_ptr(&softlockup_completion))) {
+               reinit_completion(this_cpu_ptr(&softlockup_completion));
+               stop_one_cpu_nowait(smp_processor_id(),
+                               softlockup_fn, NULL,
+                               this_cpu_ptr(&softlockup_stop_work));
+       }
 
        /* .. and repeat */
        hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period));
@@ -467,9 +472,13 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 static void watchdog_enable(unsigned int cpu)
 {
        struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer);
+       struct completion *done = this_cpu_ptr(&softlockup_completion);
 
        WARN_ON_ONCE(cpu != smp_processor_id());
 
+       init_completion(done);
+       complete(done);
+
        /*
         * Start the timer first to prevent the NMI watchdog triggering
         * before the timer has a chance to fire.
@@ -499,6 +508,7 @@ static void watchdog_disable(unsigned int cpu)
         */
        watchdog_nmi_disable(cpu);
        hrtimer_cancel(hrtimer);
+       wait_for_completion(this_cpu_ptr(&softlockup_completion));
 }
 
 static int softlockup_stop_fn(void *data)