Merge branch 'for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)
diff --combined Documentation/kernel-parameters.txt

index 1a44a7a7f71d76eea808811a230267b1fd028c55,fb6c93f36e3b1b6a4970b3212580ab5fe71173b9..1a8169ba29e6e0516c6837cf0ea04317b9253afb
--- 1/Documentation/kernel-parameters.txt
--- 2/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@@ -472,15 -472,6 +472,15 @@@ bytes respectively. Such letter suffixe
                         Change the amount of debugging information output
                         when initialising the APIC and IO-APIC components.
   
+ +      apic_extnmi=    [APIC,X86] External NMI delivery setting
+ +                      Format: { bsp (default) | all | none }
+ +                      bsp:  External NMI is delivered only to CPU 0
+ +                      all:  External NMIs are broadcast to all CPUs as a
+ +                            backup of CPU 0
+ +                      none: External NMI is masked for all CPUs. This is
+ +                            useful so that a dump capture kernel won't be
+ +                            shot down by NMI
+ +
         autoconf=       [IPV6]
                         See Documentation/networking/ipv6.txt.
   
@@@ -3305,35 -3296,18 +3305,35 @@@
         rcutorture.verbose= [KNL]
                         Enable additional printk() statements.
   
+ +      rcupdate.rcu_cpu_stall_suppress= [KNL]
+ +                      Suppress RCU CPU stall warning messages.
+ +
+ +      rcupdate.rcu_cpu_stall_timeout= [KNL]
+ +                      Set timeout for RCU CPU stall warning messages.
+ +
         rcupdate.rcu_expedited= [KNL]
                         Use expedited grace-period primitives, for
                         example, synchronize_rcu_expedited() instead
                         of synchronize_rcu().  This reduces latency,
                         but can increase CPU utilization, degrade
                         real-time latency, and degrade energy efficiency.
- -
- -      rcupdate.rcu_cpu_stall_suppress= [KNL]
- -                      Suppress RCU CPU stall warning messages.
- -
- -      rcupdate.rcu_cpu_stall_timeout= [KNL]
- -                      Set timeout for RCU CPU stall warning messages.
+ +                      No effect on CONFIG_TINY_RCU kernels.
+ +
+ +      rcupdate.rcu_normal= [KNL]
+ +                      Use only normal grace-period primitives,
+ +                      for example, synchronize_rcu() instead of
+ +                      synchronize_rcu_expedited().  This improves
+ +                      real-time latency, CPU utilization, and
+ +                      energy efficiency, but can expose users to
+ +                      increased grace-period latency.  This parameter
+ +                      overrides rcupdate.rcu_expedited.  No effect on
+ +                      CONFIG_TINY_RCU kernels.
+ +
+ +      rcupdate.rcu_normal_after_boot= [KNL]
+ +                      Once boot has completed (that is, after
+ +                      rcu_end_inkernel_boot() has been invoked), use
+ +                      only normal grace-period primitives.  No effect
+ +                      on CONFIG_TINY_RCU kernels.
   
         rcupdate.rcu_task_stall_timeout= [KNL]
                         Set timeout in jiffies for RCU task stall warning
@@@ -4140,6 -4114,15 +4140,15 @@@
                         or other driver-specific files in the
                         Documentation/watchdog/ directory.
   
+       workqueue.watchdog_thresh=
+                       If CONFIG_WQ_WATCHDOG is configured, workqueue can
+                       warn stall conditions and dump internal state to
+                       help debugging.  0 disables workqueue stall
+                       detection; otherwise, it's the stall threshold
+                       duration in seconds.  The default value is 30 and
+                       it can be updated at runtime by writing to the
+                       corresponding sysfs file.
+ 
         workqueue.disable_numa
                         By default, all work items queued to unbound
                         workqueues are affine to the NUMA nodes they're
diff --combined include/linux/sched.h

index 0c0e78102850a229cc7d06eb8d508eb680786cb4,d56cdde2f12c574e1ef51a5d1648db465a05bd55..4bae8ab3b89391f7d8d1566f56d3666d5d19058d
--- 1/include/linux/sched.h
--- 2/include/linux/sched.h
+++ b/include/linux/sched.h
@@@ -177,9 -177,9 +177,9 @@@ extern void get_iowait_load(unsigned lo
   extern void calc_global_load(unsigned long ticks);
   
   #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
- -extern void update_cpu_load_nohz(void);
+ +extern void update_cpu_load_nohz(int active);
   #else
- -static inline void update_cpu_load_nohz(void) { }
+ +static inline void update_cpu_load_nohz(int active) { }
   #endif
   
   extern unsigned long get_parent_ip(unsigned long addr);
@@@ -377,6 -377,7 +377,7 @@@ extern void scheduler_tick(void)
   extern void sched_show_task(struct task_struct *p);
   
   #ifdef CONFIG_LOCKUP_DETECTOR
+ extern void touch_softlockup_watchdog_sched(void);
   extern void touch_softlockup_watchdog(void);
   extern void touch_softlockup_watchdog_sync(void);
   extern void touch_all_softlockup_watchdogs(void);
@@@ -387,6 -388,9 +388,9 @@@ extern unsigned int  softlockup_panic
   extern unsigned int  hardlockup_panic;
   void lockup_detector_init(void);
   #else
+ static inline void touch_softlockup_watchdog_sched(void)
+ {
+ }
   static inline void touch_softlockup_watchdog(void)
   {
   }
@@@ -1268,13 -1272,8 +1272,13 @@@ struct sched_entity 
   #endif
   
   #ifdef CONFIG_SMP
- -      /* Per entity load average tracking */
- -      struct sched_avg        avg;
+ +      /*
+ +       * Per entity load average tracking.
+ +       *
+ +       * Put into separate cache line so it does not
+ +       * collide with read-mostly values above.
+ +       */
+ +      struct sched_avg        avg ____cacheline_aligned_in_smp;
   #endif
   };
   
@@@ -1460,15 -1459,14 +1464,15 @@@ struct task_struct 
         /* Used for emulating ABI behavior of previous Linux versions */
         unsigned int personality;
   
- -      unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
- -                               * execve */
- -      unsigned in_iowait:1;
- -
- -      /* Revert to default priority/policy when forking */
+ +      /* scheduler bits, serialized by scheduler locks */
         unsigned sched_reset_on_fork:1;
         unsigned sched_contributes_to_load:1;
         unsigned sched_migrated:1;
+ +      unsigned :0; /* force alignment to the next boundary */
+ +
+ +      /* unserialized, strictly 'current' */
+ +      unsigned in_execve:1; /* bit to tell LSMs we're in execve */
+ +      unsigned in_iowait:1;
   #ifdef CONFIG_MEMCG
         unsigned memcg_may_oom:1;
   #endif
@@@ -1525,14 -1523,11 +1529,14 @@@
         cputime_t gtime;
         struct prev_cputime prev_cputime;
   #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
- -      seqlock_t vtime_seqlock;
+ +      seqcount_t vtime_seqcount;
         unsigned long long vtime_snap;
         enum {
- -              VTIME_SLEEPING = 0,
+ +              /* Task is sleeping or running in a CPU with VTIME inactive */
+ +              VTIME_INACTIVE = 0,
+ +              /* Task runs in userspace in a CPU with VTIME active */
                 VTIME_USER,
+ +              /* Task runs in kernelspace in a CPU with VTIME active */
                 VTIME_SYS,
         } vtime_snap_whence;
   #endif
@@@ -2011,8 -2006,7 +2015,8 @@@ static inline int pid_alive(const struc
   }
   
   /**
- - * is_global_init - check if a task structure is init
+ + * is_global_init - check if a task structure is init. Since init
+ + * is free to have sub-threads we need to check tgid.
    * @tsk: Task structure to be checked.
    *
    * Check if a task structure is the first user space task the kernel created.
@@@ -2021,7 -2015,7 +2025,7 @@@
    */
   static inline int is_global_init(struct task_struct *tsk)
   {
- -      return tsk->pid == 1;
+ +      return task_tgid_nr(tsk) == 1;
   }
   
   extern struct pid *cad_pid;
diff --combined kernel/sched/clock.c

index caf4041f5b0ae6769bc562fccc189852eae77fcf,bf1f37507a497884f3b520260db284d9888b58ce..bc54e84675da0d50bd9a60a82f2da5df8590b080
--- 1/kernel/sched/clock.c
--- 2/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@@ -1,7 -1,7 +1,7 @@@
   /*
    * sched_clock for unstable cpu clocks
    *
- - *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
+ + *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
    *
    *  Updates and enhancements:
    *    Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
@@@ -354,7 -354,7 +354,7 @@@ void sched_clock_idle_wakeup_event(u64 
                 return;
   
         sched_clock_tick();
-       touch_softlockup_watchdog();
+       touch_softlockup_watchdog_sched();
   }
   EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
   
diff --combined kernel/time/tick-sched.c

index 99ef0df1280737f463ea578addc7967da4a2c810,58219f6ff3c6f99bf733313c475fff8a69ce4e4a..9cc20af58c76300111f23a007b9fd5ad0c8bd60b
--- 1/kernel/time/tick-sched.c
--- 2/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@@ -143,7 -143,7 +143,7 @@@ static void tick_sched_handle(struct ti
          * when we go busy again does not account too much ticks.
          */
         if (ts->tick_stopped) {
-               touch_softlockup_watchdog();
+               touch_softlockup_watchdog_sched();
                 if (is_idle_task(current))
                         ts->idle_jiffies++;
         }
@@@ -430,7 -430,7 +430,7 @@@ static void tick_nohz_update_jiffies(kt
         tick_do_update_jiffies64(now);
         local_irq_restore(flags);
   
-       touch_softlockup_watchdog();
+       touch_softlockup_watchdog_sched();
   }
   
   /*
@@@ -603,31 -603,15 +603,31 @@@ static ktime_t tick_nohz_stop_sched_tic
   
         /*
          * If the tick is due in the next period, keep it ticking or
- -       * restart it proper.
+ +       * force prod the timer.
          */
         delta = next_tick - basemono;
         if (delta <= (u64)TICK_NSEC) {
                 tick.tv64 = 0;
+ +              /*
+ +               * We've not stopped the tick yet, and there's a timer in the
+ +               * next period, so no point in stopping it either, bail.
+ +               */
                 if (!ts->tick_stopped)
                         goto out;
+ +
+ +              /*
+ +               * If, OTOH, we did stop it, but there's a pending (expired)
+ +               * timer reprogram the timer hardware to fire now.
+ +               *
+ +               * We will not restart the tick proper, just prod the timer
+ +               * hardware into firing an interrupt to process the pending
+ +               * timers. Just like tick_irq_exit() will not restart the tick
+ +               * for 'normal' interrupts.
+ +               *
+ +               * Only once we exit the idle loop will we re-enable the tick,
+ +               * see tick_nohz_idle_exit().
+ +               */
                 if (delta == 0) {
- -                      /* Tick is stopped, but required now. Enforce it */
                         tick_nohz_restart(ts, now);
                         goto out;
                 }
@@@ -710,14 -694,14 +710,14 @@@ out
         return tick;
   }
   
- -static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
+ +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
   {
         /* Update jiffies first */
         tick_do_update_jiffies64(now);
- -      update_cpu_load_nohz();
+ +      update_cpu_load_nohz(active);
   
         calc_load_exit_idle();
-       touch_softlockup_watchdog();
+       touch_softlockup_watchdog_sched();
         /*
          * Cancel the scheduled timer and restore the tick
          */
@@@ -741,7 -725,7 +741,7 @@@ static void tick_nohz_full_update_tick(
         if (can_stop_full_tick())
                 tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
         else if (ts->tick_stopped)
- -              tick_nohz_restart_sched_tick(ts, ktime_get());
+ +              tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
   #endif
   }
   
@@@ -891,7 -875,7 +891,7 @@@ static void tick_nohz_account_idle_tick
   #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
         unsigned long ticks;
   
- -      if (vtime_accounting_enabled())
+ +      if (vtime_accounting_cpu_enabled())
                 return;
         /*
          * We stopped the tick in idle. Update process times would miss the
@@@ -932,7 -916,7 +932,7 @@@ void tick_nohz_idle_exit(void
                 tick_nohz_stop_idle(ts, now);
   
         if (ts->tick_stopped) {
- -              tick_nohz_restart_sched_tick(ts, now);
+ +              tick_nohz_restart_sched_tick(ts, now, 0);
                 tick_nohz_account_idle_ticks(ts);
         }
   
diff --combined kernel/watchdog.c

index 84b5035cb6a57099362ec9ab6056d6c936f5b706,b04f680c4735b6a6da5a86f44443cfb9aef83b88..b3ace6ebbba3934ca52b6e5a6e420183da233fdc
--- 1/kernel/watchdog.c
--- 2/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@@ -20,6 -20,7 +20,7 @@@
   #include <linux/smpboot.h>
   #include <linux/sched/rt.h>
   #include <linux/tick.h>
+ #include <linux/workqueue.h>
   
   #include <asm/irq_regs.h>
   #include <linux/kvm_para.h>
@@@ -225,7 -226,15 +226,15 @@@ static void __touch_watchdog(void
         __this_cpu_write(watchdog_touch_ts, get_timestamp());
   }
   
- void touch_softlockup_watchdog(void)
+ /**
+  * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
+  *
+  * Call when the scheduler may have stalled for legitimate reasons
+  * preventing the watchdog task from executing - e.g. the scheduler
+  * entering idle state.  This should only be used for scheduler events.
+  * Use touch_softlockup_watchdog() for everything else.
+  */
+ void touch_softlockup_watchdog_sched(void)
   {
         /*
          * Preemption can be enabled.  It doesn't matter which CPU's timestamp
@@@ -233,6 -242,12 +242,12 @@@
          */
         raw_cpu_write(watchdog_touch_ts, 0);
   }
+ 
+ void touch_softlockup_watchdog(void)
+ {
+       touch_softlockup_watchdog_sched();
+       wq_watchdog_touch(raw_smp_processor_id());
+ }
   EXPORT_SYMBOL(touch_softlockup_watchdog);
   
   void touch_all_softlockup_watchdogs(void)
@@@ -246,6 -261,7 +261,7 @@@
          */
         for_each_watchdog_cpu(cpu)
                 per_cpu(watchdog_touch_ts, cpu) = 0;
+       wq_watchdog_touch(-1);
   }
   
   #ifdef CONFIG_HARDLOCKUP_DETECTOR
@@@ -351,7 -367,7 +367,7 @@@ static void watchdog_overflow_callback(
                         trigger_allbutself_cpu_backtrace();
   
                 if (hardlockup_panic)
- -                      panic("Hard LOCKUP");
+ +                      nmi_panic(regs, "Hard LOCKUP");
   
                 __this_cpu_write(hard_watchdog_warn, true);
                 return;
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)
		1	2
Documentation/kernel-parameters.txt	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/sched.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sched/clock.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/tick-sched.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/watchdog.c	patch \|	diff1 \|	diff2 \|	blob \| history