]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/commitdiff
Merge branch 'for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 12 Jan 2016 02:53:13 +0000 (18:53 -0800)
Pull workqueue update from Tejun Heo:
 "Workqueue changes for v4.5.  One cleanup patch and three to improve
  the debuggability.

  Workqueue now has a stall detector which dumps workqueue state if any
  worker pool hasn't made forward progress over a certain amount of time
  (30s by default) and also triggers a warning if a workqueue which can
  be used in memory reclaim path tries to wait on something which can't
  be.

  These should make workqueue hangs a lot easier to debug."

* 'for-4.5' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/wq:
  workqueue: simplify the apply_workqueue_attrs_locked()
  workqueue: implement lockup detector
  watchdog: introduce touch_softlockup_watchdog_sched()
  workqueue: warn if memory reclaim tries to flush !WQ_MEM_RECLAIM workqueue

1  2 
Documentation/kernel-parameters.txt
include/linux/sched.h
kernel/sched/clock.c
kernel/time/tick-sched.c
kernel/watchdog.c

index 1a44a7a7f71d76eea808811a230267b1fd028c55,fb6c93f36e3b1b6a4970b3212580ab5fe71173b9..1a8169ba29e6e0516c6837cf0ea04317b9253afb
@@@ -472,15 -472,6 +472,15 @@@ bytes respectively. Such letter suffixe
                        Change the amount of debugging information output
                        when initialising the APIC and IO-APIC components.
  
 +      apic_extnmi=    [APIC,X86] External NMI delivery setting
 +                      Format: { bsp (default) | all | none }
 +                      bsp:  External NMI is delivered only to CPU 0
 +                      all:  External NMIs are broadcast to all CPUs as a
 +                            backup of CPU 0
 +                      none: External NMI is masked for all CPUs. This is
 +                            useful so that a dump capture kernel won't be
 +                            shot down by NMI
 +
        autoconf=       [IPV6]
                        See Documentation/networking/ipv6.txt.
  
        rcutorture.verbose= [KNL]
                        Enable additional printk() statements.
  
 +      rcupdate.rcu_cpu_stall_suppress= [KNL]
 +                      Suppress RCU CPU stall warning messages.
 +
 +      rcupdate.rcu_cpu_stall_timeout= [KNL]
 +                      Set timeout for RCU CPU stall warning messages.
 +
        rcupdate.rcu_expedited= [KNL]
                        Use expedited grace-period primitives, for
                        example, synchronize_rcu_expedited() instead
                        of synchronize_rcu().  This reduces latency,
                        but can increase CPU utilization, degrade
                        real-time latency, and degrade energy efficiency.
 -
 -      rcupdate.rcu_cpu_stall_suppress= [KNL]
 -                      Suppress RCU CPU stall warning messages.
 -
 -      rcupdate.rcu_cpu_stall_timeout= [KNL]
 -                      Set timeout for RCU CPU stall warning messages.
 +                      No effect on CONFIG_TINY_RCU kernels.
 +
 +      rcupdate.rcu_normal= [KNL]
 +                      Use only normal grace-period primitives,
 +                      for example, synchronize_rcu() instead of
 +                      synchronize_rcu_expedited().  This improves
 +                      real-time latency, CPU utilization, and
 +                      energy efficiency, but can expose users to
 +                      increased grace-period latency.  This parameter
 +                      overrides rcupdate.rcu_expedited.  No effect on
 +                      CONFIG_TINY_RCU kernels.
 +
 +      rcupdate.rcu_normal_after_boot= [KNL]
 +                      Once boot has completed (that is, after
 +                      rcu_end_inkernel_boot() has been invoked), use
 +                      only normal grace-period primitives.  No effect
 +                      on CONFIG_TINY_RCU kernels.
  
        rcupdate.rcu_task_stall_timeout= [KNL]
                        Set timeout in jiffies for RCU task stall warning
                        or other driver-specific files in the
                        Documentation/watchdog/ directory.
  
+       workqueue.watchdog_thresh=
+                       If CONFIG_WQ_WATCHDOG is configured, workqueue can
+                       warn stall conditions and dump internal state to
+                       help debugging.  0 disables workqueue stall
+                       detection; otherwise, it's the stall threshold
+                       duration in seconds.  The default value is 30 and
+                       it can be updated at runtime by writing to the
+                       corresponding sysfs file.
        workqueue.disable_numa
                        By default, all work items queued to unbound
                        workqueues are affine to the NUMA nodes they're
diff --combined include/linux/sched.h
index 0c0e78102850a229cc7d06eb8d508eb680786cb4,d56cdde2f12c574e1ef51a5d1648db465a05bd55..4bae8ab3b89391f7d8d1566f56d3666d5d19058d
@@@ -177,9 -177,9 +177,9 @@@ extern void get_iowait_load(unsigned lo
  extern void calc_global_load(unsigned long ticks);
  
  #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
 -extern void update_cpu_load_nohz(void);
 +extern void update_cpu_load_nohz(int active);
  #else
 -static inline void update_cpu_load_nohz(void) { }
 +static inline void update_cpu_load_nohz(int active) { }
  #endif
  
  extern unsigned long get_parent_ip(unsigned long addr);
@@@ -377,6 -377,7 +377,7 @@@ extern void scheduler_tick(void)
  extern void sched_show_task(struct task_struct *p);
  
  #ifdef CONFIG_LOCKUP_DETECTOR
+ extern void touch_softlockup_watchdog_sched(void);
  extern void touch_softlockup_watchdog(void);
  extern void touch_softlockup_watchdog_sync(void);
  extern void touch_all_softlockup_watchdogs(void);
@@@ -387,6 -388,9 +388,9 @@@ extern unsigned int  softlockup_panic
  extern unsigned int  hardlockup_panic;
  void lockup_detector_init(void);
  #else
+ static inline void touch_softlockup_watchdog_sched(void)
+ {
+ }
  static inline void touch_softlockup_watchdog(void)
  {
  }
@@@ -1268,13 -1272,8 +1272,13 @@@ struct sched_entity 
  #endif
  
  #ifdef CONFIG_SMP
 -      /* Per entity load average tracking */
 -      struct sched_avg        avg;
 +      /*
 +       * Per entity load average tracking.
 +       *
 +       * Put into separate cache line so it does not
 +       * collide with read-mostly values above.
 +       */
 +      struct sched_avg        avg ____cacheline_aligned_in_smp;
  #endif
  };
  
@@@ -1460,15 -1459,14 +1464,15 @@@ struct task_struct 
        /* Used for emulating ABI behavior of previous Linux versions */
        unsigned int personality;
  
 -      unsigned in_execve:1;   /* Tell the LSMs that the process is doing an
 -                               * execve */
 -      unsigned in_iowait:1;
 -
 -      /* Revert to default priority/policy when forking */
 +      /* scheduler bits, serialized by scheduler locks */
        unsigned sched_reset_on_fork:1;
        unsigned sched_contributes_to_load:1;
        unsigned sched_migrated:1;
 +      unsigned :0; /* force alignment to the next boundary */
 +
 +      /* unserialized, strictly 'current' */
 +      unsigned in_execve:1; /* bit to tell LSMs we're in execve */
 +      unsigned in_iowait:1;
  #ifdef CONFIG_MEMCG
        unsigned memcg_may_oom:1;
  #endif
        cputime_t gtime;
        struct prev_cputime prev_cputime;
  #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 -      seqlock_t vtime_seqlock;
 +      seqcount_t vtime_seqcount;
        unsigned long long vtime_snap;
        enum {
 -              VTIME_SLEEPING = 0,
 +              /* Task is sleeping or running in a CPU with VTIME inactive */
 +              VTIME_INACTIVE = 0,
 +              /* Task runs in userspace in a CPU with VTIME active */
                VTIME_USER,
 +              /* Task runs in kernelspace in a CPU with VTIME active */
                VTIME_SYS,
        } vtime_snap_whence;
  #endif
@@@ -2011,8 -2006,7 +2015,8 @@@ static inline int pid_alive(const struc
  }
  
  /**
 - * is_global_init - check if a task structure is init
 + * is_global_init - check if a task structure is init. Since init
 + * is free to have sub-threads we need to check tgid.
   * @tsk: Task structure to be checked.
   *
   * Check if a task structure is the first user space task the kernel created.
   */
  static inline int is_global_init(struct task_struct *tsk)
  {
 -      return tsk->pid == 1;
 +      return task_tgid_nr(tsk) == 1;
  }
  
  extern struct pid *cad_pid;
diff --combined kernel/sched/clock.c
index caf4041f5b0ae6769bc562fccc189852eae77fcf,bf1f37507a497884f3b520260db284d9888b58ce..bc54e84675da0d50bd9a60a82f2da5df8590b080
@@@ -1,7 -1,7 +1,7 @@@
  /*
   * sched_clock for unstable cpu clocks
   *
 - *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
 + *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra
   *
   *  Updates and enhancements:
   *    Copyright (C) 2008 Red Hat, Inc. Steven Rostedt <srostedt@redhat.com>
@@@ -354,7 -354,7 +354,7 @@@ void sched_clock_idle_wakeup_event(u64 
                return;
  
        sched_clock_tick();
-       touch_softlockup_watchdog();
+       touch_softlockup_watchdog_sched();
  }
  EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
  
diff --combined kernel/time/tick-sched.c
index 99ef0df1280737f463ea578addc7967da4a2c810,58219f6ff3c6f99bf733313c475fff8a69ce4e4a..9cc20af58c76300111f23a007b9fd5ad0c8bd60b
@@@ -143,7 -143,7 +143,7 @@@ static void tick_sched_handle(struct ti
         * when we go busy again does not account too much ticks.
         */
        if (ts->tick_stopped) {
-               touch_softlockup_watchdog();
+               touch_softlockup_watchdog_sched();
                if (is_idle_task(current))
                        ts->idle_jiffies++;
        }
@@@ -430,7 -430,7 +430,7 @@@ static void tick_nohz_update_jiffies(kt
        tick_do_update_jiffies64(now);
        local_irq_restore(flags);
  
-       touch_softlockup_watchdog();
+       touch_softlockup_watchdog_sched();
  }
  
  /*
@@@ -603,31 -603,15 +603,31 @@@ static ktime_t tick_nohz_stop_sched_tic
  
        /*
         * If the tick is due in the next period, keep it ticking or
 -       * restart it proper.
 +       * force prod the timer.
         */
        delta = next_tick - basemono;
        if (delta <= (u64)TICK_NSEC) {
                tick.tv64 = 0;
 +              /*
 +               * We've not stopped the tick yet, and there's a timer in the
 +               * next period, so no point in stopping it either, bail.
 +               */
                if (!ts->tick_stopped)
                        goto out;
 +
 +              /*
 +               * If, OTOH, we did stop it, but there's a pending (expired)
 +               * timer reprogram the timer hardware to fire now.
 +               *
 +               * We will not restart the tick proper, just prod the timer
 +               * hardware into firing an interrupt to process the pending
 +               * timers. Just like tick_irq_exit() will not restart the tick
 +               * for 'normal' interrupts.
 +               *
 +               * Only once we exit the idle loop will we re-enable the tick,
 +               * see tick_nohz_idle_exit().
 +               */
                if (delta == 0) {
 -                      /* Tick is stopped, but required now. Enforce it */
                        tick_nohz_restart(ts, now);
                        goto out;
                }
@@@ -710,14 -694,14 +710,14 @@@ out
        return tick;
  }
  
 -static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 +static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now, int active)
  {
        /* Update jiffies first */
        tick_do_update_jiffies64(now);
 -      update_cpu_load_nohz();
 +      update_cpu_load_nohz(active);
  
        calc_load_exit_idle();
-       touch_softlockup_watchdog();
+       touch_softlockup_watchdog_sched();
        /*
         * Cancel the scheduled timer and restore the tick
         */
@@@ -741,7 -725,7 +741,7 @@@ static void tick_nohz_full_update_tick(
        if (can_stop_full_tick())
                tick_nohz_stop_sched_tick(ts, ktime_get(), cpu);
        else if (ts->tick_stopped)
 -              tick_nohz_restart_sched_tick(ts, ktime_get());
 +              tick_nohz_restart_sched_tick(ts, ktime_get(), 1);
  #endif
  }
  
@@@ -891,7 -875,7 +891,7 @@@ static void tick_nohz_account_idle_tick
  #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
        unsigned long ticks;
  
 -      if (vtime_accounting_enabled())
 +      if (vtime_accounting_cpu_enabled())
                return;
        /*
         * We stopped the tick in idle. Update process times would miss the
@@@ -932,7 -916,7 +932,7 @@@ void tick_nohz_idle_exit(void
                tick_nohz_stop_idle(ts, now);
  
        if (ts->tick_stopped) {
 -              tick_nohz_restart_sched_tick(ts, now);
 +              tick_nohz_restart_sched_tick(ts, now, 0);
                tick_nohz_account_idle_ticks(ts);
        }
  
diff --combined kernel/watchdog.c
index 84b5035cb6a57099362ec9ab6056d6c936f5b706,b04f680c4735b6a6da5a86f44443cfb9aef83b88..b3ace6ebbba3934ca52b6e5a6e420183da233fdc
@@@ -20,6 -20,7 +20,7 @@@
  #include <linux/smpboot.h>
  #include <linux/sched/rt.h>
  #include <linux/tick.h>
+ #include <linux/workqueue.h>
  
  #include <asm/irq_regs.h>
  #include <linux/kvm_para.h>
@@@ -225,7 -226,15 +226,15 @@@ static void __touch_watchdog(void
        __this_cpu_write(watchdog_touch_ts, get_timestamp());
  }
  
- void touch_softlockup_watchdog(void)
+ /**
+  * touch_softlockup_watchdog_sched - touch watchdog on scheduler stalls
+  *
+  * Call when the scheduler may have stalled for legitimate reasons
+  * preventing the watchdog task from executing - e.g. the scheduler
+  * entering idle state.  This should only be used for scheduler events.
+  * Use touch_softlockup_watchdog() for everything else.
+  */
+ void touch_softlockup_watchdog_sched(void)
  {
        /*
         * Preemption can be enabled.  It doesn't matter which CPU's timestamp
         */
        raw_cpu_write(watchdog_touch_ts, 0);
  }
+ void touch_softlockup_watchdog(void)
+ {
+       touch_softlockup_watchdog_sched();
+       wq_watchdog_touch(raw_smp_processor_id());
+ }
  EXPORT_SYMBOL(touch_softlockup_watchdog);
  
  void touch_all_softlockup_watchdogs(void)
         */
        for_each_watchdog_cpu(cpu)
                per_cpu(watchdog_touch_ts, cpu) = 0;
+       wq_watchdog_touch(-1);
  }
  
  #ifdef CONFIG_HARDLOCKUP_DETECTOR
@@@ -351,7 -367,7 +367,7 @@@ static void watchdog_overflow_callback(
                        trigger_allbutself_cpu_backtrace();
  
                if (hardlockup_panic)
 -                      panic("Hard LOCKUP");
 +                      nmi_panic(regs, "Hard LOCKUP");
  
                __this_cpu_write(hard_watchdog_warn, true);
                return;