[mirror_ubuntu-bionic-kernel.git] / kernel / hung_task.c

/*
 * Detect Hung Task
 *
 * kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
 *
 */

#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/nmi.h>
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/lockdep.h>
#include <linux/export.h>
#include <linux/sysctl.h>
#include <linux/utsname.h>
#include <trace/events/sched.h>

/*
 * The number of tasks checked:
 */
int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;

/*
 * Limit number of tasks checked in a batch.
 *
 * This value controls the preemptibility of khungtaskd since preemption
 * is disabled during the critical section. It also controls the size of
 * the RCU grace period. So it needs to be upper-bound.
 */
#define HUNG_TASK_BATCHING 1024

/*
 * Zero means infinite timeout - no checking done:
 */
unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;

int __read_mostly sysctl_hung_task_warnings = 10;

static int __read_mostly did_panic;

static struct task_struct *watchdog_task;

/*
 * Should we panic (and reboot, if panic_timeout= is set) when a
 * hung task is detected:
 */
unsigned int __read_mostly sysctl_hung_task_panic =
				CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;

static int __init hung_task_panic_setup(char *str)
{
	int rc = kstrtouint(str, 0, &sysctl_hung_task_panic);

	if (rc)
		return rc;
	return 1;
}
__setup("hung_task_panic=", hung_task_panic_setup);

static int
hung_task_panic(struct notifier_block *this, unsigned long event, void *ptr)
{
	did_panic = 1;

	return NOTIFY_DONE;
}

static struct notifier_block panic_block = {
	.notifier_call = hung_task_panic,
};

static void check_hung_task(struct task_struct *t, unsigned long timeout)
{
	unsigned long switch_count = t->nvcsw + t->nivcsw;

	/*
	 * Ensure the task is not frozen.
	 * Also, skip vfork and any other user process that freezer should skip.
	 */
	if (unlikely(t->flags & (PF_FROZEN | PF_FREEZER_SKIP)))
	    return;

	/*
	 * When a freshly created task is scheduled once, changes its state to
	 * TASK_UNINTERRUPTIBLE without having ever been switched out once, it
	 * musn't be checked.
	 */
	if (unlikely(!switch_count))
		return;

	if (switch_count != t->last_switch_count) {
		t->last_switch_count = switch_count;
		return;
	}

	trace_sched_process_hang(t);

	if (!sysctl_hung_task_warnings)
		return;

	if (sysctl_hung_task_warnings > 0)
		sysctl_hung_task_warnings--;

	/*
	 * Ok, the task did not get scheduled for more than 2 minutes,
	 * complain:
	 */
	pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
		t->comm, t->pid, timeout);
	pr_err("      %s %s %.*s\n",
		print_tainted(), init_utsname()->release,
		(int)strcspn(init_utsname()->version, " "),
		init_utsname()->version);
	pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
		" disables this message.\n");
	sched_show_task(t);
	debug_show_held_locks(t);

	touch_nmi_watchdog();

	if (sysctl_hung_task_panic) {
		trigger_all_cpu_backtrace();
		panic("hung_task: blocked tasks");
	}
}

/*
 * To avoid extending the RCU grace period for an unbounded amount of time,
 * periodically exit the critical section and enter a new one.
 *
 * For preemptible RCU it is sufficient to call rcu_read_unlock in order
 * to exit the grace period. For classic RCU, a reschedule is required.
 */
static bool rcu_lock_break(struct task_struct *g, struct task_struct *t)
{
	bool can_cont;

	get_task_struct(g);
	get_task_struct(t);
	rcu_read_unlock();
	cond_resched();
	rcu_read_lock();
	can_cont = pid_alive(g) && pid_alive(t);
	put_task_struct(t);
	put_task_struct(g);

	return can_cont;
}

/*
 * Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
 * a really long time (120 seconds). If that happens, print out
 * a warning.
 */
static void check_hung_uninterruptible_tasks(unsigned long timeout)
{
	int max_count = sysctl_hung_task_check_count;
	int batch_count = HUNG_TASK_BATCHING;
	struct task_struct *g, *t;

	/*
	 * If the system crashed already then all bets are off,
	 * do not report extra hung tasks:
	 */
	if (test_taint(TAINT_DIE) || did_panic)
		return;

	rcu_read_lock();
	do_each_thread(g, t) {
		if (!max_count--)
			goto unlock;
		if (!--batch_count) {
			batch_count = HUNG_TASK_BATCHING;
			if (!rcu_lock_break(g, t))
				goto unlock;
		}
		/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
		if (t->state == TASK_UNINTERRUPTIBLE)
			check_hung_task(t, timeout);
	} while_each_thread(g, t);
 unlock:
	rcu_read_unlock();
}

static unsigned long timeout_jiffies(unsigned long timeout)
{
	/* timeout of 0 will disable the watchdog */
	return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
}

/*
 * Process updating of timeout sysctl
 */
int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
				  void __user *buffer,
				  size_t *lenp, loff_t *ppos)
{
	int ret;

	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);

	if (ret || !write)
		goto out;

	wake_up_process(watchdog_task);

 out:
	return ret;
}

static atomic_t reset_hung_task = ATOMIC_INIT(0);

void reset_hung_task_detector(void)
{
	atomic_set(&reset_hung_task, 1);
}
EXPORT_SYMBOL_GPL(reset_hung_task_detector);

/*
 * kthread which checks for tasks stuck in D state
 */
static int watchdog(void *dummy)
{
	set_user_nice(current, 0);

	for ( ; ; ) {
		unsigned long timeout = sysctl_hung_task_timeout_secs;

		while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
			timeout = sysctl_hung_task_timeout_secs;

		if (atomic_xchg(&reset_hung_task, 0))
			continue;

		check_hung_uninterruptible_tasks(timeout);
	}

	return 0;
}

static int __init hung_task_init(void)
{
	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");

	return 0;
}
subsys_initcall(hung_task_init);
Commit	Line	Data
	1	/*
	2	* Detect Hung Task
	3	*
	4	* kernel/hung_task.c - kernel thread for detecting tasks stuck in D state
	5	*
	6	*/
	7
	8	#include <linux/mm.h>
	9	#include <linux/cpu.h>
	10	#include <linux/nmi.h>
	11	#include <linux/init.h>
	12	#include <linux/delay.h>
	13	#include <linux/freezer.h>
	14	#include <linux/kthread.h>
	15	#include <linux/lockdep.h>
	16	#include <linux/export.h>
	17	#include <linux/sysctl.h>
	18	#include <linux/utsname.h>
	19	#include <trace/events/sched.h>
	20
	21	/*
	22	* The number of tasks checked:
	23	*/
	24	int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT;
	25
	26	/*
	27	* Limit number of tasks checked in a batch.
	28	*
	29	* This value controls the preemptibility of khungtaskd since preemption
	30	* is disabled during the critical section. It also controls the size of
	31	* the RCU grace period. So it needs to be upper-bound.
	32	*/
	33	#define HUNG_TASK_BATCHING 1024
	34
	35	/*
	36	* Zero means infinite timeout - no checking done:
	37	*/
	38	unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT;
	39
	40	int __read_mostly sysctl_hung_task_warnings = 10;
	41
	42	static int __read_mostly did_panic;
	43
	44	static struct task_struct *watchdog_task;
	45
	46	/*
	47	* Should we panic (and reboot, if panic_timeout= is set) when a
	48	* hung task is detected:
	49	*/
	50	unsigned int __read_mostly sysctl_hung_task_panic =
	51	CONFIG_BOOTPARAM_HUNG_TASK_PANIC_VALUE;
	52
	53	static int __init hung_task_panic_setup(char *str)
	54	{
	55	int rc = kstrtouint(str, 0, &sysctl_hung_task_panic);
	56
	57	if (rc)
	58	return rc;
	59	return 1;
	60	}
	61	__setup("hung_task_panic=", hung_task_panic_setup);
	62
	63	static int
	64	hung_task_panic(struct notifier_block this, unsigned long event, void ptr)
	65	{
	66	did_panic = 1;
	67
	68	return NOTIFY_DONE;
	69	}
	70
	71	static struct notifier_block panic_block = {
	72	.notifier_call = hung_task_panic,
	73	};
	74
	75	static void check_hung_task(struct task_struct *t, unsigned long timeout)
	76	{
	77	unsigned long switch_count = t->nvcsw + t->nivcsw;
	78
	79	/*
	80	* Ensure the task is not frozen.
	81	* Also, skip vfork and any other user process that freezer should skip.
	82	*/
	83	if (unlikely(t->flags & (PF_FROZEN \| PF_FREEZER_SKIP)))
	84	return;
	85
	86	/*
	87	* When a freshly created task is scheduled once, changes its state to
	88	* TASK_UNINTERRUPTIBLE without having ever been switched out once, it
	89	* musn't be checked.
	90	*/
	91	if (unlikely(!switch_count))
	92	return;
	93
	94	if (switch_count != t->last_switch_count) {
	95	t->last_switch_count = switch_count;
	96	return;
	97	}
	98
	99	trace_sched_process_hang(t);
	100
	101	if (!sysctl_hung_task_warnings)
	102	return;
	103
	104	if (sysctl_hung_task_warnings > 0)
	105	sysctl_hung_task_warnings--;
	106
	107	/*
	108	* Ok, the task did not get scheduled for more than 2 minutes,
	109	* complain:
	110	*/
	111	pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
	112	t->comm, t->pid, timeout);
	113	pr_err(" %s %s %.*s\n",
	114	print_tainted(), init_utsname()->release,
	115	(int)strcspn(init_utsname()->version, " "),
	116	init_utsname()->version);
	117	pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\""
	118	" disables this message.\n");
	119	sched_show_task(t);
	120	debug_show_held_locks(t);
	121
	122	touch_nmi_watchdog();
	123
	124	if (sysctl_hung_task_panic) {
	125	trigger_all_cpu_backtrace();
	126	panic("hung_task: blocked tasks");
	127	}
	128	}
	129
	130	/*
	131	* To avoid extending the RCU grace period for an unbounded amount of time,
	132	* periodically exit the critical section and enter a new one.
	133	*
	134	* For preemptible RCU it is sufficient to call rcu_read_unlock in order
	135	* to exit the grace period. For classic RCU, a reschedule is required.
	136	*/
	137	static bool rcu_lock_break(struct task_struct g, struct task_struct t)
	138	{
	139	bool can_cont;
	140
	141	get_task_struct(g);
	142	get_task_struct(t);
	143	rcu_read_unlock();
	144	cond_resched();
	145	rcu_read_lock();
	146	can_cont = pid_alive(g) && pid_alive(t);
	147	put_task_struct(t);
	148	put_task_struct(g);
	149
	150	return can_cont;
	151	}
	152
	153	/*
	154	* Check whether a TASK_UNINTERRUPTIBLE does not get woken up for
	155	* a really long time (120 seconds). If that happens, print out
	156	* a warning.
	157	*/
	158	static void check_hung_uninterruptible_tasks(unsigned long timeout)
	159	{
	160	int max_count = sysctl_hung_task_check_count;
	161	int batch_count = HUNG_TASK_BATCHING;
	162	struct task_struct g, t;
	163
	164	/*
	165	* If the system crashed already then all bets are off,
	166	* do not report extra hung tasks:
	167	*/
	168	if (test_taint(TAINT_DIE) \|\| did_panic)
	169	return;
	170
	171	rcu_read_lock();
	172	do_each_thread(g, t) {
	173	if (!max_count--)
	174	goto unlock;
	175	if (!--batch_count) {
	176	batch_count = HUNG_TASK_BATCHING;
	177	if (!rcu_lock_break(g, t))
	178	goto unlock;
	179	}
	180	/* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */
	181	if (t->state == TASK_UNINTERRUPTIBLE)
	182	check_hung_task(t, timeout);
	183	} while_each_thread(g, t);
	184	unlock:
	185	rcu_read_unlock();
	186	}
	187
	188	static unsigned long timeout_jiffies(unsigned long timeout)
	189	{
	190	/* timeout of 0 will disable the watchdog */
	191	return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT;
	192	}
	193
	194	/*
	195	* Process updating of timeout sysctl
	196	*/
	197	int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
	198	void __user *buffer,
	199	size_t lenp, loff_t ppos)
	200	{
	201	int ret;
	202
	203	ret = proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
	204
	205	if (ret \|\| !write)
	206	goto out;
	207
	208	wake_up_process(watchdog_task);
	209
	210	out:
	211	return ret;
	212	}
	213
	214	static atomic_t reset_hung_task = ATOMIC_INIT(0);
	215
	216	void reset_hung_task_detector(void)
	217	{
	218	atomic_set(&reset_hung_task, 1);
	219	}
	220	EXPORT_SYMBOL_GPL(reset_hung_task_detector);
	221
	222	/*
	223	* kthread which checks for tasks stuck in D state
	224	*/
	225	static int watchdog(void *dummy)
	226	{
	227	set_user_nice(current, 0);
	228
	229	for ( ; ; ) {
	230	unsigned long timeout = sysctl_hung_task_timeout_secs;
	231
	232	while (schedule_timeout_interruptible(timeout_jiffies(timeout)))
	233	timeout = sysctl_hung_task_timeout_secs;
	234
	235	if (atomic_xchg(&reset_hung_task, 0))
	236	continue;
	237
	238	check_hung_uninterruptible_tasks(timeout);
	239	}
	240
	241	return 0;
	242	}
	243
	244	static int __init hung_task_init(void)
	245	{
	246	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
	247	watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
	248
	249	return 0;
	250	}
	251	subsys_initcall(hung_task_init);