[mirror_ubuntu-zesty-kernel.git] / kernel / context_tracking.c

/*
 * Context tracking: Probe on high level context boundaries such as kernel
 * and userspace. This includes syscalls and exceptions entry/exit.
 *
 * This is used by RCU to remove its dependency on the timer tick while a CPU
 * runs in userspace.
 *
 *  Started by Frederic Weisbecker:
 *
 * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
 *
 * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
 * Steven Rostedt, Peter Zijlstra for suggestions and improvements.
 *
 */

#include <linux/context_tracking.h>
#include <linux/rcupdate.h>
#include <linux/sched.h>
#include <linux/hardirq.h>
#include <linux/export.h>
#include <linux/kprobes.h>

#define CREATE_TRACE_POINTS
#include <trace/events/context_tracking.h>

struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
EXPORT_SYMBOL_GPL(context_tracking_enabled);

DEFINE_PER_CPU(struct context_tracking, context_tracking);
EXPORT_SYMBOL_GPL(context_tracking);

void context_tracking_cpu_set(int cpu)
{
	if (!per_cpu(context_tracking.active, cpu)) {
		per_cpu(context_tracking.active, cpu) = true;
		static_key_slow_inc(&context_tracking_enabled);
	}
}

/**
 * context_tracking_user_enter - Inform the context tracking that the CPU is going to
 *                               enter userspace mode.
 *
 * This function must be called right before we switch from the kernel
 * to userspace, when it's guaranteed the remaining kernel instructions
 * to execute won't use any RCU read side critical section because this
 * function sets RCU in extended quiescent state.
 */
void context_tracking_user_enter(void)
{
	unsigned long flags;

	/*
	 * Repeat the user_enter() check here because some archs may be calling
	 * this from asm and if no CPU needs context tracking, they shouldn't
	 * go further. Repeat the check here until they support the inline static
	 * key check.
	 */
	if (!context_tracking_is_enabled())
		return;

	/*
	 * Some contexts may involve an exception occuring in an irq,
	 * leading to that nesting:
	 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
	 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
	 * helpers are enough to protect RCU uses inside the exception. So
	 * just return immediately if we detect we are in an IRQ.
	 */
	if (in_interrupt())
		return;

	/* Kernel threads aren't supposed to go to userspace */
	WARN_ON_ONCE(!current->mm);

	local_irq_save(flags);
	if ( __this_cpu_read(context_tracking.state) != IN_USER) {
		if (__this_cpu_read(context_tracking.active)) {
			trace_user_enter(0);
			/*
			 * At this stage, only low level arch entry code remains and
			 * then we'll run in userspace. We can assume there won't be
			 * any RCU read-side critical section until the next call to
			 * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
			 * on the tick.
			 */
			vtime_user_enter(current);
			rcu_user_enter();
		}
		/*
		 * Even if context tracking is disabled on this CPU, because it's outside
		 * the full dynticks mask for example, we still have to keep track of the
		 * context transitions and states to prevent inconsistency on those of
		 * other CPUs.
		 * If a task triggers an exception in userspace, sleep on the exception
		 * handler and then migrate to another CPU, that new CPU must know where
		 * the exception returns by the time we call exception_exit().
		 * This information can only be provided by the previous CPU when it called
		 * exception_enter().
		 * OTOH we can spare the calls to vtime and RCU when context_tracking.active
		 * is false because we know that CPU is not tickless.
		 */
		__this_cpu_write(context_tracking.state, IN_USER);
	}
	local_irq_restore(flags);
}
NOKPROBE_SYMBOL(context_tracking_user_enter);

/**
 * context_tracking_user_exit - Inform the context tracking that the CPU is
 *                              exiting userspace mode and entering the kernel.
 *
 * This function must be called after we entered the kernel from userspace
 * before any use of RCU read side critical section. This potentially include
 * any high level kernel code like syscalls, exceptions, signal handling, etc...
 *
 * This call supports re-entrancy. This way it can be called from any exception
 * handler without needing to know if we came from userspace or not.
 */
void context_tracking_user_exit(void)
{
	unsigned long flags;

	if (!context_tracking_is_enabled())
		return;

	if (in_interrupt())
		return;

	local_irq_save(flags);
	if (__this_cpu_read(context_tracking.state) == IN_USER) {
		if (__this_cpu_read(context_tracking.active)) {
			/*
			 * We are going to run code that may use RCU. Inform
			 * RCU core about that (ie: we may need the tick again).
			 */
			rcu_user_exit();
			vtime_user_exit(current);
			trace_user_exit(0);
		}
		__this_cpu_write(context_tracking.state, IN_KERNEL);
	}
	local_irq_restore(flags);
}
NOKPROBE_SYMBOL(context_tracking_user_exit);

/**
 * __context_tracking_task_switch - context switch the syscall callbacks
 * @prev: the task that is being switched out
 * @next: the task that is being switched in
 *
 * The context tracking uses the syscall slow path to implement its user-kernel
 * boundaries probes on syscalls. This way it doesn't impact the syscall fast
 * path on CPUs that don't do context tracking.
 *
 * But we need to clear the flag on the previous task because it may later
 * migrate to some CPU that doesn't do the context tracking. As such the TIF
 * flag may not be desired there.
 */
void __context_tracking_task_switch(struct task_struct *prev,
				    struct task_struct *next)
{
	clear_tsk_thread_flag(prev, TIF_NOHZ);
	set_tsk_thread_flag(next, TIF_NOHZ);
}

#ifdef CONFIG_CONTEXT_TRACKING_FORCE
void __init context_tracking_init(void)
{
	int cpu;

	for_each_possible_cpu(cpu)
		context_tracking_cpu_set(cpu);
}
#endif
Commit	Line	Data
	1	/*
	2	* Context tracking: Probe on high level context boundaries such as kernel
	3	* and userspace. This includes syscalls and exceptions entry/exit.
	4	*
	5	* This is used by RCU to remove its dependency on the timer tick while a CPU
	6	* runs in userspace.
	7	*
	8	* Started by Frederic Weisbecker:
	9	*
	10	* Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
	11	*
	12	* Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton,
	13	* Steven Rostedt, Peter Zijlstra for suggestions and improvements.
	14	*
	15	*/
	16
	17	#include <linux/context_tracking.h>
	18	#include <linux/rcupdate.h>
	19	#include <linux/sched.h>
	20	#include <linux/hardirq.h>
	21	#include <linux/export.h>
	22	#include <linux/kprobes.h>
	23
	24	#define CREATE_TRACE_POINTS
	25	#include <trace/events/context_tracking.h>
	26
	27	struct static_key context_tracking_enabled = STATIC_KEY_INIT_FALSE;
	28	EXPORT_SYMBOL_GPL(context_tracking_enabled);
	29
	30	DEFINE_PER_CPU(struct context_tracking, context_tracking);
	31	EXPORT_SYMBOL_GPL(context_tracking);
	32
	33	void context_tracking_cpu_set(int cpu)
	34	{
	35	if (!per_cpu(context_tracking.active, cpu)) {
	36	per_cpu(context_tracking.active, cpu) = true;
	37	static_key_slow_inc(&context_tracking_enabled);
	38	}
	39	}
	40
	41	/**
	42	* context_tracking_user_enter - Inform the context tracking that the CPU is going to
	43	* enter userspace mode.
	44	*
	45	* This function must be called right before we switch from the kernel
	46	* to userspace, when it's guaranteed the remaining kernel instructions
	47	* to execute won't use any RCU read side critical section because this
	48	* function sets RCU in extended quiescent state.
	49	*/
	50	void context_tracking_user_enter(void)
	51	{
	52	unsigned long flags;
	53
	54	/*
	55	* Repeat the user_enter() check here because some archs may be calling
	56	* this from asm and if no CPU needs context tracking, they shouldn't
	57	* go further. Repeat the check here until they support the inline static
	58	* key check.
	59	*/
	60	if (!context_tracking_is_enabled())
	61	return;
	62
	63	/*
	64	* Some contexts may involve an exception occuring in an irq,
	65	* leading to that nesting:
	66	* rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
	67	* This would mess up the dyntick_nesting count though. And rcu_irq_*()
	68	* helpers are enough to protect RCU uses inside the exception. So
	69	* just return immediately if we detect we are in an IRQ.
	70	*/
	71	if (in_interrupt())
	72	return;
	73
	74	/* Kernel threads aren't supposed to go to userspace */
	75	WARN_ON_ONCE(!current->mm);
	76
	77	local_irq_save(flags);
	78	if ( __this_cpu_read(context_tracking.state) != IN_USER) {
	79	if (__this_cpu_read(context_tracking.active)) {
	80	trace_user_enter(0);
	81	/*
	82	* At this stage, only low level arch entry code remains and
	83	* then we'll run in userspace. We can assume there won't be
	84	* any RCU read-side critical section until the next call to
	85	* user_exit() or rcu_irq_enter(). Let's remove RCU's dependency
	86	* on the tick.
	87	*/
	88	vtime_user_enter(current);
	89	rcu_user_enter();
	90	}
	91	/*
	92	* Even if context tracking is disabled on this CPU, because it's outside
	93	* the full dynticks mask for example, we still have to keep track of the
	94	* context transitions and states to prevent inconsistency on those of
	95	* other CPUs.
	96	* If a task triggers an exception in userspace, sleep on the exception
	97	* handler and then migrate to another CPU, that new CPU must know where
	98	* the exception returns by the time we call exception_exit().
	99	* This information can only be provided by the previous CPU when it called
	100	* exception_enter().
	101	* OTOH we can spare the calls to vtime and RCU when context_tracking.active
	102	* is false because we know that CPU is not tickless.
	103	*/
	104	__this_cpu_write(context_tracking.state, IN_USER);
	105	}
	106	local_irq_restore(flags);
	107	}
	108	NOKPROBE_SYMBOL(context_tracking_user_enter);
	109
	110	/**
	111	* context_tracking_user_exit - Inform the context tracking that the CPU is
	112	* exiting userspace mode and entering the kernel.
	113	*
	114	* This function must be called after we entered the kernel from userspace
	115	* before any use of RCU read side critical section. This potentially include
	116	* any high level kernel code like syscalls, exceptions, signal handling, etc...
	117	*
	118	* This call supports re-entrancy. This way it can be called from any exception
	119	* handler without needing to know if we came from userspace or not.
	120	*/
	121	void context_tracking_user_exit(void)
	122	{
	123	unsigned long flags;
	124
	125	if (!context_tracking_is_enabled())
	126	return;
	127
	128	if (in_interrupt())
	129	return;
	130
	131	local_irq_save(flags);
	132	if (__this_cpu_read(context_tracking.state) == IN_USER) {
	133	if (__this_cpu_read(context_tracking.active)) {
	134	/*
	135	* We are going to run code that may use RCU. Inform
	136	* RCU core about that (ie: we may need the tick again).
	137	*/
	138	rcu_user_exit();
	139	vtime_user_exit(current);
	140	trace_user_exit(0);
	141	}
	142	__this_cpu_write(context_tracking.state, IN_KERNEL);
	143	}
	144	local_irq_restore(flags);
	145	}
	146	NOKPROBE_SYMBOL(context_tracking_user_exit);
	147
	148	/**
	149	* __context_tracking_task_switch - context switch the syscall callbacks
	150	* @prev: the task that is being switched out
	151	* @next: the task that is being switched in
	152	*
	153	* The context tracking uses the syscall slow path to implement its user-kernel
	154	* boundaries probes on syscalls. This way it doesn't impact the syscall fast
	155	* path on CPUs that don't do context tracking.
	156	*
	157	* But we need to clear the flag on the previous task because it may later
	158	* migrate to some CPU that doesn't do the context tracking. As such the TIF
	159	* flag may not be desired there.
	160	*/
	161	void __context_tracking_task_switch(struct task_struct *prev,
	162	struct task_struct *next)
	163	{
	164	clear_tsk_thread_flag(prev, TIF_NOHZ);
	165	set_tsk_thread_flag(next, TIF_NOHZ);
	166	}
	167
	168	#ifdef CONFIG_CONTEXT_TRACKING_FORCE
	169	void __init context_tracking_init(void)
	170	{
	171	int cpu;
	172
	173	for_each_possible_cpu(cpu)
	174	context_tracking_cpu_set(cpu);
	175	}
	176	#endif