[mirror_ubuntu-artful-kernel.git] / kernel / latencytop.c

/*
 * latencytop.c: Latency display infrastructure
 *
 * (C) Copyright 2008 Intel Corporation
 * Author: Arjan van de Ven <arjan@linux.intel.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */

/*
 * CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
 * used by the "latencytop" userspace tool. The latency that is tracked is not
 * the 'traditional' interrupt latency (which is primarily caused by something
 * else consuming CPU), but instead, it is the latency an application encounters
 * because the kernel sleeps on its behalf for various reasons.
 *
 * This code tracks 2 levels of statistics:
 * 1) System level latency
 * 2) Per process latency
 *
 * The latency is stored in fixed sized data structures in an accumulated form;
 * if the "same" latency cause is hit twice, this will be tracked as one entry
 * in the data structure. Both the count, total accumulated latency and maximum
 * latency are tracked in this data structure. When the fixed size structure is
 * full, no new causes are tracked until the buffer is flushed by writing to
 * the /proc file; the userspace tool does this on a regular basis.
 *
 * A latency cause is identified by a stringified backtrace at the point that
 * the scheduler gets invoked. The userland tool will use this string to
 * identify the cause of the latency in human readable form.
 *
 * The information is exported via /proc/latency_stats and /proc/<pid>/latency.
 * These files look like this:
 *
 * Latency Top version : v0.1
 * 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
 * |    |    |    |
 * |    |    |    +----> the stringified backtrace
 * |    |    +---------> The maximum latency for this entry in microseconds
 * |    +--------------> The accumulated latency for this entry (microseconds)
 * +-------------------> The number of times this entry is hit
 *
 * (note: the average latency is the accumulated latency divided by the number
 * of times)
 */

#include <linux/latencytop.h>
#include <linux/kallsyms.h>
#include <linux/seq_file.h>
#include <linux/notifier.h>
#include <linux/spinlock.h>
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/sched.h>
#include <linux/list.h>
#include <linux/stacktrace.h>

static DEFINE_SPINLOCK(latency_lock);

#define MAXLR 128
static struct latency_record latency_record[MAXLR];

int latencytop_enabled;

void clear_all_latency_tracing(struct task_struct *p)
{
	unsigned long flags;

	if (!latencytop_enabled)
		return;

	spin_lock_irqsave(&latency_lock, flags);
	memset(&p->latency_record, 0, sizeof(p->latency_record));
	p->latency_record_count = 0;
	spin_unlock_irqrestore(&latency_lock, flags);
}

static void clear_global_latency_tracing(void)
{
	unsigned long flags;

	spin_lock_irqsave(&latency_lock, flags);
	memset(&latency_record, 0, sizeof(latency_record));
	spin_unlock_irqrestore(&latency_lock, flags);
}

static void __sched
account_global_scheduler_latency(struct task_struct *tsk, struct latency_record *lat)
{
	int firstnonnull = MAXLR + 1;
	int i;

	if (!latencytop_enabled)
		return;

	/* skip kernel threads for now */
	if (!tsk->mm)
		return;

	for (i = 0; i < MAXLR; i++) {
		int q, same = 1;

		/* Nothing stored: */
		if (!latency_record[i].backtrace[0]) {
			if (firstnonnull > i)
				firstnonnull = i;
			continue;
		}
		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
			unsigned long record = lat->backtrace[q];

			if (latency_record[i].backtrace[q] != record) {
				same = 0;
				break;
			}

			/* 0 and ULONG_MAX entries mean end of backtrace: */
			if (record == 0 || record == ULONG_MAX)
				break;
		}
		if (same) {
			latency_record[i].count++;
			latency_record[i].time += lat->time;
			if (lat->time > latency_record[i].max)
				latency_record[i].max = lat->time;
			return;
		}
	}

	i = firstnonnull;
	if (i >= MAXLR - 1)
		return;

	/* Allocted a new one: */
	memcpy(&latency_record[i], lat, sizeof(struct latency_record));
}

/*
 * Iterator to store a backtrace into a latency record entry
 */
static inline void store_stacktrace(struct task_struct *tsk,
					struct latency_record *lat)
{
	struct stack_trace trace;

	memset(&trace, 0, sizeof(trace));
	trace.max_entries = LT_BACKTRACEDEPTH;
	trace.entries = &lat->backtrace[0];
	save_stack_trace_tsk(tsk, &trace);
}

/**
 * __account_scheduler_latency - record an occured latency
 * @tsk - the task struct of the task hitting the latency
 * @usecs - the duration of the latency in microseconds
 * @inter - 1 if the sleep was interruptible, 0 if uninterruptible
 *
 * This function is the main entry point for recording latency entries
 * as called by the scheduler.
 *
 * This function has a few special cases to deal with normal 'non-latency'
 * sleeps: specifically, interruptible sleep longer than 5 msec is skipped
 * since this usually is caused by waiting for events via select() and co.
 *
 * Negative latencies (caused by time going backwards) are also explicitly
 * skipped.
 */
void __sched
__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
{
	unsigned long flags;
	int i, q;
	struct latency_record lat;

	/* Long interruptible waits are generally user requested... */
	if (inter && usecs > 5000)
		return;

	/* Negative sleeps are time going backwards */
	/* Zero-time sleeps are non-interesting */
	if (usecs <= 0)
		return;

	memset(&lat, 0, sizeof(lat));
	lat.count = 1;
	lat.time = usecs;
	lat.max = usecs;
	store_stacktrace(tsk, &lat);

	spin_lock_irqsave(&latency_lock, flags);

	account_global_scheduler_latency(tsk, &lat);

	/*
	 * short term hack; if we're > 32 we stop; future we recycle:
	 */
	tsk->latency_record_count++;
	if (tsk->latency_record_count >= LT_SAVECOUNT)
		goto out_unlock;

	for (i = 0; i < LT_SAVECOUNT; i++) {
		struct latency_record *mylat;
		int same = 1;

		mylat = &tsk->latency_record[i];
		for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
			unsigned long record = lat.backtrace[q];

			if (mylat->backtrace[q] != record) {
				same = 0;
				break;
			}

			/* 0 and ULONG_MAX entries mean end of backtrace: */
			if (record == 0 || record == ULONG_MAX)
				break;
		}
		if (same) {
			mylat->count++;
			mylat->time += lat.time;
			if (lat.time > mylat->max)
				mylat->max = lat.time;
			goto out_unlock;
		}
	}

	/* Allocated a new one: */
	i = tsk->latency_record_count;
	memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));

out_unlock:
	spin_unlock_irqrestore(&latency_lock, flags);
}

static int lstats_show(struct seq_file *m, void *v)
{
	int i;

	seq_puts(m, "Latency Top version : v0.1\n");

	for (i = 0; i < MAXLR; i++) {
		if (latency_record[i].backtrace[0]) {
			int q;
			seq_printf(m, "%i %lu %lu ",
				latency_record[i].count,
				latency_record[i].time,
				latency_record[i].max);
			for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
				char sym[KSYM_SYMBOL_LEN];
				char *c;
				if (!latency_record[i].backtrace[q])
					break;
				if (latency_record[i].backtrace[q] == ULONG_MAX)
					break;
				sprint_symbol(sym, latency_record[i].backtrace[q]);
				c = strchr(sym, '+');
				if (c)
					*c = 0;
				seq_printf(m, "%s ", sym);
			}
			seq_printf(m, "\n");
		}
	}
	return 0;
}

static ssize_t
lstats_write(struct file *file, const char __user *buf, size_t count,
	     loff_t *offs)
{
	clear_global_latency_tracing();

	return count;
}

static int lstats_open(struct inode *inode, struct file *filp)
{
	return single_open(filp, lstats_show, NULL);
}

static const struct file_operations lstats_fops = {
	.open		= lstats_open,
	.read		= seq_read,
	.write		= lstats_write,
	.llseek		= seq_lseek,
	.release	= single_release,
};

static int __init init_lstats_procfs(void)
{
	proc_create("latency_stats", 0644, NULL, &lstats_fops);
	return 0;
}
device_initcall(init_lstats_procfs);
Commit	Line	Data
9745512c AV	1	/*
	2	* latencytop.c: Latency display infrastructure
	3	*
	4	* (C) Copyright 2008 Intel Corporation
	5	* Author: Arjan van de Ven <arjan@linux.intel.com>
	6	*
	7	* This program is free software; you can redistribute it and/or
	8	* modify it under the terms of the GNU General Public License
	9	* as published by the Free Software Foundation; version 2
	10	* of the License.
	11	*/
ad0b0fd5 AV	12
	13	/*
	14	* CONFIG_LATENCYTOP enables a kernel latency tracking infrastructure that is
	15	* used by the "latencytop" userspace tool. The latency that is tracked is not
	16	* the 'traditional' interrupt latency (which is primarily caused by something
	17	* else consuming CPU), but instead, it is the latency an application encounters
	18	* because the kernel sleeps on its behalf for various reasons.
	19	*
	20	* This code tracks 2 levels of statistics:
	21	* 1) System level latency
	22	* 2) Per process latency
	23	*
	24	* The latency is stored in fixed sized data structures in an accumulated form;
	25	* if the "same" latency cause is hit twice, this will be tracked as one entry
	26	* in the data structure. Both the count, total accumulated latency and maximum
	27	* latency are tracked in this data structure. When the fixed size structure is
	28	* full, no new causes are tracked until the buffer is flushed by writing to
	29	* the /proc file; the userspace tool does this on a regular basis.
	30	*
	31	* A latency cause is identified by a stringified backtrace at the point that
	32	* the scheduler gets invoked. The userland tool will use this string to
	33	* identify the cause of the latency in human readable form.
	34	*
	35	* The information is exported via /proc/latency_stats and /proc/<pid>/latency.
	36	* These files look like this:
	37	*
	38	* Latency Top version : v0.1
	39	* 70 59433 4897 i915_irq_wait drm_ioctl vfs_ioctl do_vfs_ioctl sys_ioctl
	40	* \| \| \| \|
	41	* \| \| \| +----> the stringified backtrace
	42	* \| \| +---------> The maximum latency for this entry in microseconds
	43	* \| +--------------> The accumulated latency for this entry (microseconds)
	44	* +-------------------> The number of times this entry is hit
	45	*
	46	* (note: the average latency is the accumulated latency divided by the number
	47	* of times)
	48	*/
	49
9745512c AV	50	#include <linux/latencytop.h>
	51	#include <linux/kallsyms.h>
	52	#include <linux/seq_file.h>
	53	#include <linux/notifier.h>
	54	#include <linux/spinlock.h>
	55	#include <linux/proc_fs.h>
	56	#include <linux/module.h>
	57	#include <linux/sched.h>
	58	#include <linux/list.h>
9745512c AV	59	#include <linux/stacktrace.h>
	60
	61	static DEFINE_SPINLOCK(latency_lock);
	62
	63	#define MAXLR 128
	64	static struct latency_record latency_record[MAXLR];
	65
	66	int latencytop_enabled;
	67
	68	void clear_all_latency_tracing(struct task_struct *p)
	69	{
	70	unsigned long flags;
	71
	72	if (!latencytop_enabled)
	73	return;
	74
	75	spin_lock_irqsave(&latency_lock, flags);
	76	memset(&p->latency_record, 0, sizeof(p->latency_record));
	77	p->latency_record_count = 0;
	78	spin_unlock_irqrestore(&latency_lock, flags);
	79	}
	80
	81	static void clear_global_latency_tracing(void)
	82	{
	83	unsigned long flags;
	84
	85	spin_lock_irqsave(&latency_lock, flags);
	86	memset(&latency_record, 0, sizeof(latency_record));
	87	spin_unlock_irqrestore(&latency_lock, flags);
	88	}
	89
	90	static void __sched
	91	account_global_scheduler_latency(struct task_struct tsk, struct latency_record lat)
	92	{
	93	int firstnonnull = MAXLR + 1;
	94	int i;
	95
	96	if (!latencytop_enabled)
	97	return;
	98
	99	/* skip kernel threads for now */
	100	if (!tsk->mm)
	101	return;
	102
	103	for (i = 0; i < MAXLR; i++) {
19fb518c DA	104	int q, same = 1;
19fb518c DA	105
9745512c AV	106	/* Nothing stored: */
	107	if (!latency_record[i].backtrace[0]) {
	108	if (firstnonnull > i)
	109	firstnonnull = i;
	110	continue;
	111	}
ad0b0fd5	112	for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
19fb518c DA	113	unsigned long record = lat->backtrace[q];
	114
	115	if (latency_record[i].backtrace[q] != record) {
9745512c	116	same = 0;
9745512c	117	break;
19fb518c DA	118	}
	119
	120	/* 0 and ULONG_MAX entries mean end of backtrace: */
	121	if (record == 0 \|\| record == ULONG_MAX)
9745512c AV	122	break;
	123	}
	124	if (same) {
	125	latency_record[i].count++;
	126	latency_record[i].time += lat->time;
	127	if (lat->time > latency_record[i].max)
	128	latency_record[i].max = lat->time;
	129	return;
	130	}
	131	}
	132
	133	i = firstnonnull;
	134	if (i >= MAXLR - 1)
	135	return;
	136
	137	/* Allocted a new one: */
	138	memcpy(&latency_record[i], lat, sizeof(struct latency_record));
	139	}
	140
ad0b0fd5 AV	141	/*
	142	* Iterator to store a backtrace into a latency record entry
	143	*/
	144	static inline void store_stacktrace(struct task_struct *tsk,
	145	struct latency_record *lat)
9745512c AV	146	{
	147	struct stack_trace trace;
	148
	149	memset(&trace, 0, sizeof(trace));
	150	trace.max_entries = LT_BACKTRACEDEPTH;
	151	trace.entries = &lat->backtrace[0];
9745512c AV	152	save_stack_trace_tsk(tsk, &trace);
	153	}
	154
ad0b0fd5 AV	155	/**
	156	* __account_scheduler_latency - record an occured latency
	157	* @tsk - the task struct of the task hitting the latency
	158	* @usecs - the duration of the latency in microseconds
	159	* @inter - 1 if the sleep was interruptible, 0 if uninterruptible
	160	*
	161	* This function is the main entry point for recording latency entries
	162	* as called by the scheduler.
	163	*
	164	* This function has a few special cases to deal with normal 'non-latency'
	165	* sleeps: specifically, interruptible sleep longer than 5 msec is skipped
	166	* since this usually is caused by waiting for events via select() and co.
	167	*
	168	* Negative latencies (caused by time going backwards) are also explicitly
	169	* skipped.
	170	*/
9745512c	171	void __sched
ad0b0fd5	172	__account_scheduler_latency(struct task_struct *tsk, int usecs, int inter)
9745512c AV	173	{
	174	unsigned long flags;
	175	int i, q;
	176	struct latency_record lat;
	177
9745512c AV	178	/* Long interruptible waits are generally user requested... */
	179	if (inter && usecs > 5000)
	180	return;
	181
ad0b0fd5 AV	182	/* Negative sleeps are time going backwards */
	183	/* Zero-time sleeps are non-interesting */
	184	if (usecs <= 0)
	185	return;
	186
9745512c AV	187	memset(&lat, 0, sizeof(lat));
	188	lat.count = 1;
	189	lat.time = usecs;
	190	lat.max = usecs;
	191	store_stacktrace(tsk, &lat);
	192
	193	spin_lock_irqsave(&latency_lock, flags);
	194
	195	account_global_scheduler_latency(tsk, &lat);
	196
	197	/*
	198	* short term hack; if we're > 32 we stop; future we recycle:
	199	*/
	200	tsk->latency_record_count++;
	201	if (tsk->latency_record_count >= LT_SAVECOUNT)
	202	goto out_unlock;
	203
ad0b0fd5	204	for (i = 0; i < LT_SAVECOUNT; i++) {
9745512c AV	205	struct latency_record *mylat;
9745512c AV	206	int same = 1;
19fb518c	207
9745512c	208	mylat = &tsk->latency_record[i];
ad0b0fd5	209	for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
19fb518c DA	210	unsigned long record = lat.backtrace[q];
	211
	212	if (mylat->backtrace[q] != record) {
9745512c	213	same = 0;
9745512c	214	break;
19fb518c DA	215	}
	216
	217	/* 0 and ULONG_MAX entries mean end of backtrace: */
	218	if (record == 0 \|\| record == ULONG_MAX)
9745512c AV	219	break;
	220	}
	221	if (same) {
	222	mylat->count++;
	223	mylat->time += lat.time;
	224	if (lat.time > mylat->max)
	225	mylat->max = lat.time;
	226	goto out_unlock;
	227	}
	228	}
	229
	230	/* Allocated a new one: */
	231	i = tsk->latency_record_count;
	232	memcpy(&tsk->latency_record[i], &lat, sizeof(struct latency_record));
	233
	234	out_unlock:
	235	spin_unlock_irqrestore(&latency_lock, flags);
	236	}
	237
	238	static int lstats_show(struct seq_file m, void v)
	239	{
	240	int i;
	241
	242	seq_puts(m, "Latency Top version : v0.1\n");
	243
	244	for (i = 0; i < MAXLR; i++) {
	245	if (latency_record[i].backtrace[0]) {
	246	int q;
ad0b0fd5	247	seq_printf(m, "%i %lu %lu ",
9745512c AV	248	latency_record[i].count,
	249	latency_record[i].time,
	250	latency_record[i].max);
	251	for (q = 0; q < LT_BACKTRACEDEPTH; q++) {
9c246247	252	char sym[KSYM_SYMBOL_LEN];
9745512c AV	253	char *c;
	254	if (!latency_record[i].backtrace[q])
	255	break;
	256	if (latency_record[i].backtrace[q] == ULONG_MAX)
	257	break;
	258	sprint_symbol(sym, latency_record[i].backtrace[q]);
	259	c = strchr(sym, '+');
	260	if (c)
	261	*c = 0;
	262	seq_printf(m, "%s ", sym);
	263	}
	264	seq_printf(m, "\n");
	265	}
	266	}
	267	return 0;
	268	}
	269
	270	static ssize_t
	271	lstats_write(struct file file, const char __user buf, size_t count,
	272	loff_t *offs)
	273	{
	274	clear_global_latency_tracing();
	275
	276	return count;
	277	}
	278
	279	static int lstats_open(struct inode inode, struct file filp)
	280	{
	281	return single_open(filp, lstats_show, NULL);
	282	}
	283
ad0b0fd5	284	static const struct file_operations lstats_fops = {
9745512c AV	285	.open = lstats_open,
	286	.read = seq_read,
	287	.write = lstats_write,
	288	.llseek = seq_lseek,
	289	.release = single_release,
	290	};
	291
	292	static int __init init_lstats_procfs(void)
	293	{
c33fff0a	294	proc_create("latency_stats", 0644, NULL, &lstats_fops);
9745512c AV	295	return 0;
9745512c AV	296	}
ad0b0fd5	297	device_initcall(init_lstats_procfs);