1 // SPDX-License-Identifier: GPL-2.0-only
5 * Print the CFS rbtree and other debugging details
7 * Copyright(C) 2007, Red Hat, Inc., Ingo Molnar
12 * This allows printing both to /proc/sched_debug and
15 #define SEQ_printf(m, x...) \
24 * Ease the printing of nsec fields:
26 static long long nsec_high(unsigned long long nsec
)
28 if ((long long)nsec
< 0) {
30 do_div(nsec
, 1000000);
33 do_div(nsec
, 1000000);
38 static unsigned long nsec_low(unsigned long long nsec
)
40 if ((long long)nsec
< 0)
43 return do_div(nsec
, 1000000);
46 #define SPLIT_NS(x) nsec_high(x), nsec_low(x)
48 #define SCHED_FEAT(name, enabled) \
51 static const char * const sched_feat_names
[] = {
57 static int sched_feat_show(struct seq_file
*m
, void *v
)
61 for (i
= 0; i
< __SCHED_FEAT_NR
; i
++) {
62 if (!(sysctl_sched_features
& (1UL << i
)))
64 seq_printf(m
, "%s ", sched_feat_names
[i
]);
71 #ifdef CONFIG_JUMP_LABEL
73 #define jump_label_key__true STATIC_KEY_INIT_TRUE
74 #define jump_label_key__false STATIC_KEY_INIT_FALSE
76 #define SCHED_FEAT(name, enabled) \
77 jump_label_key__##enabled ,
79 struct static_key sched_feat_keys
[__SCHED_FEAT_NR
] = {
85 static void sched_feat_disable(int i
)
87 static_key_disable_cpuslocked(&sched_feat_keys
[i
]);
90 static void sched_feat_enable(int i
)
92 static_key_enable_cpuslocked(&sched_feat_keys
[i
]);
95 static void sched_feat_disable(int i
) { };
96 static void sched_feat_enable(int i
) { };
97 #endif /* CONFIG_JUMP_LABEL */
99 static int sched_feat_set(char *cmp
)
104 if (strncmp(cmp
, "NO_", 3) == 0) {
109 i
= match_string(sched_feat_names
, __SCHED_FEAT_NR
, cmp
);
114 sysctl_sched_features
&= ~(1UL << i
);
115 sched_feat_disable(i
);
117 sysctl_sched_features
|= (1UL << i
);
118 sched_feat_enable(i
);
125 sched_feat_write(struct file
*filp
, const char __user
*ubuf
,
126 size_t cnt
, loff_t
*ppos
)
136 if (copy_from_user(&buf
, ubuf
, cnt
))
142 /* Ensure the static_key remains in a consistent state */
143 inode
= file_inode(filp
);
146 ret
= sched_feat_set(cmp
);
157 static int sched_feat_open(struct inode
*inode
, struct file
*filp
)
159 return single_open(filp
, sched_feat_show
, NULL
);
162 static const struct file_operations sched_feat_fops
= {
163 .open
= sched_feat_open
,
164 .write
= sched_feat_write
,
167 .release
= single_release
,
172 static ssize_t
sched_scaling_write(struct file
*filp
, const char __user
*ubuf
,
173 size_t cnt
, loff_t
*ppos
)
176 unsigned int scaling
;
181 if (copy_from_user(&buf
, ubuf
, cnt
))
185 if (kstrtouint(buf
, 10, &scaling
))
188 if (scaling
>= SCHED_TUNABLESCALING_END
)
191 sysctl_sched_tunable_scaling
= scaling
;
192 if (sched_update_scaling())
199 static int sched_scaling_show(struct seq_file
*m
, void *v
)
201 seq_printf(m
, "%d\n", sysctl_sched_tunable_scaling
);
205 static int sched_scaling_open(struct inode
*inode
, struct file
*filp
)
207 return single_open(filp
, sched_scaling_show
, NULL
);
210 static const struct file_operations sched_scaling_fops
= {
211 .open
= sched_scaling_open
,
212 .write
= sched_scaling_write
,
215 .release
= single_release
,
220 #ifdef CONFIG_PREEMPT_DYNAMIC
222 static ssize_t
sched_dynamic_write(struct file
*filp
, const char __user
*ubuf
,
223 size_t cnt
, loff_t
*ppos
)
231 if (copy_from_user(&buf
, ubuf
, cnt
))
235 mode
= sched_dynamic_mode(strstrip(buf
));
239 sched_dynamic_update(mode
);
246 static int sched_dynamic_show(struct seq_file
*m
, void *v
)
248 static const char * preempt_modes
[] = {
249 "none", "voluntary", "full"
253 for (i
= 0; i
< ARRAY_SIZE(preempt_modes
); i
++) {
254 if (preempt_dynamic_mode
== i
)
256 seq_puts(m
, preempt_modes
[i
]);
257 if (preempt_dynamic_mode
== i
)
267 static int sched_dynamic_open(struct inode
*inode
, struct file
*filp
)
269 return single_open(filp
, sched_dynamic_show
, NULL
);
272 static const struct file_operations sched_dynamic_fops
= {
273 .open
= sched_dynamic_open
,
274 .write
= sched_dynamic_write
,
277 .release
= single_release
,
280 #endif /* CONFIG_PREEMPT_DYNAMIC */
282 __read_mostly
bool sched_debug_verbose
;
284 static const struct seq_operations sched_debug_sops
;
286 static int sched_debug_open(struct inode
*inode
, struct file
*filp
)
288 return seq_open(filp
, &sched_debug_sops
);
291 static const struct file_operations sched_debug_fops
= {
292 .open
= sched_debug_open
,
295 .release
= seq_release
,
298 static struct dentry
*debugfs_sched
;
300 static __init
int sched_init_debug(void)
302 struct dentry __maybe_unused
*numa
;
304 debugfs_sched
= debugfs_create_dir("sched", NULL
);
306 debugfs_create_file("features", 0644, debugfs_sched
, NULL
, &sched_feat_fops
);
307 debugfs_create_bool("verbose", 0644, debugfs_sched
, &sched_debug_verbose
);
308 #ifdef CONFIG_PREEMPT_DYNAMIC
309 debugfs_create_file("preempt", 0644, debugfs_sched
, NULL
, &sched_dynamic_fops
);
312 debugfs_create_u32("latency_ns", 0644, debugfs_sched
, &sysctl_sched_latency
);
313 debugfs_create_u32("min_granularity_ns", 0644, debugfs_sched
, &sysctl_sched_min_granularity
);
314 debugfs_create_u32("wakeup_granularity_ns", 0644, debugfs_sched
, &sysctl_sched_wakeup_granularity
);
316 debugfs_create_u32("latency_warn_ms", 0644, debugfs_sched
, &sysctl_resched_latency_warn_ms
);
317 debugfs_create_u32("latency_warn_once", 0644, debugfs_sched
, &sysctl_resched_latency_warn_once
);
320 debugfs_create_file("tunable_scaling", 0644, debugfs_sched
, NULL
, &sched_scaling_fops
);
321 debugfs_create_u32("migration_cost_ns", 0644, debugfs_sched
, &sysctl_sched_migration_cost
);
322 debugfs_create_u32("nr_migrate", 0644, debugfs_sched
, &sysctl_sched_nr_migrate
);
324 mutex_lock(&sched_domains_mutex
);
325 update_sched_domain_debugfs();
326 mutex_unlock(&sched_domains_mutex
);
329 #ifdef CONFIG_NUMA_BALANCING
330 numa
= debugfs_create_dir("numa_balancing", debugfs_sched
);
332 debugfs_create_u32("scan_delay_ms", 0644, numa
, &sysctl_numa_balancing_scan_delay
);
333 debugfs_create_u32("scan_period_min_ms", 0644, numa
, &sysctl_numa_balancing_scan_period_min
);
334 debugfs_create_u32("scan_period_max_ms", 0644, numa
, &sysctl_numa_balancing_scan_period_max
);
335 debugfs_create_u32("scan_size_mb", 0644, numa
, &sysctl_numa_balancing_scan_size
);
338 debugfs_create_file("debug", 0444, debugfs_sched
, NULL
, &sched_debug_fops
);
342 late_initcall(sched_init_debug
);
346 static cpumask_var_t sd_sysctl_cpus
;
347 static struct dentry
*sd_dentry
;
349 static int sd_flags_show(struct seq_file
*m
, void *v
)
351 unsigned long flags
= *(unsigned int *)m
->private;
354 for_each_set_bit(idx
, &flags
, __SD_FLAG_CNT
) {
355 seq_puts(m
, sd_flag_debug
[idx
].name
);
363 static int sd_flags_open(struct inode
*inode
, struct file
*file
)
365 return single_open(file
, sd_flags_show
, inode
->i_private
);
368 static const struct file_operations sd_flags_fops
= {
369 .open
= sd_flags_open
,
372 .release
= single_release
,
375 static void register_sd(struct sched_domain
*sd
, struct dentry
*parent
)
377 #define SDM(type, mode, member) \
378 debugfs_create_##type(#member, mode, parent, &sd->member)
380 SDM(ulong
, 0644, min_interval
);
381 SDM(ulong
, 0644, max_interval
);
382 SDM(u64
, 0644, max_newidle_lb_cost
);
383 SDM(u32
, 0644, busy_factor
);
384 SDM(u32
, 0644, imbalance_pct
);
385 SDM(u32
, 0644, cache_nice_tries
);
386 SDM(str
, 0444, name
);
390 debugfs_create_file("flags", 0444, parent
, &sd
->flags
, &sd_flags_fops
);
393 void update_sched_domain_debugfs(void)
398 * This can unfortunately be invoked before sched_debug_init() creates
399 * the debug directory. Don't touch sd_sysctl_cpus until then.
404 if (!cpumask_available(sd_sysctl_cpus
)) {
405 if (!alloc_cpumask_var(&sd_sysctl_cpus
, GFP_KERNEL
))
407 cpumask_copy(sd_sysctl_cpus
, cpu_possible_mask
);
411 sd_dentry
= debugfs_create_dir("domains", debugfs_sched
);
413 for_each_cpu(cpu
, sd_sysctl_cpus
) {
414 struct sched_domain
*sd
;
415 struct dentry
*d_cpu
;
418 snprintf(buf
, sizeof(buf
), "cpu%d", cpu
);
419 debugfs_remove(debugfs_lookup(buf
, sd_dentry
));
420 d_cpu
= debugfs_create_dir(buf
, sd_dentry
);
423 for_each_domain(cpu
, sd
) {
426 snprintf(buf
, sizeof(buf
), "domain%d", i
);
427 d_sd
= debugfs_create_dir(buf
, d_cpu
);
429 register_sd(sd
, d_sd
);
433 __cpumask_clear_cpu(cpu
, sd_sysctl_cpus
);
437 void dirty_sched_domain_sysctl(int cpu
)
439 if (cpumask_available(sd_sysctl_cpus
))
440 __cpumask_set_cpu(cpu
, sd_sysctl_cpus
);
443 #endif /* CONFIG_SMP */
445 #ifdef CONFIG_FAIR_GROUP_SCHED
446 static void print_cfs_group_stats(struct seq_file
*m
, int cpu
, struct task_group
*tg
)
448 struct sched_entity
*se
= tg
->se
[cpu
];
450 #define P(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)F)
451 #define P_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld\n", #F, (long long)schedstat_val(F))
452 #define PN(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)F))
453 #define PN_SCHEDSTAT(F) SEQ_printf(m, " .%-30s: %lld.%06ld\n", #F, SPLIT_NS((long long)schedstat_val(F)))
460 PN(se
->sum_exec_runtime
);
462 if (schedstat_enabled()) {
463 PN_SCHEDSTAT(se
->statistics
.wait_start
);
464 PN_SCHEDSTAT(se
->statistics
.sleep_start
);
465 PN_SCHEDSTAT(se
->statistics
.block_start
);
466 PN_SCHEDSTAT(se
->statistics
.sleep_max
);
467 PN_SCHEDSTAT(se
->statistics
.block_max
);
468 PN_SCHEDSTAT(se
->statistics
.exec_max
);
469 PN_SCHEDSTAT(se
->statistics
.slice_max
);
470 PN_SCHEDSTAT(se
->statistics
.wait_max
);
471 PN_SCHEDSTAT(se
->statistics
.wait_sum
);
472 P_SCHEDSTAT(se
->statistics
.wait_count
);
479 P(se
->avg
.runnable_avg
);
489 #ifdef CONFIG_CGROUP_SCHED
490 static DEFINE_SPINLOCK(sched_debug_lock
);
491 static char group_path
[PATH_MAX
];
493 static void task_group_path(struct task_group
*tg
, char *path
, int plen
)
495 if (autogroup_path(tg
, path
, plen
))
498 cgroup_path(tg
->css
.cgroup
, path
, plen
);
502 * Only 1 SEQ_printf_task_group_path() caller can use the full length
503 * group_path[] for cgroup path. Other simultaneous callers will have
504 * to use a shorter stack buffer. A "..." suffix is appended at the end
505 * of the stack buffer so that it will show up in case the output length
506 * matches the given buffer size to indicate possible path name truncation.
508 #define SEQ_printf_task_group_path(m, tg, fmt...) \
510 if (spin_trylock(&sched_debug_lock)) { \
511 task_group_path(tg, group_path, sizeof(group_path)); \
512 SEQ_printf(m, fmt, group_path); \
513 spin_unlock(&sched_debug_lock); \
516 char *bufend = buf + sizeof(buf) - 3; \
517 task_group_path(tg, buf, bufend - buf); \
518 strcpy(bufend - 1, "..."); \
519 SEQ_printf(m, fmt, buf); \
525 print_task(struct seq_file
*m
, struct rq
*rq
, struct task_struct
*p
)
527 if (task_current(rq
, p
))
530 SEQ_printf(m
, " %c", task_state_to_char(p
));
532 SEQ_printf(m
, " %15s %5d %9Ld.%06ld %9Ld %5d ",
533 p
->comm
, task_pid_nr(p
),
534 SPLIT_NS(p
->se
.vruntime
),
535 (long long)(p
->nvcsw
+ p
->nivcsw
),
538 SEQ_printf(m
, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
539 SPLIT_NS(schedstat_val_or_zero(p
->se
.statistics
.wait_sum
)),
540 SPLIT_NS(p
->se
.sum_exec_runtime
),
541 SPLIT_NS(schedstat_val_or_zero(p
->se
.statistics
.sum_sleep_runtime
)));
543 #ifdef CONFIG_NUMA_BALANCING
544 SEQ_printf(m
, " %d %d", task_node(p
), task_numa_group_id(p
));
546 #ifdef CONFIG_CGROUP_SCHED
547 SEQ_printf_task_group_path(m
, task_group(p
), " %s")
553 static void print_rq(struct seq_file
*m
, struct rq
*rq
, int rq_cpu
)
555 struct task_struct
*g
, *p
;
558 SEQ_printf(m
, "runnable tasks:\n");
559 SEQ_printf(m
, " S task PID tree-key switches prio"
560 " wait-time sum-exec sum-sleep\n");
561 SEQ_printf(m
, "-------------------------------------------------------"
562 "------------------------------------------------------\n");
565 for_each_process_thread(g
, p
) {
566 if (task_cpu(p
) != rq_cpu
)
569 print_task(m
, rq
, p
);
574 void print_cfs_rq(struct seq_file
*m
, int cpu
, struct cfs_rq
*cfs_rq
)
576 s64 MIN_vruntime
= -1, min_vruntime
, max_vruntime
= -1,
577 spread
, rq0_min_vruntime
, spread0
;
578 struct rq
*rq
= cpu_rq(cpu
);
579 struct sched_entity
*last
;
582 #ifdef CONFIG_FAIR_GROUP_SCHED
584 SEQ_printf_task_group_path(m
, cfs_rq
->tg
, "cfs_rq[%d]:%s\n", cpu
);
587 SEQ_printf(m
, "cfs_rq[%d]:\n", cpu
);
589 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "exec_clock",
590 SPLIT_NS(cfs_rq
->exec_clock
));
592 raw_spin_rq_lock_irqsave(rq
, flags
);
593 if (rb_first_cached(&cfs_rq
->tasks_timeline
))
594 MIN_vruntime
= (__pick_first_entity(cfs_rq
))->vruntime
;
595 last
= __pick_last_entity(cfs_rq
);
597 max_vruntime
= last
->vruntime
;
598 min_vruntime
= cfs_rq
->min_vruntime
;
599 rq0_min_vruntime
= cpu_rq(0)->cfs
.min_vruntime
;
600 raw_spin_rq_unlock_irqrestore(rq
, flags
);
601 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "MIN_vruntime",
602 SPLIT_NS(MIN_vruntime
));
603 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "min_vruntime",
604 SPLIT_NS(min_vruntime
));
605 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "max_vruntime",
606 SPLIT_NS(max_vruntime
));
607 spread
= max_vruntime
- MIN_vruntime
;
608 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "spread",
610 spread0
= min_vruntime
- rq0_min_vruntime
;
611 SEQ_printf(m
, " .%-30s: %Ld.%06ld\n", "spread0",
613 SEQ_printf(m
, " .%-30s: %d\n", "nr_spread_over",
614 cfs_rq
->nr_spread_over
);
615 SEQ_printf(m
, " .%-30s: %d\n", "nr_running", cfs_rq
->nr_running
);
616 SEQ_printf(m
, " .%-30s: %d\n", "h_nr_running", cfs_rq
->h_nr_running
);
617 SEQ_printf(m
, " .%-30s: %d\n", "idle_h_nr_running",
618 cfs_rq
->idle_h_nr_running
);
619 SEQ_printf(m
, " .%-30s: %ld\n", "load", cfs_rq
->load
.weight
);
621 SEQ_printf(m
, " .%-30s: %lu\n", "load_avg",
622 cfs_rq
->avg
.load_avg
);
623 SEQ_printf(m
, " .%-30s: %lu\n", "runnable_avg",
624 cfs_rq
->avg
.runnable_avg
);
625 SEQ_printf(m
, " .%-30s: %lu\n", "util_avg",
626 cfs_rq
->avg
.util_avg
);
627 SEQ_printf(m
, " .%-30s: %u\n", "util_est_enqueued",
628 cfs_rq
->avg
.util_est
.enqueued
);
629 SEQ_printf(m
, " .%-30s: %ld\n", "removed.load_avg",
630 cfs_rq
->removed
.load_avg
);
631 SEQ_printf(m
, " .%-30s: %ld\n", "removed.util_avg",
632 cfs_rq
->removed
.util_avg
);
633 SEQ_printf(m
, " .%-30s: %ld\n", "removed.runnable_avg",
634 cfs_rq
->removed
.runnable_avg
);
635 #ifdef CONFIG_FAIR_GROUP_SCHED
636 SEQ_printf(m
, " .%-30s: %lu\n", "tg_load_avg_contrib",
637 cfs_rq
->tg_load_avg_contrib
);
638 SEQ_printf(m
, " .%-30s: %ld\n", "tg_load_avg",
639 atomic_long_read(&cfs_rq
->tg
->load_avg
));
642 #ifdef CONFIG_CFS_BANDWIDTH
643 SEQ_printf(m
, " .%-30s: %d\n", "throttled",
645 SEQ_printf(m
, " .%-30s: %d\n", "throttle_count",
646 cfs_rq
->throttle_count
);
649 #ifdef CONFIG_FAIR_GROUP_SCHED
650 print_cfs_group_stats(m
, cpu
, cfs_rq
->tg
);
654 void print_rt_rq(struct seq_file
*m
, int cpu
, struct rt_rq
*rt_rq
)
656 #ifdef CONFIG_RT_GROUP_SCHED
658 SEQ_printf_task_group_path(m
, rt_rq
->tg
, "rt_rq[%d]:%s\n", cpu
);
661 SEQ_printf(m
, "rt_rq[%d]:\n", cpu
);
665 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x))
667 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(rt_rq->x))
669 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rt_rq->x))
684 void print_dl_rq(struct seq_file
*m
, int cpu
, struct dl_rq
*dl_rq
)
689 SEQ_printf(m
, "dl_rq[%d]:\n", cpu
);
692 SEQ_printf(m, " .%-30s: %lu\n", #x, (unsigned long)(dl_rq->x))
697 dl_bw
= &cpu_rq(cpu
)->rd
->dl_bw
;
699 dl_bw
= &dl_rq
->dl_bw
;
701 SEQ_printf(m
, " .%-30s: %lld\n", "dl_bw->bw", dl_bw
->bw
);
702 SEQ_printf(m
, " .%-30s: %lld\n", "dl_bw->total_bw", dl_bw
->total_bw
);
707 static void print_cpu(struct seq_file
*m
, int cpu
)
709 struct rq
*rq
= cpu_rq(cpu
);
713 unsigned int freq
= cpu_khz
? : 1;
715 SEQ_printf(m
, "cpu#%d, %u.%03u MHz\n",
716 cpu
, freq
/ 1000, (freq
% 1000));
719 SEQ_printf(m
, "cpu#%d\n", cpu
);
724 if (sizeof(rq->x) == 4) \
725 SEQ_printf(m, " .%-30s: %ld\n", #x, (long)(rq->x)); \
727 SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rq->x));\
731 SEQ_printf(m, " .%-30s: %Ld.%06ld\n", #x, SPLIT_NS(rq->x))
735 P(nr_uninterruptible
);
737 SEQ_printf(m
, " .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq
->curr
)));
744 #define P64(n) SEQ_printf(m, " .%-30s: %Ld\n", #n, rq->n);
746 P64(max_idle_balance_cost
);
750 #define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, schedstat_val(rq->n));
751 if (schedstat_enabled()) {
760 print_cfs_stats(m
, cpu
);
761 print_rt_stats(m
, cpu
);
762 print_dl_stats(m
, cpu
);
764 print_rq(m
, rq
, cpu
);
768 static const char *sched_tunable_scaling_names
[] = {
774 static void sched_debug_header(struct seq_file
*m
)
776 u64 ktime
, sched_clk
, cpu_clk
;
779 local_irq_save(flags
);
780 ktime
= ktime_to_ns(ktime_get());
781 sched_clk
= sched_clock();
782 cpu_clk
= local_clock();
783 local_irq_restore(flags
);
785 SEQ_printf(m
, "Sched Debug Version: v0.11, %s %.*s\n",
786 init_utsname()->release
,
787 (int)strcspn(init_utsname()->version
, " "),
788 init_utsname()->version
);
791 SEQ_printf(m, "%-40s: %Ld\n", #x, (long long)(x))
793 SEQ_printf(m, "%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
798 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
799 P(sched_clock_stable());
805 SEQ_printf(m
, "sysctl_sched\n");
808 SEQ_printf(m, " .%-40s: %Ld\n", #x, (long long)(x))
810 SEQ_printf(m, " .%-40s: %Ld.%06ld\n", #x, SPLIT_NS(x))
811 PN(sysctl_sched_latency
);
812 PN(sysctl_sched_min_granularity
);
813 PN(sysctl_sched_wakeup_granularity
);
814 P(sysctl_sched_child_runs_first
);
815 P(sysctl_sched_features
);
819 SEQ_printf(m
, " .%-40s: %d (%s)\n",
820 "sysctl_sched_tunable_scaling",
821 sysctl_sched_tunable_scaling
,
822 sched_tunable_scaling_names
[sysctl_sched_tunable_scaling
]);
826 static int sched_debug_show(struct seq_file
*m
, void *v
)
828 int cpu
= (unsigned long)(v
- 2);
833 sched_debug_header(m
);
838 void sysrq_sched_debug_show(void)
842 sched_debug_header(NULL
);
843 for_each_online_cpu(cpu
) {
845 * Need to reset softlockup watchdogs on all CPUs, because
846 * another CPU might be blocked waiting for us to process
847 * an IPI or stop_machine.
849 touch_nmi_watchdog();
850 touch_all_softlockup_watchdogs();
851 print_cpu(NULL
, cpu
);
856 * This iterator needs some explanation.
857 * It returns 1 for the header position.
858 * This means 2 is CPU 0.
859 * In a hotplugged system some CPUs, including CPU 0, may be missing so we have
860 * to use cpumask_* to iterate over the CPUs.
862 static void *sched_debug_start(struct seq_file
*file
, loff_t
*offset
)
864 unsigned long n
= *offset
;
872 n
= cpumask_next(n
- 1, cpu_online_mask
);
874 n
= cpumask_first(cpu_online_mask
);
879 return (void *)(unsigned long)(n
+ 2);
884 static void *sched_debug_next(struct seq_file
*file
, void *data
, loff_t
*offset
)
887 return sched_debug_start(file
, offset
);
890 static void sched_debug_stop(struct seq_file
*file
, void *data
)
894 static const struct seq_operations sched_debug_sops
= {
895 .start
= sched_debug_start
,
896 .next
= sched_debug_next
,
897 .stop
= sched_debug_stop
,
898 .show
= sched_debug_show
,
901 #define __PS(S, F) SEQ_printf(m, "%-45s:%21Ld\n", S, (long long)(F))
902 #define __P(F) __PS(#F, F)
903 #define P(F) __PS(#F, p->F)
904 #define PM(F, M) __PS(#F, p->F & (M))
905 #define __PSN(S, F) SEQ_printf(m, "%-45s:%14Ld.%06ld\n", S, SPLIT_NS((long long)(F)))
906 #define __PN(F) __PSN(#F, F)
907 #define PN(F) __PSN(#F, p->F)
910 #ifdef CONFIG_NUMA_BALANCING
911 void print_numa_stats(struct seq_file
*m
, int node
, unsigned long tsf
,
912 unsigned long tpf
, unsigned long gsf
, unsigned long gpf
)
914 SEQ_printf(m
, "numa_faults node=%d ", node
);
915 SEQ_printf(m
, "task_private=%lu task_shared=%lu ", tpf
, tsf
);
916 SEQ_printf(m
, "group_private=%lu group_shared=%lu\n", gpf
, gsf
);
921 static void sched_show_numa(struct task_struct
*p
, struct seq_file
*m
)
923 #ifdef CONFIG_NUMA_BALANCING
924 struct mempolicy
*pol
;
927 P(mm
->numa_scan_seq
);
931 if (pol
&& !(pol
->flags
& MPOL_F_MORON
))
936 P(numa_pages_migrated
);
937 P(numa_preferred_nid
);
938 P(total_numa_faults
);
939 SEQ_printf(m
, "current_node=%d, numa_group_id=%d\n",
940 task_node(p
), task_numa_group_id(p
));
941 show_numa_stats(p
, m
);
946 void proc_sched_show_task(struct task_struct
*p
, struct pid_namespace
*ns
,
949 unsigned long nr_switches
;
951 SEQ_printf(m
, "%s (%d, #threads: %d)\n", p
->comm
, task_pid_nr_ns(p
, ns
),
954 "---------------------------------------------------------"
957 #define P_SCHEDSTAT(F) __PS(#F, schedstat_val(p->F))
958 #define PN_SCHEDSTAT(F) __PSN(#F, schedstat_val(p->F))
962 PN(se
.sum_exec_runtime
);
964 nr_switches
= p
->nvcsw
+ p
->nivcsw
;
968 if (schedstat_enabled()) {
969 u64 avg_atom
, avg_per_cpu
;
971 PN_SCHEDSTAT(se
.statistics
.sum_sleep_runtime
);
972 PN_SCHEDSTAT(se
.statistics
.wait_start
);
973 PN_SCHEDSTAT(se
.statistics
.sleep_start
);
974 PN_SCHEDSTAT(se
.statistics
.block_start
);
975 PN_SCHEDSTAT(se
.statistics
.sleep_max
);
976 PN_SCHEDSTAT(se
.statistics
.block_max
);
977 PN_SCHEDSTAT(se
.statistics
.exec_max
);
978 PN_SCHEDSTAT(se
.statistics
.slice_max
);
979 PN_SCHEDSTAT(se
.statistics
.wait_max
);
980 PN_SCHEDSTAT(se
.statistics
.wait_sum
);
981 P_SCHEDSTAT(se
.statistics
.wait_count
);
982 PN_SCHEDSTAT(se
.statistics
.iowait_sum
);
983 P_SCHEDSTAT(se
.statistics
.iowait_count
);
984 P_SCHEDSTAT(se
.statistics
.nr_migrations_cold
);
985 P_SCHEDSTAT(se
.statistics
.nr_failed_migrations_affine
);
986 P_SCHEDSTAT(se
.statistics
.nr_failed_migrations_running
);
987 P_SCHEDSTAT(se
.statistics
.nr_failed_migrations_hot
);
988 P_SCHEDSTAT(se
.statistics
.nr_forced_migrations
);
989 P_SCHEDSTAT(se
.statistics
.nr_wakeups
);
990 P_SCHEDSTAT(se
.statistics
.nr_wakeups_sync
);
991 P_SCHEDSTAT(se
.statistics
.nr_wakeups_migrate
);
992 P_SCHEDSTAT(se
.statistics
.nr_wakeups_local
);
993 P_SCHEDSTAT(se
.statistics
.nr_wakeups_remote
);
994 P_SCHEDSTAT(se
.statistics
.nr_wakeups_affine
);
995 P_SCHEDSTAT(se
.statistics
.nr_wakeups_affine_attempts
);
996 P_SCHEDSTAT(se
.statistics
.nr_wakeups_passive
);
997 P_SCHEDSTAT(se
.statistics
.nr_wakeups_idle
);
999 avg_atom
= p
->se
.sum_exec_runtime
;
1001 avg_atom
= div64_ul(avg_atom
, nr_switches
);
1005 avg_per_cpu
= p
->se
.sum_exec_runtime
;
1006 if (p
->se
.nr_migrations
) {
1007 avg_per_cpu
= div64_u64(avg_per_cpu
,
1008 p
->se
.nr_migrations
);
1018 __PS("nr_voluntary_switches", p
->nvcsw
);
1019 __PS("nr_involuntary_switches", p
->nivcsw
);
1024 P(se
.avg
.runnable_sum
);
1027 P(se
.avg
.runnable_avg
);
1029 P(se
.avg
.last_update_time
);
1030 P(se
.avg
.util_est
.ewma
);
1031 PM(se
.avg
.util_est
.enqueued
, ~UTIL_AVG_UNCHANGED
);
1033 #ifdef CONFIG_UCLAMP_TASK
1034 __PS("uclamp.min", p
->uclamp_req
[UCLAMP_MIN
].value
);
1035 __PS("uclamp.max", p
->uclamp_req
[UCLAMP_MAX
].value
);
1036 __PS("effective uclamp.min", uclamp_eff_value(p
, UCLAMP_MIN
));
1037 __PS("effective uclamp.max", uclamp_eff_value(p
, UCLAMP_MAX
));
1041 if (task_has_dl_policy(p
)) {
1049 unsigned int this_cpu
= raw_smp_processor_id();
1052 t0
= cpu_clock(this_cpu
);
1053 t1
= cpu_clock(this_cpu
);
1054 __PS("clock-delta", t1
-t0
);
1057 sched_show_numa(p
, m
);
1060 void proc_sched_set_task(struct task_struct
*p
)
1062 #ifdef CONFIG_SCHEDSTATS
1063 memset(&p
->se
.statistics
, 0, sizeof(p
->se
.statistics
));
1067 void resched_latency_warn(int cpu
, u64 latency
)
1069 static DEFINE_RATELIMIT_STATE(latency_check_ratelimit
, 60 * 60 * HZ
, 1);
1071 WARN(__ratelimit(&latency_check_ratelimit
),
1072 "sched: CPU %d need_resched set for > %llu ns (%d ticks) "
1073 "without schedule\n",
1074 cpu
, latency
, cpu_rq(cpu
)->ticks_without_resched
);