]> git.proxmox.com Git - mirror_ubuntu-artful-kernel.git/blame_incremental - include/linux/sched.h
sched/headers, RCU: Move rcu_copy_process() from <linux/sched/task.h> to kernel/fork.c
[mirror_ubuntu-artful-kernel.git] / include / linux / sched.h
... / ...
CommitLineData
1#ifndef _LINUX_SCHED_H
2#define _LINUX_SCHED_H
3
4#include <uapi/linux/sched.h>
5
6#include <linux/sched/prio.h>
7
8#include <linux/capability.h>
9#include <linux/mutex.h>
10#include <linux/plist.h>
11#include <linux/mm_types.h>
12#include <asm/ptrace.h>
13
14#include <linux/sem.h>
15#include <linux/shm.h>
16#include <linux/signal.h>
17#include <linux/signal_types.h>
18#include <linux/pid.h>
19#include <linux/seccomp.h>
20#include <linux/rculist.h>
21#include <linux/rtmutex.h>
22
23#include <linux/resource.h>
24#include <linux/hrtimer.h>
25#include <linux/kcov.h>
26#include <linux/task_io_accounting.h>
27#include <linux/latencytop.h>
28#include <linux/cred.h>
29#include <linux/gfp.h>
30#include <linux/topology.h>
31#include <linux/magic.h>
32#include <linux/cgroup-defs.h>
33
34#include <asm/current.h>
35
36struct sched_attr;
37struct sched_param;
38
39struct futex_pi_state;
40struct robust_list_head;
41struct bio_list;
42struct fs_struct;
43struct perf_event_context;
44struct blk_plug;
45struct filename;
46struct nameidata;
47
48struct signal_struct;
49struct sighand_struct;
50
51struct seq_file;
52struct cfs_rq;
53struct task_group;
54
55/*
56 * Task state bitmask. NOTE! These bits are also
57 * encoded in fs/proc/array.c: get_task_state().
58 *
59 * We have two separate sets of flags: task->state
60 * is about runnability, while task->exit_state are
61 * about the task exiting. Confusing, but this way
62 * modifying one set can't modify the other one by
63 * mistake.
64 */
65#define TASK_RUNNING 0
66#define TASK_INTERRUPTIBLE 1
67#define TASK_UNINTERRUPTIBLE 2
68#define __TASK_STOPPED 4
69#define __TASK_TRACED 8
70/* in tsk->exit_state */
71#define EXIT_DEAD 16
72#define EXIT_ZOMBIE 32
73#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD)
74/* in tsk->state again */
75#define TASK_DEAD 64
76#define TASK_WAKEKILL 128
77#define TASK_WAKING 256
78#define TASK_PARKED 512
79#define TASK_NOLOAD 1024
80#define TASK_NEW 2048
81#define TASK_STATE_MAX 4096
82
83#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
84
85/* Convenience macros for the sake of set_current_state */
86#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
87#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
88#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
89
90#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD)
91
92/* Convenience macros for the sake of wake_up */
93#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
94#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
95
96/* get_task_state() */
97#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
98 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
99 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
100
101#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
102#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
103#define task_is_stopped_or_traced(task) \
104 ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
105#define task_contributes_to_load(task) \
106 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
107 (task->flags & PF_FROZEN) == 0 && \
108 (task->state & TASK_NOLOAD) == 0)
109
110#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
111
112#define __set_current_state(state_value) \
113 do { \
114 current->task_state_change = _THIS_IP_; \
115 current->state = (state_value); \
116 } while (0)
117#define set_current_state(state_value) \
118 do { \
119 current->task_state_change = _THIS_IP_; \
120 smp_store_mb(current->state, (state_value)); \
121 } while (0)
122
123#else
124/*
125 * set_current_state() includes a barrier so that the write of current->state
126 * is correctly serialised wrt the caller's subsequent test of whether to
127 * actually sleep:
128 *
129 * for (;;) {
130 * set_current_state(TASK_UNINTERRUPTIBLE);
131 * if (!need_sleep)
132 * break;
133 *
134 * schedule();
135 * }
136 * __set_current_state(TASK_RUNNING);
137 *
138 * If the caller does not need such serialisation (because, for instance, the
139 * condition test and condition change and wakeup are under the same lock) then
140 * use __set_current_state().
141 *
142 * The above is typically ordered against the wakeup, which does:
143 *
144 * need_sleep = false;
145 * wake_up_state(p, TASK_UNINTERRUPTIBLE);
146 *
147 * Where wake_up_state() (and all other wakeup primitives) imply enough
148 * barriers to order the store of the variable against wakeup.
149 *
150 * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
151 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
152 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
153 *
154 * This is obviously fine, since they both store the exact same value.
155 *
156 * Also see the comments of try_to_wake_up().
157 */
158#define __set_current_state(state_value) \
159 do { current->state = (state_value); } while (0)
160#define set_current_state(state_value) \
161 smp_store_mb(current->state, (state_value))
162
163#endif
164
165/* Task command name length */
166#define TASK_COMM_LEN 16
167
168struct task_struct;
169
170extern void sched_init(void);
171extern void sched_init_smp(void);
172
173extern cpumask_var_t cpu_isolated_map;
174
175extern int runqueue_is_locked(int cpu);
176
177extern void cpu_init (void);
178extern void trap_init(void);
179extern void update_process_times(int user);
180extern void scheduler_tick(void);
181
182#define MAX_SCHEDULE_TIMEOUT LONG_MAX
183extern signed long schedule_timeout(signed long timeout);
184extern signed long schedule_timeout_interruptible(signed long timeout);
185extern signed long schedule_timeout_killable(signed long timeout);
186extern signed long schedule_timeout_uninterruptible(signed long timeout);
187extern signed long schedule_timeout_idle(signed long timeout);
188asmlinkage void schedule(void);
189extern void schedule_preempt_disabled(void);
190
191extern int __must_check io_schedule_prepare(void);
192extern void io_schedule_finish(int token);
193extern long io_schedule_timeout(long timeout);
194extern void io_schedule(void);
195
196struct nsproxy;
197
198/**
199 * struct prev_cputime - snaphsot of system and user cputime
200 * @utime: time spent in user mode
201 * @stime: time spent in system mode
202 * @lock: protects the above two fields
203 *
204 * Stores previous user/system time values such that we can guarantee
205 * monotonicity.
206 */
207struct prev_cputime {
208#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
209 u64 utime;
210 u64 stime;
211 raw_spinlock_t lock;
212#endif
213};
214
215static inline void prev_cputime_init(struct prev_cputime *prev)
216{
217#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
218 prev->utime = prev->stime = 0;
219 raw_spin_lock_init(&prev->lock);
220#endif
221}
222
223/**
224 * struct task_cputime - collected CPU time counts
225 * @utime: time spent in user mode, in nanoseconds
226 * @stime: time spent in kernel mode, in nanoseconds
227 * @sum_exec_runtime: total time spent on the CPU, in nanoseconds
228 *
229 * This structure groups together three kinds of CPU time that are tracked for
230 * threads and thread groups. Most things considering CPU time want to group
231 * these counts together and treat all three of them in parallel.
232 */
233struct task_cputime {
234 u64 utime;
235 u64 stime;
236 unsigned long long sum_exec_runtime;
237};
238
239/* Alternate field names when used to cache expirations. */
240#define virt_exp utime
241#define prof_exp stime
242#define sched_exp sum_exec_runtime
243
244/*
245 * This is the atomic variant of task_cputime, which can be used for
246 * storing and updating task_cputime statistics without locking.
247 */
248struct task_cputime_atomic {
249 atomic64_t utime;
250 atomic64_t stime;
251 atomic64_t sum_exec_runtime;
252};
253
254#define INIT_CPUTIME_ATOMIC \
255 (struct task_cputime_atomic) { \
256 .utime = ATOMIC64_INIT(0), \
257 .stime = ATOMIC64_INIT(0), \
258 .sum_exec_runtime = ATOMIC64_INIT(0), \
259 }
260
261#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
262
263/*
264 * Disable preemption until the scheduler is running -- use an unconditional
265 * value so that it also works on !PREEMPT_COUNT kernels.
266 *
267 * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count().
268 */
269#define INIT_PREEMPT_COUNT PREEMPT_OFFSET
270
271/*
272 * Initial preempt_count value; reflects the preempt_count schedule invariant
273 * which states that during context switches:
274 *
275 * preempt_count() == 2*PREEMPT_DISABLE_OFFSET
276 *
277 * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels.
278 * Note: See finish_task_switch().
279 */
280#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED)
281
282/**
283 * struct thread_group_cputimer - thread group interval timer counts
284 * @cputime_atomic: atomic thread group interval timers.
285 * @running: true when there are timers running and
286 * @cputime_atomic receives updates.
287 * @checking_timer: true when a thread in the group is in the
288 * process of checking for thread group timers.
289 *
290 * This structure contains the version of task_cputime, above, that is
291 * used for thread group CPU timer calculations.
292 */
293struct thread_group_cputimer {
294 struct task_cputime_atomic cputime_atomic;
295 bool running;
296 bool checking_timer;
297};
298
299#include <linux/rwsem.h>
300struct autogroup;
301
302struct backing_dev_info;
303struct reclaim_state;
304
305#ifdef CONFIG_SCHED_INFO
306struct sched_info {
307 /* cumulative counters */
308 unsigned long pcount; /* # of times run on this cpu */
309 unsigned long long run_delay; /* time spent waiting on a runqueue */
310
311 /* timestamps */
312 unsigned long long last_arrival,/* when we last ran on a cpu */
313 last_queued; /* when we were last queued to run */
314};
315#endif /* CONFIG_SCHED_INFO */
316
317struct task_delay_info;
318
319static inline int sched_info_on(void)
320{
321#ifdef CONFIG_SCHEDSTATS
322 return 1;
323#elif defined(CONFIG_TASK_DELAY_ACCT)
324 extern int delayacct_on;
325 return delayacct_on;
326#else
327 return 0;
328#endif
329}
330
331#ifdef CONFIG_SCHEDSTATS
332void force_schedstat_enabled(void);
333#endif
334
335/*
336 * Integer metrics need fixed point arithmetic, e.g., sched/fair
337 * has a few: load, load_avg, util_avg, freq, and capacity.
338 *
339 * We define a basic fixed point arithmetic range, and then formalize
340 * all these metrics based on that basic range.
341 */
342# define SCHED_FIXEDPOINT_SHIFT 10
343# define SCHED_FIXEDPOINT_SCALE (1L << SCHED_FIXEDPOINT_SHIFT)
344
345struct io_context; /* See blkdev.h */
346
347
348#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
349extern void prefetch_stack(struct task_struct *t);
350#else
351static inline void prefetch_stack(struct task_struct *t) { }
352#endif
353
354struct audit_context; /* See audit.c */
355struct mempolicy;
356struct pipe_inode_info;
357struct uts_namespace;
358
359struct load_weight {
360 unsigned long weight;
361 u32 inv_weight;
362};
363
364/*
365 * The load_avg/util_avg accumulates an infinite geometric series
366 * (see __update_load_avg() in kernel/sched/fair.c).
367 *
368 * [load_avg definition]
369 *
370 * load_avg = runnable% * scale_load_down(load)
371 *
372 * where runnable% is the time ratio that a sched_entity is runnable.
373 * For cfs_rq, it is the aggregated load_avg of all runnable and
374 * blocked sched_entities.
375 *
376 * load_avg may also take frequency scaling into account:
377 *
378 * load_avg = runnable% * scale_load_down(load) * freq%
379 *
380 * where freq% is the CPU frequency normalized to the highest frequency.
381 *
382 * [util_avg definition]
383 *
384 * util_avg = running% * SCHED_CAPACITY_SCALE
385 *
386 * where running% is the time ratio that a sched_entity is running on
387 * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
388 * and blocked sched_entities.
389 *
390 * util_avg may also factor frequency scaling and CPU capacity scaling:
391 *
392 * util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
393 *
394 * where freq% is the same as above, and capacity% is the CPU capacity
395 * normalized to the greatest capacity (due to uarch differences, etc).
396 *
397 * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
398 * themselves are in the range of [0, 1]. To do fixed point arithmetics,
399 * we therefore scale them to as large a range as necessary. This is for
400 * example reflected by util_avg's SCHED_CAPACITY_SCALE.
401 *
402 * [Overflow issue]
403 *
404 * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
405 * with the highest load (=88761), always runnable on a single cfs_rq,
406 * and should not overflow as the number already hits PID_MAX_LIMIT.
407 *
408 * For all other cases (including 32-bit kernels), struct load_weight's
409 * weight will overflow first before we do, because:
410 *
411 * Max(load_avg) <= Max(load.weight)
412 *
413 * Then it is the load_weight's responsibility to consider overflow
414 * issues.
415 */
416struct sched_avg {
417 u64 last_update_time, load_sum;
418 u32 util_sum, period_contrib;
419 unsigned long load_avg, util_avg;
420};
421
422#ifdef CONFIG_SCHEDSTATS
423struct sched_statistics {
424 u64 wait_start;
425 u64 wait_max;
426 u64 wait_count;
427 u64 wait_sum;
428 u64 iowait_count;
429 u64 iowait_sum;
430
431 u64 sleep_start;
432 u64 sleep_max;
433 s64 sum_sleep_runtime;
434
435 u64 block_start;
436 u64 block_max;
437 u64 exec_max;
438 u64 slice_max;
439
440 u64 nr_migrations_cold;
441 u64 nr_failed_migrations_affine;
442 u64 nr_failed_migrations_running;
443 u64 nr_failed_migrations_hot;
444 u64 nr_forced_migrations;
445
446 u64 nr_wakeups;
447 u64 nr_wakeups_sync;
448 u64 nr_wakeups_migrate;
449 u64 nr_wakeups_local;
450 u64 nr_wakeups_remote;
451 u64 nr_wakeups_affine;
452 u64 nr_wakeups_affine_attempts;
453 u64 nr_wakeups_passive;
454 u64 nr_wakeups_idle;
455};
456#endif
457
458struct sched_entity {
459 struct load_weight load; /* for load-balancing */
460 struct rb_node run_node;
461 struct list_head group_node;
462 unsigned int on_rq;
463
464 u64 exec_start;
465 u64 sum_exec_runtime;
466 u64 vruntime;
467 u64 prev_sum_exec_runtime;
468
469 u64 nr_migrations;
470
471#ifdef CONFIG_SCHEDSTATS
472 struct sched_statistics statistics;
473#endif
474
475#ifdef CONFIG_FAIR_GROUP_SCHED
476 int depth;
477 struct sched_entity *parent;
478 /* rq on which this entity is (to be) queued: */
479 struct cfs_rq *cfs_rq;
480 /* rq "owned" by this entity/group: */
481 struct cfs_rq *my_q;
482#endif
483
484#ifdef CONFIG_SMP
485 /*
486 * Per entity load average tracking.
487 *
488 * Put into separate cache line so it does not
489 * collide with read-mostly values above.
490 */
491 struct sched_avg avg ____cacheline_aligned_in_smp;
492#endif
493};
494
495struct sched_rt_entity {
496 struct list_head run_list;
497 unsigned long timeout;
498 unsigned long watchdog_stamp;
499 unsigned int time_slice;
500 unsigned short on_rq;
501 unsigned short on_list;
502
503 struct sched_rt_entity *back;
504#ifdef CONFIG_RT_GROUP_SCHED
505 struct sched_rt_entity *parent;
506 /* rq on which this entity is (to be) queued: */
507 struct rt_rq *rt_rq;
508 /* rq "owned" by this entity/group: */
509 struct rt_rq *my_q;
510#endif
511};
512
513struct sched_dl_entity {
514 struct rb_node rb_node;
515
516 /*
517 * Original scheduling parameters. Copied here from sched_attr
518 * during sched_setattr(), they will remain the same until
519 * the next sched_setattr().
520 */
521 u64 dl_runtime; /* maximum runtime for each instance */
522 u64 dl_deadline; /* relative deadline of each instance */
523 u64 dl_period; /* separation of two instances (period) */
524 u64 dl_bw; /* dl_runtime / dl_deadline */
525
526 /*
527 * Actual scheduling parameters. Initialized with the values above,
528 * they are continously updated during task execution. Note that
529 * the remaining runtime could be < 0 in case we are in overrun.
530 */
531 s64 runtime; /* remaining runtime for this instance */
532 u64 deadline; /* absolute deadline for this instance */
533 unsigned int flags; /* specifying the scheduler behaviour */
534
535 /*
536 * Some bool flags:
537 *
538 * @dl_throttled tells if we exhausted the runtime. If so, the
539 * task has to wait for a replenishment to be performed at the
540 * next firing of dl_timer.
541 *
542 * @dl_boosted tells if we are boosted due to DI. If so we are
543 * outside bandwidth enforcement mechanism (but only until we
544 * exit the critical section);
545 *
546 * @dl_yielded tells if task gave up the cpu before consuming
547 * all its available runtime during the last job.
548 */
549 int dl_throttled, dl_boosted, dl_yielded;
550
551 /*
552 * Bandwidth enforcement timer. Each -deadline task has its
553 * own bandwidth to be enforced, thus we need one timer per task.
554 */
555 struct hrtimer dl_timer;
556};
557
558union rcu_special {
559 struct {
560 u8 blocked;
561 u8 need_qs;
562 u8 exp_need_qs;
563 u8 pad; /* Otherwise the compiler can store garbage here. */
564 } b; /* Bits. */
565 u32 s; /* Set of bits. */
566};
567struct rcu_node;
568
569enum perf_event_task_context {
570 perf_invalid_context = -1,
571 perf_hw_context = 0,
572 perf_sw_context,
573 perf_nr_task_contexts,
574};
575
576struct wake_q_node {
577 struct wake_q_node *next;
578};
579
580/* Track pages that require TLB flushes */
581struct tlbflush_unmap_batch {
582 /*
583 * Each bit set is a CPU that potentially has a TLB entry for one of
584 * the PFNs being flushed. See set_tlb_ubc_flush_pending().
585 */
586 struct cpumask cpumask;
587
588 /* True if any bit in cpumask is set */
589 bool flush_required;
590
591 /*
592 * If true then the PTE was dirty when unmapped. The entry must be
593 * flushed before IO is initiated or a stale TLB entry potentially
594 * allows an update without redirtying the page.
595 */
596 bool writable;
597};
598
599struct task_struct {
600#ifdef CONFIG_THREAD_INFO_IN_TASK
601 /*
602 * For reasons of header soup (see current_thread_info()), this
603 * must be the first element of task_struct.
604 */
605 struct thread_info thread_info;
606#endif
607 volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
608 void *stack;
609 atomic_t usage;
610 unsigned int flags; /* per process flags, defined below */
611 unsigned int ptrace;
612
613#ifdef CONFIG_SMP
614 struct llist_node wake_entry;
615 int on_cpu;
616#ifdef CONFIG_THREAD_INFO_IN_TASK
617 unsigned int cpu; /* current CPU */
618#endif
619 unsigned int wakee_flips;
620 unsigned long wakee_flip_decay_ts;
621 struct task_struct *last_wakee;
622
623 int wake_cpu;
624#endif
625 int on_rq;
626
627 int prio, static_prio, normal_prio;
628 unsigned int rt_priority;
629 const struct sched_class *sched_class;
630 struct sched_entity se;
631 struct sched_rt_entity rt;
632#ifdef CONFIG_CGROUP_SCHED
633 struct task_group *sched_task_group;
634#endif
635 struct sched_dl_entity dl;
636
637#ifdef CONFIG_PREEMPT_NOTIFIERS
638 /* list of struct preempt_notifier: */
639 struct hlist_head preempt_notifiers;
640#endif
641
642#ifdef CONFIG_BLK_DEV_IO_TRACE
643 unsigned int btrace_seq;
644#endif
645
646 unsigned int policy;
647 int nr_cpus_allowed;
648 cpumask_t cpus_allowed;
649
650#ifdef CONFIG_PREEMPT_RCU
651 int rcu_read_lock_nesting;
652 union rcu_special rcu_read_unlock_special;
653 struct list_head rcu_node_entry;
654 struct rcu_node *rcu_blocked_node;
655#endif /* #ifdef CONFIG_PREEMPT_RCU */
656#ifdef CONFIG_TASKS_RCU
657 unsigned long rcu_tasks_nvcsw;
658 bool rcu_tasks_holdout;
659 struct list_head rcu_tasks_holdout_list;
660 int rcu_tasks_idle_cpu;
661#endif /* #ifdef CONFIG_TASKS_RCU */
662
663#ifdef CONFIG_SCHED_INFO
664 struct sched_info sched_info;
665#endif
666
667 struct list_head tasks;
668#ifdef CONFIG_SMP
669 struct plist_node pushable_tasks;
670 struct rb_node pushable_dl_tasks;
671#endif
672
673 struct mm_struct *mm, *active_mm;
674
675 /* Per-thread vma caching: */
676 struct vmacache vmacache;
677
678#if defined(SPLIT_RSS_COUNTING)
679 struct task_rss_stat rss_stat;
680#endif
681/* task state */
682 int exit_state;
683 int exit_code, exit_signal;
684 int pdeath_signal; /* The signal sent when the parent dies */
685 unsigned long jobctl; /* JOBCTL_*, siglock protected */
686
687 /* Used for emulating ABI behavior of previous Linux versions */
688 unsigned int personality;
689
690 /* scheduler bits, serialized by scheduler locks */
691 unsigned sched_reset_on_fork:1;
692 unsigned sched_contributes_to_load:1;
693 unsigned sched_migrated:1;
694 unsigned sched_remote_wakeup:1;
695 unsigned :0; /* force alignment to the next boundary */
696
697 /* unserialized, strictly 'current' */
698 unsigned in_execve:1; /* bit to tell LSMs we're in execve */
699 unsigned in_iowait:1;
700#if !defined(TIF_RESTORE_SIGMASK)
701 unsigned restore_sigmask:1;
702#endif
703#ifdef CONFIG_MEMCG
704 unsigned memcg_may_oom:1;
705#ifndef CONFIG_SLOB
706 unsigned memcg_kmem_skip_account:1;
707#endif
708#endif
709#ifdef CONFIG_COMPAT_BRK
710 unsigned brk_randomized:1;
711#endif
712
713 unsigned long atomic_flags; /* Flags needing atomic access. */
714
715 struct restart_block restart_block;
716
717 pid_t pid;
718 pid_t tgid;
719
720#ifdef CONFIG_CC_STACKPROTECTOR
721 /* Canary value for the -fstack-protector gcc feature */
722 unsigned long stack_canary;
723#endif
724 /*
725 * pointers to (original) parent process, youngest child, younger sibling,
726 * older sibling, respectively. (p->father can be replaced with
727 * p->real_parent->pid)
728 */
729 struct task_struct __rcu *real_parent; /* real parent process */
730 struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
731 /*
732 * children/sibling forms the list of my natural children
733 */
734 struct list_head children; /* list of my children */
735 struct list_head sibling; /* linkage in my parent's children list */
736 struct task_struct *group_leader; /* threadgroup leader */
737
738 /*
739 * ptraced is the list of tasks this task is using ptrace on.
740 * This includes both natural children and PTRACE_ATTACH targets.
741 * p->ptrace_entry is p's link on the p->parent->ptraced list.
742 */
743 struct list_head ptraced;
744 struct list_head ptrace_entry;
745
746 /* PID/PID hash table linkage. */
747 struct pid_link pids[PIDTYPE_MAX];
748 struct list_head thread_group;
749 struct list_head thread_node;
750
751 struct completion *vfork_done; /* for vfork() */
752 int __user *set_child_tid; /* CLONE_CHILD_SETTID */
753 int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
754
755 u64 utime, stime;
756#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
757 u64 utimescaled, stimescaled;
758#endif
759 u64 gtime;
760 struct prev_cputime prev_cputime;
761#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
762 seqcount_t vtime_seqcount;
763 unsigned long long vtime_snap;
764 enum {
765 /* Task is sleeping or running in a CPU with VTIME inactive */
766 VTIME_INACTIVE = 0,
767 /* Task runs in userspace in a CPU with VTIME active */
768 VTIME_USER,
769 /* Task runs in kernelspace in a CPU with VTIME active */
770 VTIME_SYS,
771 } vtime_snap_whence;
772#endif
773
774#ifdef CONFIG_NO_HZ_FULL
775 atomic_t tick_dep_mask;
776#endif
777 unsigned long nvcsw, nivcsw; /* context switch counts */
778 u64 start_time; /* monotonic time in nsec */
779 u64 real_start_time; /* boot based time in nsec */
780/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
781 unsigned long min_flt, maj_flt;
782
783#ifdef CONFIG_POSIX_TIMERS
784 struct task_cputime cputime_expires;
785 struct list_head cpu_timers[3];
786#endif
787
788/* process credentials */
789 const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
790 const struct cred __rcu *real_cred; /* objective and real subjective task
791 * credentials (COW) */
792 const struct cred __rcu *cred; /* effective (overridable) subjective task
793 * credentials (COW) */
794 char comm[TASK_COMM_LEN]; /* executable name excluding path
795 - access with [gs]et_task_comm (which lock
796 it with task_lock())
797 - initialized normally by setup_new_exec */
798/* file system info */
799 struct nameidata *nameidata;
800#ifdef CONFIG_SYSVIPC
801/* ipc stuff */
802 struct sysv_sem sysvsem;
803 struct sysv_shm sysvshm;
804#endif
805#ifdef CONFIG_DETECT_HUNG_TASK
806/* hung task detection */
807 unsigned long last_switch_count;
808#endif
809/* filesystem information */
810 struct fs_struct *fs;
811/* open file information */
812 struct files_struct *files;
813/* namespaces */
814 struct nsproxy *nsproxy;
815/* signal handlers */
816 struct signal_struct *signal;
817 struct sighand_struct *sighand;
818
819 sigset_t blocked, real_blocked;
820 sigset_t saved_sigmask; /* restored if set_restore_sigmask() was used */
821 struct sigpending pending;
822
823 unsigned long sas_ss_sp;
824 size_t sas_ss_size;
825 unsigned sas_ss_flags;
826
827 struct callback_head *task_works;
828
829 struct audit_context *audit_context;
830#ifdef CONFIG_AUDITSYSCALL
831 kuid_t loginuid;
832 unsigned int sessionid;
833#endif
834 struct seccomp seccomp;
835
836/* Thread group tracking */
837 u32 parent_exec_id;
838 u32 self_exec_id;
839/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
840 * mempolicy */
841 spinlock_t alloc_lock;
842
843 /* Protection of the PI data structures: */
844 raw_spinlock_t pi_lock;
845
846 struct wake_q_node wake_q;
847
848#ifdef CONFIG_RT_MUTEXES
849 /* PI waiters blocked on a rt_mutex held by this task */
850 struct rb_root pi_waiters;
851 struct rb_node *pi_waiters_leftmost;
852 /* Deadlock detection and priority inheritance handling */
853 struct rt_mutex_waiter *pi_blocked_on;
854#endif
855
856#ifdef CONFIG_DEBUG_MUTEXES
857 /* mutex deadlock detection */
858 struct mutex_waiter *blocked_on;
859#endif
860#ifdef CONFIG_TRACE_IRQFLAGS
861 unsigned int irq_events;
862 unsigned long hardirq_enable_ip;
863 unsigned long hardirq_disable_ip;
864 unsigned int hardirq_enable_event;
865 unsigned int hardirq_disable_event;
866 int hardirqs_enabled;
867 int hardirq_context;
868 unsigned long softirq_disable_ip;
869 unsigned long softirq_enable_ip;
870 unsigned int softirq_disable_event;
871 unsigned int softirq_enable_event;
872 int softirqs_enabled;
873 int softirq_context;
874#endif
875#ifdef CONFIG_LOCKDEP
876# define MAX_LOCK_DEPTH 48UL
877 u64 curr_chain_key;
878 int lockdep_depth;
879 unsigned int lockdep_recursion;
880 struct held_lock held_locks[MAX_LOCK_DEPTH];
881 gfp_t lockdep_reclaim_gfp;
882#endif
883#ifdef CONFIG_UBSAN
884 unsigned int in_ubsan;
885#endif
886
887/* journalling filesystem info */
888 void *journal_info;
889
890/* stacked block device info */
891 struct bio_list *bio_list;
892
893#ifdef CONFIG_BLOCK
894/* stack plugging */
895 struct blk_plug *plug;
896#endif
897
898/* VM state */
899 struct reclaim_state *reclaim_state;
900
901 struct backing_dev_info *backing_dev_info;
902
903 struct io_context *io_context;
904
905 unsigned long ptrace_message;
906 siginfo_t *last_siginfo; /* For ptrace use. */
907 struct task_io_accounting ioac;
908#if defined(CONFIG_TASK_XACCT)
909 u64 acct_rss_mem1; /* accumulated rss usage */
910 u64 acct_vm_mem1; /* accumulated virtual memory usage */
911 u64 acct_timexpd; /* stime + utime since last update */
912#endif
913#ifdef CONFIG_CPUSETS
914 nodemask_t mems_allowed; /* Protected by alloc_lock */
915 seqcount_t mems_allowed_seq; /* Seqence no to catch updates */
916 int cpuset_mem_spread_rotor;
917 int cpuset_slab_spread_rotor;
918#endif
919#ifdef CONFIG_CGROUPS
920 /* Control Group info protected by css_set_lock */
921 struct css_set __rcu *cgroups;
922 /* cg_list protected by css_set_lock and tsk->alloc_lock */
923 struct list_head cg_list;
924#endif
925#ifdef CONFIG_INTEL_RDT_A
926 int closid;
927#endif
928#ifdef CONFIG_FUTEX
929 struct robust_list_head __user *robust_list;
930#ifdef CONFIG_COMPAT
931 struct compat_robust_list_head __user *compat_robust_list;
932#endif
933 struct list_head pi_state_list;
934 struct futex_pi_state *pi_state_cache;
935#endif
936#ifdef CONFIG_PERF_EVENTS
937 struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
938 struct mutex perf_event_mutex;
939 struct list_head perf_event_list;
940#endif
941#ifdef CONFIG_DEBUG_PREEMPT
942 unsigned long preempt_disable_ip;
943#endif
944#ifdef CONFIG_NUMA
945 struct mempolicy *mempolicy; /* Protected by alloc_lock */
946 short il_next;
947 short pref_node_fork;
948#endif
949#ifdef CONFIG_NUMA_BALANCING
950 int numa_scan_seq;
951 unsigned int numa_scan_period;
952 unsigned int numa_scan_period_max;
953 int numa_preferred_nid;
954 unsigned long numa_migrate_retry;
955 u64 node_stamp; /* migration stamp */
956 u64 last_task_numa_placement;
957 u64 last_sum_exec_runtime;
958 struct callback_head numa_work;
959
960 struct list_head numa_entry;
961 struct numa_group *numa_group;
962
963 /*
964 * numa_faults is an array split into four regions:
965 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
966 * in this precise order.
967 *
968 * faults_memory: Exponential decaying average of faults on a per-node
969 * basis. Scheduling placement decisions are made based on these
970 * counts. The values remain static for the duration of a PTE scan.
971 * faults_cpu: Track the nodes the process was running on when a NUMA
972 * hinting fault was incurred.
973 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
974 * during the current scan window. When the scan completes, the counts
975 * in faults_memory and faults_cpu decay and these values are copied.
976 */
977 unsigned long *numa_faults;
978 unsigned long total_numa_faults;
979
980 /*
981 * numa_faults_locality tracks if faults recorded during the last
982 * scan window were remote/local or failed to migrate. The task scan
983 * period is adapted based on the locality of the faults with different
984 * weights depending on whether they were shared or private faults
985 */
986 unsigned long numa_faults_locality[3];
987
988 unsigned long numa_pages_migrated;
989#endif /* CONFIG_NUMA_BALANCING */
990
991#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
992 struct tlbflush_unmap_batch tlb_ubc;
993#endif
994
995 struct rcu_head rcu;
996
997 /*
998 * cache last used pipe for splice
999 */
1000 struct pipe_inode_info *splice_pipe;
1001
1002 struct page_frag task_frag;
1003
1004#ifdef CONFIG_TASK_DELAY_ACCT
1005 struct task_delay_info *delays;
1006#endif
1007
1008#ifdef CONFIG_FAULT_INJECTION
1009 int make_it_fail;
1010#endif
1011 /*
1012 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
1013 * balance_dirty_pages() for some dirty throttling pause
1014 */
1015 int nr_dirtied;
1016 int nr_dirtied_pause;
1017 unsigned long dirty_paused_when; /* start of a write-and-pause period */
1018
1019#ifdef CONFIG_LATENCYTOP
1020 int latency_record_count;
1021 struct latency_record latency_record[LT_SAVECOUNT];
1022#endif
1023 /*
1024 * time slack values; these are used to round up poll() and
1025 * select() etc timeout values. These are in nanoseconds.
1026 */
1027 u64 timer_slack_ns;
1028 u64 default_timer_slack_ns;
1029
1030#ifdef CONFIG_KASAN
1031 unsigned int kasan_depth;
1032#endif
1033#ifdef CONFIG_FUNCTION_GRAPH_TRACER
1034 /* Index of current stored address in ret_stack */
1035 int curr_ret_stack;
1036 /* Stack of return addresses for return function tracing */
1037 struct ftrace_ret_stack *ret_stack;
1038 /* time stamp for last schedule */
1039 unsigned long long ftrace_timestamp;
1040 /*
1041 * Number of functions that haven't been traced
1042 * because of depth overrun.
1043 */
1044 atomic_t trace_overrun;
1045 /* Pause for the tracing */
1046 atomic_t tracing_graph_pause;
1047#endif
1048#ifdef CONFIG_TRACING
1049 /* state flags for use by tracers */
1050 unsigned long trace;
1051 /* bitmask and counter of trace recursion */
1052 unsigned long trace_recursion;
1053#endif /* CONFIG_TRACING */
1054#ifdef CONFIG_KCOV
1055 /* Coverage collection mode enabled for this task (0 if disabled). */
1056 enum kcov_mode kcov_mode;
1057 /* Size of the kcov_area. */
1058 unsigned kcov_size;
1059 /* Buffer for coverage collection. */
1060 void *kcov_area;
1061 /* kcov desciptor wired with this task or NULL. */
1062 struct kcov *kcov;
1063#endif
1064#ifdef CONFIG_MEMCG
1065 struct mem_cgroup *memcg_in_oom;
1066 gfp_t memcg_oom_gfp_mask;
1067 int memcg_oom_order;
1068
1069 /* number of pages to reclaim on returning to userland */
1070 unsigned int memcg_nr_pages_over_high;
1071#endif
1072#ifdef CONFIG_UPROBES
1073 struct uprobe_task *utask;
1074#endif
1075#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
1076 unsigned int sequential_io;
1077 unsigned int sequential_io_avg;
1078#endif
1079#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
1080 unsigned long task_state_change;
1081#endif
1082 int pagefault_disabled;
1083#ifdef CONFIG_MMU
1084 struct task_struct *oom_reaper_list;
1085#endif
1086#ifdef CONFIG_VMAP_STACK
1087 struct vm_struct *stack_vm_area;
1088#endif
1089#ifdef CONFIG_THREAD_INFO_IN_TASK
1090 /* A live task holds one reference. */
1091 atomic_t stack_refcount;
1092#endif
1093/* CPU-specific state of this task */
1094 struct thread_struct thread;
1095/*
1096 * WARNING: on x86, 'thread_struct' contains a variable-sized
1097 * structure. It *MUST* be at the end of 'task_struct'.
1098 *
1099 * Do not put anything below here!
1100 */
1101};
1102
1103static inline struct pid *task_pid(struct task_struct *task)
1104{
1105 return task->pids[PIDTYPE_PID].pid;
1106}
1107
1108static inline struct pid *task_tgid(struct task_struct *task)
1109{
1110 return task->group_leader->pids[PIDTYPE_PID].pid;
1111}
1112
1113/*
1114 * Without tasklist or rcu lock it is not safe to dereference
1115 * the result of task_pgrp/task_session even if task == current,
1116 * we can race with another thread doing sys_setsid/sys_setpgid.
1117 */
1118static inline struct pid *task_pgrp(struct task_struct *task)
1119{
1120 return task->group_leader->pids[PIDTYPE_PGID].pid;
1121}
1122
1123static inline struct pid *task_session(struct task_struct *task)
1124{
1125 return task->group_leader->pids[PIDTYPE_SID].pid;
1126}
1127
1128struct pid_namespace;
1129
1130/*
1131 * the helpers to get the task's different pids as they are seen
1132 * from various namespaces
1133 *
1134 * task_xid_nr() : global id, i.e. the id seen from the init namespace;
1135 * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of
1136 * current.
1137 * task_xid_nr_ns() : id seen from the ns specified;
1138 *
1139 * set_task_vxid() : assigns a virtual id to a task;
1140 *
1141 * see also pid_nr() etc in include/linux/pid.h
1142 */
1143pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
1144 struct pid_namespace *ns);
1145
1146static inline pid_t task_pid_nr(struct task_struct *tsk)
1147{
1148 return tsk->pid;
1149}
1150
1151static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
1152 struct pid_namespace *ns)
1153{
1154 return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
1155}
1156
1157static inline pid_t task_pid_vnr(struct task_struct *tsk)
1158{
1159 return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
1160}
1161
1162
1163static inline pid_t task_tgid_nr(struct task_struct *tsk)
1164{
1165 return tsk->tgid;
1166}
1167
1168pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1169
1170static inline pid_t task_tgid_vnr(struct task_struct *tsk)
1171{
1172 return pid_vnr(task_tgid(tsk));
1173}
1174
1175
1176static inline int pid_alive(const struct task_struct *p);
1177static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
1178{
1179 pid_t pid = 0;
1180
1181 rcu_read_lock();
1182 if (pid_alive(tsk))
1183 pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
1184 rcu_read_unlock();
1185
1186 return pid;
1187}
1188
1189static inline pid_t task_ppid_nr(const struct task_struct *tsk)
1190{
1191 return task_ppid_nr_ns(tsk, &init_pid_ns);
1192}
1193
1194static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
1195 struct pid_namespace *ns)
1196{
1197 return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
1198}
1199
1200static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
1201{
1202 return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
1203}
1204
1205
1206static inline pid_t task_session_nr_ns(struct task_struct *tsk,
1207 struct pid_namespace *ns)
1208{
1209 return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
1210}
1211
1212static inline pid_t task_session_vnr(struct task_struct *tsk)
1213{
1214 return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
1215}
1216
1217/* obsolete, do not use */
1218static inline pid_t task_pgrp_nr(struct task_struct *tsk)
1219{
1220 return task_pgrp_nr_ns(tsk, &init_pid_ns);
1221}
1222
1223/**
1224 * pid_alive - check that a task structure is not stale
1225 * @p: Task structure to be checked.
1226 *
1227 * Test if a process is not yet dead (at most zombie state)
1228 * If pid_alive fails, then pointers within the task structure
1229 * can be stale and must not be dereferenced.
1230 *
1231 * Return: 1 if the process is alive. 0 otherwise.
1232 */
1233static inline int pid_alive(const struct task_struct *p)
1234{
1235 return p->pids[PIDTYPE_PID].pid != NULL;
1236}
1237
1238/**
1239 * is_global_init - check if a task structure is init. Since init
1240 * is free to have sub-threads we need to check tgid.
1241 * @tsk: Task structure to be checked.
1242 *
1243 * Check if a task structure is the first user space task the kernel created.
1244 *
1245 * Return: 1 if the task structure is init. 0 otherwise.
1246 */
1247static inline int is_global_init(struct task_struct *tsk)
1248{
1249 return task_tgid_nr(tsk) == 1;
1250}
1251
1252extern struct pid *cad_pid;
1253
1254extern void free_task(struct task_struct *tsk);
1255#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0)
1256
1257extern void __put_task_struct(struct task_struct *t);
1258
1259static inline void put_task_struct(struct task_struct *t)
1260{
1261 if (atomic_dec_and_test(&t->usage))
1262 __put_task_struct(t);
1263}
1264
1265struct task_struct *task_rcu_dereference(struct task_struct **ptask);
1266struct task_struct *try_get_task_struct(struct task_struct **ptask);
1267
1268#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
1269extern void task_cputime(struct task_struct *t,
1270 u64 *utime, u64 *stime);
1271extern u64 task_gtime(struct task_struct *t);
1272#else
1273static inline void task_cputime(struct task_struct *t,
1274 u64 *utime, u64 *stime)
1275{
1276 *utime = t->utime;
1277 *stime = t->stime;
1278}
1279
1280static inline u64 task_gtime(struct task_struct *t)
1281{
1282 return t->gtime;
1283}
1284#endif
1285
1286#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
1287static inline void task_cputime_scaled(struct task_struct *t,
1288 u64 *utimescaled,
1289 u64 *stimescaled)
1290{
1291 *utimescaled = t->utimescaled;
1292 *stimescaled = t->stimescaled;
1293}
1294#else
1295static inline void task_cputime_scaled(struct task_struct *t,
1296 u64 *utimescaled,
1297 u64 *stimescaled)
1298{
1299 task_cputime(t, utimescaled, stimescaled);
1300}
1301#endif
1302
1303extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
1304extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st);
1305
1306/*
1307 * Per process flags
1308 */
1309#define PF_IDLE 0x00000002 /* I am an IDLE thread */
1310#define PF_EXITING 0x00000004 /* getting shut down */
1311#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
1312#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
1313#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
1314#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
1315#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
1316#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
1317#define PF_DUMPCORE 0x00000200 /* dumped core */
1318#define PF_SIGNALED 0x00000400 /* killed by a signal */
1319#define PF_MEMALLOC 0x00000800 /* Allocating memory */
1320#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */
1321#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */
1322#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */
1323#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */
1324#define PF_FROZEN 0x00010000 /* frozen for system suspend */
1325#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */
1326#define PF_KSWAPD 0x00040000 /* I am kswapd */
1327#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */
1328#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */
1329#define PF_KTHREAD 0x00200000 /* I am a kernel thread */
1330#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */
1331#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
1332#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */
1333#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
1334#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
1335#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
1336#define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */
1337
1338/*
1339 * Only the _current_ task can read/write to tsk->flags, but other
1340 * tasks can access tsk->flags in readonly mode for example
1341 * with tsk_used_math (like during threaded core dumping).
1342 * There is however an exception to this rule during ptrace
1343 * or during fork: the ptracer task is allowed to write to the
1344 * child->flags of its traced child (same goes for fork, the parent
1345 * can write to the child->flags), because we're guaranteed the
1346 * child is not running and in turn not changing child->flags
1347 * at the same time the parent does it.
1348 */
1349#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
1350#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
1351#define clear_used_math() clear_stopped_child_used_math(current)
1352#define set_used_math() set_stopped_child_used_math(current)
1353#define conditional_stopped_child_used_math(condition, child) \
1354 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
1355#define conditional_used_math(condition) \
1356 conditional_stopped_child_used_math(condition, current)
1357#define copy_to_stopped_child_used_math(child) \
1358 do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
1359/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
1360#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
1361#define used_math() tsk_used_math(current)
1362
1363/* Per-process atomic flags. */
1364#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
1365#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
1366#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */
1367#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */
1368
1369
1370#define TASK_PFA_TEST(name, func) \
1371 static inline bool task_##func(struct task_struct *p) \
1372 { return test_bit(PFA_##name, &p->atomic_flags); }
1373#define TASK_PFA_SET(name, func) \
1374 static inline void task_set_##func(struct task_struct *p) \
1375 { set_bit(PFA_##name, &p->atomic_flags); }
1376#define TASK_PFA_CLEAR(name, func) \
1377 static inline void task_clear_##func(struct task_struct *p) \
1378 { clear_bit(PFA_##name, &p->atomic_flags); }
1379
1380TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
1381TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
1382
1383TASK_PFA_TEST(SPREAD_PAGE, spread_page)
1384TASK_PFA_SET(SPREAD_PAGE, spread_page)
1385TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)
1386
1387TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
1388TASK_PFA_SET(SPREAD_SLAB, spread_slab)
1389TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1390
1391TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
1392TASK_PFA_SET(LMK_WAITING, lmk_waiting)
1393
1394static inline void tsk_restore_flags(struct task_struct *task,
1395 unsigned long orig_flags, unsigned long flags)
1396{
1397 task->flags &= ~flags;
1398 task->flags |= orig_flags & flags;
1399}
1400
1401extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
1402 const struct cpumask *trial);
1403extern int task_can_attach(struct task_struct *p,
1404 const struct cpumask *cs_cpus_allowed);
1405#ifdef CONFIG_SMP
1406extern void do_set_cpus_allowed(struct task_struct *p,
1407 const struct cpumask *new_mask);
1408
1409extern int set_cpus_allowed_ptr(struct task_struct *p,
1410 const struct cpumask *new_mask);
1411#else
1412static inline void do_set_cpus_allowed(struct task_struct *p,
1413 const struct cpumask *new_mask)
1414{
1415}
1416static inline int set_cpus_allowed_ptr(struct task_struct *p,
1417 const struct cpumask *new_mask)
1418{
1419 if (!cpumask_test_cpu(0, new_mask))
1420 return -EINVAL;
1421 return 0;
1422}
1423#endif
1424
1425#ifndef cpu_relax_yield
1426#define cpu_relax_yield() cpu_relax()
1427#endif
1428
1429extern unsigned long long
1430task_sched_runtime(struct task_struct *task);
1431
1432/* sched_exec is called by processes performing an exec */
1433#ifdef CONFIG_SMP
1434extern void sched_exec(void);
1435#else
1436#define sched_exec() {}
1437#endif
1438
1439extern int yield_to(struct task_struct *p, bool preempt);
1440extern void set_user_nice(struct task_struct *p, long nice);
1441extern int task_prio(const struct task_struct *p);
1442/**
1443 * task_nice - return the nice value of a given task.
1444 * @p: the task in question.
1445 *
1446 * Return: The nice value [ -20 ... 0 ... 19 ].
1447 */
1448static inline int task_nice(const struct task_struct *p)
1449{
1450 return PRIO_TO_NICE((p)->static_prio);
1451}
1452extern int can_nice(const struct task_struct *p, const int nice);
1453extern int task_curr(const struct task_struct *p);
1454extern int idle_cpu(int cpu);
1455extern int sched_setscheduler(struct task_struct *, int,
1456 const struct sched_param *);
1457extern int sched_setscheduler_nocheck(struct task_struct *, int,
1458 const struct sched_param *);
1459extern int sched_setattr(struct task_struct *,
1460 const struct sched_attr *);
1461extern struct task_struct *idle_task(int cpu);
1462/**
1463 * is_idle_task - is the specified task an idle task?
1464 * @p: the task in question.
1465 *
1466 * Return: 1 if @p is an idle task. 0 otherwise.
1467 */
1468static inline bool is_idle_task(const struct task_struct *p)
1469{
1470 return !!(p->flags & PF_IDLE);
1471}
1472extern struct task_struct *curr_task(int cpu);
1473extern void ia64_set_curr_task(int cpu, struct task_struct *p);
1474
1475void yield(void);
1476
1477union thread_union {
1478#ifndef CONFIG_THREAD_INFO_IN_TASK
1479 struct thread_info thread_info;
1480#endif
1481 unsigned long stack[THREAD_SIZE/sizeof(long)];
1482};
1483
1484#ifndef __HAVE_ARCH_KSTACK_END
1485static inline int kstack_end(void *addr)
1486{
1487 /* Reliable end of stack detection:
1488 * Some APM bios versions misalign the stack
1489 */
1490 return !(((unsigned long)addr+sizeof(void*)-1) & (THREAD_SIZE-sizeof(void*)));
1491}
1492#endif
1493
1494extern union thread_union init_thread_union;
1495extern struct task_struct init_task;
1496
1497extern struct pid_namespace init_pid_ns;
1498
1499/*
1500 * find a task by one of its numerical ids
1501 *
1502 * find_task_by_pid_ns():
1503 * finds a task by its pid in the specified namespace
1504 * find_task_by_vpid():
1505 * finds a task by its virtual pid
1506 *
1507 * see also find_vpid() etc in include/linux/pid.h
1508 */
1509
1510extern struct task_struct *find_task_by_vpid(pid_t nr);
1511extern struct task_struct *find_task_by_pid_ns(pid_t nr,
1512 struct pid_namespace *ns);
1513
1514extern int wake_up_state(struct task_struct *tsk, unsigned int state);
1515extern int wake_up_process(struct task_struct *tsk);
1516extern void wake_up_new_task(struct task_struct *tsk);
1517#ifdef CONFIG_SMP
1518 extern void kick_process(struct task_struct *tsk);
1519#else
1520 static inline void kick_process(struct task_struct *tsk) { }
1521#endif
1522
1523extern void exit_files(struct task_struct *);
1524
1525extern void exit_itimers(struct signal_struct *);
1526
1527extern int do_execve(struct filename *,
1528 const char __user * const __user *,
1529 const char __user * const __user *);
1530extern int do_execveat(int, struct filename *,
1531 const char __user * const __user *,
1532 const char __user * const __user *,
1533 int);
1534
1535extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
1536static inline void set_task_comm(struct task_struct *tsk, const char *from)
1537{
1538 __set_task_comm(tsk, from, false);
1539}
1540extern char *get_task_comm(char *to, struct task_struct *tsk);
1541
1542#ifdef CONFIG_SMP
1543void scheduler_ipi(void);
1544extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
1545#else
1546static inline void scheduler_ipi(void) { }
1547static inline unsigned long wait_task_inactive(struct task_struct *p,
1548 long match_state)
1549{
1550 return 1;
1551}
1552#endif
1553
1554/*
1555 * Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
1556 * subscriptions and synchronises with wait4(). Also used in procfs. Also
1557 * pins the final release of task.io_context. Also protects ->cpuset and
1558 * ->cgroup.subsys[]. And ->vfork_done.
1559 *
1560 * Nests both inside and outside of read_lock(&tasklist_lock).
1561 * It must not be nested with write_lock_irq(&tasklist_lock),
1562 * neither inside nor outside.
1563 */
1564static inline void task_lock(struct task_struct *p)
1565{
1566 spin_lock(&p->alloc_lock);
1567}
1568
1569static inline void task_unlock(struct task_struct *p)
1570{
1571 spin_unlock(&p->alloc_lock);
1572}
1573
1574#ifdef CONFIG_THREAD_INFO_IN_TASK
1575
1576static inline struct thread_info *task_thread_info(struct task_struct *task)
1577{
1578 return &task->thread_info;
1579}
1580
1581/*
1582 * When accessing the stack of a non-current task that might exit, use
1583 * try_get_task_stack() instead. task_stack_page will return a pointer
1584 * that could get freed out from under you.
1585 */
1586static inline void *task_stack_page(const struct task_struct *task)
1587{
1588 return task->stack;
1589}
1590
1591#define setup_thread_stack(new,old) do { } while(0)
1592
1593static inline unsigned long *end_of_stack(const struct task_struct *task)
1594{
1595 return task->stack;
1596}
1597
1598#elif !defined(__HAVE_THREAD_FUNCTIONS)
1599
1600#define task_thread_info(task) ((struct thread_info *)(task)->stack)
1601#define task_stack_page(task) ((void *)(task)->stack)
1602
1603static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
1604{
1605 *task_thread_info(p) = *task_thread_info(org);
1606 task_thread_info(p)->task = p;
1607}
1608
1609/*
1610 * Return the address of the last usable long on the stack.
1611 *
1612 * When the stack grows down, this is just above the thread
1613 * info struct. Going any lower will corrupt the threadinfo.
1614 *
1615 * When the stack grows up, this is the highest address.
1616 * Beyond that position, we corrupt data on the next page.
1617 */
1618static inline unsigned long *end_of_stack(struct task_struct *p)
1619{
1620#ifdef CONFIG_STACK_GROWSUP
1621 return (unsigned long *)((unsigned long)task_thread_info(p) + THREAD_SIZE) - 1;
1622#else
1623 return (unsigned long *)(task_thread_info(p) + 1);
1624#endif
1625}
1626
1627#endif
1628
1629#ifdef CONFIG_THREAD_INFO_IN_TASK
1630static inline void *try_get_task_stack(struct task_struct *tsk)
1631{
1632 return atomic_inc_not_zero(&tsk->stack_refcount) ?
1633 task_stack_page(tsk) : NULL;
1634}
1635
1636extern void put_task_stack(struct task_struct *tsk);
1637#else
1638static inline void *try_get_task_stack(struct task_struct *tsk)
1639{
1640 return task_stack_page(tsk);
1641}
1642
1643static inline void put_task_stack(struct task_struct *tsk) {}
1644#endif
1645
1646#define task_stack_end_corrupted(task) \
1647 (*(end_of_stack(task)) != STACK_END_MAGIC)
1648
1649static inline int object_is_on_stack(void *obj)
1650{
1651 void *stack = task_stack_page(current);
1652
1653 return (obj >= stack) && (obj < (stack + THREAD_SIZE));
1654}
1655
1656extern void thread_stack_cache_init(void);
1657
1658#ifdef CONFIG_DEBUG_STACK_USAGE
1659static inline unsigned long stack_not_used(struct task_struct *p)
1660{
1661 unsigned long *n = end_of_stack(p);
1662
1663 do { /* Skip over canary */
1664# ifdef CONFIG_STACK_GROWSUP
1665 n--;
1666# else
1667 n++;
1668# endif
1669 } while (!*n);
1670
1671# ifdef CONFIG_STACK_GROWSUP
1672 return (unsigned long)end_of_stack(p) - (unsigned long)n;
1673# else
1674 return (unsigned long)n - (unsigned long)end_of_stack(p);
1675# endif
1676}
1677#endif
1678extern void set_task_stack_end_magic(struct task_struct *tsk);
1679
1680/* set thread flags in other task's structures
1681 * - see asm/thread_info.h for TIF_xxxx flags available
1682 */
1683static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
1684{
1685 set_ti_thread_flag(task_thread_info(tsk), flag);
1686}
1687
1688static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
1689{
1690 clear_ti_thread_flag(task_thread_info(tsk), flag);
1691}
1692
1693static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
1694{
1695 return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
1696}
1697
1698static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
1699{
1700 return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
1701}
1702
1703static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
1704{
1705 return test_ti_thread_flag(task_thread_info(tsk), flag);
1706}
1707
1708static inline void set_tsk_need_resched(struct task_struct *tsk)
1709{
1710 set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1711}
1712
1713static inline void clear_tsk_need_resched(struct task_struct *tsk)
1714{
1715 clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
1716}
1717
1718static inline int test_tsk_need_resched(struct task_struct *tsk)
1719{
1720 return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
1721}
1722
1723/*
1724 * cond_resched() and cond_resched_lock(): latency reduction via
1725 * explicit rescheduling in places that are safe. The return
1726 * value indicates whether a reschedule was done in fact.
1727 * cond_resched_lock() will drop the spinlock before scheduling,
1728 * cond_resched_softirq() will enable bhs before scheduling.
1729 */
1730#ifndef CONFIG_PREEMPT
1731extern int _cond_resched(void);
1732#else
1733static inline int _cond_resched(void) { return 0; }
1734#endif
1735
1736#define cond_resched() ({ \
1737 ___might_sleep(__FILE__, __LINE__, 0); \
1738 _cond_resched(); \
1739})
1740
1741extern int __cond_resched_lock(spinlock_t *lock);
1742
1743#define cond_resched_lock(lock) ({ \
1744 ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
1745 __cond_resched_lock(lock); \
1746})
1747
1748extern int __cond_resched_softirq(void);
1749
1750#define cond_resched_softirq() ({ \
1751 ___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
1752 __cond_resched_softirq(); \
1753})
1754
1755static inline void cond_resched_rcu(void)
1756{
1757#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
1758 rcu_read_unlock();
1759 cond_resched();
1760 rcu_read_lock();
1761#endif
1762}
1763
1764/*
1765 * Does a critical section need to be broken due to another
1766 * task waiting?: (technically does not depend on CONFIG_PREEMPT,
1767 * but a general need for low latency)
1768 */
1769static inline int spin_needbreak(spinlock_t *lock)
1770{
1771#ifdef CONFIG_PREEMPT
1772 return spin_is_contended(lock);
1773#else
1774 return 0;
1775#endif
1776}
1777
1778static __always_inline bool need_resched(void)
1779{
1780 return unlikely(tif_need_resched());
1781}
1782
1783/*
1784 * Thread group CPU time accounting.
1785 */
1786void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times);
1787void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times);
1788
1789/*
1790 * Wrappers for p->thread_info->cpu access. No-op on UP.
1791 */
1792#ifdef CONFIG_SMP
1793
1794static inline unsigned int task_cpu(const struct task_struct *p)
1795{
1796#ifdef CONFIG_THREAD_INFO_IN_TASK
1797 return p->cpu;
1798#else
1799 return task_thread_info(p)->cpu;
1800#endif
1801}
1802
1803static inline int task_node(const struct task_struct *p)
1804{
1805 return cpu_to_node(task_cpu(p));
1806}
1807
1808extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
1809
1810#else
1811
1812static inline unsigned int task_cpu(const struct task_struct *p)
1813{
1814 return 0;
1815}
1816
1817static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
1818{
1819}
1820
1821#endif /* CONFIG_SMP */
1822
1823/*
1824 * In order to reduce various lock holder preemption latencies provide an
1825 * interface to see if a vCPU is currently running or not.
1826 *
1827 * This allows us to terminate optimistic spin loops and block, analogous to
1828 * the native optimistic spin heuristic of testing if the lock owner task is
1829 * running or not.
1830 */
1831#ifndef vcpu_is_preempted
1832# define vcpu_is_preempted(cpu) false
1833#endif
1834
1835extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
1836extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
1837
1838#ifdef CONFIG_CGROUP_SCHED
1839extern struct task_group root_task_group;
1840#endif /* CONFIG_CGROUP_SCHED */
1841
1842extern int task_can_switch_user(struct user_struct *up,
1843 struct task_struct *tsk);
1844
1845#ifndef TASK_SIZE_OF
1846#define TASK_SIZE_OF(tsk) TASK_SIZE
1847#endif
1848
1849#endif