]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - kernel/sched/sched.h
sched: Rework CPU hotplug task selection
[mirror_ubuntu-jammy-kernel.git] / kernel / sched / sched.h
CommitLineData
b2441318 1/* SPDX-License-Identifier: GPL-2.0 */
97fb7a0a
IM
2/*
3 * Scheduler internal types and methods:
4 */
029632fb 5#include <linux/sched.h>
325ea10c 6
dfc3401a 7#include <linux/sched/autogroup.h>
e6017571 8#include <linux/sched/clock.h>
325ea10c 9#include <linux/sched/coredump.h>
55687da1 10#include <linux/sched/cpufreq.h>
325ea10c
IM
11#include <linux/sched/cputime.h>
12#include <linux/sched/deadline.h>
b17b0153 13#include <linux/sched/debug.h>
ef8bd77f 14#include <linux/sched/hotplug.h>
325ea10c
IM
15#include <linux/sched/idle.h>
16#include <linux/sched/init.h>
17#include <linux/sched/isolation.h>
18#include <linux/sched/jobctl.h>
19#include <linux/sched/loadavg.h>
20#include <linux/sched/mm.h>
21#include <linux/sched/nohz.h>
22#include <linux/sched/numa_balancing.h>
23#include <linux/sched/prio.h>
24#include <linux/sched/rt.h>
25#include <linux/sched/signal.h>
321a874a 26#include <linux/sched/smt.h>
325ea10c
IM
27#include <linux/sched/stat.h>
28#include <linux/sched/sysctl.h>
29930025 29#include <linux/sched/task.h>
68db0cf1 30#include <linux/sched/task_stack.h>
325ea10c
IM
31#include <linux/sched/topology.h>
32#include <linux/sched/user.h>
33#include <linux/sched/wake_q.h>
34#include <linux/sched/xacct.h>
35
36#include <uapi/linux/sched/types.h>
ef8bd77f 37
3866e845 38#include <linux/binfmts.h>
325ea10c
IM
39#include <linux/blkdev.h>
40#include <linux/compat.h>
41#include <linux/context_tracking.h>
42#include <linux/cpufreq.h>
43#include <linux/cpuidle.h>
44#include <linux/cpuset.h>
45#include <linux/ctype.h>
46#include <linux/debugfs.h>
47#include <linux/delayacct.h>
6aa140fa 48#include <linux/energy_model.h>
325ea10c
IM
49#include <linux/init_task.h>
50#include <linux/kprobes.h>
51#include <linux/kthread.h>
52#include <linux/membarrier.h>
53#include <linux/migrate.h>
54#include <linux/mmu_context.h>
55#include <linux/nmi.h>
56#include <linux/proc_fs.h>
57#include <linux/prefetch.h>
58#include <linux/profile.h>
eb414681 59#include <linux/psi.h>
325ea10c
IM
60#include <linux/rcupdate_wait.h>
61#include <linux/security.h>
029632fb 62#include <linux/stop_machine.h>
325ea10c
IM
63#include <linux/suspend.h>
64#include <linux/swait.h>
65#include <linux/syscalls.h>
66#include <linux/task_work.h>
67#include <linux/tsacct_kern.h>
68
69#include <asm/tlb.h>
029632fb 70
7fce777c 71#ifdef CONFIG_PARAVIRT
325ea10c 72# include <asm/paravirt.h>
7fce777c
IM
73#endif
74
391e43da 75#include "cpupri.h"
6bfd6d72 76#include "cpudeadline.h"
029632fb 77
9148a3a1 78#ifdef CONFIG_SCHED_DEBUG
6d3aed3d 79# define SCHED_WARN_ON(x) WARN_ONCE(x, #x)
9148a3a1 80#else
6d3aed3d 81# define SCHED_WARN_ON(x) ({ (void)(x), 0; })
9148a3a1
PZ
82#endif
83
45ceebf7 84struct rq;
442bf3aa 85struct cpuidle_state;
45ceebf7 86
da0c1e65
KT
87/* task_struct::on_rq states: */
88#define TASK_ON_RQ_QUEUED 1
cca26e80 89#define TASK_ON_RQ_MIGRATING 2
da0c1e65 90
029632fb
PZ
91extern __read_mostly int scheduler_running;
92
45ceebf7
PG
93extern unsigned long calc_load_update;
94extern atomic_long_t calc_load_tasks;
95
3289bdb4 96extern void calc_global_load_tick(struct rq *this_rq);
d60585c5 97extern long calc_load_fold_active(struct rq *this_rq, long adjust);
3289bdb4 98
029632fb
PZ
99/*
100 * Helpers for converting nanosecond timing to jiffy resolution
101 */
102#define NS_TO_JIFFIES(TIME) ((unsigned long)(TIME) / (NSEC_PER_SEC / HZ))
103
cc1f4b1f
LZ
104/*
105 * Increase resolution of nice-level calculations for 64-bit architectures.
106 * The extra resolution improves shares distribution and load balancing of
107 * low-weight task groups (eg. nice +19 on an autogroup), deeper taskgroup
108 * hierarchies, especially on larger systems. This is not a user-visible change
109 * and does not change the user-interface for setting shares/weights.
110 *
111 * We increase resolution only if we have enough bits to allow this increased
97fb7a0a
IM
112 * resolution (i.e. 64-bit). The costs for increasing resolution when 32-bit
113 * are pretty high and the returns do not justify the increased costs.
2159197d 114 *
97fb7a0a
IM
115 * Really only required when CONFIG_FAIR_GROUP_SCHED=y is also set, but to
116 * increase coverage and consistency always enable it on 64-bit platforms.
cc1f4b1f 117 */
2159197d 118#ifdef CONFIG_64BIT
172895e6 119# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT)
6ecdd749
YD
120# define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT)
121# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT)
cc1f4b1f 122#else
172895e6 123# define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT)
cc1f4b1f
LZ
124# define scale_load(w) (w)
125# define scale_load_down(w) (w)
126#endif
127
6ecdd749 128/*
172895e6
YD
129 * Task weight (visible to users) and its load (invisible to users) have
130 * independent resolution, but they should be well calibrated. We use
131 * scale_load() and scale_load_down(w) to convert between them. The
132 * following must be true:
133 *
134 * scale_load(sched_prio_to_weight[USER_PRIO(NICE_TO_PRIO(0))]) == NICE_0_LOAD
135 *
6ecdd749 136 */
172895e6 137#define NICE_0_LOAD (1L << NICE_0_LOAD_SHIFT)
029632fb 138
332ac17e
DF
139/*
140 * Single value that decides SCHED_DEADLINE internal math precision.
141 * 10 -> just above 1us
142 * 9 -> just above 0.5us
143 */
97fb7a0a 144#define DL_SCALE 10
029632fb
PZ
145
146/*
97fb7a0a 147 * Single value that denotes runtime == period, ie unlimited time.
029632fb 148 */
97fb7a0a 149#define RUNTIME_INF ((u64)~0ULL)
029632fb 150
20f9cd2a
HA
151static inline int idle_policy(int policy)
152{
153 return policy == SCHED_IDLE;
154}
d50dde5a
DF
155static inline int fair_policy(int policy)
156{
157 return policy == SCHED_NORMAL || policy == SCHED_BATCH;
158}
159
029632fb
PZ
160static inline int rt_policy(int policy)
161{
d50dde5a 162 return policy == SCHED_FIFO || policy == SCHED_RR;
029632fb
PZ
163}
164
aab03e05
DF
165static inline int dl_policy(int policy)
166{
167 return policy == SCHED_DEADLINE;
168}
20f9cd2a
HA
169static inline bool valid_policy(int policy)
170{
171 return idle_policy(policy) || fair_policy(policy) ||
172 rt_policy(policy) || dl_policy(policy);
173}
aab03e05 174
1da1843f
VK
175static inline int task_has_idle_policy(struct task_struct *p)
176{
177 return idle_policy(p->policy);
178}
179
029632fb
PZ
180static inline int task_has_rt_policy(struct task_struct *p)
181{
182 return rt_policy(p->policy);
183}
184
aab03e05
DF
185static inline int task_has_dl_policy(struct task_struct *p)
186{
187 return dl_policy(p->policy);
188}
189
07881166
JL
190#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
191
794a56eb
JL
192/*
193 * !! For sched_setattr_nocheck() (kernel) only !!
194 *
195 * This is actually gross. :(
196 *
197 * It is used to make schedutil kworker(s) higher priority than SCHED_DEADLINE
198 * tasks, but still be able to sleep. We need this on platforms that cannot
199 * atomically change clock frequency. Remove once fast switching will be
200 * available on such platforms.
201 *
202 * SUGOV stands for SchedUtil GOVernor.
203 */
204#define SCHED_FLAG_SUGOV 0x10000000
205
206static inline bool dl_entity_is_special(struct sched_dl_entity *dl_se)
207{
208#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
209 return unlikely(dl_se->flags & SCHED_FLAG_SUGOV);
210#else
211 return false;
212#endif
213}
214
2d3d891d
DF
215/*
216 * Tells if entity @a should preempt entity @b.
217 */
332ac17e
DF
218static inline bool
219dl_entity_preempt(struct sched_dl_entity *a, struct sched_dl_entity *b)
2d3d891d 220{
794a56eb
JL
221 return dl_entity_is_special(a) ||
222 dl_time_before(a->deadline, b->deadline);
2d3d891d
DF
223}
224
029632fb
PZ
225/*
226 * This is the priority-queue data structure of the RT scheduling class:
227 */
228struct rt_prio_array {
229 DECLARE_BITMAP(bitmap, MAX_RT_PRIO+1); /* include 1 bit for delimiter */
230 struct list_head queue[MAX_RT_PRIO];
231};
232
233struct rt_bandwidth {
234 /* nests inside the rq lock: */
235 raw_spinlock_t rt_runtime_lock;
236 ktime_t rt_period;
237 u64 rt_runtime;
238 struct hrtimer rt_period_timer;
4cfafd30 239 unsigned int rt_period_active;
029632fb 240};
a5e7be3b
JL
241
242void __dl_clear_params(struct task_struct *p);
243
332ac17e
DF
244/*
245 * To keep the bandwidth of -deadline tasks and groups under control
246 * we need some place where:
247 * - store the maximum -deadline bandwidth of the system (the group);
248 * - cache the fraction of that bandwidth that is currently allocated.
249 *
250 * This is all done in the data structure below. It is similar to the
251 * one used for RT-throttling (rt_bandwidth), with the main difference
252 * that, since here we are only interested in admission control, we
253 * do not decrease any runtime while the group "executes", neither we
254 * need a timer to replenish it.
255 *
256 * With respect to SMP, the bandwidth is given on a per-CPU basis,
257 * meaning that:
258 * - dl_bw (< 100%) is the bandwidth of the system (group) on each CPU;
259 * - dl_total_bw array contains, in the i-eth element, the currently
260 * allocated bandwidth on the i-eth CPU.
261 * Moreover, groups consume bandwidth on each CPU, while tasks only
262 * consume bandwidth on the CPU they're running on.
263 * Finally, dl_total_bw_cpu is used to cache the index of dl_total_bw
264 * that will be shown the next time the proc or cgroup controls will
265 * be red. It on its turn can be changed by writing on its own
266 * control.
267 */
268struct dl_bandwidth {
97fb7a0a
IM
269 raw_spinlock_t dl_runtime_lock;
270 u64 dl_runtime;
271 u64 dl_period;
332ac17e
DF
272};
273
274static inline int dl_bandwidth_enabled(void)
275{
1724813d 276 return sysctl_sched_rt_runtime >= 0;
332ac17e
DF
277}
278
332ac17e 279struct dl_bw {
97fb7a0a
IM
280 raw_spinlock_t lock;
281 u64 bw;
282 u64 total_bw;
332ac17e
DF
283};
284
daec5798
LA
285static inline void __dl_update(struct dl_bw *dl_b, s64 bw);
286
7f51412a 287static inline
8c0944ce 288void __dl_sub(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
7f51412a
JL
289{
290 dl_b->total_bw -= tsk_bw;
daec5798 291 __dl_update(dl_b, (s32)tsk_bw / cpus);
7f51412a
JL
292}
293
294static inline
daec5798 295void __dl_add(struct dl_bw *dl_b, u64 tsk_bw, int cpus)
7f51412a
JL
296{
297 dl_b->total_bw += tsk_bw;
daec5798 298 __dl_update(dl_b, -((s32)tsk_bw / cpus));
7f51412a
JL
299}
300
301static inline
302bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
303{
304 return dl_b->bw != -1 &&
305 dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
306}
307
97fb7a0a 308extern void dl_change_utilization(struct task_struct *p, u64 new_bw);
f2cb1360 309extern void init_dl_bw(struct dl_bw *dl_b);
97fb7a0a 310extern int sched_dl_global_validate(void);
06a76fe0 311extern void sched_dl_do_global(void);
97fb7a0a 312extern int sched_dl_overflow(struct task_struct *p, int policy, const struct sched_attr *attr);
06a76fe0
NP
313extern void __setparam_dl(struct task_struct *p, const struct sched_attr *attr);
314extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr);
315extern bool __checkparam_dl(const struct sched_attr *attr);
06a76fe0 316extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr);
97fb7a0a
IM
317extern int dl_task_can_attach(struct task_struct *p, const struct cpumask *cs_cpus_allowed);
318extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial);
06a76fe0 319extern bool dl_cpu_busy(unsigned int cpu);
029632fb
PZ
320
321#ifdef CONFIG_CGROUP_SCHED
322
323#include <linux/cgroup.h>
eb414681 324#include <linux/psi.h>
029632fb
PZ
325
326struct cfs_rq;
327struct rt_rq;
328
35cf4e50 329extern struct list_head task_groups;
029632fb
PZ
330
331struct cfs_bandwidth {
332#ifdef CONFIG_CFS_BANDWIDTH
97fb7a0a
IM
333 raw_spinlock_t lock;
334 ktime_t period;
335 u64 quota;
336 u64 runtime;
337 s64 hierarchical_quota;
97fb7a0a 338
66567fcb 339 u8 idle;
340 u8 period_active;
341 u8 distribute_running;
342 u8 slack_started;
97fb7a0a
IM
343 struct hrtimer period_timer;
344 struct hrtimer slack_timer;
345 struct list_head throttled_cfs_rq;
346
347 /* Statistics: */
348 int nr_periods;
349 int nr_throttled;
350 u64 throttled_time;
029632fb
PZ
351#endif
352};
353
97fb7a0a 354/* Task group related information */
029632fb
PZ
355struct task_group {
356 struct cgroup_subsys_state css;
357
358#ifdef CONFIG_FAIR_GROUP_SCHED
97fb7a0a
IM
359 /* schedulable entities of this group on each CPU */
360 struct sched_entity **se;
361 /* runqueue "owned" by this group on each CPU */
362 struct cfs_rq **cfs_rq;
363 unsigned long shares;
029632fb 364
fa6bddeb 365#ifdef CONFIG_SMP
b0367629
WL
366 /*
367 * load_avg can be heavily contended at clock tick time, so put
368 * it in its own cacheline separated from the fields above which
369 * will also be accessed at each tick.
370 */
97fb7a0a 371 atomic_long_t load_avg ____cacheline_aligned;
029632fb 372#endif
fa6bddeb 373#endif
029632fb
PZ
374
375#ifdef CONFIG_RT_GROUP_SCHED
97fb7a0a
IM
376 struct sched_rt_entity **rt_se;
377 struct rt_rq **rt_rq;
029632fb 378
97fb7a0a 379 struct rt_bandwidth rt_bandwidth;
029632fb
PZ
380#endif
381
97fb7a0a
IM
382 struct rcu_head rcu;
383 struct list_head list;
029632fb 384
97fb7a0a
IM
385 struct task_group *parent;
386 struct list_head siblings;
387 struct list_head children;
029632fb
PZ
388
389#ifdef CONFIG_SCHED_AUTOGROUP
97fb7a0a 390 struct autogroup *autogroup;
029632fb
PZ
391#endif
392
97fb7a0a 393 struct cfs_bandwidth cfs_bandwidth;
029632fb
PZ
394};
395
396#ifdef CONFIG_FAIR_GROUP_SCHED
397#define ROOT_TASK_GROUP_LOAD NICE_0_LOAD
398
399/*
400 * A weight of 0 or 1 can cause arithmetics problems.
401 * A weight of a cfs_rq is the sum of weights of which entities
402 * are queued on this cfs_rq, so a weight of a entity should not be
403 * too large, so as the shares value of a task group.
404 * (The default weight is 1024 - so there's no practical
405 * limitation from this.)
406 */
97fb7a0a
IM
407#define MIN_SHARES (1UL << 1)
408#define MAX_SHARES (1UL << 18)
029632fb
PZ
409#endif
410
029632fb
PZ
411typedef int (*tg_visitor)(struct task_group *, void *);
412
413extern int walk_tg_tree_from(struct task_group *from,
414 tg_visitor down, tg_visitor up, void *data);
415
416/*
417 * Iterate the full tree, calling @down when first entering a node and @up when
418 * leaving it for the final time.
419 *
420 * Caller must hold rcu_lock or sufficient equivalent.
421 */
422static inline int walk_tg_tree(tg_visitor down, tg_visitor up, void *data)
423{
424 return walk_tg_tree_from(&root_task_group, down, up, data);
425}
426
427extern int tg_nop(struct task_group *tg, void *data);
428
429extern void free_fair_sched_group(struct task_group *tg);
430extern int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent);
8663e24d 431extern void online_fair_sched_group(struct task_group *tg);
6fe1f348 432extern void unregister_fair_sched_group(struct task_group *tg);
029632fb
PZ
433extern void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
434 struct sched_entity *se, int cpu,
435 struct sched_entity *parent);
436extern void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
029632fb
PZ
437
438extern void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b);
77a4d1a1 439extern void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b);
029632fb
PZ
440extern void unthrottle_cfs_rq(struct cfs_rq *cfs_rq);
441
442extern void free_rt_sched_group(struct task_group *tg);
443extern int alloc_rt_sched_group(struct task_group *tg, struct task_group *parent);
444extern void init_tg_rt_entry(struct task_group *tg, struct rt_rq *rt_rq,
445 struct sched_rt_entity *rt_se, int cpu,
446 struct sched_rt_entity *parent);
8887cd99
NP
447extern int sched_group_set_rt_runtime(struct task_group *tg, long rt_runtime_us);
448extern int sched_group_set_rt_period(struct task_group *tg, u64 rt_period_us);
449extern long sched_group_rt_runtime(struct task_group *tg);
450extern long sched_group_rt_period(struct task_group *tg);
451extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk);
029632fb 452
25cc7da7
LZ
453extern struct task_group *sched_create_group(struct task_group *parent);
454extern void sched_online_group(struct task_group *tg,
455 struct task_group *parent);
456extern void sched_destroy_group(struct task_group *tg);
457extern void sched_offline_group(struct task_group *tg);
458
459extern void sched_move_task(struct task_struct *tsk);
460
461#ifdef CONFIG_FAIR_GROUP_SCHED
462extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
ad936d86
BP
463
464#ifdef CONFIG_SMP
465extern void set_task_rq_fair(struct sched_entity *se,
466 struct cfs_rq *prev, struct cfs_rq *next);
467#else /* !CONFIG_SMP */
468static inline void set_task_rq_fair(struct sched_entity *se,
469 struct cfs_rq *prev, struct cfs_rq *next) { }
470#endif /* CONFIG_SMP */
471#endif /* CONFIG_FAIR_GROUP_SCHED */
25cc7da7 472
029632fb
PZ
473#else /* CONFIG_CGROUP_SCHED */
474
475struct cfs_bandwidth { };
476
477#endif /* CONFIG_CGROUP_SCHED */
478
479/* CFS-related fields in a runqueue */
480struct cfs_rq {
97fb7a0a
IM
481 struct load_weight load;
482 unsigned long runnable_weight;
483 unsigned int nr_running;
43e9f7f2
VK
484 unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */
485 unsigned int idle_h_nr_running; /* SCHED_IDLE */
029632fb 486
97fb7a0a
IM
487 u64 exec_clock;
488 u64 min_vruntime;
029632fb 489#ifndef CONFIG_64BIT
97fb7a0a 490 u64 min_vruntime_copy;
029632fb
PZ
491#endif
492
97fb7a0a 493 struct rb_root_cached tasks_timeline;
029632fb 494
029632fb
PZ
495 /*
496 * 'curr' points to currently running entity on this cfs_rq.
497 * It is set to NULL otherwise (i.e when none are currently running).
498 */
97fb7a0a
IM
499 struct sched_entity *curr;
500 struct sched_entity *next;
501 struct sched_entity *last;
502 struct sched_entity *skip;
029632fb
PZ
503
504#ifdef CONFIG_SCHED_DEBUG
97fb7a0a 505 unsigned int nr_spread_over;
029632fb
PZ
506#endif
507
2dac754e
PT
508#ifdef CONFIG_SMP
509 /*
9d89c257 510 * CFS load tracking
2dac754e 511 */
97fb7a0a 512 struct sched_avg avg;
2a2f5d4e 513#ifndef CONFIG_64BIT
97fb7a0a 514 u64 load_last_update_time_copy;
9d89c257 515#endif
2a2f5d4e
PZ
516 struct {
517 raw_spinlock_t lock ____cacheline_aligned;
518 int nr;
519 unsigned long load_avg;
520 unsigned long util_avg;
0e2d2aaa 521 unsigned long runnable_sum;
2a2f5d4e 522 } removed;
82958366 523
9d89c257 524#ifdef CONFIG_FAIR_GROUP_SCHED
97fb7a0a
IM
525 unsigned long tg_load_avg_contrib;
526 long propagate;
527 long prop_runnable_sum;
0e2d2aaa 528
82958366
PT
529 /*
530 * h_load = weight * f(tg)
531 *
532 * Where f(tg) is the recursive weight fraction assigned to
533 * this group.
534 */
97fb7a0a
IM
535 unsigned long h_load;
536 u64 last_h_load_update;
537 struct sched_entity *h_load_next;
68520796 538#endif /* CONFIG_FAIR_GROUP_SCHED */
82958366
PT
539#endif /* CONFIG_SMP */
540
029632fb 541#ifdef CONFIG_FAIR_GROUP_SCHED
97fb7a0a 542 struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */
029632fb
PZ
543
544 /*
545 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
546 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
547 * (like users, containers etc.)
548 *
97fb7a0a
IM
549 * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU.
550 * This list is used during load balance.
029632fb 551 */
97fb7a0a
IM
552 int on_list;
553 struct list_head leaf_cfs_rq_list;
554 struct task_group *tg; /* group that "owns" this runqueue */
029632fb 555
029632fb 556#ifdef CONFIG_CFS_BANDWIDTH
97fb7a0a 557 int runtime_enabled;
97fb7a0a
IM
558 s64 runtime_remaining;
559
560 u64 throttled_clock;
561 u64 throttled_clock_task;
562 u64 throttled_clock_task_time;
563 int throttled;
564 int throttle_count;
565 struct list_head throttled_list;
029632fb
PZ
566#endif /* CONFIG_CFS_BANDWIDTH */
567#endif /* CONFIG_FAIR_GROUP_SCHED */
568};
569
570static inline int rt_bandwidth_enabled(void)
571{
572 return sysctl_sched_rt_runtime >= 0;
573}
574
b6366f04 575/* RT IPI pull logic requires IRQ_WORK */
4bdced5c 576#if defined(CONFIG_IRQ_WORK) && defined(CONFIG_SMP)
b6366f04
SR
577# define HAVE_RT_PUSH_IPI
578#endif
579
029632fb
PZ
580/* Real-Time classes' related field in a runqueue: */
581struct rt_rq {
97fb7a0a
IM
582 struct rt_prio_array active;
583 unsigned int rt_nr_running;
584 unsigned int rr_nr_running;
029632fb
PZ
585#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
586 struct {
97fb7a0a 587 int curr; /* highest queued rt task prio */
029632fb 588#ifdef CONFIG_SMP
97fb7a0a 589 int next; /* next highest */
029632fb
PZ
590#endif
591 } highest_prio;
592#endif
593#ifdef CONFIG_SMP
97fb7a0a
IM
594 unsigned long rt_nr_migratory;
595 unsigned long rt_nr_total;
596 int overloaded;
597 struct plist_head pushable_tasks;
371bf427 598
b6366f04 599#endif /* CONFIG_SMP */
97fb7a0a 600 int rt_queued;
f4ebcbc0 601
97fb7a0a
IM
602 int rt_throttled;
603 u64 rt_time;
604 u64 rt_runtime;
029632fb 605 /* Nests inside the rq lock: */
97fb7a0a 606 raw_spinlock_t rt_runtime_lock;
029632fb
PZ
607
608#ifdef CONFIG_RT_GROUP_SCHED
97fb7a0a 609 unsigned long rt_nr_boosted;
029632fb 610
97fb7a0a
IM
611 struct rq *rq;
612 struct task_group *tg;
029632fb
PZ
613#endif
614};
615
296b2ffe
VG
616static inline bool rt_rq_is_runnable(struct rt_rq *rt_rq)
617{
618 return rt_rq->rt_queued && rt_rq->rt_nr_running;
619}
620
aab03e05
DF
621/* Deadline class' related fields in a runqueue */
622struct dl_rq {
623 /* runqueue is an rbtree, ordered by deadline */
97fb7a0a 624 struct rb_root_cached root;
aab03e05 625
97fb7a0a 626 unsigned long dl_nr_running;
1baca4ce
JL
627
628#ifdef CONFIG_SMP
629 /*
630 * Deadline values of the currently executing and the
631 * earliest ready task on this rq. Caching these facilitates
dfcb245e 632 * the decision whether or not a ready but not running task
1baca4ce
JL
633 * should migrate somewhere else.
634 */
635 struct {
97fb7a0a
IM
636 u64 curr;
637 u64 next;
1baca4ce
JL
638 } earliest_dl;
639
97fb7a0a
IM
640 unsigned long dl_nr_migratory;
641 int overloaded;
1baca4ce
JL
642
643 /*
644 * Tasks on this rq that can be pushed away. They are kept in
645 * an rb-tree, ordered by tasks' deadlines, with caching
646 * of the leftmost (earliest deadline) element.
647 */
97fb7a0a 648 struct rb_root_cached pushable_dl_tasks_root;
332ac17e 649#else
97fb7a0a 650 struct dl_bw dl_bw;
1baca4ce 651#endif
e36d8677
LA
652 /*
653 * "Active utilization" for this runqueue: increased when a
654 * task wakes up (becomes TASK_RUNNING) and decreased when a
655 * task blocks
656 */
97fb7a0a 657 u64 running_bw;
4da3abce 658
8fd27231
LA
659 /*
660 * Utilization of the tasks "assigned" to this runqueue (including
661 * the tasks that are in runqueue and the tasks that executed on this
662 * CPU and blocked). Increased when a task moves to this runqueue, and
663 * decreased when the task moves away (migrates, changes scheduling
664 * policy, or terminates).
665 * This is needed to compute the "inactive utilization" for the
666 * runqueue (inactive utilization = this_bw - running_bw).
667 */
97fb7a0a
IM
668 u64 this_bw;
669 u64 extra_bw;
8fd27231 670
4da3abce
LA
671 /*
672 * Inverse of the fraction of CPU utilization that can be reclaimed
673 * by the GRUB algorithm.
674 */
97fb7a0a 675 u64 bw_ratio;
aab03e05
DF
676};
677
c0796298
VG
678#ifdef CONFIG_FAIR_GROUP_SCHED
679/* An entity is a task if it doesn't "own" a runqueue */
680#define entity_is_task(se) (!se->my_q)
681#else
682#define entity_is_task(se) 1
683#endif
684
029632fb 685#ifdef CONFIG_SMP
c0796298
VG
686/*
687 * XXX we want to get rid of these helpers and use the full load resolution.
688 */
689static inline long se_weight(struct sched_entity *se)
690{
691 return scale_load_down(se->load.weight);
692}
693
694static inline long se_runnable(struct sched_entity *se)
695{
696 return scale_load_down(se->runnable_weight);
697}
029632fb 698
afe06efd
TC
699static inline bool sched_asym_prefer(int a, int b)
700{
701 return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
702}
703
6aa140fa
QP
704struct perf_domain {
705 struct em_perf_domain *em_pd;
706 struct perf_domain *next;
707 struct rcu_head rcu;
708};
709
630246a0
QP
710/* Scheduling group status flags */
711#define SG_OVERLOAD 0x1 /* More than one runnable task on a CPU. */
2802bf3c 712#define SG_OVERUTILIZED 0x2 /* One or more CPUs are over-utilized. */
630246a0 713
029632fb
PZ
714/*
715 * We add the notion of a root-domain which will be used to define per-domain
716 * variables. Each exclusive cpuset essentially defines an island domain by
97fb7a0a 717 * fully partitioning the member CPUs from any other cpuset. Whenever a new
029632fb
PZ
718 * exclusive cpuset is created, we also create and attach a new root-domain
719 * object.
720 *
721 */
722struct root_domain {
97fb7a0a
IM
723 atomic_t refcount;
724 atomic_t rto_count;
725 struct rcu_head rcu;
726 cpumask_var_t span;
727 cpumask_var_t online;
029632fb 728
757ffdd7
VS
729 /*
730 * Indicate pullable load on at least one CPU, e.g:
731 * - More than one runnable task
732 * - Running task is misfit
733 */
575638d1 734 int overload;
4486edd1 735
2802bf3c
MR
736 /* Indicate one or more cpus over-utilized (tipping point) */
737 int overutilized;
738
1baca4ce
JL
739 /*
740 * The bit corresponding to a CPU gets set here if such CPU has more
741 * than one runnable -deadline task (as it is below for RT tasks).
742 */
97fb7a0a
IM
743 cpumask_var_t dlo_mask;
744 atomic_t dlo_count;
745 struct dl_bw dl_bw;
746 struct cpudl cpudl;
1baca4ce 747
4bdced5c
SRRH
748#ifdef HAVE_RT_PUSH_IPI
749 /*
750 * For IPI pull requests, loop across the rto_mask.
751 */
97fb7a0a
IM
752 struct irq_work rto_push_work;
753 raw_spinlock_t rto_lock;
4bdced5c 754 /* These are only updated and read within rto_lock */
97fb7a0a
IM
755 int rto_loop;
756 int rto_cpu;
4bdced5c 757 /* These atomics are updated outside of a lock */
97fb7a0a
IM
758 atomic_t rto_loop_next;
759 atomic_t rto_loop_start;
4bdced5c 760#endif
029632fb
PZ
761 /*
762 * The "RT overload" flag: it gets set if a CPU has more than
763 * one runnable RT task.
764 */
97fb7a0a
IM
765 cpumask_var_t rto_mask;
766 struct cpupri cpupri;
cd92bfd3 767
97fb7a0a 768 unsigned long max_cpu_capacity;
6aa140fa
QP
769
770 /*
771 * NULL-terminated list of performance domains intersecting with the
772 * CPUs of the rd. Protected by RCU.
773 */
7ba7319f 774 struct perf_domain __rcu *pd;
029632fb
PZ
775};
776
f2cb1360 777extern void init_defrootdomain(void);
8d5dc512 778extern int sched_init_domains(const struct cpumask *cpu_map);
f2cb1360 779extern void rq_attach_root(struct rq *rq, struct root_domain *rd);
364f5665
SRV
780extern void sched_get_rd(struct root_domain *rd);
781extern void sched_put_rd(struct root_domain *rd);
029632fb 782
4bdced5c
SRRH
783#ifdef HAVE_RT_PUSH_IPI
784extern void rto_push_irq_work_func(struct irq_work *work);
785#endif
029632fb
PZ
786#endif /* CONFIG_SMP */
787
69842cba
PB
788#ifdef CONFIG_UCLAMP_TASK
789/*
790 * struct uclamp_bucket - Utilization clamp bucket
791 * @value: utilization clamp value for tasks on this clamp bucket
792 * @tasks: number of RUNNABLE tasks on this clamp bucket
793 *
794 * Keep track of how many tasks are RUNNABLE for a given utilization
795 * clamp value.
796 */
797struct uclamp_bucket {
798 unsigned long value : bits_per(SCHED_CAPACITY_SCALE);
799 unsigned long tasks : BITS_PER_LONG - bits_per(SCHED_CAPACITY_SCALE);
800};
801
802/*
803 * struct uclamp_rq - rq's utilization clamp
804 * @value: currently active clamp values for a rq
805 * @bucket: utilization clamp buckets affecting a rq
806 *
807 * Keep track of RUNNABLE tasks on a rq to aggregate their clamp values.
808 * A clamp value is affecting a rq when there is at least one task RUNNABLE
809 * (or actually running) with that value.
810 *
811 * There are up to UCLAMP_CNT possible different clamp values, currently there
812 * are only two: minimum utilization and maximum utilization.
813 *
814 * All utilization clamping values are MAX aggregated, since:
815 * - for util_min: we want to run the CPU at least at the max of the minimum
816 * utilization required by its currently RUNNABLE tasks.
817 * - for util_max: we want to allow the CPU to run up to the max of the
818 * maximum utilization allowed by its currently RUNNABLE tasks.
819 *
820 * Since on each system we expect only a limited number of different
821 * utilization clamp values (UCLAMP_BUCKETS), use a simple array to track
822 * the metrics required to compute all the per-rq utilization clamp values.
823 */
824struct uclamp_rq {
825 unsigned int value;
826 struct uclamp_bucket bucket[UCLAMP_BUCKETS];
827};
828#endif /* CONFIG_UCLAMP_TASK */
829
029632fb
PZ
830/*
831 * This is the main, per-CPU runqueue data structure.
832 *
833 * Locking rule: those places that want to lock multiple runqueues
834 * (such as the load balancing or the thread migration code), lock
835 * acquire operations must be ordered by ascending &runqueue.
836 */
837struct rq {
838 /* runqueue lock: */
97fb7a0a 839 raw_spinlock_t lock;
029632fb
PZ
840
841 /*
842 * nr_running and cpu_load should be in the same cacheline because
843 * remote CPUs use both these fields when doing load calculation.
844 */
97fb7a0a 845 unsigned int nr_running;
0ec8aa00 846#ifdef CONFIG_NUMA_BALANCING
97fb7a0a
IM
847 unsigned int nr_numa_running;
848 unsigned int nr_preferred_running;
a4739eca 849 unsigned int numa_migrate_on;
0ec8aa00 850#endif
3451d024 851#ifdef CONFIG_NO_HZ_COMMON
9fd81dd5 852#ifdef CONFIG_SMP
97fb7a0a 853 unsigned long last_load_update_tick;
e022e0d3 854 unsigned long last_blocked_load_update_tick;
f643ea22 855 unsigned int has_blocked_load;
9fd81dd5 856#endif /* CONFIG_SMP */
00357f5e 857 unsigned int nohz_tick_stopped;
a22e47a4 858 atomic_t nohz_flags;
9fd81dd5 859#endif /* CONFIG_NO_HZ_COMMON */
dcdedb24 860
97fb7a0a
IM
861 unsigned long nr_load_updates;
862 u64 nr_switches;
029632fb 863
69842cba
PB
864#ifdef CONFIG_UCLAMP_TASK
865 /* Utilization clamp values based on CPU's RUNNABLE tasks */
866 struct uclamp_rq uclamp[UCLAMP_CNT] ____cacheline_aligned;
e496187d
PB
867 unsigned int uclamp_flags;
868#define UCLAMP_FLAG_IDLE 0x01
69842cba
PB
869#endif
870
97fb7a0a
IM
871 struct cfs_rq cfs;
872 struct rt_rq rt;
873 struct dl_rq dl;
029632fb
PZ
874
875#ifdef CONFIG_FAIR_GROUP_SCHED
97fb7a0a
IM
876 /* list of leaf cfs_rq on this CPU: */
877 struct list_head leaf_cfs_rq_list;
878 struct list_head *tmp_alone_branch;
a35b6466
PZ
879#endif /* CONFIG_FAIR_GROUP_SCHED */
880
029632fb
PZ
881 /*
882 * This is part of a global counter where only the total sum
883 * over all CPUs matters. A task can increase this counter on
884 * one CPU and if it got migrated afterwards it may decrease
885 * it on another CPU. Always updated under the runqueue lock:
886 */
97fb7a0a 887 unsigned long nr_uninterruptible;
029632fb 888
97fb7a0a
IM
889 struct task_struct *curr;
890 struct task_struct *idle;
891 struct task_struct *stop;
892 unsigned long next_balance;
893 struct mm_struct *prev_mm;
029632fb 894
97fb7a0a
IM
895 unsigned int clock_update_flags;
896 u64 clock;
23127296
VG
897 /* Ensure that all clocks are in the same cache line */
898 u64 clock_task ____cacheline_aligned;
899 u64 clock_pelt;
900 unsigned long lost_idle_time;
029632fb 901
97fb7a0a 902 atomic_t nr_iowait;
029632fb
PZ
903
904#ifdef CONFIG_SMP
994aeb7a
JFG
905 struct root_domain *rd;
906 struct sched_domain __rcu *sd;
97fb7a0a
IM
907
908 unsigned long cpu_capacity;
909 unsigned long cpu_capacity_orig;
029632fb 910
97fb7a0a 911 struct callback_head *balance_callback;
029632fb 912
97fb7a0a 913 unsigned char idle_balance;
e3fca9e7 914
3b1baa64
MR
915 unsigned long misfit_task_load;
916
029632fb 917 /* For active balancing */
97fb7a0a
IM
918 int active_balance;
919 int push_cpu;
920 struct cpu_stop_work active_balance_work;
921
922 /* CPU of this runqueue: */
923 int cpu;
924 int online;
029632fb 925
367456c7
PZ
926 struct list_head cfs_tasks;
927
371bf427 928 struct sched_avg avg_rt;
3727e0e1 929 struct sched_avg avg_dl;
11d4afd4 930#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
91c27493
VG
931 struct sched_avg avg_irq;
932#endif
97fb7a0a
IM
933 u64 idle_stamp;
934 u64 avg_idle;
9bd721c5
JL
935
936 /* This is used to determine avg_idle's max value */
97fb7a0a 937 u64 max_idle_balance_cost;
029632fb
PZ
938#endif
939
940#ifdef CONFIG_IRQ_TIME_ACCOUNTING
97fb7a0a 941 u64 prev_irq_time;
029632fb
PZ
942#endif
943#ifdef CONFIG_PARAVIRT
97fb7a0a 944 u64 prev_steal_time;
029632fb
PZ
945#endif
946#ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
97fb7a0a 947 u64 prev_steal_time_rq;
029632fb
PZ
948#endif
949
950 /* calc_load related fields */
97fb7a0a
IM
951 unsigned long calc_load_update;
952 long calc_load_active;
029632fb
PZ
953
954#ifdef CONFIG_SCHED_HRTICK
955#ifdef CONFIG_SMP
97fb7a0a
IM
956 int hrtick_csd_pending;
957 call_single_data_t hrtick_csd;
029632fb 958#endif
97fb7a0a 959 struct hrtimer hrtick_timer;
029632fb
PZ
960#endif
961
962#ifdef CONFIG_SCHEDSTATS
963 /* latency stats */
97fb7a0a
IM
964 struct sched_info rq_sched_info;
965 unsigned long long rq_cpu_time;
029632fb
PZ
966 /* could above be rq->cfs_rq.exec_clock + rq->rt_rq.rt_runtime ? */
967
968 /* sys_sched_yield() stats */
97fb7a0a 969 unsigned int yld_count;
029632fb
PZ
970
971 /* schedule() stats */
97fb7a0a
IM
972 unsigned int sched_count;
973 unsigned int sched_goidle;
029632fb
PZ
974
975 /* try_to_wake_up() stats */
97fb7a0a
IM
976 unsigned int ttwu_count;
977 unsigned int ttwu_local;
029632fb
PZ
978#endif
979
980#ifdef CONFIG_SMP
97fb7a0a 981 struct llist_head wake_list;
029632fb 982#endif
442bf3aa
DL
983
984#ifdef CONFIG_CPU_IDLE
985 /* Must be inspected within a rcu lock section */
97fb7a0a 986 struct cpuidle_state *idle_state;
442bf3aa 987#endif
029632fb
PZ
988};
989
62478d99
VG
990#ifdef CONFIG_FAIR_GROUP_SCHED
991
992/* CPU runqueue to which this cfs_rq is attached */
993static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
994{
995 return cfs_rq->rq;
996}
997
998#else
999
1000static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
1001{
1002 return container_of(cfs_rq, struct rq, cfs);
1003}
1004#endif
1005
029632fb
PZ
1006static inline int cpu_of(struct rq *rq)
1007{
1008#ifdef CONFIG_SMP
1009 return rq->cpu;
1010#else
1011 return 0;
1012#endif
1013}
1014
1b568f0a
PZ
1015
1016#ifdef CONFIG_SCHED_SMT
1b568f0a
PZ
1017extern void __update_idle_core(struct rq *rq);
1018
1019static inline void update_idle_core(struct rq *rq)
1020{
1021 if (static_branch_unlikely(&sched_smt_present))
1022 __update_idle_core(rq);
1023}
1024
1025#else
1026static inline void update_idle_core(struct rq *rq) { }
1027#endif
1028
8b06c55b 1029DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
029632fb 1030
518cd623 1031#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu)))
4a32fea9 1032#define this_rq() this_cpu_ptr(&runqueues)
518cd623
PZ
1033#define task_rq(p) cpu_rq(task_cpu(p))
1034#define cpu_curr(cpu) (cpu_rq(cpu)->curr)
4a32fea9 1035#define raw_rq() raw_cpu_ptr(&runqueues)
518cd623 1036
1f351d7f
JW
1037extern void update_rq_clock(struct rq *rq);
1038
cebde6d6
PZ
1039static inline u64 __rq_clock_broken(struct rq *rq)
1040{
316c1608 1041 return READ_ONCE(rq->clock);
cebde6d6
PZ
1042}
1043
cb42c9a3
MF
1044/*
1045 * rq::clock_update_flags bits
1046 *
1047 * %RQCF_REQ_SKIP - will request skipping of clock update on the next
1048 * call to __schedule(). This is an optimisation to avoid
1049 * neighbouring rq clock updates.
1050 *
1051 * %RQCF_ACT_SKIP - is set from inside of __schedule() when skipping is
1052 * in effect and calls to update_rq_clock() are being ignored.
1053 *
1054 * %RQCF_UPDATED - is a debug flag that indicates whether a call has been
1055 * made to update_rq_clock() since the last time rq::lock was pinned.
1056 *
1057 * If inside of __schedule(), clock_update_flags will have been
1058 * shifted left (a left shift is a cheap operation for the fast path
1059 * to promote %RQCF_REQ_SKIP to %RQCF_ACT_SKIP), so you must use,
1060 *
1061 * if (rq-clock_update_flags >= RQCF_UPDATED)
1062 *
1063 * to check if %RQCF_UPADTED is set. It'll never be shifted more than
1064 * one position though, because the next rq_unpin_lock() will shift it
1065 * back.
1066 */
97fb7a0a
IM
1067#define RQCF_REQ_SKIP 0x01
1068#define RQCF_ACT_SKIP 0x02
1069#define RQCF_UPDATED 0x04
cb42c9a3
MF
1070
1071static inline void assert_clock_updated(struct rq *rq)
1072{
1073 /*
1074 * The only reason for not seeing a clock update since the
1075 * last rq_pin_lock() is if we're currently skipping updates.
1076 */
1077 SCHED_WARN_ON(rq->clock_update_flags < RQCF_ACT_SKIP);
1078}
1079
78becc27
FW
1080static inline u64 rq_clock(struct rq *rq)
1081{
cebde6d6 1082 lockdep_assert_held(&rq->lock);
cb42c9a3
MF
1083 assert_clock_updated(rq);
1084
78becc27
FW
1085 return rq->clock;
1086}
1087
1088static inline u64 rq_clock_task(struct rq *rq)
1089{
cebde6d6 1090 lockdep_assert_held(&rq->lock);
cb42c9a3
MF
1091 assert_clock_updated(rq);
1092
78becc27
FW
1093 return rq->clock_task;
1094}
1095
adcc8da8 1096static inline void rq_clock_skip_update(struct rq *rq)
9edfbfed
PZ
1097{
1098 lockdep_assert_held(&rq->lock);
adcc8da8
DB
1099 rq->clock_update_flags |= RQCF_REQ_SKIP;
1100}
1101
1102/*
595058b6 1103 * See rt task throttling, which is the only time a skip
adcc8da8
DB
1104 * request is cancelled.
1105 */
1106static inline void rq_clock_cancel_skipupdate(struct rq *rq)
1107{
1108 lockdep_assert_held(&rq->lock);
1109 rq->clock_update_flags &= ~RQCF_REQ_SKIP;
9edfbfed
PZ
1110}
1111
d8ac8971
MF
1112struct rq_flags {
1113 unsigned long flags;
1114 struct pin_cookie cookie;
cb42c9a3
MF
1115#ifdef CONFIG_SCHED_DEBUG
1116 /*
1117 * A copy of (rq::clock_update_flags & RQCF_UPDATED) for the
1118 * current pin context is stashed here in case it needs to be
1119 * restored in rq_repin_lock().
1120 */
1121 unsigned int clock_update_flags;
1122#endif
d8ac8971
MF
1123};
1124
1125static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf)
1126{
1127 rf->cookie = lockdep_pin_lock(&rq->lock);
cb42c9a3
MF
1128
1129#ifdef CONFIG_SCHED_DEBUG
1130 rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
1131 rf->clock_update_flags = 0;
1132#endif
d8ac8971
MF
1133}
1134
1135static inline void rq_unpin_lock(struct rq *rq, struct rq_flags *rf)
1136{
cb42c9a3
MF
1137#ifdef CONFIG_SCHED_DEBUG
1138 if (rq->clock_update_flags > RQCF_ACT_SKIP)
1139 rf->clock_update_flags = RQCF_UPDATED;
1140#endif
1141
d8ac8971
MF
1142 lockdep_unpin_lock(&rq->lock, rf->cookie);
1143}
1144
1145static inline void rq_repin_lock(struct rq *rq, struct rq_flags *rf)
1146{
1147 lockdep_repin_lock(&rq->lock, rf->cookie);
cb42c9a3
MF
1148
1149#ifdef CONFIG_SCHED_DEBUG
1150 /*
1151 * Restore the value we stashed in @rf for this pin context.
1152 */
1153 rq->clock_update_flags |= rf->clock_update_flags;
1154#endif
d8ac8971
MF
1155}
1156
1f351d7f
JW
1157struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf)
1158 __acquires(rq->lock);
1159
1160struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf)
1161 __acquires(p->pi_lock)
1162 __acquires(rq->lock);
1163
1164static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf)
1165 __releases(rq->lock)
1166{
1167 rq_unpin_lock(rq, rf);
1168 raw_spin_unlock(&rq->lock);
1169}
1170
1171static inline void
1172task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf)
1173 __releases(rq->lock)
1174 __releases(p->pi_lock)
1175{
1176 rq_unpin_lock(rq, rf);
1177 raw_spin_unlock(&rq->lock);
1178 raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags);
1179}
1180
1181static inline void
1182rq_lock_irqsave(struct rq *rq, struct rq_flags *rf)
1183 __acquires(rq->lock)
1184{
1185 raw_spin_lock_irqsave(&rq->lock, rf->flags);
1186 rq_pin_lock(rq, rf);
1187}
1188
1189static inline void
1190rq_lock_irq(struct rq *rq, struct rq_flags *rf)
1191 __acquires(rq->lock)
1192{
1193 raw_spin_lock_irq(&rq->lock);
1194 rq_pin_lock(rq, rf);
1195}
1196
1197static inline void
1198rq_lock(struct rq *rq, struct rq_flags *rf)
1199 __acquires(rq->lock)
1200{
1201 raw_spin_lock(&rq->lock);
1202 rq_pin_lock(rq, rf);
1203}
1204
1205static inline void
1206rq_relock(struct rq *rq, struct rq_flags *rf)
1207 __acquires(rq->lock)
1208{
1209 raw_spin_lock(&rq->lock);
1210 rq_repin_lock(rq, rf);
1211}
1212
1213static inline void
1214rq_unlock_irqrestore(struct rq *rq, struct rq_flags *rf)
1215 __releases(rq->lock)
1216{
1217 rq_unpin_lock(rq, rf);
1218 raw_spin_unlock_irqrestore(&rq->lock, rf->flags);
1219}
1220
1221static inline void
1222rq_unlock_irq(struct rq *rq, struct rq_flags *rf)
1223 __releases(rq->lock)
1224{
1225 rq_unpin_lock(rq, rf);
1226 raw_spin_unlock_irq(&rq->lock);
1227}
1228
1229static inline void
1230rq_unlock(struct rq *rq, struct rq_flags *rf)
1231 __releases(rq->lock)
1232{
1233 rq_unpin_lock(rq, rf);
1234 raw_spin_unlock(&rq->lock);
1235}
1236
246b3b33
JW
1237static inline struct rq *
1238this_rq_lock_irq(struct rq_flags *rf)
1239 __acquires(rq->lock)
1240{
1241 struct rq *rq;
1242
1243 local_irq_disable();
1244 rq = this_rq();
1245 rq_lock(rq, rf);
1246 return rq;
1247}
1248
9942f79b 1249#ifdef CONFIG_NUMA
e3fe70b1
RR
1250enum numa_topology_type {
1251 NUMA_DIRECT,
1252 NUMA_GLUELESS_MESH,
1253 NUMA_BACKPLANE,
1254};
1255extern enum numa_topology_type sched_numa_topology_type;
9942f79b
RR
1256extern int sched_max_numa_distance;
1257extern bool find_numa_distance(int distance);
f2cb1360
IM
1258extern void sched_init_numa(void);
1259extern void sched_domains_numa_masks_set(unsigned int cpu);
1260extern void sched_domains_numa_masks_clear(unsigned int cpu);
e0e8d491 1261extern int sched_numa_find_closest(const struct cpumask *cpus, int cpu);
f2cb1360
IM
1262#else
1263static inline void sched_init_numa(void) { }
1264static inline void sched_domains_numa_masks_set(unsigned int cpu) { }
1265static inline void sched_domains_numa_masks_clear(unsigned int cpu) { }
e0e8d491
WL
1266static inline int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
1267{
1268 return nr_cpu_ids;
1269}
f2cb1360
IM
1270#endif
1271
f809ca9a 1272#ifdef CONFIG_NUMA_BALANCING
44dba3d5
IM
1273/* The regions in numa_faults array from task_struct */
1274enum numa_faults_stats {
1275 NUMA_MEM = 0,
1276 NUMA_CPU,
1277 NUMA_MEMBUF,
1278 NUMA_CPUBUF
1279};
0ec8aa00 1280extern void sched_setnuma(struct task_struct *p, int node);
e6628d5b 1281extern int migrate_task_to(struct task_struct *p, int cpu);
0ad4e3df
SD
1282extern int migrate_swap(struct task_struct *p, struct task_struct *t,
1283 int cpu, int scpu);
13784475
MG
1284extern void init_numa_balancing(unsigned long clone_flags, struct task_struct *p);
1285#else
1286static inline void
1287init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
1288{
1289}
f809ca9a
MG
1290#endif /* CONFIG_NUMA_BALANCING */
1291
518cd623
PZ
1292#ifdef CONFIG_SMP
1293
e3fca9e7
PZ
1294static inline void
1295queue_balance_callback(struct rq *rq,
1296 struct callback_head *head,
1297 void (*func)(struct rq *rq))
1298{
1299 lockdep_assert_held(&rq->lock);
1300
1301 if (unlikely(head->next))
1302 return;
1303
1304 head->func = (void (*)(struct callback_head *))func;
1305 head->next = rq->balance_callback;
1306 rq->balance_callback = head;
1307}
1308
e3baac47
PZ
1309extern void sched_ttwu_pending(void);
1310
029632fb
PZ
1311#define rcu_dereference_check_sched_domain(p) \
1312 rcu_dereference_check((p), \
1313 lockdep_is_held(&sched_domains_mutex))
1314
1315/*
1316 * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
337e9b07 1317 * See destroy_sched_domains: call_rcu for details.
029632fb
PZ
1318 *
1319 * The domain tree of any CPU may only be accessed from within
1320 * preempt-disabled sections.
1321 */
1322#define for_each_domain(cpu, __sd) \
518cd623
PZ
1323 for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \
1324 __sd; __sd = __sd->parent)
029632fb 1325
77e81365
SS
1326#define for_each_lower_domain(sd) for (; sd; sd = sd->child)
1327
518cd623
PZ
1328/**
1329 * highest_flag_domain - Return highest sched_domain containing flag.
97fb7a0a 1330 * @cpu: The CPU whose highest level of sched domain is to
518cd623
PZ
1331 * be returned.
1332 * @flag: The flag to check for the highest sched_domain
97fb7a0a 1333 * for the given CPU.
518cd623 1334 *
97fb7a0a 1335 * Returns the highest sched_domain of a CPU which contains the given flag.
518cd623
PZ
1336 */
1337static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
1338{
1339 struct sched_domain *sd, *hsd = NULL;
1340
1341 for_each_domain(cpu, sd) {
1342 if (!(sd->flags & flag))
1343 break;
1344 hsd = sd;
1345 }
1346
1347 return hsd;
1348}
1349
fb13c7ee
MG
1350static inline struct sched_domain *lowest_flag_domain(int cpu, int flag)
1351{
1352 struct sched_domain *sd;
1353
1354 for_each_domain(cpu, sd) {
1355 if (sd->flags & flag)
1356 break;
1357 }
1358
1359 return sd;
1360}
1361
994aeb7a 1362DECLARE_PER_CPU(struct sched_domain __rcu *, sd_llc);
7d9ffa89 1363DECLARE_PER_CPU(int, sd_llc_size);
518cd623 1364DECLARE_PER_CPU(int, sd_llc_id);
994aeb7a
JFG
1365DECLARE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
1366DECLARE_PER_CPU(struct sched_domain __rcu *, sd_numa);
1367DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
1368DECLARE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
df054e84 1369extern struct static_key_false sched_asym_cpucapacity;
518cd623 1370
63b2ca30 1371struct sched_group_capacity {
97fb7a0a 1372 atomic_t ref;
5e6521ea 1373 /*
172895e6 1374 * CPU capacity of this group, SCHED_CAPACITY_SCALE being max capacity
63b2ca30 1375 * for a single CPU.
5e6521ea 1376 */
97fb7a0a
IM
1377 unsigned long capacity;
1378 unsigned long min_capacity; /* Min per-CPU capacity in group */
e3d6d0cb 1379 unsigned long max_capacity; /* Max per-CPU capacity in group */
97fb7a0a
IM
1380 unsigned long next_update;
1381 int imbalance; /* XXX unrelated to capacity but shared group state */
5e6521ea 1382
005f874d 1383#ifdef CONFIG_SCHED_DEBUG
97fb7a0a 1384 int id;
005f874d
PZ
1385#endif
1386
97fb7a0a 1387 unsigned long cpumask[0]; /* Balance mask */
5e6521ea
LZ
1388};
1389
1390struct sched_group {
97fb7a0a
IM
1391 struct sched_group *next; /* Must be a circular list */
1392 atomic_t ref;
5e6521ea 1393
97fb7a0a 1394 unsigned int group_weight;
63b2ca30 1395 struct sched_group_capacity *sgc;
97fb7a0a 1396 int asym_prefer_cpu; /* CPU of highest priority in group */
5e6521ea
LZ
1397
1398 /*
1399 * The CPUs this group covers.
1400 *
1401 * NOTE: this field is variable length. (Allocated dynamically
1402 * by attaching extra space to the end of the structure,
1403 * depending on how many CPUs the kernel has booted up with)
1404 */
97fb7a0a 1405 unsigned long cpumask[0];
5e6521ea
LZ
1406};
1407
ae4df9d6 1408static inline struct cpumask *sched_group_span(struct sched_group *sg)
5e6521ea
LZ
1409{
1410 return to_cpumask(sg->cpumask);
1411}
1412
1413/*
e5c14b1f 1414 * See build_balance_mask().
5e6521ea 1415 */
e5c14b1f 1416static inline struct cpumask *group_balance_mask(struct sched_group *sg)
5e6521ea 1417{
63b2ca30 1418 return to_cpumask(sg->sgc->cpumask);
5e6521ea
LZ
1419}
1420
1421/**
97fb7a0a
IM
1422 * group_first_cpu - Returns the first CPU in the cpumask of a sched_group.
1423 * @group: The group whose first CPU is to be returned.
5e6521ea
LZ
1424 */
1425static inline unsigned int group_first_cpu(struct sched_group *group)
1426{
ae4df9d6 1427 return cpumask_first(sched_group_span(group));
5e6521ea
LZ
1428}
1429
c1174876
PZ
1430extern int group_balance_cpu(struct sched_group *sg);
1431
3866e845
SRRH
1432#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_SYSCTL)
1433void register_sched_domain_sysctl(void);
bbdacdfe 1434void dirty_sched_domain_sysctl(int cpu);
3866e845
SRRH
1435void unregister_sched_domain_sysctl(void);
1436#else
1437static inline void register_sched_domain_sysctl(void)
1438{
1439}
bbdacdfe
PZ
1440static inline void dirty_sched_domain_sysctl(int cpu)
1441{
1442}
3866e845
SRRH
1443static inline void unregister_sched_domain_sysctl(void)
1444{
1445}
1446#endif
1447
e3baac47
PZ
1448#else
1449
1450static inline void sched_ttwu_pending(void) { }
1451
518cd623 1452#endif /* CONFIG_SMP */
029632fb 1453
391e43da 1454#include "stats.h"
1051408f 1455#include "autogroup.h"
029632fb
PZ
1456
1457#ifdef CONFIG_CGROUP_SCHED
1458
1459/*
1460 * Return the group to which this tasks belongs.
1461 *
8af01f56
TH
1462 * We cannot use task_css() and friends because the cgroup subsystem
1463 * changes that value before the cgroup_subsys::attach() method is called,
1464 * therefore we cannot pin it and might observe the wrong value.
8323f26c
PZ
1465 *
1466 * The same is true for autogroup's p->signal->autogroup->tg, the autogroup
1467 * core changes this before calling sched_move_task().
1468 *
1469 * Instead we use a 'copy' which is updated from sched_move_task() while
1470 * holding both task_struct::pi_lock and rq::lock.
029632fb
PZ
1471 */
1472static inline struct task_group *task_group(struct task_struct *p)
1473{
8323f26c 1474 return p->sched_task_group;
029632fb
PZ
1475}
1476
1477/* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
1478static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
1479{
1480#if defined(CONFIG_FAIR_GROUP_SCHED) || defined(CONFIG_RT_GROUP_SCHED)
1481 struct task_group *tg = task_group(p);
1482#endif
1483
1484#ifdef CONFIG_FAIR_GROUP_SCHED
ad936d86 1485 set_task_rq_fair(&p->se, p->se.cfs_rq, tg->cfs_rq[cpu]);
029632fb
PZ
1486 p->se.cfs_rq = tg->cfs_rq[cpu];
1487 p->se.parent = tg->se[cpu];
1488#endif
1489
1490#ifdef CONFIG_RT_GROUP_SCHED
1491 p->rt.rt_rq = tg->rt_rq[cpu];
1492 p->rt.parent = tg->rt_se[cpu];
1493#endif
1494}
1495
1496#else /* CONFIG_CGROUP_SCHED */
1497
1498static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
1499static inline struct task_group *task_group(struct task_struct *p)
1500{
1501 return NULL;
1502}
1503
1504#endif /* CONFIG_CGROUP_SCHED */
1505
1506static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1507{
1508 set_task_rq(p, cpu);
1509#ifdef CONFIG_SMP
1510 /*
1511 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
dfcb245e 1512 * successfully executed on another CPU. We must ensure that updates of
029632fb
PZ
1513 * per-task data have been completed by this moment.
1514 */
1515 smp_wmb();
c65eacbe 1516#ifdef CONFIG_THREAD_INFO_IN_TASK
c546951d 1517 WRITE_ONCE(p->cpu, cpu);
c65eacbe 1518#else
c546951d 1519 WRITE_ONCE(task_thread_info(p)->cpu, cpu);
c65eacbe 1520#endif
ac66f547 1521 p->wake_cpu = cpu;
029632fb
PZ
1522#endif
1523}
1524
1525/*
1526 * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
1527 */
1528#ifdef CONFIG_SCHED_DEBUG
c5905afb 1529# include <linux/static_key.h>
029632fb
PZ
1530# define const_debug __read_mostly
1531#else
1532# define const_debug const
1533#endif
1534
029632fb
PZ
1535#define SCHED_FEAT(name, enabled) \
1536 __SCHED_FEAT_##name ,
1537
1538enum {
391e43da 1539#include "features.h"
f8b6d1cc 1540 __SCHED_FEAT_NR,
029632fb
PZ
1541};
1542
1543#undef SCHED_FEAT
1544
e9666d10 1545#if defined(CONFIG_SCHED_DEBUG) && defined(CONFIG_JUMP_LABEL)
765cc3a4
PB
1546
1547/*
1548 * To support run-time toggling of sched features, all the translation units
1549 * (but core.c) reference the sysctl_sched_features defined in core.c.
1550 */
1551extern const_debug unsigned int sysctl_sched_features;
1552
f8b6d1cc 1553#define SCHED_FEAT(name, enabled) \
c5905afb 1554static __always_inline bool static_branch_##name(struct static_key *key) \
f8b6d1cc 1555{ \
6e76ea8a 1556 return static_key_##enabled(key); \
f8b6d1cc
PZ
1557}
1558
1559#include "features.h"
f8b6d1cc
PZ
1560#undef SCHED_FEAT
1561
c5905afb 1562extern struct static_key sched_feat_keys[__SCHED_FEAT_NR];
f8b6d1cc 1563#define sched_feat(x) (static_branch_##x(&sched_feat_keys[__SCHED_FEAT_##x]))
765cc3a4 1564
e9666d10 1565#else /* !(SCHED_DEBUG && CONFIG_JUMP_LABEL) */
765cc3a4
PB
1566
1567/*
1568 * Each translation unit has its own copy of sysctl_sched_features to allow
1569 * constants propagation at compile time and compiler optimization based on
1570 * features default.
1571 */
1572#define SCHED_FEAT(name, enabled) \
1573 (1UL << __SCHED_FEAT_##name) * enabled |
1574static const_debug __maybe_unused unsigned int sysctl_sched_features =
1575#include "features.h"
1576 0;
1577#undef SCHED_FEAT
1578
7e6f4c5d 1579#define sched_feat(x) !!(sysctl_sched_features & (1UL << __SCHED_FEAT_##x))
765cc3a4 1580
e9666d10 1581#endif /* SCHED_DEBUG && CONFIG_JUMP_LABEL */
029632fb 1582
2a595721 1583extern struct static_key_false sched_numa_balancing;
cb251765 1584extern struct static_key_false sched_schedstats;
cbee9f88 1585
029632fb
PZ
1586static inline u64 global_rt_period(void)
1587{
1588 return (u64)sysctl_sched_rt_period * NSEC_PER_USEC;
1589}
1590
1591static inline u64 global_rt_runtime(void)
1592{
1593 if (sysctl_sched_rt_runtime < 0)
1594 return RUNTIME_INF;
1595
1596 return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
1597}
1598
029632fb
PZ
1599static inline int task_current(struct rq *rq, struct task_struct *p)
1600{
1601 return rq->curr == p;
1602}
1603
1604static inline int task_running(struct rq *rq, struct task_struct *p)
1605{
1606#ifdef CONFIG_SMP
1607 return p->on_cpu;
1608#else
1609 return task_current(rq, p);
1610#endif
1611}
1612
da0c1e65
KT
1613static inline int task_on_rq_queued(struct task_struct *p)
1614{
1615 return p->on_rq == TASK_ON_RQ_QUEUED;
1616}
029632fb 1617
cca26e80
KT
1618static inline int task_on_rq_migrating(struct task_struct *p)
1619{
c546951d 1620 return READ_ONCE(p->on_rq) == TASK_ON_RQ_MIGRATING;
cca26e80
KT
1621}
1622
b13095f0
LZ
1623/*
1624 * wake flags
1625 */
97fb7a0a
IM
1626#define WF_SYNC 0x01 /* Waker goes to sleep after wakeup */
1627#define WF_FORK 0x02 /* Child wakeup after fork */
1628#define WF_MIGRATED 0x4 /* Internal use, task got migrated */
b13095f0 1629
029632fb
PZ
1630/*
1631 * To aid in avoiding the subversion of "niceness" due to uneven distribution
1632 * of tasks with abnormal "nice" values across CPUs the contribution that
1633 * each task makes to its run queue's load is weighted according to its
1634 * scheduling class and "nice" value. For SCHED_NORMAL tasks this is just a
1635 * scaled version of the new time slice allocation that they receive on time
1636 * slice expiry etc.
1637 */
1638
97fb7a0a
IM
1639#define WEIGHT_IDLEPRIO 3
1640#define WMULT_IDLEPRIO 1431655765
029632fb 1641
97fb7a0a
IM
1642extern const int sched_prio_to_weight[40];
1643extern const u32 sched_prio_to_wmult[40];
029632fb 1644
ff77e468
PZ
1645/*
1646 * {de,en}queue flags:
1647 *
1648 * DEQUEUE_SLEEP - task is no longer runnable
1649 * ENQUEUE_WAKEUP - task just became runnable
1650 *
1651 * SAVE/RESTORE - an otherwise spurious dequeue/enqueue, done to ensure tasks
1652 * are in a known state which allows modification. Such pairs
1653 * should preserve as much state as possible.
1654 *
1655 * MOVE - paired with SAVE/RESTORE, explicitly does not preserve the location
1656 * in the runqueue.
1657 *
1658 * ENQUEUE_HEAD - place at front of runqueue (tail if not specified)
1659 * ENQUEUE_REPLENISH - CBS (replenish runtime and postpone deadline)
59efa0ba 1660 * ENQUEUE_MIGRATED - the task was migrated during wakeup
ff77e468
PZ
1661 *
1662 */
1663
1664#define DEQUEUE_SLEEP 0x01
97fb7a0a
IM
1665#define DEQUEUE_SAVE 0x02 /* Matches ENQUEUE_RESTORE */
1666#define DEQUEUE_MOVE 0x04 /* Matches ENQUEUE_MOVE */
1667#define DEQUEUE_NOCLOCK 0x08 /* Matches ENQUEUE_NOCLOCK */
ff77e468 1668
1de64443 1669#define ENQUEUE_WAKEUP 0x01
ff77e468
PZ
1670#define ENQUEUE_RESTORE 0x02
1671#define ENQUEUE_MOVE 0x04
0a67d1ee 1672#define ENQUEUE_NOCLOCK 0x08
ff77e468 1673
0a67d1ee
PZ
1674#define ENQUEUE_HEAD 0x10
1675#define ENQUEUE_REPLENISH 0x20
c82ba9fa 1676#ifdef CONFIG_SMP
0a67d1ee 1677#define ENQUEUE_MIGRATED 0x40
c82ba9fa 1678#else
59efa0ba 1679#define ENQUEUE_MIGRATED 0x00
c82ba9fa 1680#endif
c82ba9fa 1681
37e117c0
PZ
1682#define RETRY_TASK ((void *)-1UL)
1683
c82ba9fa
LZ
1684struct sched_class {
1685 const struct sched_class *next;
1686
69842cba
PB
1687#ifdef CONFIG_UCLAMP_TASK
1688 int uclamp_enabled;
1689#endif
1690
c82ba9fa
LZ
1691 void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
1692 void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
97fb7a0a
IM
1693 void (*yield_task) (struct rq *rq);
1694 bool (*yield_to_task)(struct rq *rq, struct task_struct *p, bool preempt);
c82ba9fa 1695
97fb7a0a 1696 void (*check_preempt_curr)(struct rq *rq, struct task_struct *p, int flags);
c82ba9fa 1697
606dba2e
PZ
1698 /*
1699 * It is the responsibility of the pick_next_task() method that will
1700 * return the next task to call put_prev_task() on the @prev task or
1701 * something equivalent.
37e117c0
PZ
1702 *
1703 * May return RETRY_TASK when it finds a higher prio class has runnable
1704 * tasks.
606dba2e 1705 */
97fb7a0a
IM
1706 struct task_struct * (*pick_next_task)(struct rq *rq,
1707 struct task_struct *prev,
1708 struct rq_flags *rf);
1709 void (*put_prev_task)(struct rq *rq, struct task_struct *p);
c82ba9fa
LZ
1710
1711#ifdef CONFIG_SMP
ac66f547 1712 int (*select_task_rq)(struct task_struct *p, int task_cpu, int sd_flag, int flags);
1327237a 1713 void (*migrate_task_rq)(struct task_struct *p, int new_cpu);
c82ba9fa 1714
97fb7a0a 1715 void (*task_woken)(struct rq *this_rq, struct task_struct *task);
c82ba9fa
LZ
1716
1717 void (*set_cpus_allowed)(struct task_struct *p,
1718 const struct cpumask *newmask);
1719
1720 void (*rq_online)(struct rq *rq);
1721 void (*rq_offline)(struct rq *rq);
1722#endif
1723
97fb7a0a
IM
1724 void (*set_curr_task)(struct rq *rq);
1725 void (*task_tick)(struct rq *rq, struct task_struct *p, int queued);
1726 void (*task_fork)(struct task_struct *p);
1727 void (*task_dead)(struct task_struct *p);
c82ba9fa 1728
67dfa1b7
KT
1729 /*
1730 * The switched_from() call is allowed to drop rq->lock, therefore we
1731 * cannot assume the switched_from/switched_to pair is serliazed by
1732 * rq->lock. They are however serialized by p->pi_lock.
1733 */
97fb7a0a
IM
1734 void (*switched_from)(struct rq *this_rq, struct task_struct *task);
1735 void (*switched_to) (struct rq *this_rq, struct task_struct *task);
c82ba9fa 1736 void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
97fb7a0a 1737 int oldprio);
c82ba9fa 1738
97fb7a0a
IM
1739 unsigned int (*get_rr_interval)(struct rq *rq,
1740 struct task_struct *task);
c82ba9fa 1741
97fb7a0a 1742 void (*update_curr)(struct rq *rq);
6e998916 1743
97fb7a0a
IM
1744#define TASK_SET_GROUP 0
1745#define TASK_MOVE_GROUP 1
ea86cb4b 1746
c82ba9fa 1747#ifdef CONFIG_FAIR_GROUP_SCHED
97fb7a0a 1748 void (*task_change_group)(struct task_struct *p, int type);
c82ba9fa
LZ
1749#endif
1750};
029632fb 1751
3f1d2a31
PZ
1752static inline void put_prev_task(struct rq *rq, struct task_struct *prev)
1753{
10e7071b 1754 WARN_ON_ONCE(rq->curr != prev);
3f1d2a31
PZ
1755 prev->sched_class->put_prev_task(rq, prev);
1756}
1757
b2bf6c31
PZ
1758static inline void set_curr_task(struct rq *rq, struct task_struct *curr)
1759{
1760 curr->sched_class->set_curr_task(rq);
1761}
1762
f5832c19 1763#ifdef CONFIG_SMP
029632fb 1764#define sched_class_highest (&stop_sched_class)
f5832c19
NP
1765#else
1766#define sched_class_highest (&dl_sched_class)
1767#endif
029632fb
PZ
1768#define for_each_class(class) \
1769 for (class = sched_class_highest; class; class = class->next)
1770
1771extern const struct sched_class stop_sched_class;
aab03e05 1772extern const struct sched_class dl_sched_class;
029632fb
PZ
1773extern const struct sched_class rt_sched_class;
1774extern const struct sched_class fair_sched_class;
1775extern const struct sched_class idle_sched_class;
1776
1777
1778#ifdef CONFIG_SMP
1779
63b2ca30 1780extern void update_group_capacity(struct sched_domain *sd, int cpu);
b719203b 1781
7caff66f 1782extern void trigger_load_balance(struct rq *rq);
029632fb 1783
c5b28038
PZ
1784extern void set_cpus_allowed_common(struct task_struct *p, const struct cpumask *new_mask);
1785
029632fb
PZ
1786#endif
1787
442bf3aa
DL
1788#ifdef CONFIG_CPU_IDLE
1789static inline void idle_set_state(struct rq *rq,
1790 struct cpuidle_state *idle_state)
1791{
1792 rq->idle_state = idle_state;
1793}
1794
1795static inline struct cpuidle_state *idle_get_state(struct rq *rq)
1796{
9148a3a1 1797 SCHED_WARN_ON(!rcu_read_lock_held());
97fb7a0a 1798
442bf3aa
DL
1799 return rq->idle_state;
1800}
1801#else
1802static inline void idle_set_state(struct rq *rq,
1803 struct cpuidle_state *idle_state)
1804{
1805}
1806
1807static inline struct cpuidle_state *idle_get_state(struct rq *rq)
1808{
1809 return NULL;
1810}
1811#endif
1812
8663effb
SRV
1813extern void schedule_idle(void);
1814
029632fb
PZ
1815extern void sysrq_sched_debug_show(void);
1816extern void sched_init_granularity(void);
1817extern void update_max_interval(void);
1baca4ce
JL
1818
1819extern void init_sched_dl_class(void);
029632fb
PZ
1820extern void init_sched_rt_class(void);
1821extern void init_sched_fair_class(void);
1822
9059393e
VG
1823extern void reweight_task(struct task_struct *p, int prio);
1824
8875125e 1825extern void resched_curr(struct rq *rq);
029632fb
PZ
1826extern void resched_cpu(int cpu);
1827
1828extern struct rt_bandwidth def_rt_bandwidth;
1829extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);
1830
332ac17e
DF
1831extern struct dl_bandwidth def_dl_bandwidth;
1832extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime);
aab03e05 1833extern void init_dl_task_timer(struct sched_dl_entity *dl_se);
209a0cbd 1834extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se);
4da3abce 1835extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq);
aab03e05 1836
97fb7a0a
IM
1837#define BW_SHIFT 20
1838#define BW_UNIT (1 << BW_SHIFT)
1839#define RATIO_SHIFT 8
332ac17e
DF
1840unsigned long to_ratio(u64 period, u64 runtime);
1841
540247fb 1842extern void init_entity_runnable_average(struct sched_entity *se);
d0fe0b9c 1843extern void post_init_entity_util_avg(struct task_struct *p);
a75cdaa9 1844
76d92ac3
FW
1845#ifdef CONFIG_NO_HZ_FULL
1846extern bool sched_can_stop_tick(struct rq *rq);
d84b3131 1847extern int __init sched_tick_offload_init(void);
76d92ac3
FW
1848
1849/*
1850 * Tick may be needed by tasks in the runqueue depending on their policy and
1851 * requirements. If tick is needed, lets send the target an IPI to kick it out of
1852 * nohz mode if necessary.
1853 */
1854static inline void sched_update_tick_dependency(struct rq *rq)
1855{
1856 int cpu;
1857
1858 if (!tick_nohz_full_enabled())
1859 return;
1860
1861 cpu = cpu_of(rq);
1862
1863 if (!tick_nohz_full_cpu(cpu))
1864 return;
1865
1866 if (sched_can_stop_tick(rq))
1867 tick_nohz_dep_clear_cpu(cpu, TICK_DEP_BIT_SCHED);
1868 else
1869 tick_nohz_dep_set_cpu(cpu, TICK_DEP_BIT_SCHED);
1870}
1871#else
d84b3131 1872static inline int sched_tick_offload_init(void) { return 0; }
76d92ac3
FW
1873static inline void sched_update_tick_dependency(struct rq *rq) { }
1874#endif
1875
72465447 1876static inline void add_nr_running(struct rq *rq, unsigned count)
029632fb 1877{
72465447
KT
1878 unsigned prev_nr = rq->nr_running;
1879
1880 rq->nr_running = prev_nr + count;
9f3660c2 1881
4486edd1 1882#ifdef CONFIG_SMP
3e184501 1883 if (prev_nr < 2 && rq->nr_running >= 2) {
e90c8fe1
VS
1884 if (!READ_ONCE(rq->rd->overload))
1885 WRITE_ONCE(rq->rd->overload, 1);
4486edd1 1886 }
3e184501 1887#endif
76d92ac3
FW
1888
1889 sched_update_tick_dependency(rq);
029632fb
PZ
1890}
1891
72465447 1892static inline void sub_nr_running(struct rq *rq, unsigned count)
029632fb 1893{
72465447 1894 rq->nr_running -= count;
76d92ac3
FW
1895 /* Check if we still need preemption */
1896 sched_update_tick_dependency(rq);
029632fb
PZ
1897}
1898
029632fb
PZ
1899extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
1900extern void deactivate_task(struct rq *rq, struct task_struct *p, int flags);
1901
1902extern void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags);
1903
029632fb
PZ
1904extern const_debug unsigned int sysctl_sched_nr_migrate;
1905extern const_debug unsigned int sysctl_sched_migration_cost;
1906
029632fb
PZ
1907#ifdef CONFIG_SCHED_HRTICK
1908
1909/*
1910 * Use hrtick when:
1911 * - enabled by features
1912 * - hrtimer is actually high res
1913 */
1914static inline int hrtick_enabled(struct rq *rq)
1915{
1916 if (!sched_feat(HRTICK))
1917 return 0;
1918 if (!cpu_active(cpu_of(rq)))
1919 return 0;
1920 return hrtimer_is_hres_active(&rq->hrtick_timer);
1921}
1922
1923void hrtick_start(struct rq *rq, u64 delay);
1924
b39e66ea
MG
1925#else
1926
1927static inline int hrtick_enabled(struct rq *rq)
1928{
1929 return 0;
1930}
1931
029632fb
PZ
1932#endif /* CONFIG_SCHED_HRTICK */
1933
dfbca41f
PZ
1934#ifndef arch_scale_freq_capacity
1935static __always_inline
7673c8a4 1936unsigned long arch_scale_freq_capacity(int cpu)
dfbca41f
PZ
1937{
1938 return SCHED_CAPACITY_SCALE;
1939}
1940#endif
b5b4860d 1941
029632fb
PZ
1942#ifdef CONFIG_SMP
1943#ifdef CONFIG_PREEMPT
1944
1945static inline void double_rq_lock(struct rq *rq1, struct rq *rq2);
1946
1947/*
1948 * fair double_lock_balance: Safely acquires both rq->locks in a fair
1949 * way at the expense of forcing extra atomic operations in all
1950 * invocations. This assures that the double_lock is acquired using the
1951 * same underlying policy as the spinlock_t on this architecture, which
1952 * reduces latency compared to the unfair variant below. However, it
1953 * also adds more overhead and therefore may reduce throughput.
1954 */
1955static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1956 __releases(this_rq->lock)
1957 __acquires(busiest->lock)
1958 __acquires(this_rq->lock)
1959{
1960 raw_spin_unlock(&this_rq->lock);
1961 double_rq_lock(this_rq, busiest);
1962
1963 return 1;
1964}
1965
1966#else
1967/*
1968 * Unfair double_lock_balance: Optimizes throughput at the expense of
1969 * latency by eliminating extra atomic operations when the locks are
97fb7a0a
IM
1970 * already in proper order on entry. This favors lower CPU-ids and will
1971 * grant the double lock to lower CPUs over higher ids under contention,
029632fb
PZ
1972 * regardless of entry order into the function.
1973 */
1974static inline int _double_lock_balance(struct rq *this_rq, struct rq *busiest)
1975 __releases(this_rq->lock)
1976 __acquires(busiest->lock)
1977 __acquires(this_rq->lock)
1978{
1979 int ret = 0;
1980
1981 if (unlikely(!raw_spin_trylock(&busiest->lock))) {
1982 if (busiest < this_rq) {
1983 raw_spin_unlock(&this_rq->lock);
1984 raw_spin_lock(&busiest->lock);
1985 raw_spin_lock_nested(&this_rq->lock,
1986 SINGLE_DEPTH_NESTING);
1987 ret = 1;
1988 } else
1989 raw_spin_lock_nested(&busiest->lock,
1990 SINGLE_DEPTH_NESTING);
1991 }
1992 return ret;
1993}
1994
1995#endif /* CONFIG_PREEMPT */
1996
1997/*
1998 * double_lock_balance - lock the busiest runqueue, this_rq is locked already.
1999 */
2000static inline int double_lock_balance(struct rq *this_rq, struct rq *busiest)
2001{
2002 if (unlikely(!irqs_disabled())) {
97fb7a0a 2003 /* printk() doesn't work well under rq->lock */
029632fb
PZ
2004 raw_spin_unlock(&this_rq->lock);
2005 BUG_ON(1);
2006 }
2007
2008 return _double_lock_balance(this_rq, busiest);
2009}
2010
2011static inline void double_unlock_balance(struct rq *this_rq, struct rq *busiest)
2012 __releases(busiest->lock)
2013{
2014 raw_spin_unlock(&busiest->lock);
2015 lock_set_subclass(&this_rq->lock.dep_map, 0, _RET_IP_);
2016}
2017
74602315
PZ
2018static inline void double_lock(spinlock_t *l1, spinlock_t *l2)
2019{
2020 if (l1 > l2)
2021 swap(l1, l2);
2022
2023 spin_lock(l1);
2024 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
2025}
2026
60e69eed
MG
2027static inline void double_lock_irq(spinlock_t *l1, spinlock_t *l2)
2028{
2029 if (l1 > l2)
2030 swap(l1, l2);
2031
2032 spin_lock_irq(l1);
2033 spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
2034}
2035
74602315
PZ
2036static inline void double_raw_lock(raw_spinlock_t *l1, raw_spinlock_t *l2)
2037{
2038 if (l1 > l2)
2039 swap(l1, l2);
2040
2041 raw_spin_lock(l1);
2042 raw_spin_lock_nested(l2, SINGLE_DEPTH_NESTING);
2043}
2044
029632fb
PZ
2045/*
2046 * double_rq_lock - safely lock two runqueues
2047 *
2048 * Note this does not disable interrupts like task_rq_lock,
2049 * you need to do so manually before calling.
2050 */
2051static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
2052 __acquires(rq1->lock)
2053 __acquires(rq2->lock)
2054{
2055 BUG_ON(!irqs_disabled());
2056 if (rq1 == rq2) {
2057 raw_spin_lock(&rq1->lock);
2058 __acquire(rq2->lock); /* Fake it out ;) */
2059 } else {
2060 if (rq1 < rq2) {
2061 raw_spin_lock(&rq1->lock);
2062 raw_spin_lock_nested(&rq2->lock, SINGLE_DEPTH_NESTING);
2063 } else {
2064 raw_spin_lock(&rq2->lock);
2065 raw_spin_lock_nested(&rq1->lock, SINGLE_DEPTH_NESTING);
2066 }
2067 }
2068}
2069
2070/*
2071 * double_rq_unlock - safely unlock two runqueues
2072 *
2073 * Note this does not restore interrupts like task_rq_unlock,
2074 * you need to do so manually after calling.
2075 */
2076static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
2077 __releases(rq1->lock)
2078 __releases(rq2->lock)
2079{
2080 raw_spin_unlock(&rq1->lock);
2081 if (rq1 != rq2)
2082 raw_spin_unlock(&rq2->lock);
2083 else
2084 __release(rq2->lock);
2085}
2086
f2cb1360
IM
2087extern void set_rq_online (struct rq *rq);
2088extern void set_rq_offline(struct rq *rq);
2089extern bool sched_smp_initialized;
2090
029632fb
PZ
2091#else /* CONFIG_SMP */
2092
2093/*
2094 * double_rq_lock - safely lock two runqueues
2095 *
2096 * Note this does not disable interrupts like task_rq_lock,
2097 * you need to do so manually before calling.
2098 */
2099static inline void double_rq_lock(struct rq *rq1, struct rq *rq2)
2100 __acquires(rq1->lock)
2101 __acquires(rq2->lock)
2102{
2103 BUG_ON(!irqs_disabled());
2104 BUG_ON(rq1 != rq2);
2105 raw_spin_lock(&rq1->lock);
2106 __acquire(rq2->lock); /* Fake it out ;) */
2107}
2108
2109/*
2110 * double_rq_unlock - safely unlock two runqueues
2111 *
2112 * Note this does not restore interrupts like task_rq_unlock,
2113 * you need to do so manually after calling.
2114 */
2115static inline void double_rq_unlock(struct rq *rq1, struct rq *rq2)
2116 __releases(rq1->lock)
2117 __releases(rq2->lock)
2118{
2119 BUG_ON(rq1 != rq2);
2120 raw_spin_unlock(&rq1->lock);
2121 __release(rq2->lock);
2122}
2123
2124#endif
2125
2126extern struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq);
2127extern struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq);
6b55c965
SD
2128
2129#ifdef CONFIG_SCHED_DEBUG
9469eb01
PZ
2130extern bool sched_debug_enabled;
2131
029632fb
PZ
2132extern void print_cfs_stats(struct seq_file *m, int cpu);
2133extern void print_rt_stats(struct seq_file *m, int cpu);
acb32132 2134extern void print_dl_stats(struct seq_file *m, int cpu);
f6a34630
MM
2135extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq);
2136extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
2137extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq);
397f2378
SD
2138#ifdef CONFIG_NUMA_BALANCING
2139extern void
2140show_numa_stats(struct task_struct *p, struct seq_file *m);
2141extern void
2142print_numa_stats(struct seq_file *m, int node, unsigned long tsf,
2143 unsigned long tpf, unsigned long gsf, unsigned long gpf);
2144#endif /* CONFIG_NUMA_BALANCING */
2145#endif /* CONFIG_SCHED_DEBUG */
029632fb
PZ
2146
2147extern void init_cfs_rq(struct cfs_rq *cfs_rq);
07c54f7a
AV
2148extern void init_rt_rq(struct rt_rq *rt_rq);
2149extern void init_dl_rq(struct dl_rq *dl_rq);
029632fb 2150
1ee14e6c
BS
2151extern void cfs_bandwidth_usage_inc(void);
2152extern void cfs_bandwidth_usage_dec(void);
1c792db7 2153
3451d024 2154#ifdef CONFIG_NO_HZ_COMMON
00357f5e
PZ
2155#define NOHZ_BALANCE_KICK_BIT 0
2156#define NOHZ_STATS_KICK_BIT 1
a22e47a4 2157
a22e47a4 2158#define NOHZ_BALANCE_KICK BIT(NOHZ_BALANCE_KICK_BIT)
b7031a02
PZ
2159#define NOHZ_STATS_KICK BIT(NOHZ_STATS_KICK_BIT)
2160
2161#define NOHZ_KICK_MASK (NOHZ_BALANCE_KICK | NOHZ_STATS_KICK)
1c792db7
SS
2162
2163#define nohz_flags(cpu) (&cpu_rq(cpu)->nohz_flags)
20a5c8cc 2164
00357f5e 2165extern void nohz_balance_exit_idle(struct rq *rq);
20a5c8cc 2166#else
00357f5e 2167static inline void nohz_balance_exit_idle(struct rq *rq) { }
1c792db7 2168#endif
73fbec60 2169
daec5798
LA
2170
2171#ifdef CONFIG_SMP
2172static inline
2173void __dl_update(struct dl_bw *dl_b, s64 bw)
2174{
2175 struct root_domain *rd = container_of(dl_b, struct root_domain, dl_bw);
2176 int i;
2177
2178 RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held(),
2179 "sched RCU must be held");
2180 for_each_cpu_and(i, rd->span, cpu_active_mask) {
2181 struct rq *rq = cpu_rq(i);
2182
2183 rq->dl.extra_bw += bw;
2184 }
2185}
2186#else
2187static inline
2188void __dl_update(struct dl_bw *dl_b, s64 bw)
2189{
2190 struct dl_rq *dl = container_of(dl_b, struct dl_rq, dl_bw);
2191
2192 dl->extra_bw += bw;
2193}
2194#endif
2195
2196
73fbec60 2197#ifdef CONFIG_IRQ_TIME_ACCOUNTING
19d23dbf 2198struct irqtime {
25e2d8c1 2199 u64 total;
a499a5a1 2200 u64 tick_delta;
19d23dbf
FW
2201 u64 irq_start_time;
2202 struct u64_stats_sync sync;
2203};
73fbec60 2204
19d23dbf 2205DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
73fbec60 2206
25e2d8c1
FW
2207/*
2208 * Returns the irqtime minus the softirq time computed by ksoftirqd.
2209 * Otherwise ksoftirqd's sum_exec_runtime is substracted its own runtime
2210 * and never move forward.
2211 */
73fbec60
FW
2212static inline u64 irq_time_read(int cpu)
2213{
19d23dbf
FW
2214 struct irqtime *irqtime = &per_cpu(cpu_irqtime, cpu);
2215 unsigned int seq;
2216 u64 total;
73fbec60
FW
2217
2218 do {
19d23dbf 2219 seq = __u64_stats_fetch_begin(&irqtime->sync);
25e2d8c1 2220 total = irqtime->total;
19d23dbf 2221 } while (__u64_stats_fetch_retry(&irqtime->sync, seq));
73fbec60 2222
19d23dbf 2223 return total;
73fbec60 2224}
73fbec60 2225#endif /* CONFIG_IRQ_TIME_ACCOUNTING */
adaf9fcd
RW
2226
2227#ifdef CONFIG_CPU_FREQ
b10abd0a 2228DECLARE_PER_CPU(struct update_util_data __rcu *, cpufreq_update_util_data);
adaf9fcd
RW
2229
2230/**
2231 * cpufreq_update_util - Take a note about CPU utilization changes.
12bde33d 2232 * @rq: Runqueue to carry out the update for.
58919e83 2233 * @flags: Update reason flags.
adaf9fcd 2234 *
58919e83
RW
2235 * This function is called by the scheduler on the CPU whose utilization is
2236 * being updated.
adaf9fcd
RW
2237 *
2238 * It can only be called from RCU-sched read-side critical sections.
adaf9fcd
RW
2239 *
2240 * The way cpufreq is currently arranged requires it to evaluate the CPU
2241 * performance state (frequency/voltage) on a regular basis to prevent it from
2242 * being stuck in a completely inadequate performance level for too long.
e0367b12
JL
2243 * That is not guaranteed to happen if the updates are only triggered from CFS
2244 * and DL, though, because they may not be coming in if only RT tasks are
2245 * active all the time (or there are RT tasks only).
adaf9fcd 2246 *
e0367b12
JL
2247 * As a workaround for that issue, this function is called periodically by the
2248 * RT sched class to trigger extra cpufreq updates to prevent it from stalling,
adaf9fcd 2249 * but that really is a band-aid. Going forward it should be replaced with
e0367b12 2250 * solutions targeted more specifically at RT tasks.
adaf9fcd 2251 */
12bde33d 2252static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
adaf9fcd 2253{
58919e83
RW
2254 struct update_util_data *data;
2255
674e7541
VK
2256 data = rcu_dereference_sched(*per_cpu_ptr(&cpufreq_update_util_data,
2257 cpu_of(rq)));
58919e83 2258 if (data)
12bde33d
RW
2259 data->func(data, rq_clock(rq), flags);
2260}
adaf9fcd 2261#else
12bde33d 2262static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
adaf9fcd 2263#endif /* CONFIG_CPU_FREQ */
be53f58f 2264
982d9cdc 2265#ifdef CONFIG_UCLAMP_TASK
9d20ad7d
PB
2266unsigned int uclamp_eff_value(struct task_struct *p, unsigned int clamp_id);
2267
2268static __always_inline
2269unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
2270 struct task_struct *p)
982d9cdc
PB
2271{
2272 unsigned int min_util = READ_ONCE(rq->uclamp[UCLAMP_MIN].value);
2273 unsigned int max_util = READ_ONCE(rq->uclamp[UCLAMP_MAX].value);
2274
9d20ad7d
PB
2275 if (p) {
2276 min_util = max(min_util, uclamp_eff_value(p, UCLAMP_MIN));
2277 max_util = max(max_util, uclamp_eff_value(p, UCLAMP_MAX));
2278 }
2279
982d9cdc
PB
2280 /*
2281 * Since CPU's {min,max}_util clamps are MAX aggregated considering
2282 * RUNNABLE tasks with _different_ clamps, we can end up with an
2283 * inversion. Fix it now when the clamps are applied.
2284 */
2285 if (unlikely(min_util >= max_util))
2286 return min_util;
2287
2288 return clamp(util, min_util, max_util);
2289}
9d20ad7d
PB
2290
2291static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
2292{
2293 return uclamp_util_with(rq, util, NULL);
2294}
982d9cdc 2295#else /* CONFIG_UCLAMP_TASK */
9d20ad7d
PB
2296static inline unsigned int uclamp_util_with(struct rq *rq, unsigned int util,
2297 struct task_struct *p)
2298{
2299 return util;
2300}
982d9cdc
PB
2301static inline unsigned int uclamp_util(struct rq *rq, unsigned int util)
2302{
2303 return util;
2304}
2305#endif /* CONFIG_UCLAMP_TASK */
2306
9bdcb44e 2307#ifdef arch_scale_freq_capacity
97fb7a0a
IM
2308# ifndef arch_scale_freq_invariant
2309# define arch_scale_freq_invariant() true
2310# endif
2311#else
2312# define arch_scale_freq_invariant() false
9bdcb44e 2313#endif
d4edd662 2314
10a35e68
VG
2315#ifdef CONFIG_SMP
2316static inline unsigned long capacity_orig_of(int cpu)
2317{
2318 return cpu_rq(cpu)->cpu_capacity_orig;
2319}
2320#endif
2321
938e5e4b
QP
2322/**
2323 * enum schedutil_type - CPU utilization type
2324 * @FREQUENCY_UTIL: Utilization used to select frequency
2325 * @ENERGY_UTIL: Utilization used during energy calculation
2326 *
2327 * The utilization signals of all scheduling classes (CFS/RT/DL) and IRQ time
2328 * need to be aggregated differently depending on the usage made of them. This
2329 * enum is used within schedutil_freq_util() to differentiate the types of
2330 * utilization expected by the callers, and adjust the aggregation accordingly.
2331 */
2332enum schedutil_type {
2333 FREQUENCY_UTIL,
2334 ENERGY_UTIL,
2335};
2336
af24bde8 2337#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL
938e5e4b 2338
af24bde8
PB
2339unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
2340 unsigned long max, enum schedutil_type type,
2341 struct task_struct *p);
938e5e4b 2342
8cc90515 2343static inline unsigned long cpu_bw_dl(struct rq *rq)
d4edd662
JL
2344{
2345 return (rq->dl.running_bw * SCHED_CAPACITY_SCALE) >> BW_SHIFT;
2346}
2347
8cc90515
VG
2348static inline unsigned long cpu_util_dl(struct rq *rq)
2349{
2350 return READ_ONCE(rq->avg_dl.util_avg);
2351}
2352
d4edd662
JL
2353static inline unsigned long cpu_util_cfs(struct rq *rq)
2354{
a07630b8
PB
2355 unsigned long util = READ_ONCE(rq->cfs.avg.util_avg);
2356
2357 if (sched_feat(UTIL_EST)) {
2358 util = max_t(unsigned long, util,
2359 READ_ONCE(rq->cfs.avg.util_est.enqueued));
2360 }
2361
2362 return util;
d4edd662 2363}
371bf427
VG
2364
2365static inline unsigned long cpu_util_rt(struct rq *rq)
2366{
dfa444dc 2367 return READ_ONCE(rq->avg_rt.util_avg);
371bf427 2368}
938e5e4b 2369#else /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
af24bde8
PB
2370static inline unsigned long schedutil_cpu_util(int cpu, unsigned long util_cfs,
2371 unsigned long max, enum schedutil_type type,
2372 struct task_struct *p)
938e5e4b 2373{
af24bde8 2374 return 0;
938e5e4b 2375}
af24bde8 2376#endif /* CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
9033ea11 2377
11d4afd4 2378#ifdef CONFIG_HAVE_SCHED_AVG_IRQ
9033ea11
VG
2379static inline unsigned long cpu_util_irq(struct rq *rq)
2380{
2381 return rq->avg_irq.util_avg;
2382}
2e62c474
VG
2383
2384static inline
2385unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
2386{
2387 util *= (max - irq);
2388 util /= max;
2389
2390 return util;
2391
2392}
9033ea11
VG
2393#else
2394static inline unsigned long cpu_util_irq(struct rq *rq)
2395{
2396 return 0;
2397}
2398
2e62c474
VG
2399static inline
2400unsigned long scale_irq_capacity(unsigned long util, unsigned long irq, unsigned long max)
2401{
2402 return util;
2403}
794a56eb 2404#endif
6aa140fa 2405
531b5c9f 2406#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
f8a696f2 2407
6aa140fa 2408#define perf_domain_span(pd) (to_cpumask(((pd)->em_pd->cpus)))
f8a696f2
PZ
2409
2410DECLARE_STATIC_KEY_FALSE(sched_energy_present);
2411
2412static inline bool sched_energy_enabled(void)
2413{
2414 return static_branch_unlikely(&sched_energy_present);
2415}
2416
2417#else /* ! (CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL) */
2418
6aa140fa 2419#define perf_domain_span(pd) NULL
f8a696f2 2420static inline bool sched_energy_enabled(void) { return false; }
1f74de87 2421
f8a696f2 2422#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */