unsigned int sysctl_sched_cfs_bandwidth_slice = 5000UL;
#endif
+static inline void update_load_add(struct load_weight *lw, unsigned long inc)
+{
+ lw->weight += inc;
+ lw->inv_weight = 0;
+}
+
+static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
+{
+ lw->weight -= dec;
+ lw->inv_weight = 0;
+}
+
+static inline void update_load_set(struct load_weight *lw, unsigned long w)
+{
+ lw->weight = w;
+ lw->inv_weight = 0;
+}
+
/*
* Increase the granularity value when there are more CPUs,
* because with more CPUs the 'effective latency' as visible
return calc_delta_fair(sched_slice(cfs_rq, se), se);
}
+#ifdef CONFIG_SMP
+static inline void __update_task_entity_contrib(struct sched_entity *se);
+
+/* Give new task start runnable values to heavy its load in infant time */
+void init_task_runnable_average(struct task_struct *p)
+{
+ u32 slice;
+
+ p->se.avg.decay_count = 0;
+ slice = sched_slice(task_cfs_rq(p), &p->se) >> 10;
+ p->se.avg.runnable_avg_sum = slice;
+ p->se.avg.runnable_avg_period = slice;
+ __update_task_entity_contrib(&p->se);
+}
+#else
+void init_task_runnable_average(struct task_struct *p)
+{
+}
+#endif
+
/*
* Update the current task's runtime statistics. Skip current tasks that
* are not in our scheduling class.
static void update_curr(struct cfs_rq *cfs_rq)
{
struct sched_entity *curr = cfs_rq->curr;
- u64 now = rq_of(cfs_rq)->clock_task;
+ u64 now = rq_clock_task(rq_of(cfs_rq));
unsigned long delta_exec;
if (unlikely(!curr))
static inline void
update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
- schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock);
+ schedstat_set(se->statistics.wait_start, rq_clock(rq_of(cfs_rq)));
}
/*
update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
- rq_of(cfs_rq)->clock - se->statistics.wait_start));
+ rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start));
schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
- rq_of(cfs_rq)->clock - se->statistics.wait_start);
+ rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
#ifdef CONFIG_SCHEDSTATS
if (entity_is_task(se)) {
trace_sched_stat_wait(task_of(se),
- rq_of(cfs_rq)->clock - se->statistics.wait_start);
+ rq_clock(rq_of(cfs_rq)) - se->statistics.wait_start);
}
#endif
schedstat_set(se->statistics.wait_start, 0);
/*
* We are starting a new run period:
*/
- se->exec_start = rq_of(cfs_rq)->clock_task;
+ se->exec_start = rq_clock_task(rq_of(cfs_rq));
}
/**************************************************
}
#endif /* CONFIG_FAIR_GROUP_SCHED */
-/* Only depends on SMP, FAIR_GROUP_SCHED may be removed when useful in lb */
-#if defined(CONFIG_SMP) && defined(CONFIG_FAIR_GROUP_SCHED)
+#ifdef CONFIG_SMP
/*
* We choose a half-life close to 1 scheduling period.
* Note: The tables below are dependent on this value.
static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
{
- __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
+ __update_entity_runnable_avg(rq_clock_task(rq), &rq->avg, runnable);
__update_tg_runnable_avg(&rq->avg, &rq->cfs);
}
* We track migrations using entity decay_count <= 0, on a wake-up
* migration we use a negative decay count to track the remote decays
* accumulated while sleeping.
+ *
+ * Newly forked tasks are enqueued with se->avg.decay_count == 0, they
+ * are seen by enqueue_entity_load_avg() as a migration with an already
+ * constructed load_avg_contrib.
*/
if (unlikely(se->avg.decay_count <= 0)) {
- se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task;
+ se->avg.last_runnable_update = rq_clock_task(rq_of(cfs_rq));
if (se->avg.decay_count) {
/*
* In a wake-up migration we have to approximate the
}
wakeup = 0;
} else {
- __synchronize_entity_decay(se);
+ /*
+ * Task re-woke on same cpu (or else migrate_task_rq_fair()
+ * would have made count negative); we must be careful to avoid
+ * double-accounting blocked time after synchronizing decays.
+ */
+ se->avg.last_runnable_update += __synchronize_entity_decay(se)
+ << 20;
}
/* migrated tasks did not contribute to our blocked load */
tsk = task_of(se);
if (se->statistics.sleep_start) {
- u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start;
+ u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.sleep_start;
if ((s64)delta < 0)
delta = 0;
}
}
if (se->statistics.block_start) {
- u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start;
+ u64 delta = rq_clock(rq_of(cfs_rq)) - se->statistics.block_start;
if ((s64)delta < 0)
delta = 0;
struct task_struct *tsk = task_of(se);
if (tsk->state & TASK_INTERRUPTIBLE)
- se->statistics.sleep_start = rq_of(cfs_rq)->clock;
+ se->statistics.sleep_start = rq_clock(rq_of(cfs_rq));
if (tsk->state & TASK_UNINTERRUPTIBLE)
- se->statistics.block_start = rq_of(cfs_rq)->clock;
+ se->statistics.block_start = rq_clock(rq_of(cfs_rq));
}
#endif
}
if (unlikely(cfs_rq->throttle_count))
return cfs_rq->throttled_clock_task;
- return rq_of(cfs_rq)->clock_task - cfs_rq->throttled_clock_task_time;
+ return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
}
/* returns 0 on failure to allocate runtime */
static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
- struct rq *rq = rq_of(cfs_rq);
/* if the deadline is ahead of our clock, nothing to do */
- if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0))
+ if (likely((s64)(rq_clock(rq_of(cfs_rq)) - cfs_rq->runtime_expires) < 0))
return;
if (cfs_rq->runtime_remaining < 0)
#ifdef CONFIG_SMP
if (!cfs_rq->throttle_count) {
/* adjust cfs_rq_clock_task() */
- cfs_rq->throttled_clock_task_time += rq->clock_task -
+ cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
cfs_rq->throttled_clock_task;
}
#endif
/* group is entering throttled state, stop time */
if (!cfs_rq->throttle_count)
- cfs_rq->throttled_clock_task = rq->clock_task;
+ cfs_rq->throttled_clock_task = rq_clock_task(rq);
cfs_rq->throttle_count++;
return 0;
rq->nr_running -= task_delta;
cfs_rq->throttled = 1;
- cfs_rq->throttled_clock = rq->clock;
+ cfs_rq->throttled_clock = rq_clock(rq);
raw_spin_lock(&cfs_b->lock);
list_add_tail_rcu(&cfs_rq->throttled_list, &cfs_b->throttled_cfs_rq);
raw_spin_unlock(&cfs_b->lock);
int enqueue = 1;
long task_delta;
- se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
+ se = cfs_rq->tg->se[cpu_of(rq)];
cfs_rq->throttled = 0;
+
+ update_rq_clock(rq);
+
raw_spin_lock(&cfs_b->lock);
- cfs_b->throttled_time += rq->clock - cfs_rq->throttled_clock;
+ cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
list_del_rcu(&cfs_rq->throttled_list);
raw_spin_unlock(&cfs_b->lock);
- update_rq_clock(rq);
/* update hierarchical throttle state */
walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
throttle_cfs_rq(cfs_rq);
}
-static inline u64 default_cfs_period(void);
-static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun);
-static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b);
-
static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
{
struct cfs_bandwidth *cfs_b =
#else /* CONFIG_CFS_BANDWIDTH */
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
{
- return rq_of(cfs_rq)->clock_task;
+ return rq_clock_task(rq_of(cfs_rq));
}
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
return new_cpu;
}
-/*
- * Load-tracking only depends on SMP, FAIR_GROUP_SCHED dependency below may be
- * removed when useful for applications beyond shares distribution (e.g.
- * load-balance).
- */
-#ifdef CONFIG_FAIR_GROUP_SCHED
/*
* Called immediately before a task is migrated to a new cpu; task_cpu(p) and
* cfs_rq_of(p) references at time of call are still valid and identify the
atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load);
}
}
-#endif
#endif /* CONFIG_SMP */
static unsigned long
* 2) too many balance attempts have failed.
*/
- tsk_cache_hot = task_hot(p, env->src_rq->clock_task, env->sd);
+ tsk_cache_hot = task_hot(p, rq_clock_task(env->src_rq), env->sd);
if (!tsk_cache_hot ||
env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
age_stamp = ACCESS_ONCE(rq->age_stamp);
avg = ACCESS_ONCE(rq->rt_avg);
- total = sched_avg_period() + (rq->clock - age_stamp);
+ total = sched_avg_period() + (rq_clock(rq) - age_stamp);
if (unlikely(total < avg)) {
/* Ensures that power won't end up being negative */
int pulled_task = 0;
unsigned long next_balance = jiffies + HZ;
- this_rq->idle_stamp = this_rq->clock;
+ this_rq->idle_stamp = rq_clock(this_rq);
if (this_rq->avg_idle < sysctl_sched_migration_cost)
return;
static inline void set_cpu_sd_state_busy(void)
{
struct sched_domain *sd;
- int cpu = smp_processor_id();
rcu_read_lock();
- sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+ sd = rcu_dereference_check_sched_domain(this_rq()->sd);
if (!sd || !sd->nohz_idle)
goto unlock;
void set_cpu_sd_state_idle(void)
{
struct sched_domain *sd;
- int cpu = smp_processor_id();
rcu_read_lock();
- sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd);
+ sd = rcu_dereference_check_sched_domain(this_rq()->sd);
if (!sd || sd->nohz_idle)
goto unlock;
se->vruntime -= cfs_rq->min_vruntime;
}
-#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
/*
* Remove our load from contribution when we leave sched_fair
* and ensure we don't carry in an old decay_count if we
#ifndef CONFIG_64BIT
cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
#endif
-#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
+#ifdef CONFIG_SMP
atomic64_set(&cfs_rq->decay_counter, 1);
atomic64_set(&cfs_rq->removed_load, 0);
#endif
se = tg->se[i];
/* Propagate contribution to hierarchy */
raw_spin_lock_irqsave(&rq->lock, flags);
+
+ /* Possible calls to update_curr() need rq clock */
+ update_rq_clock(rq);
for_each_sched_entity(se)
update_cfs_shares(group_cfs_rq(se));
raw_spin_unlock_irqrestore(&rq->lock, flags);
#ifdef CONFIG_SMP
.select_task_rq = select_task_rq_fair,
-#ifdef CONFIG_FAIR_GROUP_SCHED
.migrate_task_rq = migrate_task_rq_fair,
-#endif
+
.rq_online = rq_online_fair,
.rq_offline = rq_offline_fair,