]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/commitdiff
sched/vtime: Bring up complete kcpustat accessor
authorFrederic Weisbecker <frederic@kernel.org>
Thu, 21 Nov 2019 02:44:26 +0000 (03:44 +0100)
committerIngo Molnar <mingo@kernel.org>
Thu, 21 Nov 2019 06:33:24 +0000 (07:33 +0100)
Many callsites want to fetch the values of system, user, user_nice, guest
or guest_nice kcpustat fields altogether or at least a pair of these.

In that case calling kcpustat_field() for each requested field brings
unecessary overhead when we could fetch all of them in a row.

So provide kcpustat_cpu_fetch() that fetches the whole kcpustat array
in a vtime safe way under the same RCU and seqcount block.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Wanpeng Li <wanpengli@tencent.com>
Cc: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Link: https://lkml.kernel.org/r/20191121024430.19938-3-frederic@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
include/linux/kernel_stat.h
kernel/sched/cputime.c

index 79781196eb2542057a7e3be7f4fe610b98a9e53b..89f0745c096d4b090cec7e21df46e9d435ad5a78 100644 (file)
@@ -81,12 +81,19 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
 extern u64 kcpustat_field(struct kernel_cpustat *kcpustat,
                          enum cpu_usage_stat usage, int cpu);
+extern void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu);
 #else
 static inline u64 kcpustat_field(struct kernel_cpustat *kcpustat,
                                 enum cpu_usage_stat usage, int cpu)
 {
        return kcpustat->cpustat[usage];
 }
+
+static inline void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
+{
+       *dst = kcpustat_cpu(cpu);
+}
+
 #endif
 
 extern void account_user_time(struct task_struct *, u64);
index 27b5406222fcf9a0dc2cc48502ac74a555e20b5e..d43318a489f245d6356094b4b80f96c01e26ceb9 100644 (file)
@@ -912,6 +912,30 @@ void task_cputime(struct task_struct *t, u64 *utime, u64 *stime)
        } while (read_seqcount_retry(&vtime->seqcount, seq));
 }
 
+static int vtime_state_check(struct vtime *vtime, int cpu)
+{
+       /*
+        * We raced against a context switch, fetch the
+        * kcpustat task again.
+        */
+       if (vtime->cpu != cpu && vtime->cpu != -1)
+               return -EAGAIN;
+
+       /*
+        * Two possible things here:
+        * 1) We are seeing the scheduling out task (prev) or any past one.
+        * 2) We are seeing the scheduling in task (next) but it hasn't
+        *    passed though vtime_task_switch() yet so the pending
+        *    cputime of the prev task may not be flushed yet.
+        *
+        * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
+        */
+       if (vtime->state == VTIME_INACTIVE)
+               return -EAGAIN;
+
+       return 0;
+}
+
 static u64 kcpustat_user_vtime(struct vtime *vtime)
 {
        if (vtime->state == VTIME_USER)
@@ -933,26 +957,9 @@ static int kcpustat_field_vtime(u64 *cpustat,
        do {
                seq = read_seqcount_begin(&vtime->seqcount);
 
-               /*
-                * We raced against context switch, fetch the
-                * kcpustat task again.
-                */
-               if (vtime->cpu != cpu && vtime->cpu != -1)
-                       return -EAGAIN;
-
-               /*
-                * Two possible things here:
-                * 1) We are seeing the scheduling out task (prev) or any past one.
-                * 2) We are seeing the scheduling in task (next) but it hasn't
-                *    passed though vtime_task_switch() yet so the pending
-                *    cputime of the prev task may not be flushed yet.
-                *
-                * Case 1) is ok but 2) is not. So wait for a safe VTIME state.
-                */
-               if (vtime->state == VTIME_INACTIVE)
-                       return -EAGAIN;
-
-               err = 0;
+               err = vtime_state_check(vtime, cpu);
+               if (err < 0)
+                       return err;
 
                *val = cpustat[usage];
 
@@ -1025,4 +1032,93 @@ u64 kcpustat_field(struct kernel_cpustat *kcpustat,
        }
 }
 EXPORT_SYMBOL_GPL(kcpustat_field);
+
+static int kcpustat_cpu_fetch_vtime(struct kernel_cpustat *dst,
+                                   const struct kernel_cpustat *src,
+                                   struct task_struct *tsk, int cpu)
+{
+       struct vtime *vtime = &tsk->vtime;
+       unsigned int seq;
+       int err;
+
+       do {
+               u64 *cpustat;
+               u64 delta;
+
+               seq = read_seqcount_begin(&vtime->seqcount);
+
+               err = vtime_state_check(vtime, cpu);
+               if (err < 0)
+                       return err;
+
+               *dst = *src;
+               cpustat = dst->cpustat;
+
+               /* Task is sleeping, dead or idle, nothing to add */
+               if (vtime->state < VTIME_SYS)
+                       continue;
+
+               delta = vtime_delta(vtime);
+
+               /*
+                * Task runs either in user (including guest) or kernel space,
+                * add pending nohz time to the right place.
+                */
+               if (vtime->state == VTIME_SYS) {
+                       cpustat[CPUTIME_SYSTEM] += vtime->stime + delta;
+               } else if (vtime->state == VTIME_USER) {
+                       if (task_nice(tsk) > 0)
+                               cpustat[CPUTIME_NICE] += vtime->utime + delta;
+                       else
+                               cpustat[CPUTIME_USER] += vtime->utime + delta;
+               } else {
+                       WARN_ON_ONCE(vtime->state != VTIME_GUEST);
+                       if (task_nice(tsk) > 0) {
+                               cpustat[CPUTIME_GUEST_NICE] += vtime->gtime + delta;
+                               cpustat[CPUTIME_NICE] += vtime->gtime + delta;
+                       } else {
+                               cpustat[CPUTIME_GUEST] += vtime->gtime + delta;
+                               cpustat[CPUTIME_USER] += vtime->gtime + delta;
+                       }
+               }
+       } while (read_seqcount_retry(&vtime->seqcount, seq));
+
+       return err;
+}
+
+void kcpustat_cpu_fetch(struct kernel_cpustat *dst, int cpu)
+{
+       const struct kernel_cpustat *src = &kcpustat_cpu(cpu);
+       struct rq *rq;
+       int err;
+
+       if (!vtime_accounting_enabled_cpu(cpu)) {
+               *dst = *src;
+               return;
+       }
+
+       rq = cpu_rq(cpu);
+
+       for (;;) {
+               struct task_struct *curr;
+
+               rcu_read_lock();
+               curr = rcu_dereference(rq->curr);
+               if (WARN_ON_ONCE(!curr)) {
+                       rcu_read_unlock();
+                       *dst = *src;
+                       return;
+               }
+
+               err = kcpustat_cpu_fetch_vtime(dst, src, curr, cpu);
+               rcu_read_unlock();
+
+               if (!err)
+                       return;
+
+               cpu_relax();
+       }
+}
+EXPORT_SYMBOL_GPL(kcpustat_cpu_fetch);
+
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */