kernel/locking/qspinlock_stat.h

   1 /*
   2  * This program is free software; you can redistribute it and/or modify
   3  * it under the terms of the GNU General Public License as published by
   4  * the Free Software Foundation; either version 2 of the License, or
   5  * (at your option) any later version.
   6  *
   7  * This program is distributed in the hope that it will be useful,
   8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  10  * GNU General Public License for more details.
  11  *
  12  * Authors: Waiman Long <waiman.long@hpe.com>
  13  */
  14
  15 /*
  16  * When queued spinlock statistical counters are enabled, the following
  17  * debugfs files will be created for reporting the counter values:
  18  *
  19  * <debugfs>/qlockstat/
  20  *   pv_hash_hops       - average # of hops per hashing operation
  21  *   pv_kick_unlock     - # of vCPU kicks issued at unlock time
  22  *   pv_kick_wake       - # of vCPU kicks used for computing pv_latency_wake
  23  *   pv_latency_kick    - average latency (ns) of vCPU kick operation
  24  *   pv_latency_wake    - average latency (ns) from vCPU kick to wakeup
  25  *   pv_lock_stealing   - # of lock stealing operations
  26  *   pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
  27  *   pv_wait_again      - # of wait's after a queue head vCPU kick
  28  *   pv_wait_early      - # of early vCPU wait's
  29  *   pv_wait_head       - # of vCPU wait's at the queue head
  30  *   pv_wait_node       - # of vCPU wait's at a non-head queue node
  31  *   lock_pending       - # of locking operations via pending code
  32  *   lock_slowpath      - # of locking operations via MCS lock queue
  33  *   lock_use_node2     - # of locking operations that use 2nd per-CPU node
  34  *   lock_use_node3     - # of locking operations that use 3rd per-CPU node
  35  *   lock_use_node4     - # of locking operations that use 4th per-CPU node
  36  *   lock_no_node       - # of locking operations without using per-CPU node
  37  *
  38  * Subtracting lock_use_node[234] from lock_slowpath will give you
  39  * lock_use_node1.
  40  *
  41  * Writing to the "reset_counters" file will reset all the above counter
  42  * values.
  43  *
  44  * These statistical counters are implemented as per-cpu variables which are
  45  * summed and computed whenever the corresponding debugfs files are read. This
  46  * minimizes added overhead making the counters usable even in a production
  47  * environment.
  48  *
  49  * There may be slight difference between pv_kick_wake and pv_kick_unlock.
  50  */
  51 enum qlock_stats {
  52         qstat_pv_hash_hops,
  53         qstat_pv_kick_unlock,
  54         qstat_pv_kick_wake,
  55         qstat_pv_latency_kick,
  56         qstat_pv_latency_wake,
  57         qstat_pv_lock_stealing,
  58         qstat_pv_spurious_wakeup,
  59         qstat_pv_wait_again,
  60         qstat_pv_wait_early,
  61         qstat_pv_wait_head,
  62         qstat_pv_wait_node,
  63         qstat_lock_pending,
  64         qstat_lock_slowpath,
  65         qstat_lock_use_node2,
  66         qstat_lock_use_node3,
  67         qstat_lock_use_node4,
  68         qstat_lock_no_node,
  69         qstat_num,      /* Total number of statistical counters */
  70         qstat_reset_cnts = qstat_num,
  71 };
  72
  73 #ifdef CONFIG_QUEUED_LOCK_STAT
  74 /*
  75  * Collect pvqspinlock statistics
  76  */
  77 #include <linux/debugfs.h>
  78 #include <linux/sched.h>
  79 #include <linux/sched/clock.h>
  80 #include <linux/fs.h>
  81
  82 static const char * const qstat_names[qstat_num + 1] = {
  83         [qstat_pv_hash_hops]       = "pv_hash_hops",
  84         [qstat_pv_kick_unlock]     = "pv_kick_unlock",
  85         [qstat_pv_kick_wake]       = "pv_kick_wake",
  86         [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
  87         [qstat_pv_latency_kick]    = "pv_latency_kick",
  88         [qstat_pv_latency_wake]    = "pv_latency_wake",
  89         [qstat_pv_lock_stealing]   = "pv_lock_stealing",
  90         [qstat_pv_wait_again]      = "pv_wait_again",
  91         [qstat_pv_wait_early]      = "pv_wait_early",
  92         [qstat_pv_wait_head]       = "pv_wait_head",
  93         [qstat_pv_wait_node]       = "pv_wait_node",
  94         [qstat_lock_pending]       = "lock_pending",
  95         [qstat_lock_slowpath]      = "lock_slowpath",
  96         [qstat_lock_use_node2]     = "lock_use_node2",
  97         [qstat_lock_use_node3]     = "lock_use_node3",
  98         [qstat_lock_use_node4]     = "lock_use_node4",
  99         [qstat_lock_no_node]       = "lock_no_node",
 100         [qstat_reset_cnts]         = "reset_counters",
 101 };
 102
 103 /*
 104  * Per-cpu counters
 105  */
 106 static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
 107 static DEFINE_PER_CPU(u64, pv_kick_time);
 108
 109 /*
 110  * Function to read and return the qlock statistical counter values
 111  *
 112  * The following counters are handled specially:
 113  * 1. qstat_pv_latency_kick
 114  *    Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
 115  * 2. qstat_pv_latency_wake
 116  *    Average wake latency (ns) = pv_latency_wake/pv_kick_wake
 117  * 3. qstat_pv_hash_hops
 118  *    Average hops/hash = pv_hash_hops/pv_kick_unlock
 119  */
 120 static ssize_t qstat_read(struct file *file, char __user *user_buf,
 121                           size_t count, loff_t *ppos)
 122 {
 123         char buf[64];
 124         int cpu, counter, len;
 125         u64 stat = 0, kicks = 0;
 126
 127         /*
 128          * Get the counter ID stored in file->f_inode->i_private
 129          */
 130         counter = (long)file_inode(file)->i_private;
 131
 132         if (counter >= qstat_num)
 133                 return -EBADF;
 134
 135         for_each_possible_cpu(cpu) {
 136                 stat += per_cpu(qstats[counter], cpu);
 137                 /*
 138                  * Need to sum additional counter for some of them
 139                  */
 140                 switch (counter) {
 141
 142                 case qstat_pv_latency_kick:
 143                 case qstat_pv_hash_hops:
 144                         kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
 145                         break;
 146
 147                 case qstat_pv_latency_wake:
 148                         kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
 149                         break;
 150                 }
 151         }
 152
 153         if (counter == qstat_pv_hash_hops) {
 154                 u64 frac = 0;
 155
 156                 if (kicks) {
 157                         frac = 100ULL * do_div(stat, kicks);
 158                         frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
 159                 }
 160
 161                 /*
 162                  * Return a X.XX decimal number
 163                  */
 164                 len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
 165         } else {
 166                 /*
 167                  * Round to the nearest ns
 168                  */
 169                 if ((counter == qstat_pv_latency_kick) ||
 170                     (counter == qstat_pv_latency_wake)) {
 171                         if (kicks)
 172                                 stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
 173                 }
 174                 len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
 175         }
 176
 177         return simple_read_from_buffer(user_buf, count, ppos, buf, len);
 178 }
 179
 180 /*
 181  * Function to handle write request
 182  *
 183  * When counter = reset_cnts, reset all the counter values.
 184  * Since the counter updates aren't atomic, the resetting is done twice
 185  * to make sure that the counters are very likely to be all cleared.
 186  */
 187 static ssize_t qstat_write(struct file *file, const char __user *user_buf,
 188                            size_t count, loff_t *ppos)
 189 {
 190         int cpu;
 191
 192         /*
 193          * Get the counter ID stored in file->f_inode->i_private
 194          */
 195         if ((long)file_inode(file)->i_private != qstat_reset_cnts)
 196                 return count;
 197
 198         for_each_possible_cpu(cpu) {
 199                 int i;
 200                 unsigned long *ptr = per_cpu_ptr(qstats, cpu);
 201
 202                 for (i = 0 ; i < qstat_num; i++)
 203                         WRITE_ONCE(ptr[i], 0);
 204         }
 205         return count;
 206 }
 207
 208 /*
 209  * Debugfs data structures
 210  */
 211 static const struct file_operations fops_qstat = {
 212         .read = qstat_read,
 213         .write = qstat_write,
 214         .llseek = default_llseek,
 215 };
 216
 217 /*
 218  * Initialize debugfs for the qspinlock statistical counters
 219  */
 220 static int __init init_qspinlock_stat(void)
 221 {
 222         struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
 223         int i;
 224
 225         if (!d_qstat)
 226                 goto out;
 227
 228         /*
 229          * Create the debugfs files
 230          *
 231          * As reading from and writing to the stat files can be slow, only
 232          * root is allowed to do the read/write to limit impact to system
 233          * performance.
 234          */
 235         for (i = 0; i < qstat_num; i++)
 236                 if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
 237                                          (void *)(long)i, &fops_qstat))
 238                         goto fail_undo;
 239
 240         if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
 241                                  (void *)(long)qstat_reset_cnts, &fops_qstat))
 242                 goto fail_undo;
 243
 244         return 0;
 245 fail_undo:
 246         debugfs_remove_recursive(d_qstat);
 247 out:
 248         pr_warn("Could not create 'qlockstat' debugfs entries\n");
 249         return -ENOMEM;
 250 }
 251 fs_initcall(init_qspinlock_stat);
 252
 253 /*
 254  * Increment the PV qspinlock statistical counters
 255  */
 256 static inline void qstat_inc(enum qlock_stats stat, bool cond)
 257 {
 258         if (cond)
 259                 this_cpu_inc(qstats[stat]);
 260 }
 261
 262 /*
 263  * PV hash hop count
 264  */
 265 static inline void qstat_hop(int hopcnt)
 266 {
 267         this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
 268 }
 269
 270 /*
 271  * Replacement function for pv_kick()
 272  */
 273 static inline void __pv_kick(int cpu)
 274 {
 275         u64 start = sched_clock();
 276
 277         per_cpu(pv_kick_time, cpu) = start;
 278         pv_kick(cpu);
 279         this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
 280 }
 281
 282 /*
 283  * Replacement function for pv_wait()
 284  */
 285 static inline void __pv_wait(u8 *ptr, u8 val)
 286 {
 287         u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
 288
 289         *pkick_time = 0;
 290         pv_wait(ptr, val);
 291         if (*pkick_time) {
 292                 this_cpu_add(qstats[qstat_pv_latency_wake],
 293                              sched_clock() - *pkick_time);
 294                 qstat_inc(qstat_pv_kick_wake, true);
 295         }
 296 }
 297
 298 #define pv_kick(c)      __pv_kick(c)
 299 #define pv_wait(p, v)   __pv_wait(p, v)
 300
 301 #else /* CONFIG_QUEUED_LOCK_STAT */
 302
 303 static inline void qstat_inc(enum qlock_stats stat, bool cond)  { }
 304 static inline void qstat_hop(int hopcnt)                        { }
 305
 306 #endif /* CONFIG_QUEUED_LOCK_STAT */