]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpif-netdev-perf.h
ovsdb-idl: Fix iteration over tracked rows with no actual data.
[mirror_ovs.git] / lib / dpif-netdev-perf.h
1 /*
2 * Copyright (c) 2017 Ericsson AB.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef DPIF_NETDEV_PERF_H
18 #define DPIF_NETDEV_PERF_H 1
19
20 #include <stdbool.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <time.h>
25 #include <math.h>
26
27 #ifdef DPDK_NETDEV
28 #include <rte_config.h>
29 #include <rte_cycles.h>
30 #endif
31
32 #include "openvswitch/vlog.h"
33 #include "ovs-atomic.h"
34 #include "timeval.h"
35 #include "unixctl.h"
36 #include "util.h"
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 /* This module encapsulates data structures and functions to maintain basic PMD
43 * performance metrics such as packet counters, execution cycles as well as
44 * histograms and time series recording for more detailed PMD metrics.
45 *
46 * It provides a clean API for dpif-netdev to initialize, update and read and
47 * reset these metrics.
48 *
49 * The basic set of PMD counters is implemented as atomic_uint64_t variables
50 * to guarantee correct read also in 32-bit systems.
51 *
52 * The detailed PMD performance metrics are only supported on 64-bit systems
53 * with atomic 64-bit read and store semantics for plain uint64_t counters.
54 */
55
56 /* Set of counter types maintained in pmd_perf_stats. */
57
58 enum pmd_stat_type {
59 PMD_STAT_EXACT_HIT, /* Packets that had an exact match (emc). */
60 PMD_STAT_SMC_HIT, /* Packets that had a sig match hit (SMC). */
61 PMD_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
62 PMD_STAT_MISS, /* Packets that did not match and upcall was ok. */
63 PMD_STAT_LOST, /* Packets that did not match and upcall failed. */
64 /* The above statistics account for the total
65 * number of packet passes through the datapath
66 * pipeline and should not be overlapping with each
67 * other. */
68 PMD_STAT_MASKED_LOOKUP, /* Number of subtable lookups for flow table
69 hits. Each MASKED_HIT hit will have >= 1
70 MASKED_LOOKUP(s). */
71 PMD_STAT_RECV, /* Packets entering the datapath pipeline from an
72 * interface. */
73 PMD_STAT_RECIRC, /* Packets reentering the datapath pipeline due to
74 * recirculation. */
75 PMD_STAT_SENT_PKTS, /* Packets that have been sent. */
76 PMD_STAT_SENT_BATCHES, /* Number of batches sent. */
77 PMD_CYCLES_ITER_IDLE, /* Cycles spent in idle iterations. */
78 PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */
79 PMD_CYCLES_UPCALL, /* Cycles spent processing upcalls. */
80 PMD_N_STATS
81 };
82
83 /* Array of PMD counters indexed by enum pmd_stat_type.
84 * The n[] array contains the actual counter values since initialization
85 * of the PMD. Counters are atomically updated from the PMD but are
86 * read and cleared also from other processes. To clear the counters at
87 * PMD run-time, the current counter values are copied over to the zero[]
88 * array. To read counters we subtract zero[] value from n[]. */
89
90 struct pmd_counters {
91 atomic_uint64_t n[PMD_N_STATS]; /* Value since _init(). */
92 uint64_t zero[PMD_N_STATS]; /* Value at last _clear(). */
93 };
94
95 /* Data structure to collect statistical distribution of an integer measurement
96 * type in form of a histogram. The wall[] array contains the inclusive
97 * upper boundaries of the bins, while the bin[] array contains the actual
98 * counters per bin. The histogram walls are typically set automatically
99 * using the functions provided below.*/
100
101 #define NUM_BINS 32 /* Number of histogram bins. */
102
103 struct histogram {
104 uint32_t wall[NUM_BINS];
105 uint64_t bin[NUM_BINS];
106 };
107
108 /* Data structure to record details PMD execution metrics per iteration for
109 * a history period of up to HISTORY_LEN iterations in circular buffer.
110 * Also used to record up to HISTORY_LEN millisecond averages/totals of these
111 * metrics.*/
112
113 struct iter_stats {
114 uint64_t timestamp; /* Iteration no. or millisecond. */
115 uint64_t cycles; /* Number of TSC cycles spent in it. or ms. */
116 uint64_t busy_cycles; /* Cycles spent in busy iterations or ms. */
117 uint32_t iterations; /* Iterations in ms. */
118 uint32_t pkts; /* Packets processed in iteration or ms. */
119 uint32_t upcalls; /* Number of upcalls in iteration or ms. */
120 uint32_t upcall_cycles; /* Cycles spent in upcalls in it. or ms. */
121 uint32_t batches; /* Number of rx batches in iteration or ms. */
122 uint32_t max_vhost_qfill; /* Maximum fill level in iteration or ms. */
123 };
124
125 #define HISTORY_LEN 1000 /* Length of recorded history
126 (iterations and ms). */
127 #define DEF_HIST_SHOW 20 /* Default number of history samples to
128 display. */
129
130 struct history {
131 size_t idx; /* Slot to which next call to history_store()
132 will write. */
133 struct iter_stats sample[HISTORY_LEN];
134 };
135
136 /* Container for all performance metrics of a PMD within the struct
137 * dp_netdev_pmd_thread. The metrics must be updated from within the PMD
138 * thread but can be read from any thread. The basic PMD counters in
139 * struct pmd_counters can be read without protection against concurrent
140 * clearing. The other metrics may only be safely read with the clear_mutex
141 * held to protect against concurrent clearing. */
142
143 struct pmd_perf_stats {
144 /* Prevents interference between PMD polling and stats clearing. */
145 struct ovs_mutex stats_mutex;
146 /* Set by CLI thread to order clearing of PMD stats. */
147 volatile bool clear;
148 /* Prevents stats retrieval while clearing is in progress. */
149 struct ovs_mutex clear_mutex;
150 /* Start of the current performance measurement period. */
151 uint64_t start_ms;
152 /* Counter for PMD iterations. */
153 uint64_t iteration_cnt;
154 /* Start of the current iteration. */
155 uint64_t start_tsc;
156 /* Latest TSC time stamp taken in PMD. */
157 uint64_t last_tsc;
158 /* Used to space certain checks in time. */
159 uint64_t next_check_tsc;
160 /* If non-NULL, outermost cycle timer currently running in PMD. */
161 struct cycle_timer *cur_timer;
162 /* Set of PMD counters with their zero offsets. */
163 struct pmd_counters counters;
164 /* Statistics of the current iteration. */
165 struct iter_stats current;
166 /* Totals for the current millisecond. */
167 struct iter_stats totals;
168 /* Histograms for the PMD metrics. */
169 struct histogram cycles;
170 struct histogram pkts;
171 struct histogram cycles_per_pkt;
172 struct histogram upcalls;
173 struct histogram cycles_per_upcall;
174 struct histogram pkts_per_batch;
175 struct histogram max_vhost_qfill;
176 /* Iteration history buffer. */
177 struct history iterations;
178 /* Millisecond history buffer. */
179 struct history milliseconds;
180 /* Suspicious iteration log. */
181 uint32_t log_susp_it;
182 /* Start of iteration range to log. */
183 uint32_t log_begin_it;
184 /* End of iteration range to log. */
185 uint32_t log_end_it;
186 /* Reason for logging suspicious iteration. */
187 char *log_reason;
188 };
189
190 #ifdef __linux__
191 static inline uint64_t
192 rdtsc_syscall(struct pmd_perf_stats *s)
193 {
194 struct timespec val;
195 uint64_t v;
196
197 if (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0) {
198 return s->last_tsc;
199 }
200
201 v = val.tv_sec * UINT64_C(1000000000) + val.tv_nsec;
202 return s->last_tsc = v;
203 }
204 #endif
205
206 /* Support for accurate timing of PMD execution on TSC clock cycle level.
207 * These functions are intended to be invoked in the context of pmd threads. */
208
209 /* Read the TSC cycle register and cache it. Any function not requiring clock
210 * cycle accuracy should read the cached value using cycles_counter_get() to
211 * avoid the overhead of reading the TSC register. */
212
213 static inline uint64_t
214 cycles_counter_update(struct pmd_perf_stats *s)
215 {
216 #ifdef DPDK_NETDEV
217 return s->last_tsc = rte_get_tsc_cycles();
218 #elif !defined(_MSC_VER) && defined(__x86_64__)
219 uint32_t h, l;
220 asm volatile("rdtsc" : "=a" (l), "=d" (h));
221
222 return s->last_tsc = ((uint64_t) h << 32) | l;
223 #elif !defined(_MSC_VER) && defined(__aarch64__)
224 asm volatile("mrs %0, cntvct_el0" : "=r" (s->last_tsc));
225
226 return s->last_tsc;
227 #elif defined(__linux__)
228 return rdtsc_syscall(s);
229 #else
230 return s->last_tsc = 0;
231 #endif
232 }
233
234 static inline uint64_t
235 cycles_counter_get(struct pmd_perf_stats *s)
236 {
237 return s->last_tsc;
238 }
239
240 void pmd_perf_estimate_tsc_frequency(void);
241
242 /* A nestable timer for measuring execution time in TSC cycles.
243 *
244 * Usage:
245 * struct cycle_timer timer;
246 *
247 * cycle_timer_start(pmd, &timer);
248 * <Timed execution>
249 * uint64_t cycles = cycle_timer_stop(pmd, &timer);
250 *
251 * The caller must guarantee that a call to cycle_timer_start() is always
252 * paired with a call to cycle_stimer_stop().
253 *
254 * Is is possible to have nested cycles timers within the timed code. The
255 * execution time measured by the nested timers is excluded from the time
256 * measured by the embracing timer.
257 */
258
259 struct cycle_timer {
260 uint64_t start;
261 uint64_t suspended;
262 struct cycle_timer *interrupted;
263 };
264
265 static inline void
266 cycle_timer_start(struct pmd_perf_stats *s,
267 struct cycle_timer *timer)
268 {
269 struct cycle_timer *cur_timer = s->cur_timer;
270 uint64_t now = cycles_counter_update(s);
271
272 if (cur_timer) {
273 cur_timer->suspended = now;
274 }
275 timer->interrupted = cur_timer;
276 timer->start = now;
277 timer->suspended = 0;
278 s->cur_timer = timer;
279 }
280
281 static inline uint64_t
282 cycle_timer_stop(struct pmd_perf_stats *s,
283 struct cycle_timer *timer)
284 {
285 /* Assert that this is the current cycle timer. */
286 ovs_assert(s->cur_timer == timer);
287 uint64_t now = cycles_counter_update(s);
288 struct cycle_timer *intr_timer = timer->interrupted;
289
290 if (intr_timer) {
291 /* Adjust the start offset by the suspended cycles. */
292 intr_timer->start += now - intr_timer->suspended;
293 }
294 /* Restore suspended timer, if any. */
295 s->cur_timer = intr_timer;
296 return now - timer->start;
297 }
298
299 /* Functions to initialize and reset the PMD performance metrics. */
300
301 void pmd_perf_stats_init(struct pmd_perf_stats *s);
302 void pmd_perf_stats_clear(struct pmd_perf_stats *s);
303 void pmd_perf_stats_clear_lock(struct pmd_perf_stats *s);
304
305 /* Functions to read and update PMD counters. */
306
307 void pmd_perf_read_counters(struct pmd_perf_stats *s,
308 uint64_t stats[PMD_N_STATS]);
309
310 /* PMD performance counters are updated lock-less. For real PMDs
311 * they are only updated from the PMD thread itself. In the case of the
312 * NON-PMD they might be updated from multiple threads, but we can live
313 * with losing a rare update as 100% accuracy is not required.
314 * However, as counters are read for display from outside the PMD thread
315 * with e.g. pmd-stats-show, we make sure that the 64-bit read and store
316 * operations are atomic also on 32-bit systems so that readers cannot
317 * not read garbage. On 64-bit systems this incurs no overhead. */
318
319 static inline void
320 pmd_perf_update_counter(struct pmd_perf_stats *s,
321 enum pmd_stat_type counter, int delta)
322 {
323 uint64_t tmp;
324 atomic_read_relaxed(&s->counters.n[counter], &tmp);
325 tmp += delta;
326 atomic_store_relaxed(&s->counters.n[counter], tmp);
327 }
328
329 /* Functions to manipulate a sample history. */
330
331 static inline void
332 histogram_add_sample(struct histogram *hist, uint32_t val)
333 {
334 /* TODO: Can do better with binary search? */
335 for (int i = 0; i < NUM_BINS-1; i++) {
336 if (val <= hist->wall[i]) {
337 hist->bin[i]++;
338 return;
339 }
340 }
341 hist->bin[NUM_BINS-1]++;
342 }
343
344 uint64_t histogram_samples(const struct histogram *hist);
345
346 /* This function is used to advance the given history index by positive
347 * offset in the circular history buffer. */
348 static inline uint32_t
349 history_add(uint32_t idx, uint32_t offset)
350 {
351 return (idx + offset) % HISTORY_LEN;
352 }
353
354 /* This function computes the difference between two indices into the
355 * circular history buffer. The result is always positive in the range
356 * 0 .. HISTORY_LEN-1 and specifies the number of steps to reach idx1
357 * starting from idx2. It can also be used to retreat the history index
358 * idx1 by idx2 steps. */
359 static inline uint32_t
360 history_sub(uint32_t idx1, uint32_t idx2)
361 {
362 return (idx1 + HISTORY_LEN - idx2) % HISTORY_LEN;
363 }
364
365 static inline struct iter_stats *
366 history_current(struct history *h)
367 {
368 return &h->sample[h->idx];
369 }
370
371 static inline struct iter_stats *
372 history_next(struct history *h)
373 {
374 size_t next_idx = history_add(h->idx, 1);
375 struct iter_stats *next = &h->sample[next_idx];
376
377 memset(next, 0, sizeof(*next));
378 h->idx = next_idx;
379 return next;
380 }
381
382 static inline struct iter_stats *
383 history_store(struct history *h, struct iter_stats *is)
384 {
385 if (is) {
386 h->sample[h->idx] = *is;
387 }
388 /* Advance the history pointer */
389 return history_next(h);
390 }
391
392 /* Data and function related to logging of suspicious iterations. */
393
394 extern bool log_enabled;
395 extern bool log_extend;
396 extern uint32_t log_q_thr;
397 extern uint64_t iter_cycle_threshold;
398
399 void pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s, char *reason);
400 void pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s);
401
402 /* Functions recording PMD metrics per iteration. */
403
404 void
405 pmd_perf_start_iteration(struct pmd_perf_stats *s);
406 void
407 pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
408 int tx_packets, bool full_metrics);
409
410 /* Formatting the output of commands. */
411
412 struct pmd_perf_params {
413 int command_type;
414 bool histograms;
415 size_t iter_hist_len;
416 size_t ms_hist_len;
417 };
418
419 void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
420 double duration);
421 void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s);
422 void pmd_perf_format_iteration_history(struct ds *str,
423 struct pmd_perf_stats *s,
424 int n_iter);
425 void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
426 int n_ms);
427 void pmd_perf_log_set_cmd(struct unixctl_conn *conn,
428 int argc, const char *argv[],
429 void *aux OVS_UNUSED);
430
431 #ifdef __cplusplus
432 }
433 #endif
434
435 #endif /* DPIF_NETDEV_PERF_H */