]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpif-netdev-perf.h
ofproto-dpif-upcall: Echo HASH attribute back to datapath.
[mirror_ovs.git] / lib / dpif-netdev-perf.h
1 /*
2 * Copyright (c) 2017 Ericsson AB.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef DPIF_NETDEV_PERF_H
18 #define DPIF_NETDEV_PERF_H 1
19
20 #include <stdbool.h>
21 #include <stddef.h>
22 #include <stdint.h>
23 #include <string.h>
24 #include <time.h>
25 #include <math.h>
26
27 #ifdef DPDK_NETDEV
28 #include <rte_config.h>
29 #include <rte_cycles.h>
30 #endif
31
32 #include "openvswitch/vlog.h"
33 #include "ovs-atomic.h"
34 #include "timeval.h"
35 #include "unixctl.h"
36 #include "util.h"
37
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41
42 /* This module encapsulates data structures and functions to maintain basic PMD
43 * performance metrics such as packet counters, execution cycles as well as
44 * histograms and time series recording for more detailed PMD metrics.
45 *
46 * It provides a clean API for dpif-netdev to initialize, update and read and
47 * reset these metrics.
48 *
49 * The basic set of PMD counters is implemented as atomic_uint64_t variables
50 * to guarantee correct read also in 32-bit systems.
51 *
52 * The detailed PMD performance metrics are only supported on 64-bit systems
53 * with atomic 64-bit read and store semantics for plain uint64_t counters.
54 */
55
56 /* Set of counter types maintained in pmd_perf_stats. */
57
58 enum pmd_stat_type {
59 PMD_STAT_EXACT_HIT, /* Packets that had an exact match (emc). */
60 PMD_STAT_SMC_HIT, /* Packets that had a sig match hit (SMC). */
61 PMD_STAT_MASKED_HIT, /* Packets that matched in the flow table. */
62 PMD_STAT_MISS, /* Packets that did not match and upcall was ok. */
63 PMD_STAT_LOST, /* Packets that did not match and upcall failed. */
64 /* The above statistics account for the total
65 * number of packet passes through the datapath
66 * pipeline and should not be overlapping with each
67 * other. */
68 PMD_STAT_MASKED_LOOKUP, /* Number of subtable lookups for flow table
69 hits. Each MASKED_HIT hit will have >= 1
70 MASKED_LOOKUP(s). */
71 PMD_STAT_RECV, /* Packets entering the datapath pipeline from an
72 * interface. */
73 PMD_STAT_RECIRC, /* Packets reentering the datapath pipeline due to
74 * recirculation. */
75 PMD_STAT_SENT_PKTS, /* Packets that have been sent. */
76 PMD_STAT_SENT_BATCHES, /* Number of batches sent. */
77 PMD_CYCLES_ITER_IDLE, /* Cycles spent in idle iterations. */
78 PMD_CYCLES_ITER_BUSY, /* Cycles spent in busy iterations. */
79 PMD_CYCLES_UPCALL, /* Cycles spent processing upcalls. */
80 PMD_N_STATS
81 };
82
83 /* Array of PMD counters indexed by enum pmd_stat_type.
84 * The n[] array contains the actual counter values since initialization
85 * of the PMD. Counters are atomically updated from the PMD but are
86 * read and cleared also from other processes. To clear the counters at
87 * PMD run-time, the current counter values are copied over to the zero[]
88 * array. To read counters we subtract zero[] value from n[]. */
89
90 struct pmd_counters {
91 atomic_uint64_t n[PMD_N_STATS]; /* Value since _init(). */
92 uint64_t zero[PMD_N_STATS]; /* Value at last _clear(). */
93 };
94
95 /* Data structure to collect statistical distribution of an integer measurement
96 * type in form of a histogram. The wall[] array contains the inclusive
97 * upper boundaries of the bins, while the bin[] array contains the actual
98 * counters per bin. The histogram walls are typically set automatically
99 * using the functions provided below.*/
100
101 #define NUM_BINS 32 /* Number of histogram bins. */
102
103 struct histogram {
104 uint32_t wall[NUM_BINS];
105 uint64_t bin[NUM_BINS];
106 };
107
108 /* Data structure to record details PMD execution metrics per iteration for
109 * a history period of up to HISTORY_LEN iterations in circular buffer.
110 * Also used to record up to HISTORY_LEN millisecond averages/totals of these
111 * metrics.*/
112
113 struct iter_stats {
114 uint64_t timestamp; /* Iteration no. or millisecond. */
115 uint64_t cycles; /* Number of TSC cycles spent in it. or ms. */
116 uint64_t busy_cycles; /* Cycles spent in busy iterations or ms. */
117 uint32_t iterations; /* Iterations in ms. */
118 uint32_t pkts; /* Packets processed in iteration or ms. */
119 uint32_t upcalls; /* Number of upcalls in iteration or ms. */
120 uint32_t upcall_cycles; /* Cycles spent in upcalls in it. or ms. */
121 uint32_t batches; /* Number of rx batches in iteration or ms. */
122 uint32_t max_vhost_qfill; /* Maximum fill level in iteration or ms. */
123 };
124
125 #define HISTORY_LEN 1000 /* Length of recorded history
126 (iterations and ms). */
127 #define DEF_HIST_SHOW 20 /* Default number of history samples to
128 display. */
129
130 struct history {
131 size_t idx; /* Slot to which next call to history_store()
132 will write. */
133 struct iter_stats sample[HISTORY_LEN];
134 };
135
136 /* Container for all performance metrics of a PMD within the struct
137 * dp_netdev_pmd_thread. The metrics must be updated from within the PMD
138 * thread but can be read from any thread. The basic PMD counters in
139 * struct pmd_counters can be read without protection against concurrent
140 * clearing. The other metrics may only be safely read with the clear_mutex
141 * held to protect against concurrent clearing. */
142
143 struct pmd_perf_stats {
144 /* Prevents interference between PMD polling and stats clearing. */
145 struct ovs_mutex stats_mutex;
146 /* Set by CLI thread to order clearing of PMD stats. */
147 volatile bool clear;
148 /* Prevents stats retrieval while clearing is in progress. */
149 struct ovs_mutex clear_mutex;
150 /* Start of the current performance measurement period. */
151 uint64_t start_ms;
152 /* Counter for PMD iterations. */
153 uint64_t iteration_cnt;
154 /* Start of the current iteration. */
155 uint64_t start_tsc;
156 /* Latest TSC time stamp taken in PMD. */
157 uint64_t last_tsc;
158 /* Used to space certain checks in time. */
159 uint64_t next_check_tsc;
160 /* If non-NULL, outermost cycle timer currently running in PMD. */
161 struct cycle_timer *cur_timer;
162 /* Set of PMD counters with their zero offsets. */
163 struct pmd_counters counters;
164 /* Statistics of the current iteration. */
165 struct iter_stats current;
166 /* Totals for the current millisecond. */
167 struct iter_stats totals;
168 /* Histograms for the PMD metrics. */
169 struct histogram cycles;
170 struct histogram pkts;
171 struct histogram cycles_per_pkt;
172 struct histogram upcalls;
173 struct histogram cycles_per_upcall;
174 struct histogram pkts_per_batch;
175 struct histogram max_vhost_qfill;
176 /* Iteration history buffer. */
177 struct history iterations;
178 /* Millisecond history buffer. */
179 struct history milliseconds;
180 /* Suspicious iteration log. */
181 uint32_t log_susp_it;
182 /* Start of iteration range to log. */
183 uint32_t log_begin_it;
184 /* End of iteration range to log. */
185 uint32_t log_end_it;
186 /* Reason for logging suspicious iteration. */
187 char *log_reason;
188 };
189
190 #ifdef __linux__
191 static inline uint64_t
192 rdtsc_syscall(struct pmd_perf_stats *s)
193 {
194 struct timespec val;
195 uint64_t v;
196
197 if (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0) {
198 return s->last_tsc;
199 }
200
201 v = val.tv_sec * UINT64_C(1000000000) + val.tv_nsec;
202 return s->last_tsc = v;
203 }
204 #endif
205
206 /* Support for accurate timing of PMD execution on TSC clock cycle level.
207 * These functions are intended to be invoked in the context of pmd threads. */
208
209 /* Read the TSC cycle register and cache it. Any function not requiring clock
210 * cycle accuracy should read the cached value using cycles_counter_get() to
211 * avoid the overhead of reading the TSC register. */
212
213 static inline uint64_t
214 cycles_counter_update(struct pmd_perf_stats *s)
215 {
216 #ifdef DPDK_NETDEV
217 return s->last_tsc = rte_get_tsc_cycles();
218 #elif !defined(_MSC_VER) && defined(__x86_64__)
219 uint32_t h, l;
220 asm volatile("rdtsc" : "=a" (l), "=d" (h));
221
222 return s->last_tsc = ((uint64_t) h << 32) | l;
223 #elif defined(__linux__)
224 return rdtsc_syscall(s);
225 #else
226 return s->last_tsc = 0;
227 #endif
228 }
229
230 static inline uint64_t
231 cycles_counter_get(struct pmd_perf_stats *s)
232 {
233 return s->last_tsc;
234 }
235
236 void pmd_perf_estimate_tsc_frequency(void);
237
238 /* A nestable timer for measuring execution time in TSC cycles.
239 *
240 * Usage:
241 * struct cycle_timer timer;
242 *
243 * cycle_timer_start(pmd, &timer);
244 * <Timed execution>
245 * uint64_t cycles = cycle_timer_stop(pmd, &timer);
246 *
247 * The caller must guarantee that a call to cycle_timer_start() is always
248 * paired with a call to cycle_stimer_stop().
249 *
250 * Is is possible to have nested cycles timers within the timed code. The
251 * execution time measured by the nested timers is excluded from the time
252 * measured by the embracing timer.
253 */
254
255 struct cycle_timer {
256 uint64_t start;
257 uint64_t suspended;
258 struct cycle_timer *interrupted;
259 };
260
261 static inline void
262 cycle_timer_start(struct pmd_perf_stats *s,
263 struct cycle_timer *timer)
264 {
265 struct cycle_timer *cur_timer = s->cur_timer;
266 uint64_t now = cycles_counter_update(s);
267
268 if (cur_timer) {
269 cur_timer->suspended = now;
270 }
271 timer->interrupted = cur_timer;
272 timer->start = now;
273 timer->suspended = 0;
274 s->cur_timer = timer;
275 }
276
277 static inline uint64_t
278 cycle_timer_stop(struct pmd_perf_stats *s,
279 struct cycle_timer *timer)
280 {
281 /* Assert that this is the current cycle timer. */
282 ovs_assert(s->cur_timer == timer);
283 uint64_t now = cycles_counter_update(s);
284 struct cycle_timer *intr_timer = timer->interrupted;
285
286 if (intr_timer) {
287 /* Adjust the start offset by the suspended cycles. */
288 intr_timer->start += now - intr_timer->suspended;
289 }
290 /* Restore suspended timer, if any. */
291 s->cur_timer = intr_timer;
292 return now - timer->start;
293 }
294
295 /* Functions to initialize and reset the PMD performance metrics. */
296
297 void pmd_perf_stats_init(struct pmd_perf_stats *s);
298 void pmd_perf_stats_clear(struct pmd_perf_stats *s);
299 void pmd_perf_stats_clear_lock(struct pmd_perf_stats *s);
300
301 /* Functions to read and update PMD counters. */
302
303 void pmd_perf_read_counters(struct pmd_perf_stats *s,
304 uint64_t stats[PMD_N_STATS]);
305
306 /* PMD performance counters are updated lock-less. For real PMDs
307 * they are only updated from the PMD thread itself. In the case of the
308 * NON-PMD they might be updated from multiple threads, but we can live
309 * with losing a rare update as 100% accuracy is not required.
310 * However, as counters are read for display from outside the PMD thread
311 * with e.g. pmd-stats-show, we make sure that the 64-bit read and store
312 * operations are atomic also on 32-bit systems so that readers cannot
313 * not read garbage. On 64-bit systems this incurs no overhead. */
314
315 static inline void
316 pmd_perf_update_counter(struct pmd_perf_stats *s,
317 enum pmd_stat_type counter, int delta)
318 {
319 uint64_t tmp;
320 atomic_read_relaxed(&s->counters.n[counter], &tmp);
321 tmp += delta;
322 atomic_store_relaxed(&s->counters.n[counter], tmp);
323 }
324
325 /* Functions to manipulate a sample history. */
326
327 static inline void
328 histogram_add_sample(struct histogram *hist, uint32_t val)
329 {
330 /* TODO: Can do better with binary search? */
331 for (int i = 0; i < NUM_BINS-1; i++) {
332 if (val <= hist->wall[i]) {
333 hist->bin[i]++;
334 return;
335 }
336 }
337 hist->bin[NUM_BINS-1]++;
338 }
339
340 uint64_t histogram_samples(const struct histogram *hist);
341
342 /* This function is used to advance the given history index by positive
343 * offset in the circular history buffer. */
344 static inline uint32_t
345 history_add(uint32_t idx, uint32_t offset)
346 {
347 return (idx + offset) % HISTORY_LEN;
348 }
349
350 /* This function computes the difference between two indices into the
351 * circular history buffer. The result is always positive in the range
352 * 0 .. HISTORY_LEN-1 and specifies the number of steps to reach idx1
353 * starting from idx2. It can also be used to retreat the history index
354 * idx1 by idx2 steps. */
355 static inline uint32_t
356 history_sub(uint32_t idx1, uint32_t idx2)
357 {
358 return (idx1 + HISTORY_LEN - idx2) % HISTORY_LEN;
359 }
360
361 static inline struct iter_stats *
362 history_current(struct history *h)
363 {
364 return &h->sample[h->idx];
365 }
366
367 static inline struct iter_stats *
368 history_next(struct history *h)
369 {
370 size_t next_idx = history_add(h->idx, 1);
371 struct iter_stats *next = &h->sample[next_idx];
372
373 memset(next, 0, sizeof(*next));
374 h->idx = next_idx;
375 return next;
376 }
377
378 static inline struct iter_stats *
379 history_store(struct history *h, struct iter_stats *is)
380 {
381 if (is) {
382 h->sample[h->idx] = *is;
383 }
384 /* Advance the history pointer */
385 return history_next(h);
386 }
387
388 /* Data and function related to logging of suspicious iterations. */
389
390 extern bool log_enabled;
391 extern bool log_extend;
392 extern uint32_t log_q_thr;
393 extern uint64_t iter_cycle_threshold;
394
395 void pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s, char *reason);
396 void pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s);
397
398 /* Functions recording PMD metrics per iteration. */
399
400 void
401 pmd_perf_start_iteration(struct pmd_perf_stats *s);
402 void
403 pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
404 int tx_packets, bool full_metrics);
405
406 /* Formatting the output of commands. */
407
408 struct pmd_perf_params {
409 int command_type;
410 bool histograms;
411 size_t iter_hist_len;
412 size_t ms_hist_len;
413 };
414
415 void pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
416 double duration);
417 void pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s);
418 void pmd_perf_format_iteration_history(struct ds *str,
419 struct pmd_perf_stats *s,
420 int n_iter);
421 void pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s,
422 int n_ms);
423 void pmd_perf_log_set_cmd(struct unixctl_conn *conn,
424 int argc, const char *argv[],
425 void *aux OVS_UNUSED);
426
427 #ifdef __cplusplus
428 }
429 #endif
430
431 #endif /* DPIF_NETDEV_PERF_H */