]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpif-netdev-perf.c
cirrus: Use FreeBSD 12.2.
[mirror_ovs.git] / lib / dpif-netdev-perf.c
1 /*
2 * Copyright (c) 2017 Ericsson AB.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include <stdint.h>
19
20 #include "dpdk.h"
21 #include "dpif-netdev-perf.h"
22 #include "openvswitch/dynamic-string.h"
23 #include "openvswitch/vlog.h"
24 #include "ovs-numa.h"
25 #include "ovs-thread.h"
26 #include "timeval.h"
27
28 VLOG_DEFINE_THIS_MODULE(pmd_perf);
29
30 #define ITER_US_THRESHOLD 250 /* Warning threshold for iteration duration
31 in microseconds. */
32 #define VHOST_QUEUE_FULL 128 /* Size of the virtio TX queue. */
33 #define LOG_IT_BEFORE 5 /* Number of iterations to log before
34 suspicious iteration. */
35 #define LOG_IT_AFTER 5 /* Number of iterations to log after
36 suspicious iteration. */
37
38 bool log_enabled = false;
39 bool log_extend = false;
40 static uint32_t log_it_before = LOG_IT_BEFORE;
41 static uint32_t log_it_after = LOG_IT_AFTER;
42 static uint32_t log_us_thr = ITER_US_THRESHOLD;
43 uint32_t log_q_thr = VHOST_QUEUE_FULL;
44 uint64_t iter_cycle_threshold;
45
46 static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600, 600);
47
48 static uint64_t tsc_hz = 1;
49
50 void
51 pmd_perf_estimate_tsc_frequency(void)
52 {
53 #ifdef DPDK_NETDEV
54 if (dpdk_available()) {
55 tsc_hz = rte_get_tsc_hz();
56 }
57 if (tsc_hz > 1) {
58 VLOG_INFO("DPDK provided TSC frequency: %"PRIu64" KHz", tsc_hz / 1000);
59 return;
60 }
61 #endif
62 struct ovs_numa_dump *affinity;
63 struct pmd_perf_stats s;
64 uint64_t start, stop;
65
66 /* DPDK is not available or returned unreliable value.
67 * Trying to estimate. */
68 affinity = ovs_numa_thread_getaffinity_dump();
69 if (affinity) {
70 const struct ovs_numa_info_core *core;
71
72 FOR_EACH_CORE_ON_DUMP (core, affinity) {
73 /* Setting affinity to a single core from the affinity mask to
74 * avoid re-scheduling to another core while sleeping. */
75 ovs_numa_thread_setaffinity_core(core->core_id);
76 break;
77 }
78 }
79
80 memset(&s, 0, sizeof s);
81 start = cycles_counter_update(&s);
82 /* Using xnanosleep as it's interrupt resistant.
83 * Sleeping only 100 ms to avoid holding the main thread for too long. */
84 xnanosleep(1E8);
85 stop = cycles_counter_update(&s);
86
87 if (affinity) {
88 /* Restoring previous affinity. */
89 ovs_numa_thread_setaffinity_dump(affinity);
90 ovs_numa_dump_destroy(affinity);
91 }
92
93 if (stop <= start) {
94 VLOG_WARN("TSC source is unreliable.");
95 tsc_hz = 1;
96 } else {
97 tsc_hz = (stop - start) * 10;
98 }
99
100 VLOG_INFO("Estimated TSC frequency: %"PRIu64" KHz", tsc_hz / 1000);
101 }
102
103 /* Histogram functions. */
104
105 static void
106 histogram_walls_set_lin(struct histogram *hist, uint32_t min, uint32_t max)
107 {
108 int i;
109
110 ovs_assert(min < max);
111 for (i = 0; i < NUM_BINS-1; i++) {
112 hist->wall[i] = min + (i * (max - min)) / (NUM_BINS - 2);
113 }
114 hist->wall[NUM_BINS-1] = UINT32_MAX;
115 }
116
117 static void
118 histogram_walls_set_log(struct histogram *hist, uint32_t min, uint32_t max)
119 {
120 int i, start, bins, wall;
121 double log_min, log_max;
122
123 ovs_assert(min < max);
124 if (min > 0) {
125 log_min = log(min);
126 log_max = log(max);
127 start = 0;
128 bins = NUM_BINS - 1;
129 } else {
130 hist->wall[0] = 0;
131 log_min = log(1);
132 log_max = log(max);
133 start = 1;
134 bins = NUM_BINS - 2;
135 }
136 wall = start;
137 for (i = 0; i < bins; i++) {
138 /* Make sure each wall is monotonically increasing. */
139 wall = MAX(wall, exp(log_min + (i * (log_max - log_min)) / (bins-1)));
140 hist->wall[start + i] = wall++;
141 }
142 if (hist->wall[NUM_BINS-2] < max) {
143 hist->wall[NUM_BINS-2] = max;
144 }
145 hist->wall[NUM_BINS-1] = UINT32_MAX;
146 }
147
148 uint64_t
149 histogram_samples(const struct histogram *hist)
150 {
151 uint64_t samples = 0;
152
153 for (int i = 0; i < NUM_BINS; i++) {
154 samples += hist->bin[i];
155 }
156 return samples;
157 }
158
159 static void
160 histogram_clear(struct histogram *hist)
161 {
162 int i;
163
164 for (i = 0; i < NUM_BINS; i++) {
165 hist->bin[i] = 0;
166 }
167 }
168
169 static void
170 history_init(struct history *h)
171 {
172 memset(h, 0, sizeof(*h));
173 }
174
175 void
176 pmd_perf_stats_init(struct pmd_perf_stats *s)
177 {
178 memset(s, 0, sizeof(*s));
179 ovs_mutex_init(&s->stats_mutex);
180 ovs_mutex_init(&s->clear_mutex);
181 /* Logarithmic histogram for cycles/it ranging from 500 to 24M
182 * (corresponding to 200 ns to 9.6 ms at 2.5 GHz TSC clock). */
183 histogram_walls_set_log(&s->cycles, 500, 24000000);
184 /* Logarithmic histogram for pkts/it ranging from 0 to 1000. */
185 histogram_walls_set_log(&s->pkts, 0, 1000);
186 /* Linear histogram for cycles/pkt ranging from 100 to 30K. */
187 histogram_walls_set_lin(&s->cycles_per_pkt, 100, 30000);
188 /* Linear histogram for pkts/rx batch ranging from 0 to 32,
189 * the maximum rx batch size in OVS. */
190 histogram_walls_set_lin(&s->pkts_per_batch, 0, 32);
191 /* Linear histogram for upcalls/it ranging from 0 to 30. */
192 histogram_walls_set_lin(&s->upcalls, 0, 30);
193 /* Logarithmic histogram for cycles/upcall ranging from 1000 to 1M
194 * (corresponding to 400 ns to 400 us at 2.5 GHz TSC clock). */
195 histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000);
196 /* Log. histogram for max vhostuser queue fill level from 0 to 512.
197 * 512 is the maximum fill level for a virtio queue with 1024
198 * descriptors (maximum configurable length in Qemu), with the
199 * DPDK 17.11 virtio PMD in the guest. */
200 histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
201 s->iteration_cnt = 0;
202 s->start_ms = time_msec();
203 s->log_susp_it = UINT32_MAX;
204 s->log_begin_it = UINT32_MAX;
205 s->log_end_it = UINT32_MAX;
206 s->log_reason = NULL;
207 }
208
209 void
210 pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
211 double duration)
212 {
213 uint64_t stats[PMD_N_STATS];
214 double us_per_cycle = 1000000.0 / tsc_hz;
215
216 if (duration == 0) {
217 return;
218 }
219
220 pmd_perf_read_counters(s, stats);
221 uint64_t tot_cycles = stats[PMD_CYCLES_ITER_IDLE] +
222 stats[PMD_CYCLES_ITER_BUSY];
223 uint64_t rx_packets = stats[PMD_STAT_RECV];
224 uint64_t tx_packets = stats[PMD_STAT_SENT_PKTS];
225 uint64_t tx_batches = stats[PMD_STAT_SENT_BATCHES];
226 uint64_t passes = stats[PMD_STAT_RECV] +
227 stats[PMD_STAT_RECIRC];
228 uint64_t upcalls = stats[PMD_STAT_MISS];
229 uint64_t upcall_cycles = stats[PMD_CYCLES_UPCALL];
230 uint64_t tot_iter = histogram_samples(&s->pkts);
231 uint64_t idle_iter = s->pkts.bin[0];
232 uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0;
233
234 ds_put_format(str,
235 " Iterations: %12"PRIu64" (%.2f us/it)\n"
236 " - Used TSC cycles: %12"PRIu64" (%5.1f %% of total cycles)\n"
237 " - idle iterations: %12"PRIu64" (%5.1f %% of used cycles)\n"
238 " - busy iterations: %12"PRIu64" (%5.1f %% of used cycles)\n",
239 tot_iter, tot_cycles * us_per_cycle / tot_iter,
240 tot_cycles, 100.0 * (tot_cycles / duration) / tsc_hz,
241 idle_iter,
242 100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles,
243 busy_iter,
244 100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles);
245 if (rx_packets > 0) {
246 ds_put_format(str,
247 " Rx packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n"
248 " Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n"
249 " - EMC hits: %12"PRIu64" (%5.1f %%)\n"
250 " - SMC hits: %12"PRIu64" (%5.1f %%)\n"
251 " - Megaflow hits: %12"PRIu64" (%5.1f %%, %.2f "
252 "subtbl lookups/hit)\n"
253 " - Upcalls: %12"PRIu64" (%5.1f %%, %.1f us/upcall)\n"
254 " - Lost upcalls: %12"PRIu64" (%5.1f %%)\n",
255 rx_packets, (rx_packets / duration) / 1000,
256 1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
257 passes, rx_packets ? 1.0 * passes / rx_packets : 0,
258 stats[PMD_STAT_EXACT_HIT],
259 100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
260 stats[PMD_STAT_SMC_HIT],
261 100.0 * stats[PMD_STAT_SMC_HIT] / passes,
262 stats[PMD_STAT_MASKED_HIT],
263 100.0 * stats[PMD_STAT_MASKED_HIT] / passes,
264 stats[PMD_STAT_MASKED_HIT]
265 ? 1.0 * stats[PMD_STAT_MASKED_LOOKUP] / stats[PMD_STAT_MASKED_HIT]
266 : 0,
267 upcalls, 100.0 * upcalls / passes,
268 upcalls ? (upcall_cycles * us_per_cycle) / upcalls : 0,
269 stats[PMD_STAT_LOST],
270 100.0 * stats[PMD_STAT_LOST] / passes);
271 } else {
272 ds_put_format(str, " Rx packets: %12d\n", 0);
273 }
274 if (tx_packets > 0) {
275 ds_put_format(str,
276 " Tx packets: %12"PRIu64" (%.0f Kpps)\n"
277 " Tx batches: %12"PRIu64" (%.2f pkts/batch)\n",
278 tx_packets, (tx_packets / duration) / 1000,
279 tx_batches, 1.0 * tx_packets / tx_batches);
280 } else {
281 ds_put_format(str, " Tx packets: %12d\n\n", 0);
282 }
283 }
284
285 void
286 pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
287 {
288 int i;
289
290 ds_put_cstr(str, "Histograms\n");
291 ds_put_format(str,
292 " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
293 "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
294 "max vhost qlen", "upcalls/it", "cycles/upcall");
295 for (i = 0; i < NUM_BINS-1; i++) {
296 ds_put_format(str,
297 " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
298 " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
299 " %-9d %-11"PRIu64"\n",
300 s->cycles.wall[i], s->cycles.bin[i],
301 s->pkts.wall[i],s->pkts.bin[i],
302 s->cycles_per_pkt.wall[i], s->cycles_per_pkt.bin[i],
303 s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
304 s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
305 s->upcalls.wall[i], s->upcalls.bin[i],
306 s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
307 }
308 ds_put_format(str,
309 " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
310 " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
311 " %-9s %-11"PRIu64"\n",
312 ">", s->cycles.bin[i],
313 ">", s->pkts.bin[i],
314 ">", s->cycles_per_pkt.bin[i],
315 ">", s->pkts_per_batch.bin[i],
316 ">", s->max_vhost_qfill.bin[i],
317 ">", s->upcalls.bin[i],
318 ">", s->cycles_per_upcall.bin[i]);
319 if (s->totals.iterations > 0) {
320 ds_put_cstr(str,
321 "-----------------------------------------------------"
322 "-----------------------------------------------------"
323 "------------------------------------------------\n");
324 ds_put_format(str,
325 " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
326 "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
327 "vhost qlen", "upcalls/it", "cycles/upcall");
328 ds_put_format(str,
329 " %-21"PRIu64" %-21.5f %-21"PRIu64
330 " %-21.5f %-21.5f %-21.5f %-21"PRIu32"\n",
331 s->totals.cycles / s->totals.iterations,
332 1.0 * s->totals.pkts / s->totals.iterations,
333 s->totals.pkts
334 ? s->totals.busy_cycles / s->totals.pkts : 0,
335 s->totals.batches
336 ? 1.0 * s->totals.pkts / s->totals.batches : 0,
337 1.0 * s->totals.max_vhost_qfill / s->totals.iterations,
338 1.0 * s->totals.upcalls / s->totals.iterations,
339 s->totals.upcalls
340 ? s->totals.upcall_cycles / s->totals.upcalls : 0);
341 }
342 }
343
344 void
345 pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
346 int n_iter)
347 {
348 struct iter_stats *is;
349 size_t index;
350 int i;
351
352 if (n_iter == 0) {
353 return;
354 }
355 ds_put_format(str, " %-17s %-10s %-10s %-10s %-10s "
356 "%-10s %-10s %-10s\n",
357 "iter", "cycles", "packets", "cycles/pkt", "pkts/batch",
358 "vhost qlen", "upcalls", "cycles/upcall");
359 for (i = 1; i <= n_iter; i++) {
360 index = history_sub(s->iterations.idx, i);
361 is = &s->iterations.sample[index];
362 ds_put_format(str,
363 " %-17"PRIu64" %-11"PRIu64" %-11"PRIu32
364 " %-11"PRIu64" %-11"PRIu32" %-11"PRIu32
365 " %-11"PRIu32" %-11"PRIu32"\n",
366 is->timestamp,
367 is->cycles,
368 is->pkts,
369 is->pkts ? is->cycles / is->pkts : 0,
370 is->batches ? is->pkts / is->batches : 0,
371 is->max_vhost_qfill,
372 is->upcalls,
373 is->upcalls ? is->upcall_cycles / is->upcalls : 0);
374 }
375 }
376
377 void
378 pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
379 {
380 struct iter_stats *is;
381 size_t index;
382 int i;
383
384 if (n_ms == 0) {
385 return;
386 }
387 ds_put_format(str,
388 " %-12s %-10s %-10s %-10s %-10s"
389 " %-10s %-10s %-10s %-10s\n",
390 "ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
391 "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
392 for (i = 1; i <= n_ms; i++) {
393 index = history_sub(s->milliseconds.idx, i);
394 is = &s->milliseconds.sample[index];
395 ds_put_format(str,
396 " %-12"PRIu64" %-11"PRIu32" %-11"PRIu64
397 " %-11"PRIu32" %-11"PRIu64" %-11"PRIu32
398 " %-11"PRIu32" %-11"PRIu32" %-11"PRIu32"\n",
399 is->timestamp,
400 is->iterations,
401 is->iterations ? is->cycles / is->iterations : 0,
402 is->pkts,
403 is->pkts ? is->busy_cycles / is->pkts : 0,
404 is->batches ? is->pkts / is->batches : 0,
405 is->iterations
406 ? is->max_vhost_qfill / is->iterations : 0,
407 is->upcalls,
408 is->upcalls ? is->upcall_cycles / is->upcalls : 0);
409 }
410 }
411
412 void
413 pmd_perf_read_counters(struct pmd_perf_stats *s,
414 uint64_t stats[PMD_N_STATS])
415 {
416 uint64_t val;
417
418 /* These loops subtracts reference values (.zero[*]) from the counters.
419 * Since loads and stores are relaxed, it might be possible for a .zero[*]
420 * value to be more recent than the current value we're reading from the
421 * counter. This is not a big problem, since these numbers are not
422 * supposed to be 100% accurate, but we should at least make sure that
423 * the result is not negative. */
424 for (int i = 0; i < PMD_N_STATS; i++) {
425 atomic_read_relaxed(&s->counters.n[i], &val);
426 if (val > s->counters.zero[i]) {
427 stats[i] = val - s->counters.zero[i];
428 } else {
429 stats[i] = 0;
430 }
431 }
432 }
433
434 /* This function clears the PMD performance counters from within the PMD
435 * thread or from another thread when the PMD thread is not executing its
436 * poll loop. */
437 void
438 pmd_perf_stats_clear_lock(struct pmd_perf_stats *s)
439 OVS_REQUIRES(s->stats_mutex)
440 {
441 ovs_mutex_lock(&s->clear_mutex);
442 for (int i = 0; i < PMD_N_STATS; i++) {
443 atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
444 }
445 /* The following stats are only applicable in PMD thread and */
446 memset(&s->current, 0 , sizeof(struct iter_stats));
447 memset(&s->totals, 0 , sizeof(struct iter_stats));
448 histogram_clear(&s->cycles);
449 histogram_clear(&s->pkts);
450 histogram_clear(&s->cycles_per_pkt);
451 histogram_clear(&s->upcalls);
452 histogram_clear(&s->cycles_per_upcall);
453 histogram_clear(&s->pkts_per_batch);
454 histogram_clear(&s->max_vhost_qfill);
455 history_init(&s->iterations);
456 history_init(&s->milliseconds);
457 s->start_ms = time_msec();
458 s->milliseconds.sample[0].timestamp = s->start_ms;
459 s->log_susp_it = UINT32_MAX;
460 s->log_begin_it = UINT32_MAX;
461 s->log_end_it = UINT32_MAX;
462 s->log_reason = NULL;
463 /* Clearing finished. */
464 s->clear = false;
465 ovs_mutex_unlock(&s->clear_mutex);
466 }
467
468 /* This function can be called from the anywhere to clear the stats
469 * of PMD and non-PMD threads. */
470 void
471 pmd_perf_stats_clear(struct pmd_perf_stats *s)
472 {
473 if (ovs_mutex_trylock(&s->stats_mutex) == 0) {
474 /* Locking successful. PMD not polling. */
475 pmd_perf_stats_clear_lock(s);
476 ovs_mutex_unlock(&s->stats_mutex);
477 } else {
478 /* Request the polling PMD to clear the stats. There is no need to
479 * block here as stats retrieval is prevented during clearing. */
480 s->clear = true;
481 }
482 }
483
484 /* Functions recording PMD metrics per iteration. */
485
486 void
487 pmd_perf_start_iteration(struct pmd_perf_stats *s)
488 OVS_REQUIRES(s->stats_mutex)
489 {
490 if (s->clear) {
491 /* Clear the PMD stats before starting next iteration. */
492 pmd_perf_stats_clear_lock(s);
493 }
494 s->iteration_cnt++;
495 /* Initialize the current interval stats. */
496 memset(&s->current, 0, sizeof(struct iter_stats));
497 if (OVS_LIKELY(s->last_tsc)) {
498 /* We assume here that last_tsc was updated immediately prior at
499 * the end of the previous iteration, or just before the first
500 * iteration. */
501 s->start_tsc = s->last_tsc;
502 } else {
503 /* In case last_tsc has never been set before. */
504 s->start_tsc = cycles_counter_update(s);
505 }
506 }
507
508 void
509 pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
510 int tx_packets, bool full_metrics)
511 {
512 uint64_t now_tsc = cycles_counter_update(s);
513 struct iter_stats *cum_ms;
514 uint64_t cycles, cycles_per_pkt = 0;
515 char *reason = NULL;
516
517 cycles = now_tsc - s->start_tsc;
518 s->current.timestamp = s->iteration_cnt;
519 s->current.cycles = cycles;
520 s->current.pkts = rx_packets;
521
522 if (rx_packets + tx_packets > 0) {
523 pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
524 } else {
525 pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles);
526 }
527 /* Add iteration samples to histograms. */
528 histogram_add_sample(&s->cycles, cycles);
529 histogram_add_sample(&s->pkts, rx_packets);
530
531 if (!full_metrics) {
532 return;
533 }
534
535 s->counters.n[PMD_CYCLES_UPCALL] += s->current.upcall_cycles;
536
537 if (rx_packets > 0) {
538 cycles_per_pkt = cycles / rx_packets;
539 histogram_add_sample(&s->cycles_per_pkt, cycles_per_pkt);
540 }
541 histogram_add_sample(&s->upcalls, s->current.upcalls);
542 histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
543
544 /* Add iteration samples to millisecond stats. */
545 cum_ms = history_current(&s->milliseconds);
546 cum_ms->iterations++;
547 cum_ms->cycles += cycles;
548 if (rx_packets > 0) {
549 cum_ms->busy_cycles += cycles;
550 }
551 cum_ms->pkts += s->current.pkts;
552 cum_ms->upcalls += s->current.upcalls;
553 cum_ms->upcall_cycles += s->current.upcall_cycles;
554 cum_ms->batches += s->current.batches;
555 cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
556
557 if (log_enabled) {
558 /* Log suspicious iterations. */
559 if (cycles > iter_cycle_threshold) {
560 reason = "Excessive total cycles";
561 } else if (s->current.max_vhost_qfill >= log_q_thr) {
562 reason = "Vhost RX queue full";
563 }
564 if (OVS_UNLIKELY(reason)) {
565 pmd_perf_set_log_susp_iteration(s, reason);
566 cycles_counter_update(s);
567 }
568
569 /* Log iteration interval around suspicious iteration when reaching
570 * the end of the range to be logged. */
571 if (OVS_UNLIKELY(s->log_end_it == s->iterations.idx)) {
572 pmd_perf_log_susp_iteration_neighborhood(s);
573 cycles_counter_update(s);
574 }
575 }
576
577 /* Store in iteration history. This advances the iteration idx and
578 * clears the next slot in the iteration history. */
579 history_store(&s->iterations, &s->current);
580
581 if (now_tsc > s->next_check_tsc) {
582 /* Check if ms is completed and store in milliseconds history. */
583 uint64_t now = time_msec();
584 if (now != cum_ms->timestamp) {
585 /* Add ms stats to totals. */
586 s->totals.iterations += cum_ms->iterations;
587 s->totals.cycles += cum_ms->cycles;
588 s->totals.busy_cycles += cum_ms->busy_cycles;
589 s->totals.pkts += cum_ms->pkts;
590 s->totals.upcalls += cum_ms->upcalls;
591 s->totals.upcall_cycles += cum_ms->upcall_cycles;
592 s->totals.batches += cum_ms->batches;
593 s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
594 cum_ms = history_next(&s->milliseconds);
595 cum_ms->timestamp = now;
596 }
597 /* Do the next check after 4 us (10K cycles at 2.5 GHz TSC clock). */
598 s->next_check_tsc = cycles_counter_update(s) + tsc_hz / 250000;
599 }
600 }
601
602 /* Delay logging of the suspicious iteration and the range of iterations
603 * around it until after the last iteration in the range to be logged.
604 * This avoids any distortion of the measurements through the cost of
605 * logging itself. */
606
607 void
608 pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s,
609 char *reason)
610 {
611 if (s->log_susp_it == UINT32_MAX) {
612 /* No logging scheduled yet. */
613 s->log_susp_it = s->iterations.idx;
614 s->log_reason = reason;
615 s->log_begin_it = history_sub(s->iterations.idx, log_it_before);
616 s->log_end_it = history_add(s->iterations.idx, log_it_after + 1);
617 } else if (log_extend) {
618 /* Logging was initiated earlier, we log the previous suspicious
619 * iteration now and extend the logging interval, if possible. */
620 struct iter_stats *susp = &s->iterations.sample[s->log_susp_it];
621 uint32_t new_end_it, old_range, new_range;
622
623 VLOG_WARN_RL(&latency_rl,
624 "Suspicious iteration (%s): iter=%"PRIu64
625 " duration=%"PRIu64" us\n",
626 s->log_reason,
627 susp->timestamp,
628 (1000000L * susp->cycles) / tsc_hz);
629
630 new_end_it = history_add(s->iterations.idx, log_it_after + 1);
631 new_range = history_sub(new_end_it, s->log_begin_it);
632 old_range = history_sub(s->log_end_it, s->log_begin_it);
633 if (new_range < old_range) {
634 /* Extended range exceeds history length. */
635 new_end_it = s->log_begin_it;
636 }
637 s->log_susp_it = s->iterations.idx;
638 s->log_reason = reason;
639 s->log_end_it = new_end_it;
640 }
641 }
642
643 void
644 pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s)
645 {
646 ovs_assert(s->log_reason != NULL);
647 ovs_assert(s->log_susp_it != UINT32_MAX);
648
649 struct ds log = DS_EMPTY_INITIALIZER;
650 struct iter_stats *susp = &s->iterations.sample[s->log_susp_it];
651 uint32_t range = history_sub(s->log_end_it, s->log_begin_it);
652
653 VLOG_WARN_RL(&latency_rl,
654 "Suspicious iteration (%s): iter=%"PRIu64
655 " duration=%"PRIu64" us\n",
656 s->log_reason,
657 susp->timestamp,
658 (1000000L * susp->cycles) / tsc_hz);
659
660 pmd_perf_format_iteration_history(&log, s, range);
661 VLOG_WARN_RL(&latency_rl,
662 "Neighborhood of suspicious iteration:\n"
663 "%s", ds_cstr(&log));
664 ds_destroy(&log);
665 s->log_susp_it = s->log_end_it = s->log_begin_it = UINT32_MAX;
666 s->log_reason = NULL;
667
668 if (range > 100) {
669 /* Reset to safe default values to avoid disturbance. */
670 log_it_before = LOG_IT_BEFORE;
671 log_it_after = LOG_IT_AFTER;
672 log_extend = false;
673 }
674 }
675
676 void
677 pmd_perf_log_set_cmd(struct unixctl_conn *conn,
678 int argc, const char *argv[],
679 void *aux OVS_UNUSED)
680 {
681 unsigned int it_before, it_after, us_thr, q_thr;
682 bool on, extend;
683 bool usage = false;
684
685 on = log_enabled;
686 extend = log_extend;
687 it_before = log_it_before;
688 it_after = log_it_after;
689 q_thr = log_q_thr;
690 us_thr = log_us_thr;
691
692 while (argc > 1) {
693 if (!strcmp(argv[1], "on")) {
694 on = true;
695 argc--;
696 argv++;
697 } else if (!strcmp(argv[1], "off")) {
698 on = false;
699 argc--;
700 argv++;
701 } else if (!strcmp(argv[1], "-e")) {
702 extend = true;
703 argc--;
704 argv++;
705 } else if (!strcmp(argv[1], "-ne")) {
706 extend = false;
707 argc--;
708 argv++;
709 } else if (!strcmp(argv[1], "-a") && argc > 2) {
710 if (str_to_uint(argv[2], 10, &it_after)) {
711 if (it_after > HISTORY_LEN - 2) {
712 it_after = HISTORY_LEN - 2;
713 }
714 } else {
715 usage = true;
716 break;
717 }
718 argc -= 2;
719 argv += 2;
720 } else if (!strcmp(argv[1], "-b") && argc > 2) {
721 if (str_to_uint(argv[2], 10, &it_before)) {
722 if (it_before > HISTORY_LEN - 2) {
723 it_before = HISTORY_LEN - 2;
724 }
725 } else {
726 usage = true;
727 break;
728 }
729 argc -= 2;
730 argv += 2;
731 } else if (!strcmp(argv[1], "-q") && argc > 2) {
732 if (!str_to_uint(argv[2], 10, &q_thr)) {
733 usage = true;
734 break;
735 }
736 argc -= 2;
737 argv += 2;
738 } else if (!strcmp(argv[1], "-us") && argc > 2) {
739 if (!str_to_uint(argv[2], 10, &us_thr)) {
740 usage = true;
741 break;
742 }
743 argc -= 2;
744 argv += 2;
745 } else {
746 usage = true;
747 break;
748 }
749 }
750 if (it_before + it_after > HISTORY_LEN - 2) {
751 it_after = HISTORY_LEN - 2 - it_before;
752 }
753
754 if (usage) {
755 unixctl_command_reply_error(conn,
756 "Usage: ovs-appctl dpif-netdev/pmd-perf-log-set "
757 "[on|off] [-b before] [-a after] [-e|-ne] "
758 "[-us usec] [-q qlen]");
759 return;
760 }
761
762 VLOG_INFO("pmd-perf-log-set: %s, before=%d, after=%d, extend=%s, "
763 "us_thr=%d, q_thr=%d\n",
764 on ? "on" : "off", it_before, it_after,
765 extend ? "true" : "false", us_thr, q_thr);
766 log_enabled = on;
767 log_extend = extend;
768 log_it_before = it_before;
769 log_it_after = it_after;
770 log_q_thr = q_thr;
771 log_us_thr = us_thr;
772 iter_cycle_threshold = (log_us_thr * tsc_hz) / 1000000L;
773
774 unixctl_command_reply(conn, "");
775 }