]> git.proxmox.com Git - mirror_ovs.git/blob - lib/dpif-netdev-perf.c
dpif-netdev-perf: Clarify frequency number.
[mirror_ovs.git] / lib / dpif-netdev-perf.c
1 /*
2 * Copyright (c) 2017 Ericsson AB.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <config.h>
18 #include <stdint.h>
19
20 #include "dpif-netdev-perf.h"
21 #include "openvswitch/dynamic-string.h"
22 #include "openvswitch/vlog.h"
23 #include "ovs-thread.h"
24 #include "timeval.h"
25
26 VLOG_DEFINE_THIS_MODULE(pmd_perf);
27
28 #define ITER_US_THRESHOLD 250 /* Warning threshold for iteration duration
29 in microseconds. */
30 #define VHOST_QUEUE_FULL 128 /* Size of the virtio TX queue. */
31 #define LOG_IT_BEFORE 5 /* Number of iterations to log before
32 suspicious iteration. */
33 #define LOG_IT_AFTER 5 /* Number of iterations to log after
34 suspicious iteration. */
35
36 bool log_enabled = false;
37 bool log_extend = false;
38 static uint32_t log_it_before = LOG_IT_BEFORE;
39 static uint32_t log_it_after = LOG_IT_AFTER;
40 static uint32_t log_us_thr = ITER_US_THRESHOLD;
41 uint32_t log_q_thr = VHOST_QUEUE_FULL;
42 uint64_t iter_cycle_threshold;
43
44 static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600, 600);
45
46 #ifdef DPDK_NETDEV
47 static uint64_t
48 get_tsc_hz(void)
49 {
50 return rte_get_tsc_hz();
51 }
52 #else
53 /* This function is only invoked from PMD threads which depend on DPDK.
54 * A dummy function is sufficient when building without DPDK_NETDEV. */
55 static uint64_t
56 get_tsc_hz(void)
57 {
58 return 1;
59 }
60 #endif
61
62 /* Histogram functions. */
63
64 static void
65 histogram_walls_set_lin(struct histogram *hist, uint32_t min, uint32_t max)
66 {
67 int i;
68
69 ovs_assert(min < max);
70 for (i = 0; i < NUM_BINS-1; i++) {
71 hist->wall[i] = min + (i * (max - min)) / (NUM_BINS - 2);
72 }
73 hist->wall[NUM_BINS-1] = UINT32_MAX;
74 }
75
76 static void
77 histogram_walls_set_log(struct histogram *hist, uint32_t min, uint32_t max)
78 {
79 int i, start, bins, wall;
80 double log_min, log_max;
81
82 ovs_assert(min < max);
83 if (min > 0) {
84 log_min = log(min);
85 log_max = log(max);
86 start = 0;
87 bins = NUM_BINS - 1;
88 } else {
89 hist->wall[0] = 0;
90 log_min = log(1);
91 log_max = log(max);
92 start = 1;
93 bins = NUM_BINS - 2;
94 }
95 wall = start;
96 for (i = 0; i < bins; i++) {
97 /* Make sure each wall is monotonically increasing. */
98 wall = MAX(wall, exp(log_min + (i * (log_max - log_min)) / (bins-1)));
99 hist->wall[start + i] = wall++;
100 }
101 if (hist->wall[NUM_BINS-2] < max) {
102 hist->wall[NUM_BINS-2] = max;
103 }
104 hist->wall[NUM_BINS-1] = UINT32_MAX;
105 }
106
107 uint64_t
108 histogram_samples(const struct histogram *hist)
109 {
110 uint64_t samples = 0;
111
112 for (int i = 0; i < NUM_BINS; i++) {
113 samples += hist->bin[i];
114 }
115 return samples;
116 }
117
118 static void
119 histogram_clear(struct histogram *hist)
120 {
121 int i;
122
123 for (i = 0; i < NUM_BINS; i++) {
124 hist->bin[i] = 0;
125 }
126 }
127
128 static void
129 history_init(struct history *h)
130 {
131 memset(h, 0, sizeof(*h));
132 }
133
134 void
135 pmd_perf_stats_init(struct pmd_perf_stats *s)
136 {
137 memset(s, 0, sizeof(*s));
138 ovs_mutex_init(&s->stats_mutex);
139 ovs_mutex_init(&s->clear_mutex);
140 /* Logarithmic histogram for cycles/it ranging from 500 to 24M
141 * (corresponding to 200 ns to 9.6 ms at 2.5 GHz TSC clock). */
142 histogram_walls_set_log(&s->cycles, 500, 24000000);
143 /* Logarithmic histogram for pkts/it ranging from 0 to 1000. */
144 histogram_walls_set_log(&s->pkts, 0, 1000);
145 /* Linear histogram for cycles/pkt ranging from 100 to 30K. */
146 histogram_walls_set_lin(&s->cycles_per_pkt, 100, 30000);
147 /* Linear histogram for pkts/rx batch ranging from 0 to 32,
148 * the maximum rx batch size in OVS. */
149 histogram_walls_set_lin(&s->pkts_per_batch, 0, 32);
150 /* Linear histogram for upcalls/it ranging from 0 to 30. */
151 histogram_walls_set_lin(&s->upcalls, 0, 30);
152 /* Logarithmic histogram for cycles/upcall ranging from 1000 to 1M
153 * (corresponding to 400 ns to 400 us at 2.5 GHz TSC clock). */
154 histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000);
155 /* Log. histogram for max vhostuser queue fill level from 0 to 512.
156 * 512 is the maximum fill level for a virtio queue with 1024
157 * descriptors (maximum configurable length in Qemu), with the
158 * DPDK 17.11 virtio PMD in the guest. */
159 histogram_walls_set_log(&s->max_vhost_qfill, 0, 512);
160 s->iteration_cnt = 0;
161 s->start_ms = time_msec();
162 s->log_susp_it = UINT32_MAX;
163 s->log_begin_it = UINT32_MAX;
164 s->log_end_it = UINT32_MAX;
165 s->log_reason = NULL;
166 }
167
168 void
169 pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s,
170 double duration)
171 {
172 uint64_t stats[PMD_N_STATS];
173 uint64_t tsc_hz = get_tsc_hz();
174 double us_per_cycle = 1000000.0 / tsc_hz;
175
176 if (duration == 0) {
177 return;
178 }
179
180 pmd_perf_read_counters(s, stats);
181 uint64_t tot_cycles = stats[PMD_CYCLES_ITER_IDLE] +
182 stats[PMD_CYCLES_ITER_BUSY];
183 uint64_t rx_packets = stats[PMD_STAT_RECV];
184 uint64_t tx_packets = stats[PMD_STAT_SENT_PKTS];
185 uint64_t tx_batches = stats[PMD_STAT_SENT_BATCHES];
186 uint64_t passes = stats[PMD_STAT_RECV] +
187 stats[PMD_STAT_RECIRC];
188 uint64_t upcalls = stats[PMD_STAT_MISS];
189 uint64_t upcall_cycles = stats[PMD_CYCLES_UPCALL];
190 uint64_t tot_iter = histogram_samples(&s->pkts);
191 uint64_t idle_iter = s->pkts.bin[0];
192 uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0;
193
194 ds_put_format(str,
195 " Iterations: %12"PRIu64" (%.2f us/it)\n"
196 " - Used TSC cycles: %12"PRIu64" (%5.1f %% of total cycles)\n"
197 " - idle iterations: %12"PRIu64" (%5.1f %% of used cycles)\n"
198 " - busy iterations: %12"PRIu64" (%5.1f %% of used cycles)\n",
199 tot_iter, tot_cycles * us_per_cycle / tot_iter,
200 tot_cycles, 100.0 * (tot_cycles / duration) / tsc_hz,
201 idle_iter,
202 100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles,
203 busy_iter,
204 100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles);
205 if (rx_packets > 0) {
206 ds_put_format(str,
207 " Rx packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n"
208 " Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n"
209 " - EMC hits: %12"PRIu64" (%5.1f %%)\n"
210 " - SMC hits: %12"PRIu64" (%5.1f %%)\n"
211 " - Megaflow hits: %12"PRIu64" (%5.1f %%, %.2f "
212 "subtbl lookups/hit)\n"
213 " - Upcalls: %12"PRIu64" (%5.1f %%, %.1f us/upcall)\n"
214 " - Lost upcalls: %12"PRIu64" (%5.1f %%)\n",
215 rx_packets, (rx_packets / duration) / 1000,
216 1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets,
217 passes, rx_packets ? 1.0 * passes / rx_packets : 0,
218 stats[PMD_STAT_EXACT_HIT],
219 100.0 * stats[PMD_STAT_EXACT_HIT] / passes,
220 stats[PMD_STAT_SMC_HIT],
221 100.0 * stats[PMD_STAT_SMC_HIT] / passes,
222 stats[PMD_STAT_MASKED_HIT],
223 100.0 * stats[PMD_STAT_MASKED_HIT] / passes,
224 stats[PMD_STAT_MASKED_HIT]
225 ? 1.0 * stats[PMD_STAT_MASKED_LOOKUP] / stats[PMD_STAT_MASKED_HIT]
226 : 0,
227 upcalls, 100.0 * upcalls / passes,
228 upcalls ? (upcall_cycles * us_per_cycle) / upcalls : 0,
229 stats[PMD_STAT_LOST],
230 100.0 * stats[PMD_STAT_LOST] / passes);
231 } else {
232 ds_put_format(str, " Rx packets: %12d\n", 0);
233 }
234 if (tx_packets > 0) {
235 ds_put_format(str,
236 " Tx packets: %12"PRIu64" (%.0f Kpps)\n"
237 " Tx batches: %12"PRIu64" (%.2f pkts/batch)\n",
238 tx_packets, (tx_packets / duration) / 1000,
239 tx_batches, 1.0 * tx_packets / tx_batches);
240 } else {
241 ds_put_format(str, " Tx packets: %12d\n\n", 0);
242 }
243 }
244
245 void
246 pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s)
247 {
248 int i;
249
250 ds_put_cstr(str, "Histograms\n");
251 ds_put_format(str,
252 " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
253 "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
254 "max vhost qlen", "upcalls/it", "cycles/upcall");
255 for (i = 0; i < NUM_BINS-1; i++) {
256 ds_put_format(str,
257 " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
258 " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64
259 " %-9d %-11"PRIu64"\n",
260 s->cycles.wall[i], s->cycles.bin[i],
261 s->pkts.wall[i],s->pkts.bin[i],
262 s->cycles_per_pkt.wall[i], s->cycles_per_pkt.bin[i],
263 s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i],
264 s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i],
265 s->upcalls.wall[i], s->upcalls.bin[i],
266 s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]);
267 }
268 ds_put_format(str,
269 " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
270 " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64
271 " %-9s %-11"PRIu64"\n",
272 ">", s->cycles.bin[i],
273 ">", s->pkts.bin[i],
274 ">", s->cycles_per_pkt.bin[i],
275 ">", s->pkts_per_batch.bin[i],
276 ">", s->max_vhost_qfill.bin[i],
277 ">", s->upcalls.bin[i],
278 ">", s->cycles_per_upcall.bin[i]);
279 if (s->totals.iterations > 0) {
280 ds_put_cstr(str,
281 "-----------------------------------------------------"
282 "-----------------------------------------------------"
283 "------------------------------------------------\n");
284 ds_put_format(str,
285 " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
286 "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
287 "vhost qlen", "upcalls/it", "cycles/upcall");
288 ds_put_format(str,
289 " %-21"PRIu64" %-21.5f %-21"PRIu64
290 " %-21.5f %-21.5f %-21.5f %-21"PRIu32"\n",
291 s->totals.cycles / s->totals.iterations,
292 1.0 * s->totals.pkts / s->totals.iterations,
293 s->totals.pkts
294 ? s->totals.busy_cycles / s->totals.pkts : 0,
295 s->totals.batches
296 ? 1.0 * s->totals.pkts / s->totals.batches : 0,
297 1.0 * s->totals.max_vhost_qfill / s->totals.iterations,
298 1.0 * s->totals.upcalls / s->totals.iterations,
299 s->totals.upcalls
300 ? s->totals.upcall_cycles / s->totals.upcalls : 0);
301 }
302 }
303
304 void
305 pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s,
306 int n_iter)
307 {
308 struct iter_stats *is;
309 size_t index;
310 int i;
311
312 if (n_iter == 0) {
313 return;
314 }
315 ds_put_format(str, " %-17s %-10s %-10s %-10s %-10s "
316 "%-10s %-10s %-10s\n",
317 "iter", "cycles", "packets", "cycles/pkt", "pkts/batch",
318 "vhost qlen", "upcalls", "cycles/upcall");
319 for (i = 1; i <= n_iter; i++) {
320 index = history_sub(s->iterations.idx, i);
321 is = &s->iterations.sample[index];
322 ds_put_format(str,
323 " %-17"PRIu64" %-11"PRIu64" %-11"PRIu32
324 " %-11"PRIu64" %-11"PRIu32" %-11"PRIu32
325 " %-11"PRIu32" %-11"PRIu32"\n",
326 is->timestamp,
327 is->cycles,
328 is->pkts,
329 is->pkts ? is->cycles / is->pkts : 0,
330 is->batches ? is->pkts / is->batches : 0,
331 is->max_vhost_qfill,
332 is->upcalls,
333 is->upcalls ? is->upcall_cycles / is->upcalls : 0);
334 }
335 }
336
337 void
338 pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms)
339 {
340 struct iter_stats *is;
341 size_t index;
342 int i;
343
344 if (n_ms == 0) {
345 return;
346 }
347 ds_put_format(str,
348 " %-12s %-10s %-10s %-10s %-10s"
349 " %-10s %-10s %-10s %-10s\n",
350 "ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
351 "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
352 for (i = 1; i <= n_ms; i++) {
353 index = history_sub(s->milliseconds.idx, i);
354 is = &s->milliseconds.sample[index];
355 ds_put_format(str,
356 " %-12"PRIu64" %-11"PRIu32" %-11"PRIu64
357 " %-11"PRIu32" %-11"PRIu64" %-11"PRIu32
358 " %-11"PRIu32" %-11"PRIu32" %-11"PRIu32"\n",
359 is->timestamp,
360 is->iterations,
361 is->iterations ? is->cycles / is->iterations : 0,
362 is->pkts,
363 is->pkts ? is->busy_cycles / is->pkts : 0,
364 is->batches ? is->pkts / is->batches : 0,
365 is->iterations
366 ? is->max_vhost_qfill / is->iterations : 0,
367 is->upcalls,
368 is->upcalls ? is->upcall_cycles / is->upcalls : 0);
369 }
370 }
371
372 void
373 pmd_perf_read_counters(struct pmd_perf_stats *s,
374 uint64_t stats[PMD_N_STATS])
375 {
376 uint64_t val;
377
378 /* These loops subtracts reference values (.zero[*]) from the counters.
379 * Since loads and stores are relaxed, it might be possible for a .zero[*]
380 * value to be more recent than the current value we're reading from the
381 * counter. This is not a big problem, since these numbers are not
382 * supposed to be 100% accurate, but we should at least make sure that
383 * the result is not negative. */
384 for (int i = 0; i < PMD_N_STATS; i++) {
385 atomic_read_relaxed(&s->counters.n[i], &val);
386 if (val > s->counters.zero[i]) {
387 stats[i] = val - s->counters.zero[i];
388 } else {
389 stats[i] = 0;
390 }
391 }
392 }
393
394 /* This function clears the PMD performance counters from within the PMD
395 * thread or from another thread when the PMD thread is not executing its
396 * poll loop. */
397 void
398 pmd_perf_stats_clear_lock(struct pmd_perf_stats *s)
399 OVS_REQUIRES(s->stats_mutex)
400 {
401 ovs_mutex_lock(&s->clear_mutex);
402 for (int i = 0; i < PMD_N_STATS; i++) {
403 atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]);
404 }
405 /* The following stats are only applicable in PMD thread and */
406 memset(&s->current, 0 , sizeof(struct iter_stats));
407 memset(&s->totals, 0 , sizeof(struct iter_stats));
408 histogram_clear(&s->cycles);
409 histogram_clear(&s->pkts);
410 histogram_clear(&s->cycles_per_pkt);
411 histogram_clear(&s->upcalls);
412 histogram_clear(&s->cycles_per_upcall);
413 histogram_clear(&s->pkts_per_batch);
414 histogram_clear(&s->max_vhost_qfill);
415 history_init(&s->iterations);
416 history_init(&s->milliseconds);
417 s->start_ms = time_msec();
418 s->milliseconds.sample[0].timestamp = s->start_ms;
419 s->log_susp_it = UINT32_MAX;
420 s->log_begin_it = UINT32_MAX;
421 s->log_end_it = UINT32_MAX;
422 s->log_reason = NULL;
423 /* Clearing finished. */
424 s->clear = false;
425 ovs_mutex_unlock(&s->clear_mutex);
426 }
427
428 /* This function can be called from the anywhere to clear the stats
429 * of PMD and non-PMD threads. */
430 void
431 pmd_perf_stats_clear(struct pmd_perf_stats *s)
432 {
433 if (ovs_mutex_trylock(&s->stats_mutex) == 0) {
434 /* Locking successful. PMD not polling. */
435 pmd_perf_stats_clear_lock(s);
436 ovs_mutex_unlock(&s->stats_mutex);
437 } else {
438 /* Request the polling PMD to clear the stats. There is no need to
439 * block here as stats retrieval is prevented during clearing. */
440 s->clear = true;
441 }
442 }
443
444 /* Functions recording PMD metrics per iteration. */
445
446 void
447 pmd_perf_start_iteration(struct pmd_perf_stats *s)
448 OVS_REQUIRES(s->stats_mutex)
449 {
450 if (s->clear) {
451 /* Clear the PMD stats before starting next iteration. */
452 pmd_perf_stats_clear_lock(s);
453 }
454 s->iteration_cnt++;
455 /* Initialize the current interval stats. */
456 memset(&s->current, 0, sizeof(struct iter_stats));
457 if (OVS_LIKELY(s->last_tsc)) {
458 /* We assume here that last_tsc was updated immediately prior at
459 * the end of the previous iteration, or just before the first
460 * iteration. */
461 s->start_tsc = s->last_tsc;
462 } else {
463 /* In case last_tsc has never been set before. */
464 s->start_tsc = cycles_counter_update(s);
465 }
466 }
467
468 void
469 pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets,
470 int tx_packets, bool full_metrics)
471 {
472 uint64_t now_tsc = cycles_counter_update(s);
473 struct iter_stats *cum_ms;
474 uint64_t cycles, cycles_per_pkt = 0;
475 char *reason = NULL;
476
477 cycles = now_tsc - s->start_tsc;
478 s->current.timestamp = s->iteration_cnt;
479 s->current.cycles = cycles;
480 s->current.pkts = rx_packets;
481
482 if (rx_packets + tx_packets > 0) {
483 pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles);
484 } else {
485 pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles);
486 }
487 /* Add iteration samples to histograms. */
488 histogram_add_sample(&s->cycles, cycles);
489 histogram_add_sample(&s->pkts, rx_packets);
490
491 if (!full_metrics) {
492 return;
493 }
494
495 s->counters.n[PMD_CYCLES_UPCALL] += s->current.upcall_cycles;
496
497 if (rx_packets > 0) {
498 cycles_per_pkt = cycles / rx_packets;
499 histogram_add_sample(&s->cycles_per_pkt, cycles_per_pkt);
500 }
501 if (s->current.batches > 0) {
502 histogram_add_sample(&s->pkts_per_batch,
503 rx_packets / s->current.batches);
504 }
505 histogram_add_sample(&s->upcalls, s->current.upcalls);
506 if (s->current.upcalls > 0) {
507 histogram_add_sample(&s->cycles_per_upcall,
508 s->current.upcall_cycles / s->current.upcalls);
509 }
510 histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill);
511
512 /* Add iteration samples to millisecond stats. */
513 cum_ms = history_current(&s->milliseconds);
514 cum_ms->iterations++;
515 cum_ms->cycles += cycles;
516 if (rx_packets > 0) {
517 cum_ms->busy_cycles += cycles;
518 }
519 cum_ms->pkts += s->current.pkts;
520 cum_ms->upcalls += s->current.upcalls;
521 cum_ms->upcall_cycles += s->current.upcall_cycles;
522 cum_ms->batches += s->current.batches;
523 cum_ms->max_vhost_qfill += s->current.max_vhost_qfill;
524
525 if (log_enabled) {
526 /* Log suspicious iterations. */
527 if (cycles > iter_cycle_threshold) {
528 reason = "Excessive total cycles";
529 } else if (s->current.max_vhost_qfill >= log_q_thr) {
530 reason = "Vhost RX queue full";
531 }
532 if (OVS_UNLIKELY(reason)) {
533 pmd_perf_set_log_susp_iteration(s, reason);
534 cycles_counter_update(s);
535 }
536
537 /* Log iteration interval around suspicious iteration when reaching
538 * the end of the range to be logged. */
539 if (OVS_UNLIKELY(s->log_end_it == s->iterations.idx)) {
540 pmd_perf_log_susp_iteration_neighborhood(s);
541 cycles_counter_update(s);
542 }
543 }
544
545 /* Store in iteration history. This advances the iteration idx and
546 * clears the next slot in the iteration history. */
547 history_store(&s->iterations, &s->current);
548
549 if (now_tsc > s->next_check_tsc) {
550 /* Check if ms is completed and store in milliseconds history. */
551 uint64_t now = time_msec();
552 if (now != cum_ms->timestamp) {
553 /* Add ms stats to totals. */
554 s->totals.iterations += cum_ms->iterations;
555 s->totals.cycles += cum_ms->cycles;
556 s->totals.busy_cycles += cum_ms->busy_cycles;
557 s->totals.pkts += cum_ms->pkts;
558 s->totals.upcalls += cum_ms->upcalls;
559 s->totals.upcall_cycles += cum_ms->upcall_cycles;
560 s->totals.batches += cum_ms->batches;
561 s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill;
562 cum_ms = history_next(&s->milliseconds);
563 cum_ms->timestamp = now;
564 }
565 /* Do the next check after 10K cycles (4 us at 2.5 GHz TSC clock). */
566 s->next_check_tsc = cycles_counter_update(s) + 10000;
567 }
568 }
569
570 /* Delay logging of the suspicious iteration and the range of iterations
571 * around it until after the last iteration in the range to be logged.
572 * This avoids any distortion of the measurements through the cost of
573 * logging itself. */
574
575 void
576 pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s,
577 char *reason)
578 {
579 if (s->log_susp_it == UINT32_MAX) {
580 /* No logging scheduled yet. */
581 s->log_susp_it = s->iterations.idx;
582 s->log_reason = reason;
583 s->log_begin_it = history_sub(s->iterations.idx, log_it_before);
584 s->log_end_it = history_add(s->iterations.idx, log_it_after + 1);
585 } else if (log_extend) {
586 /* Logging was initiated earlier, we log the previous suspicious
587 * iteration now and extend the logging interval, if possible. */
588 struct iter_stats *susp = &s->iterations.sample[s->log_susp_it];
589 uint32_t new_end_it, old_range, new_range;
590
591 VLOG_WARN_RL(&latency_rl,
592 "Suspicious iteration (%s): iter=%"PRIu64
593 " duration=%"PRIu64" us\n",
594 s->log_reason,
595 susp->timestamp,
596 (1000000L * susp->cycles) / get_tsc_hz());
597
598 new_end_it = history_add(s->iterations.idx, log_it_after + 1);
599 new_range = history_sub(new_end_it, s->log_begin_it);
600 old_range = history_sub(s->log_end_it, s->log_begin_it);
601 if (new_range < old_range) {
602 /* Extended range exceeds history length. */
603 new_end_it = s->log_begin_it;
604 }
605 s->log_susp_it = s->iterations.idx;
606 s->log_reason = reason;
607 s->log_end_it = new_end_it;
608 }
609 }
610
611 void
612 pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s)
613 {
614 ovs_assert(s->log_reason != NULL);
615 ovs_assert(s->log_susp_it != UINT32_MAX);
616
617 struct ds log = DS_EMPTY_INITIALIZER;
618 struct iter_stats *susp = &s->iterations.sample[s->log_susp_it];
619 uint32_t range = history_sub(s->log_end_it, s->log_begin_it);
620
621 VLOG_WARN_RL(&latency_rl,
622 "Suspicious iteration (%s): iter=%"PRIu64
623 " duration=%"PRIu64" us\n",
624 s->log_reason,
625 susp->timestamp,
626 (1000000L * susp->cycles) / get_tsc_hz());
627
628 pmd_perf_format_iteration_history(&log, s, range);
629 VLOG_WARN_RL(&latency_rl,
630 "Neighborhood of suspicious iteration:\n"
631 "%s", ds_cstr(&log));
632 ds_destroy(&log);
633 s->log_susp_it = s->log_end_it = s->log_begin_it = UINT32_MAX;
634 s->log_reason = NULL;
635
636 if (range > 100) {
637 /* Reset to safe default values to avoid disturbance. */
638 log_it_before = LOG_IT_BEFORE;
639 log_it_after = LOG_IT_AFTER;
640 log_extend = false;
641 }
642 }
643
644 void
645 pmd_perf_log_set_cmd(struct unixctl_conn *conn,
646 int argc, const char *argv[],
647 void *aux OVS_UNUSED)
648 {
649 unsigned int it_before, it_after, us_thr, q_thr;
650 bool on, extend;
651 bool usage = false;
652
653 on = log_enabled;
654 extend = log_extend;
655 it_before = log_it_before;
656 it_after = log_it_after;
657 q_thr = log_q_thr;
658 us_thr = log_us_thr;
659
660 while (argc > 1) {
661 if (!strcmp(argv[1], "on")) {
662 on = true;
663 argc--;
664 argv++;
665 } else if (!strcmp(argv[1], "off")) {
666 on = false;
667 argc--;
668 argv++;
669 } else if (!strcmp(argv[1], "-e")) {
670 extend = true;
671 argc--;
672 argv++;
673 } else if (!strcmp(argv[1], "-ne")) {
674 extend = false;
675 argc--;
676 argv++;
677 } else if (!strcmp(argv[1], "-a") && argc > 2) {
678 if (str_to_uint(argv[2], 10, &it_after)) {
679 if (it_after > HISTORY_LEN - 2) {
680 it_after = HISTORY_LEN - 2;
681 }
682 } else {
683 usage = true;
684 break;
685 }
686 argc -= 2;
687 argv += 2;
688 } else if (!strcmp(argv[1], "-b") && argc > 2) {
689 if (str_to_uint(argv[2], 10, &it_before)) {
690 if (it_before > HISTORY_LEN - 2) {
691 it_before = HISTORY_LEN - 2;
692 }
693 } else {
694 usage = true;
695 break;
696 }
697 argc -= 2;
698 argv += 2;
699 } else if (!strcmp(argv[1], "-q") && argc > 2) {
700 if (!str_to_uint(argv[2], 10, &q_thr)) {
701 usage = true;
702 break;
703 }
704 argc -= 2;
705 argv += 2;
706 } else if (!strcmp(argv[1], "-us") && argc > 2) {
707 if (!str_to_uint(argv[2], 10, &us_thr)) {
708 usage = true;
709 break;
710 }
711 argc -= 2;
712 argv += 2;
713 } else {
714 usage = true;
715 break;
716 }
717 }
718 if (it_before + it_after > HISTORY_LEN - 2) {
719 it_after = HISTORY_LEN - 2 - it_before;
720 }
721
722 if (usage) {
723 unixctl_command_reply_error(conn,
724 "Usage: ovs-appctl dpif-netdev/pmd-perf-log-set "
725 "[on|off] [-b before] [-a after] [-e|-ne] "
726 "[-us usec] [-q qlen]");
727 return;
728 }
729
730 VLOG_INFO("pmd-perf-log-set: %s, before=%d, after=%d, extend=%s, "
731 "us_thr=%d, q_thr=%d\n",
732 on ? "on" : "off", it_before, it_after,
733 extend ? "true" : "false", us_thr, q_thr);
734 log_enabled = on;
735 log_extend = extend;
736 log_it_before = it_before;
737 log_it_after = it_after;
738 log_q_thr = q_thr;
739 log_us_thr = us_thr;
740 iter_cycle_threshold = (log_us_thr * get_tsc_hz()) / 1000000L;
741
742 unixctl_command_reply(conn, "");
743 }