2 * Copyright (c) 2017 Ericsson AB.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
21 #include "dpif-netdev-perf.h"
22 #include "openvswitch/dynamic-string.h"
23 #include "openvswitch/vlog.h"
25 #include "ovs-thread.h"
28 VLOG_DEFINE_THIS_MODULE(pmd_perf
);
30 #define ITER_US_THRESHOLD 250 /* Warning threshold for iteration duration
32 #define VHOST_QUEUE_FULL 128 /* Size of the virtio TX queue. */
33 #define LOG_IT_BEFORE 5 /* Number of iterations to log before
34 suspicious iteration. */
35 #define LOG_IT_AFTER 5 /* Number of iterations to log after
36 suspicious iteration. */
38 bool log_enabled
= false;
39 bool log_extend
= false;
40 static uint32_t log_it_before
= LOG_IT_BEFORE
;
41 static uint32_t log_it_after
= LOG_IT_AFTER
;
42 static uint32_t log_us_thr
= ITER_US_THRESHOLD
;
43 uint32_t log_q_thr
= VHOST_QUEUE_FULL
;
44 uint64_t iter_cycle_threshold
;
46 static struct vlog_rate_limit latency_rl
= VLOG_RATE_LIMIT_INIT(600, 600);
48 static uint64_t tsc_hz
= 1;
51 pmd_perf_estimate_tsc_frequency(void)
54 if (dpdk_available()) {
55 tsc_hz
= rte_get_tsc_hz();
58 VLOG_INFO("DPDK provided TSC frequency: %"PRIu64
" KHz", tsc_hz
/ 1000);
62 struct ovs_numa_dump
*affinity
;
63 struct pmd_perf_stats s
;
66 /* DPDK is not available or returned unreliable value.
67 * Trying to estimate. */
68 affinity
= ovs_numa_thread_getaffinity_dump();
70 const struct ovs_numa_info_core
*core
;
72 FOR_EACH_CORE_ON_DUMP (core
, affinity
) {
73 /* Setting affinity to a single core from the affinity mask to
74 * avoid re-scheduling to another core while sleeping. */
75 ovs_numa_thread_setaffinity_core(core
->core_id
);
80 memset(&s
, 0, sizeof s
);
81 start
= cycles_counter_update(&s
);
82 /* Using xnanosleep as it's interrupt resistant.
83 * Sleeping only 100 ms to avoid holding the main thread for too long. */
85 stop
= cycles_counter_update(&s
);
88 /* Restoring previous affinity. */
89 ovs_numa_thread_setaffinity_dump(affinity
);
90 ovs_numa_dump_destroy(affinity
);
94 VLOG_WARN("TSC source is unreliable.");
97 tsc_hz
= (stop
- start
) * 10;
100 VLOG_INFO("Estimated TSC frequency: %"PRIu64
" KHz", tsc_hz
/ 1000);
103 /* Histogram functions. */
106 histogram_walls_set_lin(struct histogram
*hist
, uint32_t min
, uint32_t max
)
110 ovs_assert(min
< max
);
111 for (i
= 0; i
< NUM_BINS
-1; i
++) {
112 hist
->wall
[i
] = min
+ (i
* (max
- min
)) / (NUM_BINS
- 2);
114 hist
->wall
[NUM_BINS
-1] = UINT32_MAX
;
118 histogram_walls_set_log(struct histogram
*hist
, uint32_t min
, uint32_t max
)
120 int i
, start
, bins
, wall
;
121 double log_min
, log_max
;
123 ovs_assert(min
< max
);
137 for (i
= 0; i
< bins
; i
++) {
138 /* Make sure each wall is monotonically increasing. */
139 wall
= MAX(wall
, exp(log_min
+ (i
* (log_max
- log_min
)) / (bins
-1)));
140 hist
->wall
[start
+ i
] = wall
++;
142 if (hist
->wall
[NUM_BINS
-2] < max
) {
143 hist
->wall
[NUM_BINS
-2] = max
;
145 hist
->wall
[NUM_BINS
-1] = UINT32_MAX
;
149 histogram_samples(const struct histogram
*hist
)
151 uint64_t samples
= 0;
153 for (int i
= 0; i
< NUM_BINS
; i
++) {
154 samples
+= hist
->bin
[i
];
160 histogram_clear(struct histogram
*hist
)
164 for (i
= 0; i
< NUM_BINS
; i
++) {
170 history_init(struct history
*h
)
172 memset(h
, 0, sizeof(*h
));
176 pmd_perf_stats_init(struct pmd_perf_stats
*s
)
178 memset(s
, 0, sizeof(*s
));
179 ovs_mutex_init(&s
->stats_mutex
);
180 ovs_mutex_init(&s
->clear_mutex
);
181 /* Logarithmic histogram for cycles/it ranging from 500 to 24M
182 * (corresponding to 200 ns to 9.6 ms at 2.5 GHz TSC clock). */
183 histogram_walls_set_log(&s
->cycles
, 500, 24000000);
184 /* Logarithmic histogram for pkts/it ranging from 0 to 1000. */
185 histogram_walls_set_log(&s
->pkts
, 0, 1000);
186 /* Linear histogram for cycles/pkt ranging from 100 to 30K. */
187 histogram_walls_set_lin(&s
->cycles_per_pkt
, 100, 30000);
188 /* Linear histogram for pkts/rx batch ranging from 0 to 32,
189 * the maximum rx batch size in OVS. */
190 histogram_walls_set_lin(&s
->pkts_per_batch
, 0, 32);
191 /* Linear histogram for upcalls/it ranging from 0 to 30. */
192 histogram_walls_set_lin(&s
->upcalls
, 0, 30);
193 /* Logarithmic histogram for cycles/upcall ranging from 1000 to 1M
194 * (corresponding to 400 ns to 400 us at 2.5 GHz TSC clock). */
195 histogram_walls_set_log(&s
->cycles_per_upcall
, 1000, 1000000);
196 /* Log. histogram for max vhostuser queue fill level from 0 to 512.
197 * 512 is the maximum fill level for a virtio queue with 1024
198 * descriptors (maximum configurable length in Qemu), with the
199 * DPDK 17.11 virtio PMD in the guest. */
200 histogram_walls_set_log(&s
->max_vhost_qfill
, 0, 512);
201 s
->iteration_cnt
= 0;
202 s
->start_ms
= time_msec();
203 s
->log_susp_it
= UINT32_MAX
;
204 s
->log_begin_it
= UINT32_MAX
;
205 s
->log_end_it
= UINT32_MAX
;
206 s
->log_reason
= NULL
;
210 pmd_perf_format_overall_stats(struct ds
*str
, struct pmd_perf_stats
*s
,
213 uint64_t stats
[PMD_N_STATS
];
214 double us_per_cycle
= 1000000.0 / tsc_hz
;
220 pmd_perf_read_counters(s
, stats
);
221 uint64_t tot_cycles
= stats
[PMD_CYCLES_ITER_IDLE
] +
222 stats
[PMD_CYCLES_ITER_BUSY
];
223 uint64_t rx_packets
= stats
[PMD_STAT_RECV
];
224 uint64_t tx_packets
= stats
[PMD_STAT_SENT_PKTS
];
225 uint64_t tx_batches
= stats
[PMD_STAT_SENT_BATCHES
];
226 uint64_t passes
= stats
[PMD_STAT_RECV
] +
227 stats
[PMD_STAT_RECIRC
];
228 uint64_t upcalls
= stats
[PMD_STAT_MISS
];
229 uint64_t upcall_cycles
= stats
[PMD_CYCLES_UPCALL
];
230 uint64_t tot_iter
= histogram_samples(&s
->pkts
);
231 uint64_t idle_iter
= s
->pkts
.bin
[0];
232 uint64_t busy_iter
= tot_iter
>= idle_iter
? tot_iter
- idle_iter
: 0;
235 " Iterations: %12"PRIu64
" (%.2f us/it)\n"
236 " - Used TSC cycles: %12"PRIu64
" (%5.1f %% of total cycles)\n"
237 " - idle iterations: %12"PRIu64
" (%5.1f %% of used cycles)\n"
238 " - busy iterations: %12"PRIu64
" (%5.1f %% of used cycles)\n",
239 tot_iter
, tot_cycles
* us_per_cycle
/ tot_iter
,
240 tot_cycles
, 100.0 * (tot_cycles
/ duration
) / tsc_hz
,
242 100.0 * stats
[PMD_CYCLES_ITER_IDLE
] / tot_cycles
,
244 100.0 * stats
[PMD_CYCLES_ITER_BUSY
] / tot_cycles
);
245 if (rx_packets
> 0) {
247 " Rx packets: %12"PRIu64
" (%.0f Kpps, %.0f cycles/pkt)\n"
248 " Datapath passes: %12"PRIu64
" (%.2f passes/pkt)\n"
249 " - EMC hits: %12"PRIu64
" (%5.1f %%)\n"
250 " - SMC hits: %12"PRIu64
" (%5.1f %%)\n"
251 " - Megaflow hits: %12"PRIu64
" (%5.1f %%, %.2f "
252 "subtbl lookups/hit)\n"
253 " - Upcalls: %12"PRIu64
" (%5.1f %%, %.1f us/upcall)\n"
254 " - Lost upcalls: %12"PRIu64
" (%5.1f %%)\n",
255 rx_packets
, (rx_packets
/ duration
) / 1000,
256 1.0 * stats
[PMD_CYCLES_ITER_BUSY
] / rx_packets
,
257 passes
, rx_packets
? 1.0 * passes
/ rx_packets
: 0,
258 stats
[PMD_STAT_EXACT_HIT
],
259 100.0 * stats
[PMD_STAT_EXACT_HIT
] / passes
,
260 stats
[PMD_STAT_SMC_HIT
],
261 100.0 * stats
[PMD_STAT_SMC_HIT
] / passes
,
262 stats
[PMD_STAT_MASKED_HIT
],
263 100.0 * stats
[PMD_STAT_MASKED_HIT
] / passes
,
264 stats
[PMD_STAT_MASKED_HIT
]
265 ? 1.0 * stats
[PMD_STAT_MASKED_LOOKUP
] / stats
[PMD_STAT_MASKED_HIT
]
267 upcalls
, 100.0 * upcalls
/ passes
,
268 upcalls
? (upcall_cycles
* us_per_cycle
) / upcalls
: 0,
269 stats
[PMD_STAT_LOST
],
270 100.0 * stats
[PMD_STAT_LOST
] / passes
);
272 ds_put_format(str
, " Rx packets: %12d\n", 0);
274 if (tx_packets
> 0) {
276 " Tx packets: %12"PRIu64
" (%.0f Kpps)\n"
277 " Tx batches: %12"PRIu64
" (%.2f pkts/batch)\n",
278 tx_packets
, (tx_packets
/ duration
) / 1000,
279 tx_batches
, 1.0 * tx_packets
/ tx_batches
);
281 ds_put_format(str
, " Tx packets: %12d\n\n", 0);
286 pmd_perf_format_histograms(struct ds
*str
, struct pmd_perf_stats
*s
)
290 ds_put_cstr(str
, "Histograms\n");
292 " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
293 "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
294 "max vhost qlen", "upcalls/it", "cycles/upcall");
295 for (i
= 0; i
< NUM_BINS
-1; i
++) {
297 " %-9d %-11"PRIu64
" %-9d %-11"PRIu64
" %-9d %-11"PRIu64
298 " %-9d %-11"PRIu64
" %-9d %-11"PRIu64
" %-9d %-11"PRIu64
299 " %-9d %-11"PRIu64
"\n",
300 s
->cycles
.wall
[i
], s
->cycles
.bin
[i
],
301 s
->pkts
.wall
[i
],s
->pkts
.bin
[i
],
302 s
->cycles_per_pkt
.wall
[i
], s
->cycles_per_pkt
.bin
[i
],
303 s
->pkts_per_batch
.wall
[i
], s
->pkts_per_batch
.bin
[i
],
304 s
->max_vhost_qfill
.wall
[i
], s
->max_vhost_qfill
.bin
[i
],
305 s
->upcalls
.wall
[i
], s
->upcalls
.bin
[i
],
306 s
->cycles_per_upcall
.wall
[i
], s
->cycles_per_upcall
.bin
[i
]);
309 " %-9s %-11"PRIu64
" %-9s %-11"PRIu64
" %-9s %-11"PRIu64
310 " %-9s %-11"PRIu64
" %-9s %-11"PRIu64
" %-9s %-11"PRIu64
311 " %-9s %-11"PRIu64
"\n",
312 ">", s
->cycles
.bin
[i
],
314 ">", s
->cycles_per_pkt
.bin
[i
],
315 ">", s
->pkts_per_batch
.bin
[i
],
316 ">", s
->max_vhost_qfill
.bin
[i
],
317 ">", s
->upcalls
.bin
[i
],
318 ">", s
->cycles_per_upcall
.bin
[i
]);
319 if (s
->totals
.iterations
> 0) {
321 "-----------------------------------------------------"
322 "-----------------------------------------------------"
323 "------------------------------------------------\n");
325 " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n",
326 "cycles/it", "packets/it", "cycles/pkt", "pkts/batch",
327 "vhost qlen", "upcalls/it", "cycles/upcall");
329 " %-21"PRIu64
" %-21.5f %-21"PRIu64
330 " %-21.5f %-21.5f %-21.5f %-21"PRIu32
"\n",
331 s
->totals
.cycles
/ s
->totals
.iterations
,
332 1.0 * s
->totals
.pkts
/ s
->totals
.iterations
,
334 ? s
->totals
.busy_cycles
/ s
->totals
.pkts
: 0,
336 ? 1.0 * s
->totals
.pkts
/ s
->totals
.batches
: 0,
337 1.0 * s
->totals
.max_vhost_qfill
/ s
->totals
.iterations
,
338 1.0 * s
->totals
.upcalls
/ s
->totals
.iterations
,
340 ? s
->totals
.upcall_cycles
/ s
->totals
.upcalls
: 0);
345 pmd_perf_format_iteration_history(struct ds
*str
, struct pmd_perf_stats
*s
,
348 struct iter_stats
*is
;
355 ds_put_format(str
, " %-17s %-10s %-10s %-10s %-10s "
356 "%-10s %-10s %-10s\n",
357 "iter", "cycles", "packets", "cycles/pkt", "pkts/batch",
358 "vhost qlen", "upcalls", "cycles/upcall");
359 for (i
= 1; i
<= n_iter
; i
++) {
360 index
= history_sub(s
->iterations
.idx
, i
);
361 is
= &s
->iterations
.sample
[index
];
363 " %-17"PRIu64
" %-11"PRIu64
" %-11"PRIu32
364 " %-11"PRIu64
" %-11"PRIu32
" %-11"PRIu32
365 " %-11"PRIu32
" %-11"PRIu32
"\n",
369 is
->pkts
? is
->cycles
/ is
->pkts
: 0,
370 is
->batches
? is
->pkts
/ is
->batches
: 0,
373 is
->upcalls
? is
->upcall_cycles
/ is
->upcalls
: 0);
378 pmd_perf_format_ms_history(struct ds
*str
, struct pmd_perf_stats
*s
, int n_ms
)
380 struct iter_stats
*is
;
388 " %-12s %-10s %-10s %-10s %-10s"
389 " %-10s %-10s %-10s %-10s\n",
390 "ms", "iterations", "cycles/it", "Kpps", "cycles/pkt",
391 "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall");
392 for (i
= 1; i
<= n_ms
; i
++) {
393 index
= history_sub(s
->milliseconds
.idx
, i
);
394 is
= &s
->milliseconds
.sample
[index
];
396 " %-12"PRIu64
" %-11"PRIu32
" %-11"PRIu64
397 " %-11"PRIu32
" %-11"PRIu64
" %-11"PRIu32
398 " %-11"PRIu32
" %-11"PRIu32
" %-11"PRIu32
"\n",
401 is
->iterations
? is
->cycles
/ is
->iterations
: 0,
403 is
->pkts
? is
->busy_cycles
/ is
->pkts
: 0,
404 is
->batches
? is
->pkts
/ is
->batches
: 0,
406 ? is
->max_vhost_qfill
/ is
->iterations
: 0,
408 is
->upcalls
? is
->upcall_cycles
/ is
->upcalls
: 0);
413 pmd_perf_read_counters(struct pmd_perf_stats
*s
,
414 uint64_t stats
[PMD_N_STATS
])
418 /* These loops subtracts reference values (.zero[*]) from the counters.
419 * Since loads and stores are relaxed, it might be possible for a .zero[*]
420 * value to be more recent than the current value we're reading from the
421 * counter. This is not a big problem, since these numbers are not
422 * supposed to be 100% accurate, but we should at least make sure that
423 * the result is not negative. */
424 for (int i
= 0; i
< PMD_N_STATS
; i
++) {
425 atomic_read_relaxed(&s
->counters
.n
[i
], &val
);
426 if (val
> s
->counters
.zero
[i
]) {
427 stats
[i
] = val
- s
->counters
.zero
[i
];
434 /* This function clears the PMD performance counters from within the PMD
435 * thread or from another thread when the PMD thread is not executing its
438 pmd_perf_stats_clear_lock(struct pmd_perf_stats
*s
)
439 OVS_REQUIRES(s
->stats_mutex
)
441 ovs_mutex_lock(&s
->clear_mutex
);
442 for (int i
= 0; i
< PMD_N_STATS
; i
++) {
443 atomic_read_relaxed(&s
->counters
.n
[i
], &s
->counters
.zero
[i
]);
445 /* The following stats are only applicable in PMD thread and */
446 memset(&s
->current
, 0 , sizeof(struct iter_stats
));
447 memset(&s
->totals
, 0 , sizeof(struct iter_stats
));
448 histogram_clear(&s
->cycles
);
449 histogram_clear(&s
->pkts
);
450 histogram_clear(&s
->cycles_per_pkt
);
451 histogram_clear(&s
->upcalls
);
452 histogram_clear(&s
->cycles_per_upcall
);
453 histogram_clear(&s
->pkts_per_batch
);
454 histogram_clear(&s
->max_vhost_qfill
);
455 history_init(&s
->iterations
);
456 history_init(&s
->milliseconds
);
457 s
->start_ms
= time_msec();
458 s
->milliseconds
.sample
[0].timestamp
= s
->start_ms
;
459 s
->log_susp_it
= UINT32_MAX
;
460 s
->log_begin_it
= UINT32_MAX
;
461 s
->log_end_it
= UINT32_MAX
;
462 s
->log_reason
= NULL
;
463 /* Clearing finished. */
465 ovs_mutex_unlock(&s
->clear_mutex
);
468 /* This function can be called from the anywhere to clear the stats
469 * of PMD and non-PMD threads. */
471 pmd_perf_stats_clear(struct pmd_perf_stats
*s
)
473 if (ovs_mutex_trylock(&s
->stats_mutex
) == 0) {
474 /* Locking successful. PMD not polling. */
475 pmd_perf_stats_clear_lock(s
);
476 ovs_mutex_unlock(&s
->stats_mutex
);
478 /* Request the polling PMD to clear the stats. There is no need to
479 * block here as stats retrieval is prevented during clearing. */
484 /* Functions recording PMD metrics per iteration. */
487 pmd_perf_start_iteration(struct pmd_perf_stats
*s
)
488 OVS_REQUIRES(s
->stats_mutex
)
491 /* Clear the PMD stats before starting next iteration. */
492 pmd_perf_stats_clear_lock(s
);
495 /* Initialize the current interval stats. */
496 memset(&s
->current
, 0, sizeof(struct iter_stats
));
497 if (OVS_LIKELY(s
->last_tsc
)) {
498 /* We assume here that last_tsc was updated immediately prior at
499 * the end of the previous iteration, or just before the first
501 s
->start_tsc
= s
->last_tsc
;
503 /* In case last_tsc has never been set before. */
504 s
->start_tsc
= cycles_counter_update(s
);
509 pmd_perf_end_iteration(struct pmd_perf_stats
*s
, int rx_packets
,
510 int tx_packets
, bool full_metrics
)
512 uint64_t now_tsc
= cycles_counter_update(s
);
513 struct iter_stats
*cum_ms
;
514 uint64_t cycles
, cycles_per_pkt
= 0;
517 cycles
= now_tsc
- s
->start_tsc
;
518 s
->current
.timestamp
= s
->iteration_cnt
;
519 s
->current
.cycles
= cycles
;
520 s
->current
.pkts
= rx_packets
;
522 if (rx_packets
+ tx_packets
> 0) {
523 pmd_perf_update_counter(s
, PMD_CYCLES_ITER_BUSY
, cycles
);
525 pmd_perf_update_counter(s
, PMD_CYCLES_ITER_IDLE
, cycles
);
527 /* Add iteration samples to histograms. */
528 histogram_add_sample(&s
->cycles
, cycles
);
529 histogram_add_sample(&s
->pkts
, rx_packets
);
535 s
->counters
.n
[PMD_CYCLES_UPCALL
] += s
->current
.upcall_cycles
;
537 if (rx_packets
> 0) {
538 cycles_per_pkt
= cycles
/ rx_packets
;
539 histogram_add_sample(&s
->cycles_per_pkt
, cycles_per_pkt
);
541 histogram_add_sample(&s
->upcalls
, s
->current
.upcalls
);
542 histogram_add_sample(&s
->max_vhost_qfill
, s
->current
.max_vhost_qfill
);
544 /* Add iteration samples to millisecond stats. */
545 cum_ms
= history_current(&s
->milliseconds
);
546 cum_ms
->iterations
++;
547 cum_ms
->cycles
+= cycles
;
548 if (rx_packets
> 0) {
549 cum_ms
->busy_cycles
+= cycles
;
551 cum_ms
->pkts
+= s
->current
.pkts
;
552 cum_ms
->upcalls
+= s
->current
.upcalls
;
553 cum_ms
->upcall_cycles
+= s
->current
.upcall_cycles
;
554 cum_ms
->batches
+= s
->current
.batches
;
555 cum_ms
->max_vhost_qfill
+= s
->current
.max_vhost_qfill
;
558 /* Log suspicious iterations. */
559 if (cycles
> iter_cycle_threshold
) {
560 reason
= "Excessive total cycles";
561 } else if (s
->current
.max_vhost_qfill
>= log_q_thr
) {
562 reason
= "Vhost RX queue full";
564 if (OVS_UNLIKELY(reason
)) {
565 pmd_perf_set_log_susp_iteration(s
, reason
);
566 cycles_counter_update(s
);
569 /* Log iteration interval around suspicious iteration when reaching
570 * the end of the range to be logged. */
571 if (OVS_UNLIKELY(s
->log_end_it
== s
->iterations
.idx
)) {
572 pmd_perf_log_susp_iteration_neighborhood(s
);
573 cycles_counter_update(s
);
577 /* Store in iteration history. This advances the iteration idx and
578 * clears the next slot in the iteration history. */
579 history_store(&s
->iterations
, &s
->current
);
581 if (now_tsc
> s
->next_check_tsc
) {
582 /* Check if ms is completed and store in milliseconds history. */
583 uint64_t now
= time_msec();
584 if (now
!= cum_ms
->timestamp
) {
585 /* Add ms stats to totals. */
586 s
->totals
.iterations
+= cum_ms
->iterations
;
587 s
->totals
.cycles
+= cum_ms
->cycles
;
588 s
->totals
.busy_cycles
+= cum_ms
->busy_cycles
;
589 s
->totals
.pkts
+= cum_ms
->pkts
;
590 s
->totals
.upcalls
+= cum_ms
->upcalls
;
591 s
->totals
.upcall_cycles
+= cum_ms
->upcall_cycles
;
592 s
->totals
.batches
+= cum_ms
->batches
;
593 s
->totals
.max_vhost_qfill
+= cum_ms
->max_vhost_qfill
;
594 cum_ms
= history_next(&s
->milliseconds
);
595 cum_ms
->timestamp
= now
;
597 /* Do the next check after 4 us (10K cycles at 2.5 GHz TSC clock). */
598 s
->next_check_tsc
= cycles_counter_update(s
) + tsc_hz
/ 250000;
602 /* Delay logging of the suspicious iteration and the range of iterations
603 * around it until after the last iteration in the range to be logged.
604 * This avoids any distortion of the measurements through the cost of
608 pmd_perf_set_log_susp_iteration(struct pmd_perf_stats
*s
,
611 if (s
->log_susp_it
== UINT32_MAX
) {
612 /* No logging scheduled yet. */
613 s
->log_susp_it
= s
->iterations
.idx
;
614 s
->log_reason
= reason
;
615 s
->log_begin_it
= history_sub(s
->iterations
.idx
, log_it_before
);
616 s
->log_end_it
= history_add(s
->iterations
.idx
, log_it_after
+ 1);
617 } else if (log_extend
) {
618 /* Logging was initiated earlier, we log the previous suspicious
619 * iteration now and extend the logging interval, if possible. */
620 struct iter_stats
*susp
= &s
->iterations
.sample
[s
->log_susp_it
];
621 uint32_t new_end_it
, old_range
, new_range
;
623 VLOG_WARN_RL(&latency_rl
,
624 "Suspicious iteration (%s): iter=%"PRIu64
625 " duration=%"PRIu64
" us\n",
628 (1000000L * susp
->cycles
) / tsc_hz
);
630 new_end_it
= history_add(s
->iterations
.idx
, log_it_after
+ 1);
631 new_range
= history_sub(new_end_it
, s
->log_begin_it
);
632 old_range
= history_sub(s
->log_end_it
, s
->log_begin_it
);
633 if (new_range
< old_range
) {
634 /* Extended range exceeds history length. */
635 new_end_it
= s
->log_begin_it
;
637 s
->log_susp_it
= s
->iterations
.idx
;
638 s
->log_reason
= reason
;
639 s
->log_end_it
= new_end_it
;
644 pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats
*s
)
646 ovs_assert(s
->log_reason
!= NULL
);
647 ovs_assert(s
->log_susp_it
!= UINT32_MAX
);
649 struct ds log
= DS_EMPTY_INITIALIZER
;
650 struct iter_stats
*susp
= &s
->iterations
.sample
[s
->log_susp_it
];
651 uint32_t range
= history_sub(s
->log_end_it
, s
->log_begin_it
);
653 VLOG_WARN_RL(&latency_rl
,
654 "Suspicious iteration (%s): iter=%"PRIu64
655 " duration=%"PRIu64
" us\n",
658 (1000000L * susp
->cycles
) / tsc_hz
);
660 pmd_perf_format_iteration_history(&log
, s
, range
);
661 VLOG_WARN_RL(&latency_rl
,
662 "Neighborhood of suspicious iteration:\n"
663 "%s", ds_cstr(&log
));
665 s
->log_susp_it
= s
->log_end_it
= s
->log_begin_it
= UINT32_MAX
;
666 s
->log_reason
= NULL
;
669 /* Reset to safe default values to avoid disturbance. */
670 log_it_before
= LOG_IT_BEFORE
;
671 log_it_after
= LOG_IT_AFTER
;
677 pmd_perf_log_set_cmd(struct unixctl_conn
*conn
,
678 int argc
, const char *argv
[],
679 void *aux OVS_UNUSED
)
681 unsigned int it_before
, it_after
, us_thr
, q_thr
;
687 it_before
= log_it_before
;
688 it_after
= log_it_after
;
693 if (!strcmp(argv
[1], "on")) {
697 } else if (!strcmp(argv
[1], "off")) {
701 } else if (!strcmp(argv
[1], "-e")) {
705 } else if (!strcmp(argv
[1], "-ne")) {
709 } else if (!strcmp(argv
[1], "-a") && argc
> 2) {
710 if (str_to_uint(argv
[2], 10, &it_after
)) {
711 if (it_after
> HISTORY_LEN
- 2) {
712 it_after
= HISTORY_LEN
- 2;
720 } else if (!strcmp(argv
[1], "-b") && argc
> 2) {
721 if (str_to_uint(argv
[2], 10, &it_before
)) {
722 if (it_before
> HISTORY_LEN
- 2) {
723 it_before
= HISTORY_LEN
- 2;
731 } else if (!strcmp(argv
[1], "-q") && argc
> 2) {
732 if (!str_to_uint(argv
[2], 10, &q_thr
)) {
738 } else if (!strcmp(argv
[1], "-us") && argc
> 2) {
739 if (!str_to_uint(argv
[2], 10, &us_thr
)) {
750 if (it_before
+ it_after
> HISTORY_LEN
- 2) {
751 it_after
= HISTORY_LEN
- 2 - it_before
;
755 unixctl_command_reply_error(conn
,
756 "Usage: ovs-appctl dpif-netdev/pmd-perf-log-set "
757 "[on|off] [-b before] [-a after] [-e|-ne] "
758 "[-us usec] [-q qlen]");
762 VLOG_INFO("pmd-perf-log-set: %s, before=%d, after=%d, extend=%s, "
763 "us_thr=%d, q_thr=%d\n",
764 on
? "on" : "off", it_before
, it_after
,
765 extend
? "true" : "false", us_thr
, q_thr
);
768 log_it_before
= it_before
;
769 log_it_after
= it_after
;
772 iter_cycle_threshold
= (log_us_thr
* tsc_hz
) / 1000000L;
774 unixctl_command_reply(conn
, "");