]>
Commit | Line | Data |
---|---|---|
82a48ead JS |
1 | /* |
2 | * Copyright (c) 2017 Ericsson AB. | |
3 | * | |
4 | * Licensed under the Apache License, Version 2.0 (the "License"); | |
5 | * you may not use this file except in compliance with the License. | |
6 | * You may obtain a copy of the License at: | |
7 | * | |
8 | * http://www.apache.org/licenses/LICENSE-2.0 | |
9 | * | |
10 | * Unless required by applicable law or agreed to in writing, software | |
11 | * distributed under the License is distributed on an "AS IS" BASIS, | |
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
13 | * See the License for the specific language governing permissions and | |
14 | * limitations under the License. | |
15 | */ | |
16 | ||
17 | #include <config.h> | |
79f36875 | 18 | #include <stdint.h> |
82a48ead | 19 | |
1276e3db | 20 | #include "dpdk.h" |
79f36875 | 21 | #include "dpif-netdev-perf.h" |
82a48ead JS |
22 | #include "openvswitch/dynamic-string.h" |
23 | #include "openvswitch/vlog.h" | |
1276e3db | 24 | #include "ovs-numa.h" |
79f36875 | 25 | #include "ovs-thread.h" |
82a48ead JS |
26 | #include "timeval.h" |
27 | ||
28 | VLOG_DEFINE_THIS_MODULE(pmd_perf); | |
29 | ||
7178fefb JS |
30 | #define ITER_US_THRESHOLD 250 /* Warning threshold for iteration duration |
31 | in microseconds. */ | |
32 | #define VHOST_QUEUE_FULL 128 /* Size of the virtio TX queue. */ | |
33 | #define LOG_IT_BEFORE 5 /* Number of iterations to log before | |
34 | suspicious iteration. */ | |
35 | #define LOG_IT_AFTER 5 /* Number of iterations to log after | |
36 | suspicious iteration. */ | |
37 | ||
38 | bool log_enabled = false; | |
39 | bool log_extend = false; | |
40 | static uint32_t log_it_before = LOG_IT_BEFORE; | |
41 | static uint32_t log_it_after = LOG_IT_AFTER; | |
42 | static uint32_t log_us_thr = ITER_US_THRESHOLD; | |
43 | uint32_t log_q_thr = VHOST_QUEUE_FULL; | |
44 | uint64_t iter_cycle_threshold; | |
45 | ||
46 | static struct vlog_rate_limit latency_rl = VLOG_RATE_LIMIT_INIT(600, 600); | |
47 | ||
1276e3db IM |
48 | static uint64_t tsc_hz = 1; |
49 | ||
50 | void | |
51 | pmd_perf_estimate_tsc_frequency(void) | |
79f36875 | 52 | { |
1276e3db IM |
53 | #ifdef DPDK_NETDEV |
54 | if (dpdk_available()) { | |
55 | tsc_hz = rte_get_tsc_hz(); | |
56 | } | |
57 | if (tsc_hz > 1) { | |
58 | VLOG_INFO("DPDK provided TSC frequency: %"PRIu64" KHz", tsc_hz / 1000); | |
59 | return; | |
60 | } | |
79f36875 | 61 | #endif |
1276e3db IM |
62 | struct ovs_numa_dump *affinity; |
63 | struct pmd_perf_stats s; | |
64 | uint64_t start, stop; | |
65 | ||
66 | /* DPDK is not available or returned unreliable value. | |
67 | * Trying to estimate. */ | |
68 | affinity = ovs_numa_thread_getaffinity_dump(); | |
69 | if (affinity) { | |
70 | const struct ovs_numa_info_core *core; | |
71 | ||
72 | FOR_EACH_CORE_ON_DUMP (core, affinity) { | |
73 | /* Setting affinity to a single core from the affinity mask to | |
74 | * avoid re-scheduling to another core while sleeping. */ | |
75 | ovs_numa_thread_setaffinity_core(core->core_id); | |
76 | break; | |
77 | } | |
78 | } | |
79 | ||
96445fa4 | 80 | memset(&s, 0, sizeof s); |
1276e3db IM |
81 | start = cycles_counter_update(&s); |
82 | /* Using xnanosleep as it's interrupt resistant. | |
83 | * Sleeping only 100 ms to avoid holding the main thread for too long. */ | |
84 | xnanosleep(1E8); | |
85 | stop = cycles_counter_update(&s); | |
86 | ||
87 | if (affinity) { | |
88 | /* Restoring previous affinity. */ | |
89 | ovs_numa_thread_setaffinity_dump(affinity); | |
90 | ovs_numa_dump_destroy(affinity); | |
91 | } | |
92 | ||
93 | if (stop <= start) { | |
94 | VLOG_WARN("TSC source is unreliable."); | |
95 | tsc_hz = 1; | |
96 | } else { | |
97 | tsc_hz = (stop - start) * 10; | |
98 | } | |
99 | ||
100 | VLOG_INFO("Estimated TSC frequency: %"PRIu64" KHz", tsc_hz / 1000); | |
101 | } | |
79f36875 JS |
102 | |
103 | /* Histogram functions. */ | |
104 | ||
105 | static void | |
106 | histogram_walls_set_lin(struct histogram *hist, uint32_t min, uint32_t max) | |
107 | { | |
108 | int i; | |
109 | ||
110 | ovs_assert(min < max); | |
111 | for (i = 0; i < NUM_BINS-1; i++) { | |
112 | hist->wall[i] = min + (i * (max - min)) / (NUM_BINS - 2); | |
113 | } | |
114 | hist->wall[NUM_BINS-1] = UINT32_MAX; | |
115 | } | |
116 | ||
117 | static void | |
118 | histogram_walls_set_log(struct histogram *hist, uint32_t min, uint32_t max) | |
119 | { | |
120 | int i, start, bins, wall; | |
121 | double log_min, log_max; | |
122 | ||
123 | ovs_assert(min < max); | |
124 | if (min > 0) { | |
125 | log_min = log(min); | |
126 | log_max = log(max); | |
127 | start = 0; | |
128 | bins = NUM_BINS - 1; | |
129 | } else { | |
130 | hist->wall[0] = 0; | |
131 | log_min = log(1); | |
132 | log_max = log(max); | |
133 | start = 1; | |
134 | bins = NUM_BINS - 2; | |
135 | } | |
136 | wall = start; | |
137 | for (i = 0; i < bins; i++) { | |
138 | /* Make sure each wall is monotonically increasing. */ | |
139 | wall = MAX(wall, exp(log_min + (i * (log_max - log_min)) / (bins-1))); | |
140 | hist->wall[start + i] = wall++; | |
141 | } | |
142 | if (hist->wall[NUM_BINS-2] < max) { | |
143 | hist->wall[NUM_BINS-2] = max; | |
144 | } | |
145 | hist->wall[NUM_BINS-1] = UINT32_MAX; | |
146 | } | |
147 | ||
148 | uint64_t | |
149 | histogram_samples(const struct histogram *hist) | |
150 | { | |
151 | uint64_t samples = 0; | |
152 | ||
153 | for (int i = 0; i < NUM_BINS; i++) { | |
154 | samples += hist->bin[i]; | |
155 | } | |
156 | return samples; | |
157 | } | |
158 | ||
159 | static void | |
160 | histogram_clear(struct histogram *hist) | |
161 | { | |
162 | int i; | |
163 | ||
164 | for (i = 0; i < NUM_BINS; i++) { | |
165 | hist->bin[i] = 0; | |
166 | } | |
167 | } | |
168 | ||
169 | static void | |
170 | history_init(struct history *h) | |
171 | { | |
172 | memset(h, 0, sizeof(*h)); | |
173 | } | |
174 | ||
82a48ead JS |
175 | void |
176 | pmd_perf_stats_init(struct pmd_perf_stats *s) | |
177 | { | |
79f36875 JS |
178 | memset(s, 0, sizeof(*s)); |
179 | ovs_mutex_init(&s->stats_mutex); | |
180 | ovs_mutex_init(&s->clear_mutex); | |
181 | /* Logarithmic histogram for cycles/it ranging from 500 to 24M | |
182 | * (corresponding to 200 ns to 9.6 ms at 2.5 GHz TSC clock). */ | |
183 | histogram_walls_set_log(&s->cycles, 500, 24000000); | |
184 | /* Logarithmic histogram for pkts/it ranging from 0 to 1000. */ | |
185 | histogram_walls_set_log(&s->pkts, 0, 1000); | |
186 | /* Linear histogram for cycles/pkt ranging from 100 to 30K. */ | |
187 | histogram_walls_set_lin(&s->cycles_per_pkt, 100, 30000); | |
188 | /* Linear histogram for pkts/rx batch ranging from 0 to 32, | |
189 | * the maximum rx batch size in OVS. */ | |
190 | histogram_walls_set_lin(&s->pkts_per_batch, 0, 32); | |
191 | /* Linear histogram for upcalls/it ranging from 0 to 30. */ | |
192 | histogram_walls_set_lin(&s->upcalls, 0, 30); | |
193 | /* Logarithmic histogram for cycles/upcall ranging from 1000 to 1M | |
194 | * (corresponding to 400 ns to 400 us at 2.5 GHz TSC clock). */ | |
195 | histogram_walls_set_log(&s->cycles_per_upcall, 1000, 1000000); | |
196 | /* Log. histogram for max vhostuser queue fill level from 0 to 512. | |
197 | * 512 is the maximum fill level for a virtio queue with 1024 | |
198 | * descriptors (maximum configurable length in Qemu), with the | |
199 | * DPDK 17.11 virtio PMD in the guest. */ | |
200 | histogram_walls_set_log(&s->max_vhost_qfill, 0, 512); | |
201 | s->iteration_cnt = 0; | |
202 | s->start_ms = time_msec(); | |
7178fefb JS |
203 | s->log_susp_it = UINT32_MAX; |
204 | s->log_begin_it = UINT32_MAX; | |
205 | s->log_end_it = UINT32_MAX; | |
206 | s->log_reason = NULL; | |
79f36875 JS |
207 | } |
208 | ||
209 | void | |
210 | pmd_perf_format_overall_stats(struct ds *str, struct pmd_perf_stats *s, | |
211 | double duration) | |
212 | { | |
213 | uint64_t stats[PMD_N_STATS]; | |
6794be1d | 214 | double us_per_cycle = 1000000.0 / tsc_hz; |
79f36875 JS |
215 | |
216 | if (duration == 0) { | |
217 | return; | |
218 | } | |
219 | ||
220 | pmd_perf_read_counters(s, stats); | |
221 | uint64_t tot_cycles = stats[PMD_CYCLES_ITER_IDLE] + | |
222 | stats[PMD_CYCLES_ITER_BUSY]; | |
223 | uint64_t rx_packets = stats[PMD_STAT_RECV]; | |
224 | uint64_t tx_packets = stats[PMD_STAT_SENT_PKTS]; | |
225 | uint64_t tx_batches = stats[PMD_STAT_SENT_BATCHES]; | |
226 | uint64_t passes = stats[PMD_STAT_RECV] + | |
227 | stats[PMD_STAT_RECIRC]; | |
228 | uint64_t upcalls = stats[PMD_STAT_MISS]; | |
229 | uint64_t upcall_cycles = stats[PMD_CYCLES_UPCALL]; | |
230 | uint64_t tot_iter = histogram_samples(&s->pkts); | |
231 | uint64_t idle_iter = s->pkts.bin[0]; | |
232 | uint64_t busy_iter = tot_iter >= idle_iter ? tot_iter - idle_iter : 0; | |
233 | ||
234 | ds_put_format(str, | |
6794be1d IM |
235 | " Iterations: %12"PRIu64" (%.2f us/it)\n" |
236 | " - Used TSC cycles: %12"PRIu64" (%5.1f %% of total cycles)\n" | |
237 | " - idle iterations: %12"PRIu64" (%5.1f %% of used cycles)\n" | |
238 | " - busy iterations: %12"PRIu64" (%5.1f %% of used cycles)\n", | |
79f36875 | 239 | tot_iter, tot_cycles * us_per_cycle / tot_iter, |
6794be1d | 240 | tot_cycles, 100.0 * (tot_cycles / duration) / tsc_hz, |
79f36875 JS |
241 | idle_iter, |
242 | 100.0 * stats[PMD_CYCLES_ITER_IDLE] / tot_cycles, | |
243 | busy_iter, | |
244 | 100.0 * stats[PMD_CYCLES_ITER_BUSY] / tot_cycles); | |
245 | if (rx_packets > 0) { | |
246 | ds_put_format(str, | |
6794be1d IM |
247 | " Rx packets: %12"PRIu64" (%.0f Kpps, %.0f cycles/pkt)\n" |
248 | " Datapath passes: %12"PRIu64" (%.2f passes/pkt)\n" | |
249 | " - EMC hits: %12"PRIu64" (%5.1f %%)\n" | |
250 | " - SMC hits: %12"PRIu64" (%5.1f %%)\n" | |
251 | " - Megaflow hits: %12"PRIu64" (%5.1f %%, %.2f " | |
252 | "subtbl lookups/hit)\n" | |
253 | " - Upcalls: %12"PRIu64" (%5.1f %%, %.1f us/upcall)\n" | |
254 | " - Lost upcalls: %12"PRIu64" (%5.1f %%)\n", | |
79f36875 JS |
255 | rx_packets, (rx_packets / duration) / 1000, |
256 | 1.0 * stats[PMD_CYCLES_ITER_BUSY] / rx_packets, | |
257 | passes, rx_packets ? 1.0 * passes / rx_packets : 0, | |
258 | stats[PMD_STAT_EXACT_HIT], | |
259 | 100.0 * stats[PMD_STAT_EXACT_HIT] / passes, | |
21e9b77b IM |
260 | stats[PMD_STAT_SMC_HIT], |
261 | 100.0 * stats[PMD_STAT_SMC_HIT] / passes, | |
79f36875 JS |
262 | stats[PMD_STAT_MASKED_HIT], |
263 | 100.0 * stats[PMD_STAT_MASKED_HIT] / passes, | |
264 | stats[PMD_STAT_MASKED_HIT] | |
265 | ? 1.0 * stats[PMD_STAT_MASKED_LOOKUP] / stats[PMD_STAT_MASKED_HIT] | |
266 | : 0, | |
267 | upcalls, 100.0 * upcalls / passes, | |
268 | upcalls ? (upcall_cycles * us_per_cycle) / upcalls : 0, | |
269 | stats[PMD_STAT_LOST], | |
270 | 100.0 * stats[PMD_STAT_LOST] / passes); | |
271 | } else { | |
6794be1d | 272 | ds_put_format(str, " Rx packets: %12d\n", 0); |
79f36875 JS |
273 | } |
274 | if (tx_packets > 0) { | |
275 | ds_put_format(str, | |
6794be1d IM |
276 | " Tx packets: %12"PRIu64" (%.0f Kpps)\n" |
277 | " Tx batches: %12"PRIu64" (%.2f pkts/batch)\n", | |
79f36875 JS |
278 | tx_packets, (tx_packets / duration) / 1000, |
279 | tx_batches, 1.0 * tx_packets / tx_batches); | |
280 | } else { | |
6794be1d | 281 | ds_put_format(str, " Tx packets: %12d\n\n", 0); |
79f36875 JS |
282 | } |
283 | } | |
284 | ||
285 | void | |
286 | pmd_perf_format_histograms(struct ds *str, struct pmd_perf_stats *s) | |
287 | { | |
288 | int i; | |
289 | ||
290 | ds_put_cstr(str, "Histograms\n"); | |
291 | ds_put_format(str, | |
292 | " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n", | |
293 | "cycles/it", "packets/it", "cycles/pkt", "pkts/batch", | |
294 | "max vhost qlen", "upcalls/it", "cycles/upcall"); | |
295 | for (i = 0; i < NUM_BINS-1; i++) { | |
296 | ds_put_format(str, | |
297 | " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64 | |
298 | " %-9d %-11"PRIu64" %-9d %-11"PRIu64" %-9d %-11"PRIu64 | |
299 | " %-9d %-11"PRIu64"\n", | |
300 | s->cycles.wall[i], s->cycles.bin[i], | |
301 | s->pkts.wall[i],s->pkts.bin[i], | |
302 | s->cycles_per_pkt.wall[i], s->cycles_per_pkt.bin[i], | |
303 | s->pkts_per_batch.wall[i], s->pkts_per_batch.bin[i], | |
304 | s->max_vhost_qfill.wall[i], s->max_vhost_qfill.bin[i], | |
305 | s->upcalls.wall[i], s->upcalls.bin[i], | |
306 | s->cycles_per_upcall.wall[i], s->cycles_per_upcall.bin[i]); | |
307 | } | |
308 | ds_put_format(str, | |
309 | " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64 | |
310 | " %-9s %-11"PRIu64" %-9s %-11"PRIu64" %-9s %-11"PRIu64 | |
311 | " %-9s %-11"PRIu64"\n", | |
312 | ">", s->cycles.bin[i], | |
313 | ">", s->pkts.bin[i], | |
314 | ">", s->cycles_per_pkt.bin[i], | |
315 | ">", s->pkts_per_batch.bin[i], | |
316 | ">", s->max_vhost_qfill.bin[i], | |
317 | ">", s->upcalls.bin[i], | |
318 | ">", s->cycles_per_upcall.bin[i]); | |
319 | if (s->totals.iterations > 0) { | |
320 | ds_put_cstr(str, | |
321 | "-----------------------------------------------------" | |
322 | "-----------------------------------------------------" | |
323 | "------------------------------------------------\n"); | |
324 | ds_put_format(str, | |
325 | " %-21s %-21s %-21s %-21s %-21s %-21s %-21s\n", | |
326 | "cycles/it", "packets/it", "cycles/pkt", "pkts/batch", | |
327 | "vhost qlen", "upcalls/it", "cycles/upcall"); | |
328 | ds_put_format(str, | |
329 | " %-21"PRIu64" %-21.5f %-21"PRIu64 | |
330 | " %-21.5f %-21.5f %-21.5f %-21"PRIu32"\n", | |
331 | s->totals.cycles / s->totals.iterations, | |
332 | 1.0 * s->totals.pkts / s->totals.iterations, | |
333 | s->totals.pkts | |
334 | ? s->totals.busy_cycles / s->totals.pkts : 0, | |
335 | s->totals.batches | |
336 | ? 1.0 * s->totals.pkts / s->totals.batches : 0, | |
337 | 1.0 * s->totals.max_vhost_qfill / s->totals.iterations, | |
338 | 1.0 * s->totals.upcalls / s->totals.iterations, | |
339 | s->totals.upcalls | |
340 | ? s->totals.upcall_cycles / s->totals.upcalls : 0); | |
341 | } | |
342 | } | |
343 | ||
344 | void | |
345 | pmd_perf_format_iteration_history(struct ds *str, struct pmd_perf_stats *s, | |
346 | int n_iter) | |
347 | { | |
348 | struct iter_stats *is; | |
349 | size_t index; | |
350 | int i; | |
351 | ||
352 | if (n_iter == 0) { | |
353 | return; | |
354 | } | |
355 | ds_put_format(str, " %-17s %-10s %-10s %-10s %-10s " | |
356 | "%-10s %-10s %-10s\n", | |
357 | "iter", "cycles", "packets", "cycles/pkt", "pkts/batch", | |
358 | "vhost qlen", "upcalls", "cycles/upcall"); | |
359 | for (i = 1; i <= n_iter; i++) { | |
360 | index = history_sub(s->iterations.idx, i); | |
361 | is = &s->iterations.sample[index]; | |
362 | ds_put_format(str, | |
363 | " %-17"PRIu64" %-11"PRIu64" %-11"PRIu32 | |
364 | " %-11"PRIu64" %-11"PRIu32" %-11"PRIu32 | |
365 | " %-11"PRIu32" %-11"PRIu32"\n", | |
366 | is->timestamp, | |
367 | is->cycles, | |
368 | is->pkts, | |
369 | is->pkts ? is->cycles / is->pkts : 0, | |
370 | is->batches ? is->pkts / is->batches : 0, | |
371 | is->max_vhost_qfill, | |
372 | is->upcalls, | |
373 | is->upcalls ? is->upcall_cycles / is->upcalls : 0); | |
374 | } | |
375 | } | |
376 | ||
377 | void | |
378 | pmd_perf_format_ms_history(struct ds *str, struct pmd_perf_stats *s, int n_ms) | |
379 | { | |
380 | struct iter_stats *is; | |
381 | size_t index; | |
382 | int i; | |
383 | ||
384 | if (n_ms == 0) { | |
385 | return; | |
386 | } | |
387 | ds_put_format(str, | |
388 | " %-12s %-10s %-10s %-10s %-10s" | |
389 | " %-10s %-10s %-10s %-10s\n", | |
390 | "ms", "iterations", "cycles/it", "Kpps", "cycles/pkt", | |
391 | "pkts/batch", "vhost qlen", "upcalls", "cycles/upcall"); | |
392 | for (i = 1; i <= n_ms; i++) { | |
393 | index = history_sub(s->milliseconds.idx, i); | |
394 | is = &s->milliseconds.sample[index]; | |
395 | ds_put_format(str, | |
396 | " %-12"PRIu64" %-11"PRIu32" %-11"PRIu64 | |
397 | " %-11"PRIu32" %-11"PRIu64" %-11"PRIu32 | |
398 | " %-11"PRIu32" %-11"PRIu32" %-11"PRIu32"\n", | |
399 | is->timestamp, | |
400 | is->iterations, | |
401 | is->iterations ? is->cycles / is->iterations : 0, | |
402 | is->pkts, | |
403 | is->pkts ? is->busy_cycles / is->pkts : 0, | |
404 | is->batches ? is->pkts / is->batches : 0, | |
405 | is->iterations | |
406 | ? is->max_vhost_qfill / is->iterations : 0, | |
407 | is->upcalls, | |
408 | is->upcalls ? is->upcall_cycles / is->upcalls : 0); | |
409 | } | |
82a48ead JS |
410 | } |
411 | ||
412 | void | |
413 | pmd_perf_read_counters(struct pmd_perf_stats *s, | |
414 | uint64_t stats[PMD_N_STATS]) | |
415 | { | |
416 | uint64_t val; | |
417 | ||
418 | /* These loops subtracts reference values (.zero[*]) from the counters. | |
419 | * Since loads and stores are relaxed, it might be possible for a .zero[*] | |
420 | * value to be more recent than the current value we're reading from the | |
421 | * counter. This is not a big problem, since these numbers are not | |
422 | * supposed to be 100% accurate, but we should at least make sure that | |
423 | * the result is not negative. */ | |
424 | for (int i = 0; i < PMD_N_STATS; i++) { | |
425 | atomic_read_relaxed(&s->counters.n[i], &val); | |
426 | if (val > s->counters.zero[i]) { | |
427 | stats[i] = val - s->counters.zero[i]; | |
428 | } else { | |
429 | stats[i] = 0; | |
430 | } | |
431 | } | |
432 | } | |
433 | ||
79f36875 JS |
434 | /* This function clears the PMD performance counters from within the PMD |
435 | * thread or from another thread when the PMD thread is not executing its | |
436 | * poll loop. */ | |
82a48ead | 437 | void |
79f36875 JS |
438 | pmd_perf_stats_clear_lock(struct pmd_perf_stats *s) |
439 | OVS_REQUIRES(s->stats_mutex) | |
82a48ead | 440 | { |
79f36875 | 441 | ovs_mutex_lock(&s->clear_mutex); |
82a48ead JS |
442 | for (int i = 0; i < PMD_N_STATS; i++) { |
443 | atomic_read_relaxed(&s->counters.n[i], &s->counters.zero[i]); | |
444 | } | |
79f36875 JS |
445 | /* The following stats are only applicable in PMD thread and */ |
446 | memset(&s->current, 0 , sizeof(struct iter_stats)); | |
447 | memset(&s->totals, 0 , sizeof(struct iter_stats)); | |
448 | histogram_clear(&s->cycles); | |
449 | histogram_clear(&s->pkts); | |
450 | histogram_clear(&s->cycles_per_pkt); | |
451 | histogram_clear(&s->upcalls); | |
452 | histogram_clear(&s->cycles_per_upcall); | |
453 | histogram_clear(&s->pkts_per_batch); | |
454 | histogram_clear(&s->max_vhost_qfill); | |
455 | history_init(&s->iterations); | |
456 | history_init(&s->milliseconds); | |
457 | s->start_ms = time_msec(); | |
458 | s->milliseconds.sample[0].timestamp = s->start_ms; | |
7178fefb JS |
459 | s->log_susp_it = UINT32_MAX; |
460 | s->log_begin_it = UINT32_MAX; | |
461 | s->log_end_it = UINT32_MAX; | |
462 | s->log_reason = NULL; | |
79f36875 JS |
463 | /* Clearing finished. */ |
464 | s->clear = false; | |
465 | ovs_mutex_unlock(&s->clear_mutex); | |
466 | } | |
467 | ||
468 | /* This function can be called from the anywhere to clear the stats | |
469 | * of PMD and non-PMD threads. */ | |
470 | void | |
471 | pmd_perf_stats_clear(struct pmd_perf_stats *s) | |
472 | { | |
473 | if (ovs_mutex_trylock(&s->stats_mutex) == 0) { | |
474 | /* Locking successful. PMD not polling. */ | |
475 | pmd_perf_stats_clear_lock(s); | |
476 | ovs_mutex_unlock(&s->stats_mutex); | |
477 | } else { | |
478 | /* Request the polling PMD to clear the stats. There is no need to | |
479 | * block here as stats retrieval is prevented during clearing. */ | |
480 | s->clear = true; | |
481 | } | |
482 | } | |
483 | ||
484 | /* Functions recording PMD metrics per iteration. */ | |
485 | ||
12028f5f | 486 | void |
79f36875 JS |
487 | pmd_perf_start_iteration(struct pmd_perf_stats *s) |
488 | OVS_REQUIRES(s->stats_mutex) | |
489 | { | |
490 | if (s->clear) { | |
491 | /* Clear the PMD stats before starting next iteration. */ | |
492 | pmd_perf_stats_clear_lock(s); | |
493 | } | |
494 | s->iteration_cnt++; | |
495 | /* Initialize the current interval stats. */ | |
496 | memset(&s->current, 0, sizeof(struct iter_stats)); | |
497 | if (OVS_LIKELY(s->last_tsc)) { | |
498 | /* We assume here that last_tsc was updated immediately prior at | |
499 | * the end of the previous iteration, or just before the first | |
500 | * iteration. */ | |
501 | s->start_tsc = s->last_tsc; | |
502 | } else { | |
503 | /* In case last_tsc has never been set before. */ | |
504 | s->start_tsc = cycles_counter_update(s); | |
505 | } | |
506 | } | |
507 | ||
12028f5f | 508 | void |
79f36875 JS |
509 | pmd_perf_end_iteration(struct pmd_perf_stats *s, int rx_packets, |
510 | int tx_packets, bool full_metrics) | |
511 | { | |
512 | uint64_t now_tsc = cycles_counter_update(s); | |
513 | struct iter_stats *cum_ms; | |
514 | uint64_t cycles, cycles_per_pkt = 0; | |
7178fefb | 515 | char *reason = NULL; |
79f36875 JS |
516 | |
517 | cycles = now_tsc - s->start_tsc; | |
518 | s->current.timestamp = s->iteration_cnt; | |
519 | s->current.cycles = cycles; | |
520 | s->current.pkts = rx_packets; | |
521 | ||
522 | if (rx_packets + tx_packets > 0) { | |
523 | pmd_perf_update_counter(s, PMD_CYCLES_ITER_BUSY, cycles); | |
524 | } else { | |
525 | pmd_perf_update_counter(s, PMD_CYCLES_ITER_IDLE, cycles); | |
526 | } | |
527 | /* Add iteration samples to histograms. */ | |
528 | histogram_add_sample(&s->cycles, cycles); | |
529 | histogram_add_sample(&s->pkts, rx_packets); | |
530 | ||
531 | if (!full_metrics) { | |
532 | return; | |
533 | } | |
534 | ||
535 | s->counters.n[PMD_CYCLES_UPCALL] += s->current.upcall_cycles; | |
536 | ||
537 | if (rx_packets > 0) { | |
538 | cycles_per_pkt = cycles / rx_packets; | |
539 | histogram_add_sample(&s->cycles_per_pkt, cycles_per_pkt); | |
540 | } | |
79f36875 | 541 | histogram_add_sample(&s->upcalls, s->current.upcalls); |
79f36875 JS |
542 | histogram_add_sample(&s->max_vhost_qfill, s->current.max_vhost_qfill); |
543 | ||
544 | /* Add iteration samples to millisecond stats. */ | |
545 | cum_ms = history_current(&s->milliseconds); | |
546 | cum_ms->iterations++; | |
547 | cum_ms->cycles += cycles; | |
548 | if (rx_packets > 0) { | |
549 | cum_ms->busy_cycles += cycles; | |
550 | } | |
551 | cum_ms->pkts += s->current.pkts; | |
552 | cum_ms->upcalls += s->current.upcalls; | |
553 | cum_ms->upcall_cycles += s->current.upcall_cycles; | |
554 | cum_ms->batches += s->current.batches; | |
555 | cum_ms->max_vhost_qfill += s->current.max_vhost_qfill; | |
556 | ||
7178fefb JS |
557 | if (log_enabled) { |
558 | /* Log suspicious iterations. */ | |
559 | if (cycles > iter_cycle_threshold) { | |
560 | reason = "Excessive total cycles"; | |
561 | } else if (s->current.max_vhost_qfill >= log_q_thr) { | |
562 | reason = "Vhost RX queue full"; | |
563 | } | |
564 | if (OVS_UNLIKELY(reason)) { | |
565 | pmd_perf_set_log_susp_iteration(s, reason); | |
566 | cycles_counter_update(s); | |
567 | } | |
568 | ||
569 | /* Log iteration interval around suspicious iteration when reaching | |
570 | * the end of the range to be logged. */ | |
571 | if (OVS_UNLIKELY(s->log_end_it == s->iterations.idx)) { | |
572 | pmd_perf_log_susp_iteration_neighborhood(s); | |
573 | cycles_counter_update(s); | |
574 | } | |
575 | } | |
576 | ||
79f36875 JS |
577 | /* Store in iteration history. This advances the iteration idx and |
578 | * clears the next slot in the iteration history. */ | |
579 | history_store(&s->iterations, &s->current); | |
7178fefb | 580 | |
79f36875 JS |
581 | if (now_tsc > s->next_check_tsc) { |
582 | /* Check if ms is completed and store in milliseconds history. */ | |
583 | uint64_t now = time_msec(); | |
584 | if (now != cum_ms->timestamp) { | |
585 | /* Add ms stats to totals. */ | |
586 | s->totals.iterations += cum_ms->iterations; | |
587 | s->totals.cycles += cum_ms->cycles; | |
588 | s->totals.busy_cycles += cum_ms->busy_cycles; | |
589 | s->totals.pkts += cum_ms->pkts; | |
590 | s->totals.upcalls += cum_ms->upcalls; | |
591 | s->totals.upcall_cycles += cum_ms->upcall_cycles; | |
592 | s->totals.batches += cum_ms->batches; | |
593 | s->totals.max_vhost_qfill += cum_ms->max_vhost_qfill; | |
594 | cum_ms = history_next(&s->milliseconds); | |
595 | cum_ms->timestamp = now; | |
596 | } | |
7f26e411 | 597 | /* Do the next check after 4 us (10K cycles at 2.5 GHz TSC clock). */ |
1276e3db | 598 | s->next_check_tsc = cycles_counter_update(s) + tsc_hz / 250000; |
79f36875 | 599 | } |
82a48ead | 600 | } |
7178fefb JS |
601 | |
602 | /* Delay logging of the suspicious iteration and the range of iterations | |
603 | * around it until after the last iteration in the range to be logged. | |
604 | * This avoids any distortion of the measurements through the cost of | |
605 | * logging itself. */ | |
606 | ||
607 | void | |
608 | pmd_perf_set_log_susp_iteration(struct pmd_perf_stats *s, | |
609 | char *reason) | |
610 | { | |
611 | if (s->log_susp_it == UINT32_MAX) { | |
612 | /* No logging scheduled yet. */ | |
613 | s->log_susp_it = s->iterations.idx; | |
614 | s->log_reason = reason; | |
615 | s->log_begin_it = history_sub(s->iterations.idx, log_it_before); | |
616 | s->log_end_it = history_add(s->iterations.idx, log_it_after + 1); | |
617 | } else if (log_extend) { | |
618 | /* Logging was initiated earlier, we log the previous suspicious | |
619 | * iteration now and extend the logging interval, if possible. */ | |
620 | struct iter_stats *susp = &s->iterations.sample[s->log_susp_it]; | |
621 | uint32_t new_end_it, old_range, new_range; | |
622 | ||
623 | VLOG_WARN_RL(&latency_rl, | |
624 | "Suspicious iteration (%s): iter=%"PRIu64 | |
625 | " duration=%"PRIu64" us\n", | |
626 | s->log_reason, | |
627 | susp->timestamp, | |
1276e3db | 628 | (1000000L * susp->cycles) / tsc_hz); |
7178fefb JS |
629 | |
630 | new_end_it = history_add(s->iterations.idx, log_it_after + 1); | |
631 | new_range = history_sub(new_end_it, s->log_begin_it); | |
632 | old_range = history_sub(s->log_end_it, s->log_begin_it); | |
633 | if (new_range < old_range) { | |
634 | /* Extended range exceeds history length. */ | |
635 | new_end_it = s->log_begin_it; | |
636 | } | |
637 | s->log_susp_it = s->iterations.idx; | |
638 | s->log_reason = reason; | |
639 | s->log_end_it = new_end_it; | |
640 | } | |
641 | } | |
642 | ||
643 | void | |
644 | pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats *s) | |
645 | { | |
646 | ovs_assert(s->log_reason != NULL); | |
647 | ovs_assert(s->log_susp_it != UINT32_MAX); | |
648 | ||
649 | struct ds log = DS_EMPTY_INITIALIZER; | |
650 | struct iter_stats *susp = &s->iterations.sample[s->log_susp_it]; | |
651 | uint32_t range = history_sub(s->log_end_it, s->log_begin_it); | |
652 | ||
653 | VLOG_WARN_RL(&latency_rl, | |
654 | "Suspicious iteration (%s): iter=%"PRIu64 | |
655 | " duration=%"PRIu64" us\n", | |
656 | s->log_reason, | |
657 | susp->timestamp, | |
1276e3db | 658 | (1000000L * susp->cycles) / tsc_hz); |
7178fefb JS |
659 | |
660 | pmd_perf_format_iteration_history(&log, s, range); | |
661 | VLOG_WARN_RL(&latency_rl, | |
662 | "Neighborhood of suspicious iteration:\n" | |
663 | "%s", ds_cstr(&log)); | |
664 | ds_destroy(&log); | |
665 | s->log_susp_it = s->log_end_it = s->log_begin_it = UINT32_MAX; | |
666 | s->log_reason = NULL; | |
667 | ||
668 | if (range > 100) { | |
669 | /* Reset to safe default values to avoid disturbance. */ | |
670 | log_it_before = LOG_IT_BEFORE; | |
671 | log_it_after = LOG_IT_AFTER; | |
672 | log_extend = false; | |
673 | } | |
674 | } | |
675 | ||
676 | void | |
677 | pmd_perf_log_set_cmd(struct unixctl_conn *conn, | |
678 | int argc, const char *argv[], | |
679 | void *aux OVS_UNUSED) | |
680 | { | |
681 | unsigned int it_before, it_after, us_thr, q_thr; | |
682 | bool on, extend; | |
683 | bool usage = false; | |
684 | ||
685 | on = log_enabled; | |
686 | extend = log_extend; | |
687 | it_before = log_it_before; | |
688 | it_after = log_it_after; | |
689 | q_thr = log_q_thr; | |
690 | us_thr = log_us_thr; | |
691 | ||
692 | while (argc > 1) { | |
693 | if (!strcmp(argv[1], "on")) { | |
694 | on = true; | |
695 | argc--; | |
696 | argv++; | |
697 | } else if (!strcmp(argv[1], "off")) { | |
698 | on = false; | |
699 | argc--; | |
700 | argv++; | |
701 | } else if (!strcmp(argv[1], "-e")) { | |
702 | extend = true; | |
703 | argc--; | |
704 | argv++; | |
705 | } else if (!strcmp(argv[1], "-ne")) { | |
706 | extend = false; | |
707 | argc--; | |
708 | argv++; | |
709 | } else if (!strcmp(argv[1], "-a") && argc > 2) { | |
710 | if (str_to_uint(argv[2], 10, &it_after)) { | |
711 | if (it_after > HISTORY_LEN - 2) { | |
712 | it_after = HISTORY_LEN - 2; | |
713 | } | |
714 | } else { | |
715 | usage = true; | |
716 | break; | |
717 | } | |
718 | argc -= 2; | |
719 | argv += 2; | |
720 | } else if (!strcmp(argv[1], "-b") && argc > 2) { | |
721 | if (str_to_uint(argv[2], 10, &it_before)) { | |
722 | if (it_before > HISTORY_LEN - 2) { | |
723 | it_before = HISTORY_LEN - 2; | |
724 | } | |
725 | } else { | |
726 | usage = true; | |
727 | break; | |
728 | } | |
729 | argc -= 2; | |
730 | argv += 2; | |
731 | } else if (!strcmp(argv[1], "-q") && argc > 2) { | |
732 | if (!str_to_uint(argv[2], 10, &q_thr)) { | |
733 | usage = true; | |
734 | break; | |
735 | } | |
736 | argc -= 2; | |
737 | argv += 2; | |
738 | } else if (!strcmp(argv[1], "-us") && argc > 2) { | |
739 | if (!str_to_uint(argv[2], 10, &us_thr)) { | |
740 | usage = true; | |
741 | break; | |
742 | } | |
743 | argc -= 2; | |
744 | argv += 2; | |
745 | } else { | |
746 | usage = true; | |
747 | break; | |
748 | } | |
749 | } | |
750 | if (it_before + it_after > HISTORY_LEN - 2) { | |
751 | it_after = HISTORY_LEN - 2 - it_before; | |
752 | } | |
753 | ||
754 | if (usage) { | |
755 | unixctl_command_reply_error(conn, | |
756 | "Usage: ovs-appctl dpif-netdev/pmd-perf-log-set " | |
757 | "[on|off] [-b before] [-a after] [-e|-ne] " | |
758 | "[-us usec] [-q qlen]"); | |
759 | return; | |
760 | } | |
761 | ||
762 | VLOG_INFO("pmd-perf-log-set: %s, before=%d, after=%d, extend=%s, " | |
763 | "us_thr=%d, q_thr=%d\n", | |
764 | on ? "on" : "off", it_before, it_after, | |
765 | extend ? "true" : "false", us_thr, q_thr); | |
766 | log_enabled = on; | |
767 | log_extend = extend; | |
768 | log_it_before = it_before; | |
769 | log_it_after = it_after; | |
770 | log_q_thr = q_thr; | |
771 | log_us_thr = us_thr; | |
1276e3db | 772 | iter_cycle_threshold = (log_us_thr * tsc_hz) / 1000000L; |
7178fefb JS |
773 | |
774 | unixctl_command_reply(conn, ""); | |
775 | } |