2 * Copyright (c) 2017 Ericsson AB.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at:
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef DPIF_NETDEV_PERF_H
18 #define DPIF_NETDEV_PERF_H 1
28 #include <rte_config.h>
29 #include <rte_cycles.h>
32 #include "openvswitch/vlog.h"
33 #include "ovs-atomic.h"
42 /* This module encapsulates data structures and functions to maintain basic PMD
43 * performance metrics such as packet counters, execution cycles as well as
44 * histograms and time series recording for more detailed PMD metrics.
46 * It provides a clean API for dpif-netdev to initialize, update and read and
47 * reset these metrics.
49 * The basic set of PMD counters is implemented as atomic_uint64_t variables
50 * to guarantee correct read also in 32-bit systems.
52 * The detailed PMD performance metrics are only supported on 64-bit systems
53 * with atomic 64-bit read and store semantics for plain uint64_t counters.
56 /* Set of counter types maintained in pmd_perf_stats. */
59 PMD_STAT_EXACT_HIT
, /* Packets that had an exact match (emc). */
60 PMD_STAT_SMC_HIT
, /* Packets that had a sig match hit (SMC). */
61 PMD_STAT_MASKED_HIT
, /* Packets that matched in the flow table. */
62 PMD_STAT_MISS
, /* Packets that did not match and upcall was ok. */
63 PMD_STAT_LOST
, /* Packets that did not match and upcall failed. */
64 /* The above statistics account for the total
65 * number of packet passes through the datapath
66 * pipeline and should not be overlapping with each
68 PMD_STAT_MASKED_LOOKUP
, /* Number of subtable lookups for flow table
69 hits. Each MASKED_HIT hit will have >= 1
71 PMD_STAT_RECV
, /* Packets entering the datapath pipeline from an
73 PMD_STAT_RECIRC
, /* Packets reentering the datapath pipeline due to
75 PMD_STAT_SENT_PKTS
, /* Packets that have been sent. */
76 PMD_STAT_SENT_BATCHES
, /* Number of batches sent. */
77 PMD_CYCLES_ITER_IDLE
, /* Cycles spent in idle iterations. */
78 PMD_CYCLES_ITER_BUSY
, /* Cycles spent in busy iterations. */
79 PMD_CYCLES_UPCALL
, /* Cycles spent processing upcalls. */
83 /* Array of PMD counters indexed by enum pmd_stat_type.
84 * The n[] array contains the actual counter values since initialization
85 * of the PMD. Counters are atomically updated from the PMD but are
86 * read and cleared also from other processes. To clear the counters at
87 * PMD run-time, the current counter values are copied over to the zero[]
88 * array. To read counters we subtract zero[] value from n[]. */
91 atomic_uint64_t n
[PMD_N_STATS
]; /* Value since _init(). */
92 uint64_t zero
[PMD_N_STATS
]; /* Value at last _clear(). */
95 /* Data structure to collect statistical distribution of an integer measurement
96 * type in form of a histogram. The wall[] array contains the inclusive
97 * upper boundaries of the bins, while the bin[] array contains the actual
98 * counters per bin. The histogram walls are typically set automatically
99 * using the functions provided below.*/
101 #define NUM_BINS 32 /* Number of histogram bins. */
104 uint32_t wall
[NUM_BINS
];
105 uint64_t bin
[NUM_BINS
];
108 /* Data structure to record details PMD execution metrics per iteration for
109 * a history period of up to HISTORY_LEN iterations in circular buffer.
110 * Also used to record up to HISTORY_LEN millisecond averages/totals of these
114 uint64_t timestamp
; /* Iteration no. or millisecond. */
115 uint64_t cycles
; /* Number of TSC cycles spent in it. or ms. */
116 uint64_t busy_cycles
; /* Cycles spent in busy iterations or ms. */
117 uint32_t iterations
; /* Iterations in ms. */
118 uint32_t pkts
; /* Packets processed in iteration or ms. */
119 uint32_t upcalls
; /* Number of upcalls in iteration or ms. */
120 uint32_t upcall_cycles
; /* Cycles spent in upcalls in it. or ms. */
121 uint32_t batches
; /* Number of rx batches in iteration or ms. */
122 uint32_t max_vhost_qfill
; /* Maximum fill level in iteration or ms. */
125 #define HISTORY_LEN 1000 /* Length of recorded history
126 (iterations and ms). */
127 #define DEF_HIST_SHOW 20 /* Default number of history samples to
131 size_t idx
; /* Slot to which next call to history_store()
133 struct iter_stats sample
[HISTORY_LEN
];
136 /* Container for all performance metrics of a PMD within the struct
137 * dp_netdev_pmd_thread. The metrics must be updated from within the PMD
138 * thread but can be read from any thread. The basic PMD counters in
139 * struct pmd_counters can be read without protection against concurrent
140 * clearing. The other metrics may only be safely read with the clear_mutex
141 * held to protect against concurrent clearing. */
143 struct pmd_perf_stats
{
144 /* Prevents interference between PMD polling and stats clearing. */
145 struct ovs_mutex stats_mutex
;
146 /* Set by CLI thread to order clearing of PMD stats. */
148 /* Prevents stats retrieval while clearing is in progress. */
149 struct ovs_mutex clear_mutex
;
150 /* Start of the current performance measurement period. */
152 /* Counter for PMD iterations. */
153 uint64_t iteration_cnt
;
154 /* Start of the current iteration. */
156 /* Latest TSC time stamp taken in PMD. */
158 /* Used to space certain checks in time. */
159 uint64_t next_check_tsc
;
160 /* If non-NULL, outermost cycle timer currently running in PMD. */
161 struct cycle_timer
*cur_timer
;
162 /* Set of PMD counters with their zero offsets. */
163 struct pmd_counters counters
;
164 /* Statistics of the current iteration. */
165 struct iter_stats current
;
166 /* Totals for the current millisecond. */
167 struct iter_stats totals
;
168 /* Histograms for the PMD metrics. */
169 struct histogram cycles
;
170 struct histogram pkts
;
171 struct histogram cycles_per_pkt
;
172 struct histogram upcalls
;
173 struct histogram cycles_per_upcall
;
174 struct histogram pkts_per_batch
;
175 struct histogram max_vhost_qfill
;
176 /* Iteration history buffer. */
177 struct history iterations
;
178 /* Millisecond history buffer. */
179 struct history milliseconds
;
180 /* Suspicious iteration log. */
181 uint32_t log_susp_it
;
182 /* Start of iteration range to log. */
183 uint32_t log_begin_it
;
184 /* End of iteration range to log. */
186 /* Reason for logging suspicious iteration. */
191 static inline uint64_t
192 rdtsc_syscall(struct pmd_perf_stats
*s
)
197 if (clock_gettime(CLOCK_MONOTONIC_RAW
, &val
) != 0) {
201 v
= val
.tv_sec
* UINT64_C(1000000000) + val
.tv_nsec
;
202 return s
->last_tsc
= v
;
206 /* Support for accurate timing of PMD execution on TSC clock cycle level.
207 * These functions are intended to be invoked in the context of pmd threads. */
209 /* Read the TSC cycle register and cache it. Any function not requiring clock
210 * cycle accuracy should read the cached value using cycles_counter_get() to
211 * avoid the overhead of reading the TSC register. */
213 static inline uint64_t
214 cycles_counter_update(struct pmd_perf_stats
*s
)
217 return s
->last_tsc
= rte_get_tsc_cycles();
218 #elif !defined(_MSC_VER) && defined(__x86_64__)
220 asm volatile("rdtsc" : "=a" (l
), "=d" (h
));
222 return s
->last_tsc
= ((uint64_t) h
<< 32) | l
;
223 #elif defined(__linux__)
224 return rdtsc_syscall(s
);
226 return s
->last_tsc
= 0;
230 static inline uint64_t
231 cycles_counter_get(struct pmd_perf_stats
*s
)
236 void pmd_perf_estimate_tsc_frequency(void);
238 /* A nestable timer for measuring execution time in TSC cycles.
241 * struct cycle_timer timer;
243 * cycle_timer_start(pmd, &timer);
245 * uint64_t cycles = cycle_timer_stop(pmd, &timer);
247 * The caller must guarantee that a call to cycle_timer_start() is always
248 * paired with a call to cycle_stimer_stop().
250 * Is is possible to have nested cycles timers within the timed code. The
251 * execution time measured by the nested timers is excluded from the time
252 * measured by the embracing timer.
258 struct cycle_timer
*interrupted
;
262 cycle_timer_start(struct pmd_perf_stats
*s
,
263 struct cycle_timer
*timer
)
265 struct cycle_timer
*cur_timer
= s
->cur_timer
;
266 uint64_t now
= cycles_counter_update(s
);
269 cur_timer
->suspended
= now
;
271 timer
->interrupted
= cur_timer
;
273 timer
->suspended
= 0;
274 s
->cur_timer
= timer
;
277 static inline uint64_t
278 cycle_timer_stop(struct pmd_perf_stats
*s
,
279 struct cycle_timer
*timer
)
281 /* Assert that this is the current cycle timer. */
282 ovs_assert(s
->cur_timer
== timer
);
283 uint64_t now
= cycles_counter_update(s
);
284 struct cycle_timer
*intr_timer
= timer
->interrupted
;
287 /* Adjust the start offset by the suspended cycles. */
288 intr_timer
->start
+= now
- intr_timer
->suspended
;
290 /* Restore suspended timer, if any. */
291 s
->cur_timer
= intr_timer
;
292 return now
- timer
->start
;
295 /* Functions to initialize and reset the PMD performance metrics. */
297 void pmd_perf_stats_init(struct pmd_perf_stats
*s
);
298 void pmd_perf_stats_clear(struct pmd_perf_stats
*s
);
299 void pmd_perf_stats_clear_lock(struct pmd_perf_stats
*s
);
301 /* Functions to read and update PMD counters. */
303 void pmd_perf_read_counters(struct pmd_perf_stats
*s
,
304 uint64_t stats
[PMD_N_STATS
]);
306 /* PMD performance counters are updated lock-less. For real PMDs
307 * they are only updated from the PMD thread itself. In the case of the
308 * NON-PMD they might be updated from multiple threads, but we can live
309 * with losing a rare update as 100% accuracy is not required.
310 * However, as counters are read for display from outside the PMD thread
311 * with e.g. pmd-stats-show, we make sure that the 64-bit read and store
312 * operations are atomic also on 32-bit systems so that readers cannot
313 * not read garbage. On 64-bit systems this incurs no overhead. */
316 pmd_perf_update_counter(struct pmd_perf_stats
*s
,
317 enum pmd_stat_type counter
, int delta
)
320 atomic_read_relaxed(&s
->counters
.n
[counter
], &tmp
);
322 atomic_store_relaxed(&s
->counters
.n
[counter
], tmp
);
325 /* Functions to manipulate a sample history. */
328 histogram_add_sample(struct histogram
*hist
, uint32_t val
)
330 /* TODO: Can do better with binary search? */
331 for (int i
= 0; i
< NUM_BINS
-1; i
++) {
332 if (val
<= hist
->wall
[i
]) {
337 hist
->bin
[NUM_BINS
-1]++;
340 uint64_t histogram_samples(const struct histogram
*hist
);
342 /* This function is used to advance the given history index by positive
343 * offset in the circular history buffer. */
344 static inline uint32_t
345 history_add(uint32_t idx
, uint32_t offset
)
347 return (idx
+ offset
) % HISTORY_LEN
;
350 /* This function computes the difference between two indices into the
351 * circular history buffer. The result is always positive in the range
352 * 0 .. HISTORY_LEN-1 and specifies the number of steps to reach idx1
353 * starting from idx2. It can also be used to retreat the history index
354 * idx1 by idx2 steps. */
355 static inline uint32_t
356 history_sub(uint32_t idx1
, uint32_t idx2
)
358 return (idx1
+ HISTORY_LEN
- idx2
) % HISTORY_LEN
;
361 static inline struct iter_stats
*
362 history_current(struct history
*h
)
364 return &h
->sample
[h
->idx
];
367 static inline struct iter_stats
*
368 history_next(struct history
*h
)
370 size_t next_idx
= history_add(h
->idx
, 1);
371 struct iter_stats
*next
= &h
->sample
[next_idx
];
373 memset(next
, 0, sizeof(*next
));
378 static inline struct iter_stats
*
379 history_store(struct history
*h
, struct iter_stats
*is
)
382 h
->sample
[h
->idx
] = *is
;
384 /* Advance the history pointer */
385 return history_next(h
);
388 /* Data and function related to logging of suspicious iterations. */
390 extern bool log_enabled
;
391 extern bool log_extend
;
392 extern uint32_t log_q_thr
;
393 extern uint64_t iter_cycle_threshold
;
395 void pmd_perf_set_log_susp_iteration(struct pmd_perf_stats
*s
, char *reason
);
396 void pmd_perf_log_susp_iteration_neighborhood(struct pmd_perf_stats
*s
);
398 /* Functions recording PMD metrics per iteration. */
401 pmd_perf_start_iteration(struct pmd_perf_stats
*s
);
403 pmd_perf_end_iteration(struct pmd_perf_stats
*s
, int rx_packets
,
404 int tx_packets
, bool full_metrics
);
406 /* Formatting the output of commands. */
408 struct pmd_perf_params
{
411 size_t iter_hist_len
;
415 void pmd_perf_format_overall_stats(struct ds
*str
, struct pmd_perf_stats
*s
,
417 void pmd_perf_format_histograms(struct ds
*str
, struct pmd_perf_stats
*s
);
418 void pmd_perf_format_iteration_history(struct ds
*str
,
419 struct pmd_perf_stats
*s
,
421 void pmd_perf_format_ms_history(struct ds
*str
, struct pmd_perf_stats
*s
,
423 void pmd_perf_log_set_cmd(struct unixctl_conn
*conn
,
424 int argc
, const char *argv
[],
425 void *aux OVS_UNUSED
);
431 #endif /* DPIF_NETDEV_PERF_H */