]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2014 Intel Corporation | |
7c673cae FG |
3 | */ |
4 | ||
5 | ||
6 | #include <stdio.h> | |
7 | #include <inttypes.h> | |
8 | #include <rte_ring.h> | |
9 | #include <rte_cycles.h> | |
10 | #include <rte_launch.h> | |
11fdf7f2 | 11 | #include <rte_pause.h> |
7c673cae FG |
12 | |
13 | #include "test.h" | |
14 | ||
15 | /* | |
16 | * Ring | |
17 | * ==== | |
18 | * | |
19 | * Measures performance of various operations using rdtsc | |
20 | * * Empty ring dequeue | |
21 | * * Enqueue/dequeue of bursts in 1 threads | |
22 | * * Enqueue/dequeue of bursts in 2 threads | |
23 | */ | |
24 | ||
25 | #define RING_NAME "RING_PERF" | |
26 | #define RING_SIZE 4096 | |
27 | #define MAX_BURST 32 | |
28 | ||
29 | /* | |
30 | * the sizes to enqueue and dequeue in testing | |
31 | * (marked volatile so they won't be seen as compile-time constants) | |
32 | */ | |
33 | static const volatile unsigned bulk_sizes[] = { 8, 32 }; | |
34 | ||
7c673cae FG |
35 | struct lcore_pair { |
36 | unsigned c1, c2; | |
37 | }; | |
38 | ||
39 | static volatile unsigned lcore_count = 0; | |
40 | ||
41 | /**** Functions to analyse our core mask to get cores for different tests ***/ | |
42 | ||
43 | static int | |
44 | get_two_hyperthreads(struct lcore_pair *lcp) | |
45 | { | |
46 | unsigned id1, id2; | |
47 | unsigned c1, c2, s1, s2; | |
48 | RTE_LCORE_FOREACH(id1) { | |
49 | /* inner loop just re-reads all id's. We could skip the first few | |
50 | * elements, but since number of cores is small there is little point | |
51 | */ | |
52 | RTE_LCORE_FOREACH(id2) { | |
53 | if (id1 == id2) | |
54 | continue; | |
55 | c1 = lcore_config[id1].core_id; | |
56 | c2 = lcore_config[id2].core_id; | |
57 | s1 = lcore_config[id1].socket_id; | |
58 | s2 = lcore_config[id2].socket_id; | |
59 | if ((c1 == c2) && (s1 == s2)){ | |
60 | lcp->c1 = id1; | |
61 | lcp->c2 = id2; | |
62 | return 0; | |
63 | } | |
64 | } | |
65 | } | |
66 | return 1; | |
67 | } | |
68 | ||
69 | static int | |
70 | get_two_cores(struct lcore_pair *lcp) | |
71 | { | |
72 | unsigned id1, id2; | |
73 | unsigned c1, c2, s1, s2; | |
74 | RTE_LCORE_FOREACH(id1) { | |
75 | RTE_LCORE_FOREACH(id2) { | |
76 | if (id1 == id2) | |
77 | continue; | |
78 | c1 = lcore_config[id1].core_id; | |
79 | c2 = lcore_config[id2].core_id; | |
80 | s1 = lcore_config[id1].socket_id; | |
81 | s2 = lcore_config[id2].socket_id; | |
82 | if ((c1 != c2) && (s1 == s2)){ | |
83 | lcp->c1 = id1; | |
84 | lcp->c2 = id2; | |
85 | return 0; | |
86 | } | |
87 | } | |
88 | } | |
89 | return 1; | |
90 | } | |
91 | ||
92 | static int | |
93 | get_two_sockets(struct lcore_pair *lcp) | |
94 | { | |
95 | unsigned id1, id2; | |
96 | unsigned s1, s2; | |
97 | RTE_LCORE_FOREACH(id1) { | |
98 | RTE_LCORE_FOREACH(id2) { | |
99 | if (id1 == id2) | |
100 | continue; | |
101 | s1 = lcore_config[id1].socket_id; | |
102 | s2 = lcore_config[id2].socket_id; | |
103 | if (s1 != s2){ | |
104 | lcp->c1 = id1; | |
105 | lcp->c2 = id2; | |
106 | return 0; | |
107 | } | |
108 | } | |
109 | } | |
110 | return 1; | |
111 | } | |
112 | ||
113 | /* Get cycle counts for dequeuing from an empty ring. Should be 2 or 3 cycles */ | |
114 | static void | |
11fdf7f2 | 115 | test_empty_dequeue(struct rte_ring *r) |
7c673cae FG |
116 | { |
117 | const unsigned iter_shift = 26; | |
118 | const unsigned iterations = 1<<iter_shift; | |
119 | unsigned i = 0; | |
120 | void *burst[MAX_BURST]; | |
121 | ||
122 | const uint64_t sc_start = rte_rdtsc(); | |
123 | for (i = 0; i < iterations; i++) | |
11fdf7f2 | 124 | rte_ring_sc_dequeue_bulk(r, burst, bulk_sizes[0], NULL); |
7c673cae FG |
125 | const uint64_t sc_end = rte_rdtsc(); |
126 | ||
127 | const uint64_t mc_start = rte_rdtsc(); | |
128 | for (i = 0; i < iterations; i++) | |
11fdf7f2 | 129 | rte_ring_mc_dequeue_bulk(r, burst, bulk_sizes[0], NULL); |
7c673cae FG |
130 | const uint64_t mc_end = rte_rdtsc(); |
131 | ||
132 | printf("SC empty dequeue: %.2F\n", | |
133 | (double)(sc_end-sc_start) / iterations); | |
134 | printf("MC empty dequeue: %.2F\n", | |
135 | (double)(mc_end-mc_start) / iterations); | |
136 | } | |
137 | ||
138 | /* | |
139 | * for the separate enqueue and dequeue threads they take in one param | |
140 | * and return two. Input = burst size, output = cycle average for sp/sc & mp/mc | |
141 | */ | |
142 | struct thread_params { | |
11fdf7f2 | 143 | struct rte_ring *r; |
7c673cae FG |
144 | unsigned size; /* input value, the burst size */ |
145 | double spsc, mpmc; /* output value, the single or multi timings */ | |
146 | }; | |
147 | ||
148 | /* | |
149 | * Function that uses rdtsc to measure timing for ring enqueue. Needs pair | |
150 | * thread running dequeue_bulk function | |
151 | */ | |
152 | static int | |
153 | enqueue_bulk(void *p) | |
154 | { | |
155 | const unsigned iter_shift = 23; | |
156 | const unsigned iterations = 1<<iter_shift; | |
157 | struct thread_params *params = p; | |
11fdf7f2 | 158 | struct rte_ring *r = params->r; |
7c673cae FG |
159 | const unsigned size = params->size; |
160 | unsigned i; | |
161 | void *burst[MAX_BURST] = {0}; | |
162 | ||
163 | if ( __sync_add_and_fetch(&lcore_count, 1) != 2 ) | |
164 | while(lcore_count != 2) | |
165 | rte_pause(); | |
166 | ||
167 | const uint64_t sp_start = rte_rdtsc(); | |
168 | for (i = 0; i < iterations; i++) | |
11fdf7f2 | 169 | while (rte_ring_sp_enqueue_bulk(r, burst, size, NULL) == 0) |
7c673cae FG |
170 | rte_pause(); |
171 | const uint64_t sp_end = rte_rdtsc(); | |
172 | ||
173 | const uint64_t mp_start = rte_rdtsc(); | |
174 | for (i = 0; i < iterations; i++) | |
11fdf7f2 | 175 | while (rte_ring_mp_enqueue_bulk(r, burst, size, NULL) == 0) |
7c673cae FG |
176 | rte_pause(); |
177 | const uint64_t mp_end = rte_rdtsc(); | |
178 | ||
179 | params->spsc = ((double)(sp_end - sp_start))/(iterations*size); | |
180 | params->mpmc = ((double)(mp_end - mp_start))/(iterations*size); | |
181 | return 0; | |
182 | } | |
183 | ||
184 | /* | |
185 | * Function that uses rdtsc to measure timing for ring dequeue. Needs pair | |
186 | * thread running enqueue_bulk function | |
187 | */ | |
188 | static int | |
189 | dequeue_bulk(void *p) | |
190 | { | |
191 | const unsigned iter_shift = 23; | |
192 | const unsigned iterations = 1<<iter_shift; | |
193 | struct thread_params *params = p; | |
11fdf7f2 | 194 | struct rte_ring *r = params->r; |
7c673cae FG |
195 | const unsigned size = params->size; |
196 | unsigned i; | |
197 | void *burst[MAX_BURST] = {0}; | |
198 | ||
199 | if ( __sync_add_and_fetch(&lcore_count, 1) != 2 ) | |
200 | while(lcore_count != 2) | |
201 | rte_pause(); | |
202 | ||
203 | const uint64_t sc_start = rte_rdtsc(); | |
204 | for (i = 0; i < iterations; i++) | |
11fdf7f2 | 205 | while (rte_ring_sc_dequeue_bulk(r, burst, size, NULL) == 0) |
7c673cae FG |
206 | rte_pause(); |
207 | const uint64_t sc_end = rte_rdtsc(); | |
208 | ||
209 | const uint64_t mc_start = rte_rdtsc(); | |
210 | for (i = 0; i < iterations; i++) | |
11fdf7f2 | 211 | while (rte_ring_mc_dequeue_bulk(r, burst, size, NULL) == 0) |
7c673cae FG |
212 | rte_pause(); |
213 | const uint64_t mc_end = rte_rdtsc(); | |
214 | ||
215 | params->spsc = ((double)(sc_end - sc_start))/(iterations*size); | |
216 | params->mpmc = ((double)(mc_end - mc_start))/(iterations*size); | |
217 | return 0; | |
218 | } | |
219 | ||
220 | /* | |
221 | * Function that calls the enqueue and dequeue bulk functions on pairs of cores. | |
222 | * used to measure ring perf between hyperthreads, cores and sockets. | |
223 | */ | |
224 | static void | |
11fdf7f2 | 225 | run_on_core_pair(struct lcore_pair *cores, struct rte_ring *r, |
7c673cae FG |
226 | lcore_function_t f1, lcore_function_t f2) |
227 | { | |
228 | struct thread_params param1 = {0}, param2 = {0}; | |
229 | unsigned i; | |
230 | for (i = 0; i < sizeof(bulk_sizes)/sizeof(bulk_sizes[0]); i++) { | |
231 | lcore_count = 0; | |
232 | param1.size = param2.size = bulk_sizes[i]; | |
11fdf7f2 | 233 | param1.r = param2.r = r; |
7c673cae FG |
234 | if (cores->c1 == rte_get_master_lcore()) { |
235 | rte_eal_remote_launch(f2, ¶m2, cores->c2); | |
236 | f1(¶m1); | |
237 | rte_eal_wait_lcore(cores->c2); | |
238 | } else { | |
239 | rte_eal_remote_launch(f1, ¶m1, cores->c1); | |
240 | rte_eal_remote_launch(f2, ¶m2, cores->c2); | |
241 | rte_eal_wait_lcore(cores->c1); | |
242 | rte_eal_wait_lcore(cores->c2); | |
243 | } | |
244 | printf("SP/SC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[i], | |
245 | param1.spsc + param2.spsc); | |
246 | printf("MP/MC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[i], | |
247 | param1.mpmc + param2.mpmc); | |
248 | } | |
249 | } | |
250 | ||
251 | /* | |
252 | * Test function that determines how long an enqueue + dequeue of a single item | |
253 | * takes on a single lcore. Result is for comparison with the bulk enq+deq. | |
254 | */ | |
255 | static void | |
11fdf7f2 | 256 | test_single_enqueue_dequeue(struct rte_ring *r) |
7c673cae FG |
257 | { |
258 | const unsigned iter_shift = 24; | |
259 | const unsigned iterations = 1<<iter_shift; | |
260 | unsigned i = 0; | |
261 | void *burst = NULL; | |
262 | ||
263 | const uint64_t sc_start = rte_rdtsc(); | |
264 | for (i = 0; i < iterations; i++) { | |
265 | rte_ring_sp_enqueue(r, burst); | |
266 | rte_ring_sc_dequeue(r, &burst); | |
267 | } | |
268 | const uint64_t sc_end = rte_rdtsc(); | |
269 | ||
270 | const uint64_t mc_start = rte_rdtsc(); | |
271 | for (i = 0; i < iterations; i++) { | |
272 | rte_ring_mp_enqueue(r, burst); | |
273 | rte_ring_mc_dequeue(r, &burst); | |
274 | } | |
275 | const uint64_t mc_end = rte_rdtsc(); | |
276 | ||
277 | printf("SP/SC single enq/dequeue: %"PRIu64"\n", | |
278 | (sc_end-sc_start) >> iter_shift); | |
279 | printf("MP/MC single enq/dequeue: %"PRIu64"\n", | |
280 | (mc_end-mc_start) >> iter_shift); | |
281 | } | |
282 | ||
283 | /* | |
284 | * Test that does both enqueue and dequeue on a core using the burst() API calls | |
285 | * instead of the bulk() calls used in other tests. Results should be the same | |
286 | * as for the bulk function called on a single lcore. | |
287 | */ | |
288 | static void | |
11fdf7f2 | 289 | test_burst_enqueue_dequeue(struct rte_ring *r) |
7c673cae FG |
290 | { |
291 | const unsigned iter_shift = 23; | |
292 | const unsigned iterations = 1<<iter_shift; | |
293 | unsigned sz, i = 0; | |
294 | void *burst[MAX_BURST] = {0}; | |
295 | ||
296 | for (sz = 0; sz < sizeof(bulk_sizes)/sizeof(bulk_sizes[0]); sz++) { | |
297 | const uint64_t sc_start = rte_rdtsc(); | |
298 | for (i = 0; i < iterations; i++) { | |
11fdf7f2 TL |
299 | rte_ring_sp_enqueue_burst(r, burst, |
300 | bulk_sizes[sz], NULL); | |
301 | rte_ring_sc_dequeue_burst(r, burst, | |
302 | bulk_sizes[sz], NULL); | |
7c673cae FG |
303 | } |
304 | const uint64_t sc_end = rte_rdtsc(); | |
305 | ||
306 | const uint64_t mc_start = rte_rdtsc(); | |
307 | for (i = 0; i < iterations; i++) { | |
11fdf7f2 TL |
308 | rte_ring_mp_enqueue_burst(r, burst, |
309 | bulk_sizes[sz], NULL); | |
310 | rte_ring_mc_dequeue_burst(r, burst, | |
311 | bulk_sizes[sz], NULL); | |
7c673cae FG |
312 | } |
313 | const uint64_t mc_end = rte_rdtsc(); | |
314 | ||
315 | uint64_t mc_avg = ((mc_end-mc_start) >> iter_shift) / bulk_sizes[sz]; | |
316 | uint64_t sc_avg = ((sc_end-sc_start) >> iter_shift) / bulk_sizes[sz]; | |
317 | ||
318 | printf("SP/SC burst enq/dequeue (size: %u): %"PRIu64"\n", bulk_sizes[sz], | |
319 | sc_avg); | |
320 | printf("MP/MC burst enq/dequeue (size: %u): %"PRIu64"\n", bulk_sizes[sz], | |
321 | mc_avg); | |
322 | } | |
323 | } | |
324 | ||
325 | /* Times enqueue and dequeue on a single lcore */ | |
326 | static void | |
11fdf7f2 | 327 | test_bulk_enqueue_dequeue(struct rte_ring *r) |
7c673cae FG |
328 | { |
329 | const unsigned iter_shift = 23; | |
330 | const unsigned iterations = 1<<iter_shift; | |
331 | unsigned sz, i = 0; | |
332 | void *burst[MAX_BURST] = {0}; | |
333 | ||
334 | for (sz = 0; sz < sizeof(bulk_sizes)/sizeof(bulk_sizes[0]); sz++) { | |
335 | const uint64_t sc_start = rte_rdtsc(); | |
336 | for (i = 0; i < iterations; i++) { | |
11fdf7f2 TL |
337 | rte_ring_sp_enqueue_bulk(r, burst, |
338 | bulk_sizes[sz], NULL); | |
339 | rte_ring_sc_dequeue_bulk(r, burst, | |
340 | bulk_sizes[sz], NULL); | |
7c673cae FG |
341 | } |
342 | const uint64_t sc_end = rte_rdtsc(); | |
343 | ||
344 | const uint64_t mc_start = rte_rdtsc(); | |
345 | for (i = 0; i < iterations; i++) { | |
11fdf7f2 TL |
346 | rte_ring_mp_enqueue_bulk(r, burst, |
347 | bulk_sizes[sz], NULL); | |
348 | rte_ring_mc_dequeue_bulk(r, burst, | |
349 | bulk_sizes[sz], NULL); | |
7c673cae FG |
350 | } |
351 | const uint64_t mc_end = rte_rdtsc(); | |
352 | ||
353 | double sc_avg = ((double)(sc_end-sc_start) / | |
354 | (iterations * bulk_sizes[sz])); | |
355 | double mc_avg = ((double)(mc_end-mc_start) / | |
356 | (iterations * bulk_sizes[sz])); | |
357 | ||
358 | printf("SP/SC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[sz], | |
359 | sc_avg); | |
360 | printf("MP/MC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[sz], | |
361 | mc_avg); | |
362 | } | |
363 | } | |
364 | ||
365 | static int | |
366 | test_ring_perf(void) | |
367 | { | |
368 | struct lcore_pair cores; | |
11fdf7f2 TL |
369 | struct rte_ring *r = NULL; |
370 | ||
7c673cae | 371 | r = rte_ring_create(RING_NAME, RING_SIZE, rte_socket_id(), 0); |
11fdf7f2 | 372 | if (r == NULL) |
7c673cae FG |
373 | return -1; |
374 | ||
375 | printf("### Testing single element and burst enq/deq ###\n"); | |
11fdf7f2 TL |
376 | test_single_enqueue_dequeue(r); |
377 | test_burst_enqueue_dequeue(r); | |
7c673cae FG |
378 | |
379 | printf("\n### Testing empty dequeue ###\n"); | |
11fdf7f2 | 380 | test_empty_dequeue(r); |
7c673cae FG |
381 | |
382 | printf("\n### Testing using a single lcore ###\n"); | |
11fdf7f2 | 383 | test_bulk_enqueue_dequeue(r); |
7c673cae FG |
384 | |
385 | if (get_two_hyperthreads(&cores) == 0) { | |
386 | printf("\n### Testing using two hyperthreads ###\n"); | |
11fdf7f2 | 387 | run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk); |
7c673cae FG |
388 | } |
389 | if (get_two_cores(&cores) == 0) { | |
390 | printf("\n### Testing using two physical cores ###\n"); | |
11fdf7f2 | 391 | run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk); |
7c673cae FG |
392 | } |
393 | if (get_two_sockets(&cores) == 0) { | |
394 | printf("\n### Testing using two NUMA nodes ###\n"); | |
11fdf7f2 | 395 | run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk); |
7c673cae | 396 | } |
11fdf7f2 | 397 | rte_ring_free(r); |
7c673cae FG |
398 | return 0; |
399 | } | |
400 | ||
401 | REGISTER_TEST_COMMAND(ring_perf_autotest, test_ring_perf); |