]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/test/test/test_ring_perf.c
update download target update for octopus release
[ceph.git] / ceph / src / spdk / dpdk / test / test / test_ring_perf.c
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
7c673cae
FG
3 */
4
5
6#include <stdio.h>
7#include <inttypes.h>
8#include <rte_ring.h>
9#include <rte_cycles.h>
10#include <rte_launch.h>
11fdf7f2 11#include <rte_pause.h>
7c673cae
FG
12
13#include "test.h"
14
15/*
16 * Ring
17 * ====
18 *
19 * Measures performance of various operations using rdtsc
20 * * Empty ring dequeue
21 * * Enqueue/dequeue of bursts in 1 threads
22 * * Enqueue/dequeue of bursts in 2 threads
23 */
24
25#define RING_NAME "RING_PERF"
26#define RING_SIZE 4096
27#define MAX_BURST 32
28
29/*
30 * the sizes to enqueue and dequeue in testing
31 * (marked volatile so they won't be seen as compile-time constants)
32 */
33static const volatile unsigned bulk_sizes[] = { 8, 32 };
34
7c673cae
FG
35struct lcore_pair {
36 unsigned c1, c2;
37};
38
39static volatile unsigned lcore_count = 0;
40
41/**** Functions to analyse our core mask to get cores for different tests ***/
42
43static int
44get_two_hyperthreads(struct lcore_pair *lcp)
45{
46 unsigned id1, id2;
47 unsigned c1, c2, s1, s2;
48 RTE_LCORE_FOREACH(id1) {
49 /* inner loop just re-reads all id's. We could skip the first few
50 * elements, but since number of cores is small there is little point
51 */
52 RTE_LCORE_FOREACH(id2) {
53 if (id1 == id2)
54 continue;
55 c1 = lcore_config[id1].core_id;
56 c2 = lcore_config[id2].core_id;
57 s1 = lcore_config[id1].socket_id;
58 s2 = lcore_config[id2].socket_id;
59 if ((c1 == c2) && (s1 == s2)){
60 lcp->c1 = id1;
61 lcp->c2 = id2;
62 return 0;
63 }
64 }
65 }
66 return 1;
67}
68
69static int
70get_two_cores(struct lcore_pair *lcp)
71{
72 unsigned id1, id2;
73 unsigned c1, c2, s1, s2;
74 RTE_LCORE_FOREACH(id1) {
75 RTE_LCORE_FOREACH(id2) {
76 if (id1 == id2)
77 continue;
78 c1 = lcore_config[id1].core_id;
79 c2 = lcore_config[id2].core_id;
80 s1 = lcore_config[id1].socket_id;
81 s2 = lcore_config[id2].socket_id;
82 if ((c1 != c2) && (s1 == s2)){
83 lcp->c1 = id1;
84 lcp->c2 = id2;
85 return 0;
86 }
87 }
88 }
89 return 1;
90}
91
92static int
93get_two_sockets(struct lcore_pair *lcp)
94{
95 unsigned id1, id2;
96 unsigned s1, s2;
97 RTE_LCORE_FOREACH(id1) {
98 RTE_LCORE_FOREACH(id2) {
99 if (id1 == id2)
100 continue;
101 s1 = lcore_config[id1].socket_id;
102 s2 = lcore_config[id2].socket_id;
103 if (s1 != s2){
104 lcp->c1 = id1;
105 lcp->c2 = id2;
106 return 0;
107 }
108 }
109 }
110 return 1;
111}
112
113/* Get cycle counts for dequeuing from an empty ring. Should be 2 or 3 cycles */
114static void
11fdf7f2 115test_empty_dequeue(struct rte_ring *r)
7c673cae
FG
116{
117 const unsigned iter_shift = 26;
118 const unsigned iterations = 1<<iter_shift;
119 unsigned i = 0;
120 void *burst[MAX_BURST];
121
122 const uint64_t sc_start = rte_rdtsc();
123 for (i = 0; i < iterations; i++)
11fdf7f2 124 rte_ring_sc_dequeue_bulk(r, burst, bulk_sizes[0], NULL);
7c673cae
FG
125 const uint64_t sc_end = rte_rdtsc();
126
127 const uint64_t mc_start = rte_rdtsc();
128 for (i = 0; i < iterations; i++)
11fdf7f2 129 rte_ring_mc_dequeue_bulk(r, burst, bulk_sizes[0], NULL);
7c673cae
FG
130 const uint64_t mc_end = rte_rdtsc();
131
132 printf("SC empty dequeue: %.2F\n",
133 (double)(sc_end-sc_start) / iterations);
134 printf("MC empty dequeue: %.2F\n",
135 (double)(mc_end-mc_start) / iterations);
136}
137
138/*
139 * for the separate enqueue and dequeue threads they take in one param
140 * and return two. Input = burst size, output = cycle average for sp/sc & mp/mc
141 */
142struct thread_params {
11fdf7f2 143 struct rte_ring *r;
7c673cae
FG
144 unsigned size; /* input value, the burst size */
145 double spsc, mpmc; /* output value, the single or multi timings */
146};
147
148/*
149 * Function that uses rdtsc to measure timing for ring enqueue. Needs pair
150 * thread running dequeue_bulk function
151 */
152static int
153enqueue_bulk(void *p)
154{
155 const unsigned iter_shift = 23;
156 const unsigned iterations = 1<<iter_shift;
157 struct thread_params *params = p;
11fdf7f2 158 struct rte_ring *r = params->r;
7c673cae
FG
159 const unsigned size = params->size;
160 unsigned i;
161 void *burst[MAX_BURST] = {0};
162
163 if ( __sync_add_and_fetch(&lcore_count, 1) != 2 )
164 while(lcore_count != 2)
165 rte_pause();
166
167 const uint64_t sp_start = rte_rdtsc();
168 for (i = 0; i < iterations; i++)
11fdf7f2 169 while (rte_ring_sp_enqueue_bulk(r, burst, size, NULL) == 0)
7c673cae
FG
170 rte_pause();
171 const uint64_t sp_end = rte_rdtsc();
172
173 const uint64_t mp_start = rte_rdtsc();
174 for (i = 0; i < iterations; i++)
11fdf7f2 175 while (rte_ring_mp_enqueue_bulk(r, burst, size, NULL) == 0)
7c673cae
FG
176 rte_pause();
177 const uint64_t mp_end = rte_rdtsc();
178
179 params->spsc = ((double)(sp_end - sp_start))/(iterations*size);
180 params->mpmc = ((double)(mp_end - mp_start))/(iterations*size);
181 return 0;
182}
183
184/*
185 * Function that uses rdtsc to measure timing for ring dequeue. Needs pair
186 * thread running enqueue_bulk function
187 */
188static int
189dequeue_bulk(void *p)
190{
191 const unsigned iter_shift = 23;
192 const unsigned iterations = 1<<iter_shift;
193 struct thread_params *params = p;
11fdf7f2 194 struct rte_ring *r = params->r;
7c673cae
FG
195 const unsigned size = params->size;
196 unsigned i;
197 void *burst[MAX_BURST] = {0};
198
199 if ( __sync_add_and_fetch(&lcore_count, 1) != 2 )
200 while(lcore_count != 2)
201 rte_pause();
202
203 const uint64_t sc_start = rte_rdtsc();
204 for (i = 0; i < iterations; i++)
11fdf7f2 205 while (rte_ring_sc_dequeue_bulk(r, burst, size, NULL) == 0)
7c673cae
FG
206 rte_pause();
207 const uint64_t sc_end = rte_rdtsc();
208
209 const uint64_t mc_start = rte_rdtsc();
210 for (i = 0; i < iterations; i++)
11fdf7f2 211 while (rte_ring_mc_dequeue_bulk(r, burst, size, NULL) == 0)
7c673cae
FG
212 rte_pause();
213 const uint64_t mc_end = rte_rdtsc();
214
215 params->spsc = ((double)(sc_end - sc_start))/(iterations*size);
216 params->mpmc = ((double)(mc_end - mc_start))/(iterations*size);
217 return 0;
218}
219
220/*
221 * Function that calls the enqueue and dequeue bulk functions on pairs of cores.
222 * used to measure ring perf between hyperthreads, cores and sockets.
223 */
224static void
11fdf7f2 225run_on_core_pair(struct lcore_pair *cores, struct rte_ring *r,
7c673cae
FG
226 lcore_function_t f1, lcore_function_t f2)
227{
228 struct thread_params param1 = {0}, param2 = {0};
229 unsigned i;
230 for (i = 0; i < sizeof(bulk_sizes)/sizeof(bulk_sizes[0]); i++) {
231 lcore_count = 0;
232 param1.size = param2.size = bulk_sizes[i];
11fdf7f2 233 param1.r = param2.r = r;
7c673cae
FG
234 if (cores->c1 == rte_get_master_lcore()) {
235 rte_eal_remote_launch(f2, &param2, cores->c2);
236 f1(&param1);
237 rte_eal_wait_lcore(cores->c2);
238 } else {
239 rte_eal_remote_launch(f1, &param1, cores->c1);
240 rte_eal_remote_launch(f2, &param2, cores->c2);
241 rte_eal_wait_lcore(cores->c1);
242 rte_eal_wait_lcore(cores->c2);
243 }
244 printf("SP/SC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[i],
245 param1.spsc + param2.spsc);
246 printf("MP/MC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[i],
247 param1.mpmc + param2.mpmc);
248 }
249}
250
251/*
252 * Test function that determines how long an enqueue + dequeue of a single item
253 * takes on a single lcore. Result is for comparison with the bulk enq+deq.
254 */
255static void
11fdf7f2 256test_single_enqueue_dequeue(struct rte_ring *r)
7c673cae
FG
257{
258 const unsigned iter_shift = 24;
259 const unsigned iterations = 1<<iter_shift;
260 unsigned i = 0;
261 void *burst = NULL;
262
263 const uint64_t sc_start = rte_rdtsc();
264 for (i = 0; i < iterations; i++) {
265 rte_ring_sp_enqueue(r, burst);
266 rte_ring_sc_dequeue(r, &burst);
267 }
268 const uint64_t sc_end = rte_rdtsc();
269
270 const uint64_t mc_start = rte_rdtsc();
271 for (i = 0; i < iterations; i++) {
272 rte_ring_mp_enqueue(r, burst);
273 rte_ring_mc_dequeue(r, &burst);
274 }
275 const uint64_t mc_end = rte_rdtsc();
276
277 printf("SP/SC single enq/dequeue: %"PRIu64"\n",
278 (sc_end-sc_start) >> iter_shift);
279 printf("MP/MC single enq/dequeue: %"PRIu64"\n",
280 (mc_end-mc_start) >> iter_shift);
281}
282
283/*
284 * Test that does both enqueue and dequeue on a core using the burst() API calls
285 * instead of the bulk() calls used in other tests. Results should be the same
286 * as for the bulk function called on a single lcore.
287 */
288static void
11fdf7f2 289test_burst_enqueue_dequeue(struct rte_ring *r)
7c673cae
FG
290{
291 const unsigned iter_shift = 23;
292 const unsigned iterations = 1<<iter_shift;
293 unsigned sz, i = 0;
294 void *burst[MAX_BURST] = {0};
295
296 for (sz = 0; sz < sizeof(bulk_sizes)/sizeof(bulk_sizes[0]); sz++) {
297 const uint64_t sc_start = rte_rdtsc();
298 for (i = 0; i < iterations; i++) {
11fdf7f2
TL
299 rte_ring_sp_enqueue_burst(r, burst,
300 bulk_sizes[sz], NULL);
301 rte_ring_sc_dequeue_burst(r, burst,
302 bulk_sizes[sz], NULL);
7c673cae
FG
303 }
304 const uint64_t sc_end = rte_rdtsc();
305
306 const uint64_t mc_start = rte_rdtsc();
307 for (i = 0; i < iterations; i++) {
11fdf7f2
TL
308 rte_ring_mp_enqueue_burst(r, burst,
309 bulk_sizes[sz], NULL);
310 rte_ring_mc_dequeue_burst(r, burst,
311 bulk_sizes[sz], NULL);
7c673cae
FG
312 }
313 const uint64_t mc_end = rte_rdtsc();
314
315 uint64_t mc_avg = ((mc_end-mc_start) >> iter_shift) / bulk_sizes[sz];
316 uint64_t sc_avg = ((sc_end-sc_start) >> iter_shift) / bulk_sizes[sz];
317
318 printf("SP/SC burst enq/dequeue (size: %u): %"PRIu64"\n", bulk_sizes[sz],
319 sc_avg);
320 printf("MP/MC burst enq/dequeue (size: %u): %"PRIu64"\n", bulk_sizes[sz],
321 mc_avg);
322 }
323}
324
325/* Times enqueue and dequeue on a single lcore */
326static void
11fdf7f2 327test_bulk_enqueue_dequeue(struct rte_ring *r)
7c673cae
FG
328{
329 const unsigned iter_shift = 23;
330 const unsigned iterations = 1<<iter_shift;
331 unsigned sz, i = 0;
332 void *burst[MAX_BURST] = {0};
333
334 for (sz = 0; sz < sizeof(bulk_sizes)/sizeof(bulk_sizes[0]); sz++) {
335 const uint64_t sc_start = rte_rdtsc();
336 for (i = 0; i < iterations; i++) {
11fdf7f2
TL
337 rte_ring_sp_enqueue_bulk(r, burst,
338 bulk_sizes[sz], NULL);
339 rte_ring_sc_dequeue_bulk(r, burst,
340 bulk_sizes[sz], NULL);
7c673cae
FG
341 }
342 const uint64_t sc_end = rte_rdtsc();
343
344 const uint64_t mc_start = rte_rdtsc();
345 for (i = 0; i < iterations; i++) {
11fdf7f2
TL
346 rte_ring_mp_enqueue_bulk(r, burst,
347 bulk_sizes[sz], NULL);
348 rte_ring_mc_dequeue_bulk(r, burst,
349 bulk_sizes[sz], NULL);
7c673cae
FG
350 }
351 const uint64_t mc_end = rte_rdtsc();
352
353 double sc_avg = ((double)(sc_end-sc_start) /
354 (iterations * bulk_sizes[sz]));
355 double mc_avg = ((double)(mc_end-mc_start) /
356 (iterations * bulk_sizes[sz]));
357
358 printf("SP/SC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[sz],
359 sc_avg);
360 printf("MP/MC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes[sz],
361 mc_avg);
362 }
363}
364
365static int
366test_ring_perf(void)
367{
368 struct lcore_pair cores;
11fdf7f2
TL
369 struct rte_ring *r = NULL;
370
7c673cae 371 r = rte_ring_create(RING_NAME, RING_SIZE, rte_socket_id(), 0);
11fdf7f2 372 if (r == NULL)
7c673cae
FG
373 return -1;
374
375 printf("### Testing single element and burst enq/deq ###\n");
11fdf7f2
TL
376 test_single_enqueue_dequeue(r);
377 test_burst_enqueue_dequeue(r);
7c673cae
FG
378
379 printf("\n### Testing empty dequeue ###\n");
11fdf7f2 380 test_empty_dequeue(r);
7c673cae
FG
381
382 printf("\n### Testing using a single lcore ###\n");
11fdf7f2 383 test_bulk_enqueue_dequeue(r);
7c673cae
FG
384
385 if (get_two_hyperthreads(&cores) == 0) {
386 printf("\n### Testing using two hyperthreads ###\n");
11fdf7f2 387 run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk);
7c673cae
FG
388 }
389 if (get_two_cores(&cores) == 0) {
390 printf("\n### Testing using two physical cores ###\n");
11fdf7f2 391 run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk);
7c673cae
FG
392 }
393 if (get_two_sockets(&cores) == 0) {
394 printf("\n### Testing using two NUMA nodes ###\n");
11fdf7f2 395 run_on_core_pair(&cores, r, enqueue_bulk, dequeue_bulk);
7c673cae 396 }
11fdf7f2 397 rte_ring_free(r);
7c673cae
FG
398 return 0;
399}
400
401REGISTER_TEST_COMMAND(ring_perf_autotest, test_ring_perf);