]>
git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/test/test/test_ring_perf.c
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <rte_cycles.h>
39 #include <rte_launch.h>
47 * Measures performance of various operations using rdtsc
48 * * Empty ring dequeue
49 * * Enqueue/dequeue of bursts in 1 threads
50 * * Enqueue/dequeue of bursts in 2 threads
53 #define RING_NAME "RING_PERF"
54 #define RING_SIZE 4096
58 * the sizes to enqueue and dequeue in testing
59 * (marked volatile so they won't be seen as compile-time constants)
61 static const volatile unsigned bulk_sizes
[] = { 8, 32 };
63 /* The ring structure used for tests */
64 static struct rte_ring
*r
;
70 static volatile unsigned lcore_count
= 0;
72 /**** Functions to analyse our core mask to get cores for different tests ***/
75 get_two_hyperthreads(struct lcore_pair
*lcp
)
78 unsigned c1
, c2
, s1
, s2
;
79 RTE_LCORE_FOREACH(id1
) {
80 /* inner loop just re-reads all id's. We could skip the first few
81 * elements, but since number of cores is small there is little point
83 RTE_LCORE_FOREACH(id2
) {
86 c1
= lcore_config
[id1
].core_id
;
87 c2
= lcore_config
[id2
].core_id
;
88 s1
= lcore_config
[id1
].socket_id
;
89 s2
= lcore_config
[id2
].socket_id
;
90 if ((c1
== c2
) && (s1
== s2
)){
101 get_two_cores(struct lcore_pair
*lcp
)
104 unsigned c1
, c2
, s1
, s2
;
105 RTE_LCORE_FOREACH(id1
) {
106 RTE_LCORE_FOREACH(id2
) {
109 c1
= lcore_config
[id1
].core_id
;
110 c2
= lcore_config
[id2
].core_id
;
111 s1
= lcore_config
[id1
].socket_id
;
112 s2
= lcore_config
[id2
].socket_id
;
113 if ((c1
!= c2
) && (s1
== s2
)){
124 get_two_sockets(struct lcore_pair
*lcp
)
128 RTE_LCORE_FOREACH(id1
) {
129 RTE_LCORE_FOREACH(id2
) {
132 s1
= lcore_config
[id1
].socket_id
;
133 s2
= lcore_config
[id2
].socket_id
;
144 /* Get cycle counts for dequeuing from an empty ring. Should be 2 or 3 cycles */
146 test_empty_dequeue(void)
148 const unsigned iter_shift
= 26;
149 const unsigned iterations
= 1<<iter_shift
;
151 void *burst
[MAX_BURST
];
153 const uint64_t sc_start
= rte_rdtsc();
154 for (i
= 0; i
< iterations
; i
++)
155 rte_ring_sc_dequeue_bulk(r
, burst
, bulk_sizes
[0], NULL
);
156 const uint64_t sc_end
= rte_rdtsc();
158 const uint64_t mc_start
= rte_rdtsc();
159 for (i
= 0; i
< iterations
; i
++)
160 rte_ring_mc_dequeue_bulk(r
, burst
, bulk_sizes
[0], NULL
);
161 const uint64_t mc_end
= rte_rdtsc();
163 printf("SC empty dequeue: %.2F\n",
164 (double)(sc_end
-sc_start
) / iterations
);
165 printf("MC empty dequeue: %.2F\n",
166 (double)(mc_end
-mc_start
) / iterations
);
170 * for the separate enqueue and dequeue threads they take in one param
171 * and return two. Input = burst size, output = cycle average for sp/sc & mp/mc
173 struct thread_params
{
174 unsigned size
; /* input value, the burst size */
175 double spsc
, mpmc
; /* output value, the single or multi timings */
179 * Function that uses rdtsc to measure timing for ring enqueue. Needs pair
180 * thread running dequeue_bulk function
183 enqueue_bulk(void *p
)
185 const unsigned iter_shift
= 23;
186 const unsigned iterations
= 1<<iter_shift
;
187 struct thread_params
*params
= p
;
188 const unsigned size
= params
->size
;
190 void *burst
[MAX_BURST
] = {0};
192 if ( __sync_add_and_fetch(&lcore_count
, 1) != 2 )
193 while(lcore_count
!= 2)
196 const uint64_t sp_start
= rte_rdtsc();
197 for (i
= 0; i
< iterations
; i
++)
198 while (rte_ring_sp_enqueue_bulk(r
, burst
, size
, NULL
) == 0)
200 const uint64_t sp_end
= rte_rdtsc();
202 const uint64_t mp_start
= rte_rdtsc();
203 for (i
= 0; i
< iterations
; i
++)
204 while (rte_ring_mp_enqueue_bulk(r
, burst
, size
, NULL
) == 0)
206 const uint64_t mp_end
= rte_rdtsc();
208 params
->spsc
= ((double)(sp_end
- sp_start
))/(iterations
*size
);
209 params
->mpmc
= ((double)(mp_end
- mp_start
))/(iterations
*size
);
214 * Function that uses rdtsc to measure timing for ring dequeue. Needs pair
215 * thread running enqueue_bulk function
218 dequeue_bulk(void *p
)
220 const unsigned iter_shift
= 23;
221 const unsigned iterations
= 1<<iter_shift
;
222 struct thread_params
*params
= p
;
223 const unsigned size
= params
->size
;
225 void *burst
[MAX_BURST
] = {0};
227 if ( __sync_add_and_fetch(&lcore_count
, 1) != 2 )
228 while(lcore_count
!= 2)
231 const uint64_t sc_start
= rte_rdtsc();
232 for (i
= 0; i
< iterations
; i
++)
233 while (rte_ring_sc_dequeue_bulk(r
, burst
, size
, NULL
) == 0)
235 const uint64_t sc_end
= rte_rdtsc();
237 const uint64_t mc_start
= rte_rdtsc();
238 for (i
= 0; i
< iterations
; i
++)
239 while (rte_ring_mc_dequeue_bulk(r
, burst
, size
, NULL
) == 0)
241 const uint64_t mc_end
= rte_rdtsc();
243 params
->spsc
= ((double)(sc_end
- sc_start
))/(iterations
*size
);
244 params
->mpmc
= ((double)(mc_end
- mc_start
))/(iterations
*size
);
249 * Function that calls the enqueue and dequeue bulk functions on pairs of cores.
250 * used to measure ring perf between hyperthreads, cores and sockets.
253 run_on_core_pair(struct lcore_pair
*cores
,
254 lcore_function_t f1
, lcore_function_t f2
)
256 struct thread_params param1
= {0}, param2
= {0};
258 for (i
= 0; i
< sizeof(bulk_sizes
)/sizeof(bulk_sizes
[0]); i
++) {
260 param1
.size
= param2
.size
= bulk_sizes
[i
];
261 if (cores
->c1
== rte_get_master_lcore()) {
262 rte_eal_remote_launch(f2
, ¶m2
, cores
->c2
);
264 rte_eal_wait_lcore(cores
->c2
);
266 rte_eal_remote_launch(f1
, ¶m1
, cores
->c1
);
267 rte_eal_remote_launch(f2
, ¶m2
, cores
->c2
);
268 rte_eal_wait_lcore(cores
->c1
);
269 rte_eal_wait_lcore(cores
->c2
);
271 printf("SP/SC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes
[i
],
272 param1
.spsc
+ param2
.spsc
);
273 printf("MP/MC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes
[i
],
274 param1
.mpmc
+ param2
.mpmc
);
279 * Test function that determines how long an enqueue + dequeue of a single item
280 * takes on a single lcore. Result is for comparison with the bulk enq+deq.
283 test_single_enqueue_dequeue(void)
285 const unsigned iter_shift
= 24;
286 const unsigned iterations
= 1<<iter_shift
;
290 const uint64_t sc_start
= rte_rdtsc();
291 for (i
= 0; i
< iterations
; i
++) {
292 rte_ring_sp_enqueue(r
, burst
);
293 rte_ring_sc_dequeue(r
, &burst
);
295 const uint64_t sc_end
= rte_rdtsc();
297 const uint64_t mc_start
= rte_rdtsc();
298 for (i
= 0; i
< iterations
; i
++) {
299 rte_ring_mp_enqueue(r
, burst
);
300 rte_ring_mc_dequeue(r
, &burst
);
302 const uint64_t mc_end
= rte_rdtsc();
304 printf("SP/SC single enq/dequeue: %"PRIu64
"\n",
305 (sc_end
-sc_start
) >> iter_shift
);
306 printf("MP/MC single enq/dequeue: %"PRIu64
"\n",
307 (mc_end
-mc_start
) >> iter_shift
);
311 * Test that does both enqueue and dequeue on a core using the burst() API calls
312 * instead of the bulk() calls used in other tests. Results should be the same
313 * as for the bulk function called on a single lcore.
316 test_burst_enqueue_dequeue(void)
318 const unsigned iter_shift
= 23;
319 const unsigned iterations
= 1<<iter_shift
;
321 void *burst
[MAX_BURST
] = {0};
323 for (sz
= 0; sz
< sizeof(bulk_sizes
)/sizeof(bulk_sizes
[0]); sz
++) {
324 const uint64_t sc_start
= rte_rdtsc();
325 for (i
= 0; i
< iterations
; i
++) {
326 rte_ring_sp_enqueue_burst(r
, burst
,
327 bulk_sizes
[sz
], NULL
);
328 rte_ring_sc_dequeue_burst(r
, burst
,
329 bulk_sizes
[sz
], NULL
);
331 const uint64_t sc_end
= rte_rdtsc();
333 const uint64_t mc_start
= rte_rdtsc();
334 for (i
= 0; i
< iterations
; i
++) {
335 rte_ring_mp_enqueue_burst(r
, burst
,
336 bulk_sizes
[sz
], NULL
);
337 rte_ring_mc_dequeue_burst(r
, burst
,
338 bulk_sizes
[sz
], NULL
);
340 const uint64_t mc_end
= rte_rdtsc();
342 uint64_t mc_avg
= ((mc_end
-mc_start
) >> iter_shift
) / bulk_sizes
[sz
];
343 uint64_t sc_avg
= ((sc_end
-sc_start
) >> iter_shift
) / bulk_sizes
[sz
];
345 printf("SP/SC burst enq/dequeue (size: %u): %"PRIu64
"\n", bulk_sizes
[sz
],
347 printf("MP/MC burst enq/dequeue (size: %u): %"PRIu64
"\n", bulk_sizes
[sz
],
352 /* Times enqueue and dequeue on a single lcore */
354 test_bulk_enqueue_dequeue(void)
356 const unsigned iter_shift
= 23;
357 const unsigned iterations
= 1<<iter_shift
;
359 void *burst
[MAX_BURST
] = {0};
361 for (sz
= 0; sz
< sizeof(bulk_sizes
)/sizeof(bulk_sizes
[0]); sz
++) {
362 const uint64_t sc_start
= rte_rdtsc();
363 for (i
= 0; i
< iterations
; i
++) {
364 rte_ring_sp_enqueue_bulk(r
, burst
,
365 bulk_sizes
[sz
], NULL
);
366 rte_ring_sc_dequeue_bulk(r
, burst
,
367 bulk_sizes
[sz
], NULL
);
369 const uint64_t sc_end
= rte_rdtsc();
371 const uint64_t mc_start
= rte_rdtsc();
372 for (i
= 0; i
< iterations
; i
++) {
373 rte_ring_mp_enqueue_bulk(r
, burst
,
374 bulk_sizes
[sz
], NULL
);
375 rte_ring_mc_dequeue_bulk(r
, burst
,
376 bulk_sizes
[sz
], NULL
);
378 const uint64_t mc_end
= rte_rdtsc();
380 double sc_avg
= ((double)(sc_end
-sc_start
) /
381 (iterations
* bulk_sizes
[sz
]));
382 double mc_avg
= ((double)(mc_end
-mc_start
) /
383 (iterations
* bulk_sizes
[sz
]));
385 printf("SP/SC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes
[sz
],
387 printf("MP/MC bulk enq/dequeue (size: %u): %.2F\n", bulk_sizes
[sz
],
395 struct lcore_pair cores
;
396 r
= rte_ring_create(RING_NAME
, RING_SIZE
, rte_socket_id(), 0);
397 if (r
== NULL
&& (r
= rte_ring_lookup(RING_NAME
)) == NULL
)
400 printf("### Testing single element and burst enq/deq ###\n");
401 test_single_enqueue_dequeue();
402 test_burst_enqueue_dequeue();
404 printf("\n### Testing empty dequeue ###\n");
405 test_empty_dequeue();
407 printf("\n### Testing using a single lcore ###\n");
408 test_bulk_enqueue_dequeue();
410 if (get_two_hyperthreads(&cores
) == 0) {
411 printf("\n### Testing using two hyperthreads ###\n");
412 run_on_core_pair(&cores
, enqueue_bulk
, dequeue_bulk
);
414 if (get_two_cores(&cores
) == 0) {
415 printf("\n### Testing using two physical cores ###\n");
416 run_on_core_pair(&cores
, enqueue_bulk
, dequeue_bulk
);
418 if (get_two_sockets(&cores
) == 0) {
419 printf("\n### Testing using two NUMA nodes ###\n");
420 run_on_core_pair(&cores
, enqueue_bulk
, dequeue_bulk
);
425 REGISTER_TEST_COMMAND(ring_perf_autotest
, test_ring_perf
);