]>
Commit | Line | Data |
---|---|---|
11fdf7f2 TL |
1 | /* SPDX-License-Identifier: BSD-3-Clause |
2 | * Copyright(c) 2010-2014 Intel Corporation | |
7c673cae FG |
3 | */ |
4 | ||
5 | #include <string.h> | |
6 | #include <stdio.h> | |
7 | #include <stdlib.h> | |
8 | #include <stdint.h> | |
9 | #include <inttypes.h> | |
10 | #include <stdarg.h> | |
11 | #include <errno.h> | |
12 | #include <sys/queue.h> | |
13 | ||
14 | #include <rte_common.h> | |
15 | #include <rte_log.h> | |
16 | #include <rte_debug.h> | |
17 | #include <rte_memory.h> | |
7c673cae FG |
18 | #include <rte_launch.h> |
19 | #include <rte_cycles.h> | |
20 | #include <rte_eal.h> | |
21 | #include <rte_per_lcore.h> | |
22 | #include <rte_lcore.h> | |
23 | #include <rte_atomic.h> | |
24 | #include <rte_branch_prediction.h> | |
25 | #include <rte_mempool.h> | |
26 | #include <rte_spinlock.h> | |
27 | #include <rte_malloc.h> | |
11fdf7f2 | 28 | #include <rte_mbuf_pool_ops.h> |
7c673cae FG |
29 | |
30 | #include "test.h" | |
31 | ||
32 | /* | |
33 | * Mempool performance | |
34 | * ======= | |
35 | * | |
36 | * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then, | |
37 | * objects are put back in the pool per bulk of *n_put_bulk*. | |
38 | * | |
39 | * This sequence is done during TIME_S seconds. | |
40 | * | |
41 | * This test is done on the following configurations: | |
42 | * | |
43 | * - Cores configuration (*cores*) | |
44 | * | |
45 | * - One core with cache | |
46 | * - Two cores with cache | |
47 | * - Max. cores with cache | |
48 | * - One core without cache | |
49 | * - Two cores without cache | |
50 | * - Max. cores without cache | |
51 | * - One core with user-owned cache | |
52 | * - Two cores with user-owned cache | |
53 | * - Max. cores with user-owned cache | |
54 | * | |
55 | * - Bulk size (*n_get_bulk*, *n_put_bulk*) | |
56 | * | |
57 | * - Bulk get from 1 to 32 | |
58 | * - Bulk put from 1 to 32 | |
59 | * | |
60 | * - Number of kept objects (*n_keep*) | |
61 | * | |
62 | * - 32 | |
63 | * - 128 | |
64 | */ | |
65 | ||
66 | #define N 65536 | |
67 | #define TIME_S 5 | |
68 | #define MEMPOOL_ELT_SIZE 2048 | |
69 | #define MAX_KEEP 128 | |
70 | #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1) | |
71 | ||
72 | #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__) | |
73 | #define RET_ERR() do { \ | |
74 | LOG_ERR(); \ | |
75 | return -1; \ | |
76 | } while (0) | |
77 | #define GOTO_ERR(var, label) do { \ | |
78 | LOG_ERR(); \ | |
79 | var = -1; \ | |
80 | goto label; \ | |
81 | } while (0) | |
82 | ||
7c673cae FG |
83 | static int use_external_cache; |
84 | static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; | |
85 | ||
86 | static rte_atomic32_t synchro; | |
87 | ||
88 | /* number of objects in one bulk operation (get or put) */ | |
89 | static unsigned n_get_bulk; | |
90 | static unsigned n_put_bulk; | |
91 | ||
92 | /* number of objects retrived from mempool before putting them back */ | |
93 | static unsigned n_keep; | |
94 | ||
95 | /* number of enqueues / dequeues */ | |
96 | struct mempool_test_stats { | |
97 | uint64_t enq_count; | |
98 | } __rte_cache_aligned; | |
99 | ||
100 | static struct mempool_test_stats stats[RTE_MAX_LCORE]; | |
101 | ||
102 | /* | |
103 | * save the object number in the first 4 bytes of object data. All | |
104 | * other bytes are set to 0. | |
105 | */ | |
106 | static void | |
107 | my_obj_init(struct rte_mempool *mp, __attribute__((unused)) void *arg, | |
108 | void *obj, unsigned i) | |
109 | { | |
110 | uint32_t *objnum = obj; | |
111 | memset(obj, 0, mp->elt_size); | |
112 | *objnum = i; | |
113 | } | |
114 | ||
115 | static int | |
11fdf7f2 | 116 | per_lcore_mempool_test(void *arg) |
7c673cae FG |
117 | { |
118 | void *obj_table[MAX_KEEP]; | |
119 | unsigned i, idx; | |
11fdf7f2 | 120 | struct rte_mempool *mp = arg; |
7c673cae FG |
121 | unsigned lcore_id = rte_lcore_id(); |
122 | int ret = 0; | |
123 | uint64_t start_cycles, end_cycles; | |
124 | uint64_t time_diff = 0, hz = rte_get_timer_hz(); | |
125 | struct rte_mempool_cache *cache; | |
126 | ||
127 | if (use_external_cache) { | |
128 | /* Create a user-owned mempool cache. */ | |
129 | cache = rte_mempool_cache_create(external_cache_size, | |
130 | SOCKET_ID_ANY); | |
131 | if (cache == NULL) | |
132 | RET_ERR(); | |
133 | } else { | |
134 | /* May be NULL if cache is disabled. */ | |
135 | cache = rte_mempool_default_cache(mp, lcore_id); | |
136 | } | |
137 | ||
138 | /* n_get_bulk and n_put_bulk must be divisors of n_keep */ | |
139 | if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep) | |
140 | GOTO_ERR(ret, out); | |
141 | if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep) | |
142 | GOTO_ERR(ret, out); | |
143 | ||
144 | stats[lcore_id].enq_count = 0; | |
145 | ||
146 | /* wait synchro for slaves */ | |
147 | if (lcore_id != rte_get_master_lcore()) | |
148 | while (rte_atomic32_read(&synchro) == 0); | |
149 | ||
150 | start_cycles = rte_get_timer_cycles(); | |
151 | ||
152 | while (time_diff/hz < TIME_S) { | |
153 | for (i = 0; likely(i < (N/n_keep)); i++) { | |
154 | /* get n_keep objects by bulk of n_bulk */ | |
155 | idx = 0; | |
156 | while (idx < n_keep) { | |
157 | ret = rte_mempool_generic_get(mp, | |
158 | &obj_table[idx], | |
159 | n_get_bulk, | |
11fdf7f2 | 160 | cache); |
7c673cae FG |
161 | if (unlikely(ret < 0)) { |
162 | rte_mempool_dump(stdout, mp); | |
163 | /* in this case, objects are lost... */ | |
164 | GOTO_ERR(ret, out); | |
165 | } | |
166 | idx += n_get_bulk; | |
167 | } | |
168 | ||
169 | /* put the objects back */ | |
170 | idx = 0; | |
171 | while (idx < n_keep) { | |
172 | rte_mempool_generic_put(mp, &obj_table[idx], | |
173 | n_put_bulk, | |
11fdf7f2 | 174 | cache); |
7c673cae FG |
175 | idx += n_put_bulk; |
176 | } | |
177 | } | |
178 | end_cycles = rte_get_timer_cycles(); | |
179 | time_diff = end_cycles - start_cycles; | |
180 | stats[lcore_id].enq_count += N; | |
181 | } | |
182 | ||
183 | out: | |
184 | if (use_external_cache) { | |
185 | rte_mempool_cache_flush(cache, mp); | |
186 | rte_mempool_cache_free(cache); | |
187 | } | |
188 | ||
189 | return ret; | |
190 | } | |
191 | ||
192 | /* launch all the per-lcore test, and display the result */ | |
193 | static int | |
11fdf7f2 | 194 | launch_cores(struct rte_mempool *mp, unsigned int cores) |
7c673cae FG |
195 | { |
196 | unsigned lcore_id; | |
197 | uint64_t rate; | |
198 | int ret; | |
199 | unsigned cores_save = cores; | |
200 | ||
201 | rte_atomic32_set(&synchro, 0); | |
202 | ||
203 | /* reset stats */ | |
204 | memset(stats, 0, sizeof(stats)); | |
205 | ||
206 | printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u " | |
207 | "n_put_bulk=%u n_keep=%u ", | |
208 | use_external_cache ? | |
209 | external_cache_size : (unsigned) mp->cache_size, | |
210 | cores, n_get_bulk, n_put_bulk, n_keep); | |
211 | ||
212 | if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) { | |
213 | printf("mempool is not full\n"); | |
214 | return -1; | |
215 | } | |
216 | ||
217 | RTE_LCORE_FOREACH_SLAVE(lcore_id) { | |
218 | if (cores == 1) | |
219 | break; | |
220 | cores--; | |
221 | rte_eal_remote_launch(per_lcore_mempool_test, | |
11fdf7f2 | 222 | mp, lcore_id); |
7c673cae FG |
223 | } |
224 | ||
225 | /* start synchro and launch test on master */ | |
226 | rte_atomic32_set(&synchro, 1); | |
227 | ||
11fdf7f2 | 228 | ret = per_lcore_mempool_test(mp); |
7c673cae FG |
229 | |
230 | cores = cores_save; | |
231 | RTE_LCORE_FOREACH_SLAVE(lcore_id) { | |
232 | if (cores == 1) | |
233 | break; | |
234 | cores--; | |
235 | if (rte_eal_wait_lcore(lcore_id) < 0) | |
236 | ret = -1; | |
237 | } | |
238 | ||
239 | if (ret < 0) { | |
240 | printf("per-lcore test returned -1\n"); | |
241 | return -1; | |
242 | } | |
243 | ||
244 | rate = 0; | |
245 | for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) | |
246 | rate += (stats[lcore_id].enq_count / TIME_S); | |
247 | ||
248 | printf("rate_persec=%" PRIu64 "\n", rate); | |
249 | ||
250 | return 0; | |
251 | } | |
252 | ||
253 | /* for a given number of core, launch all test cases */ | |
254 | static int | |
11fdf7f2 | 255 | do_one_mempool_test(struct rte_mempool *mp, unsigned int cores) |
7c673cae FG |
256 | { |
257 | unsigned bulk_tab_get[] = { 1, 4, 32, 0 }; | |
258 | unsigned bulk_tab_put[] = { 1, 4, 32, 0 }; | |
259 | unsigned keep_tab[] = { 32, 128, 0 }; | |
260 | unsigned *get_bulk_ptr; | |
261 | unsigned *put_bulk_ptr; | |
262 | unsigned *keep_ptr; | |
263 | int ret; | |
264 | ||
265 | for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) { | |
266 | for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) { | |
267 | for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { | |
268 | ||
269 | n_get_bulk = *get_bulk_ptr; | |
270 | n_put_bulk = *put_bulk_ptr; | |
271 | n_keep = *keep_ptr; | |
11fdf7f2 | 272 | ret = launch_cores(mp, cores); |
7c673cae FG |
273 | |
274 | if (ret < 0) | |
275 | return -1; | |
276 | } | |
277 | } | |
278 | } | |
279 | return 0; | |
280 | } | |
281 | ||
282 | static int | |
283 | test_mempool_perf(void) | |
284 | { | |
11fdf7f2 TL |
285 | struct rte_mempool *mp_cache = NULL; |
286 | struct rte_mempool *mp_nocache = NULL; | |
287 | struct rte_mempool *default_pool = NULL; | |
288 | const char *default_pool_ops; | |
289 | int ret = -1; | |
290 | ||
7c673cae FG |
291 | rte_atomic32_init(&synchro); |
292 | ||
293 | /* create a mempool (without cache) */ | |
11fdf7f2 TL |
294 | mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE, |
295 | MEMPOOL_ELT_SIZE, 0, 0, | |
296 | NULL, NULL, | |
297 | my_obj_init, NULL, | |
298 | SOCKET_ID_ANY, 0); | |
7c673cae | 299 | if (mp_nocache == NULL) |
11fdf7f2 | 300 | goto err; |
7c673cae FG |
301 | |
302 | /* create a mempool (with cache) */ | |
11fdf7f2 TL |
303 | mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE, |
304 | MEMPOOL_ELT_SIZE, | |
305 | RTE_MEMPOOL_CACHE_MAX_SIZE, 0, | |
306 | NULL, NULL, | |
307 | my_obj_init, NULL, | |
308 | SOCKET_ID_ANY, 0); | |
7c673cae | 309 | if (mp_cache == NULL) |
11fdf7f2 TL |
310 | goto err; |
311 | ||
312 | default_pool_ops = rte_mbuf_best_mempool_ops(); | |
313 | /* Create a mempool based on Default handler */ | |
314 | default_pool = rte_mempool_create_empty("default_pool", | |
315 | MEMPOOL_SIZE, | |
316 | MEMPOOL_ELT_SIZE, | |
317 | 0, 0, | |
318 | SOCKET_ID_ANY, 0); | |
319 | ||
320 | if (default_pool == NULL) { | |
321 | printf("cannot allocate %s mempool\n", default_pool_ops); | |
322 | goto err; | |
323 | } | |
324 | ||
325 | if (rte_mempool_set_ops_byname(default_pool, default_pool_ops, NULL) | |
326 | < 0) { | |
327 | printf("cannot set %s handler\n", default_pool_ops); | |
328 | goto err; | |
329 | } | |
330 | ||
331 | if (rte_mempool_populate_default(default_pool) < 0) { | |
332 | printf("cannot populate %s mempool\n", default_pool_ops); | |
333 | goto err; | |
334 | } | |
335 | ||
336 | rte_mempool_obj_iter(default_pool, my_obj_init, NULL); | |
7c673cae FG |
337 | |
338 | /* performance test with 1, 2 and max cores */ | |
339 | printf("start performance test (without cache)\n"); | |
7c673cae | 340 | |
11fdf7f2 TL |
341 | if (do_one_mempool_test(mp_nocache, 1) < 0) |
342 | goto err; | |
7c673cae | 343 | |
11fdf7f2 TL |
344 | if (do_one_mempool_test(mp_nocache, 2) < 0) |
345 | goto err; | |
7c673cae | 346 | |
11fdf7f2 TL |
347 | if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0) |
348 | goto err; | |
349 | ||
350 | /* performance test with 1, 2 and max cores */ | |
351 | printf("start performance test for %s (without cache)\n", | |
352 | default_pool_ops); | |
353 | ||
354 | if (do_one_mempool_test(default_pool, 1) < 0) | |
355 | goto err; | |
356 | ||
357 | if (do_one_mempool_test(default_pool, 2) < 0) | |
358 | goto err; | |
359 | ||
360 | if (do_one_mempool_test(default_pool, rte_lcore_count()) < 0) | |
361 | goto err; | |
7c673cae FG |
362 | |
363 | /* performance test with 1, 2 and max cores */ | |
364 | printf("start performance test (with cache)\n"); | |
7c673cae | 365 | |
11fdf7f2 TL |
366 | if (do_one_mempool_test(mp_cache, 1) < 0) |
367 | goto err; | |
7c673cae | 368 | |
11fdf7f2 TL |
369 | if (do_one_mempool_test(mp_cache, 2) < 0) |
370 | goto err; | |
7c673cae | 371 | |
11fdf7f2 TL |
372 | if (do_one_mempool_test(mp_cache, rte_lcore_count()) < 0) |
373 | goto err; | |
7c673cae FG |
374 | |
375 | /* performance test with 1, 2 and max cores */ | |
376 | printf("start performance test (with user-owned cache)\n"); | |
7c673cae FG |
377 | use_external_cache = 1; |
378 | ||
11fdf7f2 TL |
379 | if (do_one_mempool_test(mp_nocache, 1) < 0) |
380 | goto err; | |
7c673cae | 381 | |
11fdf7f2 TL |
382 | if (do_one_mempool_test(mp_nocache, 2) < 0) |
383 | goto err; | |
7c673cae | 384 | |
11fdf7f2 TL |
385 | if (do_one_mempool_test(mp_nocache, rte_lcore_count()) < 0) |
386 | goto err; | |
7c673cae FG |
387 | |
388 | rte_mempool_list_dump(stdout); | |
389 | ||
11fdf7f2 TL |
390 | ret = 0; |
391 | ||
392 | err: | |
393 | rte_mempool_free(mp_cache); | |
394 | rte_mempool_free(mp_nocache); | |
395 | rte_mempool_free(default_pool); | |
396 | return ret; | |
7c673cae FG |
397 | } |
398 | ||
399 | REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf); |