]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #include <string.h> | |
35 | #include <stdio.h> | |
36 | #include <stdlib.h> | |
37 | #include <stdint.h> | |
38 | #include <inttypes.h> | |
39 | #include <stdarg.h> | |
40 | #include <errno.h> | |
41 | #include <sys/queue.h> | |
42 | ||
43 | #include <rte_common.h> | |
44 | #include <rte_log.h> | |
45 | #include <rte_debug.h> | |
46 | #include <rte_memory.h> | |
47 | #include <rte_memzone.h> | |
48 | #include <rte_launch.h> | |
49 | #include <rte_cycles.h> | |
50 | #include <rte_eal.h> | |
51 | #include <rte_per_lcore.h> | |
52 | #include <rte_lcore.h> | |
53 | #include <rte_atomic.h> | |
54 | #include <rte_branch_prediction.h> | |
55 | #include <rte_mempool.h> | |
56 | #include <rte_spinlock.h> | |
57 | #include <rte_malloc.h> | |
58 | ||
59 | #include "test.h" | |
60 | ||
61 | /* | |
62 | * Mempool performance | |
63 | * ======= | |
64 | * | |
65 | * Each core get *n_keep* objects per bulk of *n_get_bulk*. Then, | |
66 | * objects are put back in the pool per bulk of *n_put_bulk*. | |
67 | * | |
68 | * This sequence is done during TIME_S seconds. | |
69 | * | |
70 | * This test is done on the following configurations: | |
71 | * | |
72 | * - Cores configuration (*cores*) | |
73 | * | |
74 | * - One core with cache | |
75 | * - Two cores with cache | |
76 | * - Max. cores with cache | |
77 | * - One core without cache | |
78 | * - Two cores without cache | |
79 | * - Max. cores without cache | |
80 | * - One core with user-owned cache | |
81 | * - Two cores with user-owned cache | |
82 | * - Max. cores with user-owned cache | |
83 | * | |
84 | * - Bulk size (*n_get_bulk*, *n_put_bulk*) | |
85 | * | |
86 | * - Bulk get from 1 to 32 | |
87 | * - Bulk put from 1 to 32 | |
88 | * | |
89 | * - Number of kept objects (*n_keep*) | |
90 | * | |
91 | * - 32 | |
92 | * - 128 | |
93 | */ | |
94 | ||
95 | #define N 65536 | |
96 | #define TIME_S 5 | |
97 | #define MEMPOOL_ELT_SIZE 2048 | |
98 | #define MAX_KEEP 128 | |
99 | #define MEMPOOL_SIZE ((rte_lcore_count()*(MAX_KEEP+RTE_MEMPOOL_CACHE_MAX_SIZE))-1) | |
100 | ||
101 | #define LOG_ERR() printf("test failed at %s():%d\n", __func__, __LINE__) | |
102 | #define RET_ERR() do { \ | |
103 | LOG_ERR(); \ | |
104 | return -1; \ | |
105 | } while (0) | |
106 | #define GOTO_ERR(var, label) do { \ | |
107 | LOG_ERR(); \ | |
108 | var = -1; \ | |
109 | goto label; \ | |
110 | } while (0) | |
111 | ||
112 | static struct rte_mempool *mp; | |
113 | static struct rte_mempool *mp_cache, *mp_nocache; | |
114 | static int use_external_cache; | |
115 | static unsigned external_cache_size = RTE_MEMPOOL_CACHE_MAX_SIZE; | |
116 | ||
117 | static rte_atomic32_t synchro; | |
118 | ||
119 | /* number of objects in one bulk operation (get or put) */ | |
120 | static unsigned n_get_bulk; | |
121 | static unsigned n_put_bulk; | |
122 | ||
123 | /* number of objects retrived from mempool before putting them back */ | |
124 | static unsigned n_keep; | |
125 | ||
126 | /* number of enqueues / dequeues */ | |
127 | struct mempool_test_stats { | |
128 | uint64_t enq_count; | |
129 | } __rte_cache_aligned; | |
130 | ||
131 | static struct mempool_test_stats stats[RTE_MAX_LCORE]; | |
132 | ||
133 | /* | |
134 | * save the object number in the first 4 bytes of object data. All | |
135 | * other bytes are set to 0. | |
136 | */ | |
137 | static void | |
138 | my_obj_init(struct rte_mempool *mp, __attribute__((unused)) void *arg, | |
139 | void *obj, unsigned i) | |
140 | { | |
141 | uint32_t *objnum = obj; | |
142 | memset(obj, 0, mp->elt_size); | |
143 | *objnum = i; | |
144 | } | |
145 | ||
146 | static int | |
147 | per_lcore_mempool_test(__attribute__((unused)) void *arg) | |
148 | { | |
149 | void *obj_table[MAX_KEEP]; | |
150 | unsigned i, idx; | |
151 | unsigned lcore_id = rte_lcore_id(); | |
152 | int ret = 0; | |
153 | uint64_t start_cycles, end_cycles; | |
154 | uint64_t time_diff = 0, hz = rte_get_timer_hz(); | |
155 | struct rte_mempool_cache *cache; | |
156 | ||
157 | if (use_external_cache) { | |
158 | /* Create a user-owned mempool cache. */ | |
159 | cache = rte_mempool_cache_create(external_cache_size, | |
160 | SOCKET_ID_ANY); | |
161 | if (cache == NULL) | |
162 | RET_ERR(); | |
163 | } else { | |
164 | /* May be NULL if cache is disabled. */ | |
165 | cache = rte_mempool_default_cache(mp, lcore_id); | |
166 | } | |
167 | ||
168 | /* n_get_bulk and n_put_bulk must be divisors of n_keep */ | |
169 | if (((n_keep / n_get_bulk) * n_get_bulk) != n_keep) | |
170 | GOTO_ERR(ret, out); | |
171 | if (((n_keep / n_put_bulk) * n_put_bulk) != n_keep) | |
172 | GOTO_ERR(ret, out); | |
173 | ||
174 | stats[lcore_id].enq_count = 0; | |
175 | ||
176 | /* wait synchro for slaves */ | |
177 | if (lcore_id != rte_get_master_lcore()) | |
178 | while (rte_atomic32_read(&synchro) == 0); | |
179 | ||
180 | start_cycles = rte_get_timer_cycles(); | |
181 | ||
182 | while (time_diff/hz < TIME_S) { | |
183 | for (i = 0; likely(i < (N/n_keep)); i++) { | |
184 | /* get n_keep objects by bulk of n_bulk */ | |
185 | idx = 0; | |
186 | while (idx < n_keep) { | |
187 | ret = rte_mempool_generic_get(mp, | |
188 | &obj_table[idx], | |
189 | n_get_bulk, | |
190 | cache, 0); | |
191 | if (unlikely(ret < 0)) { | |
192 | rte_mempool_dump(stdout, mp); | |
193 | /* in this case, objects are lost... */ | |
194 | GOTO_ERR(ret, out); | |
195 | } | |
196 | idx += n_get_bulk; | |
197 | } | |
198 | ||
199 | /* put the objects back */ | |
200 | idx = 0; | |
201 | while (idx < n_keep) { | |
202 | rte_mempool_generic_put(mp, &obj_table[idx], | |
203 | n_put_bulk, | |
204 | cache, 0); | |
205 | idx += n_put_bulk; | |
206 | } | |
207 | } | |
208 | end_cycles = rte_get_timer_cycles(); | |
209 | time_diff = end_cycles - start_cycles; | |
210 | stats[lcore_id].enq_count += N; | |
211 | } | |
212 | ||
213 | out: | |
214 | if (use_external_cache) { | |
215 | rte_mempool_cache_flush(cache, mp); | |
216 | rte_mempool_cache_free(cache); | |
217 | } | |
218 | ||
219 | return ret; | |
220 | } | |
221 | ||
222 | /* launch all the per-lcore test, and display the result */ | |
223 | static int | |
224 | launch_cores(unsigned cores) | |
225 | { | |
226 | unsigned lcore_id; | |
227 | uint64_t rate; | |
228 | int ret; | |
229 | unsigned cores_save = cores; | |
230 | ||
231 | rte_atomic32_set(&synchro, 0); | |
232 | ||
233 | /* reset stats */ | |
234 | memset(stats, 0, sizeof(stats)); | |
235 | ||
236 | printf("mempool_autotest cache=%u cores=%u n_get_bulk=%u " | |
237 | "n_put_bulk=%u n_keep=%u ", | |
238 | use_external_cache ? | |
239 | external_cache_size : (unsigned) mp->cache_size, | |
240 | cores, n_get_bulk, n_put_bulk, n_keep); | |
241 | ||
242 | if (rte_mempool_avail_count(mp) != MEMPOOL_SIZE) { | |
243 | printf("mempool is not full\n"); | |
244 | return -1; | |
245 | } | |
246 | ||
247 | RTE_LCORE_FOREACH_SLAVE(lcore_id) { | |
248 | if (cores == 1) | |
249 | break; | |
250 | cores--; | |
251 | rte_eal_remote_launch(per_lcore_mempool_test, | |
252 | NULL, lcore_id); | |
253 | } | |
254 | ||
255 | /* start synchro and launch test on master */ | |
256 | rte_atomic32_set(&synchro, 1); | |
257 | ||
258 | ret = per_lcore_mempool_test(NULL); | |
259 | ||
260 | cores = cores_save; | |
261 | RTE_LCORE_FOREACH_SLAVE(lcore_id) { | |
262 | if (cores == 1) | |
263 | break; | |
264 | cores--; | |
265 | if (rte_eal_wait_lcore(lcore_id) < 0) | |
266 | ret = -1; | |
267 | } | |
268 | ||
269 | if (ret < 0) { | |
270 | printf("per-lcore test returned -1\n"); | |
271 | return -1; | |
272 | } | |
273 | ||
274 | rate = 0; | |
275 | for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) | |
276 | rate += (stats[lcore_id].enq_count / TIME_S); | |
277 | ||
278 | printf("rate_persec=%" PRIu64 "\n", rate); | |
279 | ||
280 | return 0; | |
281 | } | |
282 | ||
283 | /* for a given number of core, launch all test cases */ | |
284 | static int | |
285 | do_one_mempool_test(unsigned cores) | |
286 | { | |
287 | unsigned bulk_tab_get[] = { 1, 4, 32, 0 }; | |
288 | unsigned bulk_tab_put[] = { 1, 4, 32, 0 }; | |
289 | unsigned keep_tab[] = { 32, 128, 0 }; | |
290 | unsigned *get_bulk_ptr; | |
291 | unsigned *put_bulk_ptr; | |
292 | unsigned *keep_ptr; | |
293 | int ret; | |
294 | ||
295 | for (get_bulk_ptr = bulk_tab_get; *get_bulk_ptr; get_bulk_ptr++) { | |
296 | for (put_bulk_ptr = bulk_tab_put; *put_bulk_ptr; put_bulk_ptr++) { | |
297 | for (keep_ptr = keep_tab; *keep_ptr; keep_ptr++) { | |
298 | ||
299 | n_get_bulk = *get_bulk_ptr; | |
300 | n_put_bulk = *put_bulk_ptr; | |
301 | n_keep = *keep_ptr; | |
302 | ret = launch_cores(cores); | |
303 | ||
304 | if (ret < 0) | |
305 | return -1; | |
306 | } | |
307 | } | |
308 | } | |
309 | return 0; | |
310 | } | |
311 | ||
312 | static int | |
313 | test_mempool_perf(void) | |
314 | { | |
315 | rte_atomic32_init(&synchro); | |
316 | ||
317 | /* create a mempool (without cache) */ | |
318 | if (mp_nocache == NULL) | |
319 | mp_nocache = rte_mempool_create("perf_test_nocache", MEMPOOL_SIZE, | |
320 | MEMPOOL_ELT_SIZE, 0, 0, | |
321 | NULL, NULL, | |
322 | my_obj_init, NULL, | |
323 | SOCKET_ID_ANY, 0); | |
324 | if (mp_nocache == NULL) | |
325 | return -1; | |
326 | ||
327 | /* create a mempool (with cache) */ | |
328 | if (mp_cache == NULL) | |
329 | mp_cache = rte_mempool_create("perf_test_cache", MEMPOOL_SIZE, | |
330 | MEMPOOL_ELT_SIZE, | |
331 | RTE_MEMPOOL_CACHE_MAX_SIZE, 0, | |
332 | NULL, NULL, | |
333 | my_obj_init, NULL, | |
334 | SOCKET_ID_ANY, 0); | |
335 | if (mp_cache == NULL) | |
336 | return -1; | |
337 | ||
338 | /* performance test with 1, 2 and max cores */ | |
339 | printf("start performance test (without cache)\n"); | |
340 | mp = mp_nocache; | |
341 | ||
342 | if (do_one_mempool_test(1) < 0) | |
343 | return -1; | |
344 | ||
345 | if (do_one_mempool_test(2) < 0) | |
346 | return -1; | |
347 | ||
348 | if (do_one_mempool_test(rte_lcore_count()) < 0) | |
349 | return -1; | |
350 | ||
351 | /* performance test with 1, 2 and max cores */ | |
352 | printf("start performance test (with cache)\n"); | |
353 | mp = mp_cache; | |
354 | ||
355 | if (do_one_mempool_test(1) < 0) | |
356 | return -1; | |
357 | ||
358 | if (do_one_mempool_test(2) < 0) | |
359 | return -1; | |
360 | ||
361 | if (do_one_mempool_test(rte_lcore_count()) < 0) | |
362 | return -1; | |
363 | ||
364 | /* performance test with 1, 2 and max cores */ | |
365 | printf("start performance test (with user-owned cache)\n"); | |
366 | mp = mp_nocache; | |
367 | use_external_cache = 1; | |
368 | ||
369 | if (do_one_mempool_test(1) < 0) | |
370 | return -1; | |
371 | ||
372 | if (do_one_mempool_test(2) < 0) | |
373 | return -1; | |
374 | ||
375 | if (do_one_mempool_test(rte_lcore_count()) < 0) | |
376 | return -1; | |
377 | ||
378 | rte_mempool_list_dump(stdout); | |
379 | ||
380 | return 0; | |
381 | } | |
382 | ||
383 | REGISTER_TEST_COMMAND(mempool_perf_autotest, test_mempool_perf); |