]>
Commit | Line | Data |
---|---|---|
7c673cae FG |
1 | /*- |
2 | * BSD LICENSE | |
3 | * | |
4 | * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. | |
5 | * All rights reserved. | |
6 | * | |
7 | * Redistribution and use in source and binary forms, with or without | |
8 | * modification, are permitted provided that the following conditions | |
9 | * are met: | |
10 | * | |
11 | * * Redistributions of source code must retain the above copyright | |
12 | * notice, this list of conditions and the following disclaimer. | |
13 | * * Redistributions in binary form must reproduce the above copyright | |
14 | * notice, this list of conditions and the following disclaimer in | |
15 | * the documentation and/or other materials provided with the | |
16 | * distribution. | |
17 | * * Neither the name of Intel Corporation nor the names of its | |
18 | * contributors may be used to endorse or promote products derived | |
19 | * from this software without specific prior written permission. | |
20 | * | |
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 | */ | |
33 | ||
34 | #include <stdio.h> | |
35 | #include <stdint.h> | |
36 | #include <stdlib.h> | |
37 | #include <math.h> | |
38 | ||
39 | #include <rte_cycles.h> | |
40 | #include <rte_random.h> | |
41 | #include <rte_branch_prediction.h> | |
42 | #include <rte_ip.h> | |
43 | #include <rte_lpm.h> | |
44 | ||
45 | #include "test.h" | |
46 | #include "test_xmmt_ops.h" | |
47 | ||
48 | #define TEST_LPM_ASSERT(cond) do { \ | |
49 | if (!(cond)) { \ | |
50 | printf("Error at line %d: \n", __LINE__); \ | |
51 | return -1; \ | |
52 | } \ | |
53 | } while(0) | |
54 | ||
55 | #define ITERATIONS (1 << 10) | |
56 | #define BATCH_SIZE (1 << 12) | |
57 | #define BULK_SIZE 32 | |
58 | ||
59 | #define MAX_RULE_NUM (1200000) | |
60 | ||
61 | struct route_rule { | |
62 | uint32_t ip; | |
63 | uint8_t depth; | |
64 | }; | |
65 | ||
66 | struct route_rule large_route_table[MAX_RULE_NUM]; | |
67 | ||
68 | static uint32_t num_route_entries; | |
69 | #define NUM_ROUTE_ENTRIES num_route_entries | |
70 | ||
71 | enum { | |
72 | IP_CLASS_A, | |
73 | IP_CLASS_B, | |
74 | IP_CLASS_C | |
75 | }; | |
76 | ||
77 | /* struct route_rule_count defines the total number of rules in following a/b/c | |
78 | * each item in a[]/b[]/c[] is the number of common IP address class A/B/C, not | |
79 | * including the ones for private local network. | |
80 | */ | |
81 | struct route_rule_count { | |
82 | uint32_t a[RTE_LPM_MAX_DEPTH]; | |
83 | uint32_t b[RTE_LPM_MAX_DEPTH]; | |
84 | uint32_t c[RTE_LPM_MAX_DEPTH]; | |
85 | }; | |
86 | ||
87 | /* All following numbers of each depth of each common IP class are just | |
88 | * got from previous large constant table in app/test/test_lpm_routes.h . | |
89 | * In order to match similar performance, they keep same depth and IP | |
90 | * address coverage as previous constant table. These numbers don't | |
91 | * include any private local IP address. As previous large const rule | |
92 | * table was just dumped from a real router, there are no any IP address | |
93 | * in class C or D. | |
94 | */ | |
95 | static struct route_rule_count rule_count = { | |
96 | .a = { /* IP class A in which the most significant bit is 0 */ | |
97 | 0, /* depth = 1 */ | |
98 | 0, /* depth = 2 */ | |
99 | 1, /* depth = 3 */ | |
100 | 0, /* depth = 4 */ | |
101 | 2, /* depth = 5 */ | |
102 | 1, /* depth = 6 */ | |
103 | 3, /* depth = 7 */ | |
104 | 185, /* depth = 8 */ | |
105 | 26, /* depth = 9 */ | |
106 | 16, /* depth = 10 */ | |
107 | 39, /* depth = 11 */ | |
108 | 144, /* depth = 12 */ | |
109 | 233, /* depth = 13 */ | |
110 | 528, /* depth = 14 */ | |
111 | 866, /* depth = 15 */ | |
112 | 3856, /* depth = 16 */ | |
113 | 3268, /* depth = 17 */ | |
114 | 5662, /* depth = 18 */ | |
115 | 17301, /* depth = 19 */ | |
116 | 22226, /* depth = 20 */ | |
117 | 11147, /* depth = 21 */ | |
118 | 16746, /* depth = 22 */ | |
119 | 17120, /* depth = 23 */ | |
120 | 77578, /* depth = 24 */ | |
121 | 401, /* depth = 25 */ | |
122 | 656, /* depth = 26 */ | |
123 | 1107, /* depth = 27 */ | |
124 | 1121, /* depth = 28 */ | |
125 | 2316, /* depth = 29 */ | |
126 | 717, /* depth = 30 */ | |
127 | 10, /* depth = 31 */ | |
128 | 66 /* depth = 32 */ | |
129 | }, | |
130 | .b = { /* IP class A in which the most 2 significant bits are 10 */ | |
131 | 0, /* depth = 1 */ | |
132 | 0, /* depth = 2 */ | |
133 | 0, /* depth = 3 */ | |
134 | 0, /* depth = 4 */ | |
135 | 1, /* depth = 5 */ | |
136 | 1, /* depth = 6 */ | |
137 | 1, /* depth = 7 */ | |
138 | 3, /* depth = 8 */ | |
139 | 3, /* depth = 9 */ | |
140 | 30, /* depth = 10 */ | |
141 | 25, /* depth = 11 */ | |
142 | 168, /* depth = 12 */ | |
143 | 305, /* depth = 13 */ | |
144 | 569, /* depth = 14 */ | |
145 | 1129, /* depth = 15 */ | |
146 | 50800, /* depth = 16 */ | |
147 | 1645, /* depth = 17 */ | |
148 | 1820, /* depth = 18 */ | |
149 | 3506, /* depth = 19 */ | |
150 | 3258, /* depth = 20 */ | |
151 | 3424, /* depth = 21 */ | |
152 | 4971, /* depth = 22 */ | |
153 | 6885, /* depth = 23 */ | |
154 | 39771, /* depth = 24 */ | |
155 | 424, /* depth = 25 */ | |
156 | 170, /* depth = 26 */ | |
157 | 433, /* depth = 27 */ | |
158 | 92, /* depth = 28 */ | |
159 | 366, /* depth = 29 */ | |
160 | 377, /* depth = 30 */ | |
161 | 2, /* depth = 31 */ | |
162 | 200 /* depth = 32 */ | |
163 | }, | |
164 | .c = { /* IP class A in which the most 3 significant bits are 110 */ | |
165 | 0, /* depth = 1 */ | |
166 | 0, /* depth = 2 */ | |
167 | 0, /* depth = 3 */ | |
168 | 0, /* depth = 4 */ | |
169 | 0, /* depth = 5 */ | |
170 | 0, /* depth = 6 */ | |
171 | 0, /* depth = 7 */ | |
172 | 12, /* depth = 8 */ | |
173 | 8, /* depth = 9 */ | |
174 | 9, /* depth = 10 */ | |
175 | 33, /* depth = 11 */ | |
176 | 69, /* depth = 12 */ | |
177 | 237, /* depth = 13 */ | |
178 | 1007, /* depth = 14 */ | |
179 | 1717, /* depth = 15 */ | |
180 | 14663, /* depth = 16 */ | |
181 | 8070, /* depth = 17 */ | |
182 | 16185, /* depth = 18 */ | |
183 | 48261, /* depth = 19 */ | |
184 | 36870, /* depth = 20 */ | |
185 | 33960, /* depth = 21 */ | |
186 | 50638, /* depth = 22 */ | |
187 | 61422, /* depth = 23 */ | |
188 | 466549, /* depth = 24 */ | |
189 | 1829, /* depth = 25 */ | |
190 | 4824, /* depth = 26 */ | |
191 | 4927, /* depth = 27 */ | |
192 | 5914, /* depth = 28 */ | |
193 | 10254, /* depth = 29 */ | |
194 | 4905, /* depth = 30 */ | |
195 | 1, /* depth = 31 */ | |
196 | 716 /* depth = 32 */ | |
197 | } | |
198 | }; | |
199 | ||
200 | static void generate_random_rule_prefix(uint32_t ip_class, uint8_t depth) | |
201 | { | |
202 | /* IP address class A, the most significant bit is 0 */ | |
203 | #define IP_HEAD_MASK_A 0x00000000 | |
204 | #define IP_HEAD_BIT_NUM_A 1 | |
205 | ||
206 | /* IP address class B, the most significant 2 bits are 10 */ | |
207 | #define IP_HEAD_MASK_B 0x80000000 | |
208 | #define IP_HEAD_BIT_NUM_B 2 | |
209 | ||
210 | /* IP address class C, the most significant 3 bits are 110 */ | |
211 | #define IP_HEAD_MASK_C 0xC0000000 | |
212 | #define IP_HEAD_BIT_NUM_C 3 | |
213 | ||
214 | uint32_t class_depth; | |
215 | uint32_t range; | |
216 | uint32_t mask; | |
217 | uint32_t step; | |
218 | uint32_t start; | |
219 | uint32_t fixed_bit_num; | |
220 | uint32_t ip_head_mask; | |
221 | uint32_t rule_num; | |
222 | uint32_t k; | |
223 | struct route_rule *ptr_rule; | |
224 | ||
225 | if (ip_class == IP_CLASS_A) { /* IP Address class A */ | |
226 | fixed_bit_num = IP_HEAD_BIT_NUM_A; | |
227 | ip_head_mask = IP_HEAD_MASK_A; | |
228 | rule_num = rule_count.a[depth - 1]; | |
229 | } else if (ip_class == IP_CLASS_B) { /* IP Address class B */ | |
230 | fixed_bit_num = IP_HEAD_BIT_NUM_B; | |
231 | ip_head_mask = IP_HEAD_MASK_B; | |
232 | rule_num = rule_count.b[depth - 1]; | |
233 | } else { /* IP Address class C */ | |
234 | fixed_bit_num = IP_HEAD_BIT_NUM_C; | |
235 | ip_head_mask = IP_HEAD_MASK_C; | |
236 | rule_num = rule_count.c[depth - 1]; | |
237 | } | |
238 | ||
239 | if (rule_num == 0) | |
240 | return; | |
241 | ||
242 | /* the number of rest bits which don't include the most significant | |
243 | * fixed bits for this IP address class | |
244 | */ | |
245 | class_depth = depth - fixed_bit_num; | |
246 | ||
247 | /* range is the maximum number of rules for this depth and | |
248 | * this IP address class | |
249 | */ | |
250 | range = 1 << class_depth; | |
251 | ||
252 | /* only mask the most depth significant generated bits | |
253 | * except fixed bits for IP address class | |
254 | */ | |
255 | mask = range - 1; | |
256 | ||
257 | /* Widen coverage of IP address in generated rules */ | |
258 | if (range <= rule_num) | |
259 | step = 1; | |
260 | else | |
261 | step = round((double)range / rule_num); | |
262 | ||
263 | /* Only generate rest bits except the most significant | |
264 | * fixed bits for IP address class | |
265 | */ | |
266 | start = lrand48() & mask; | |
267 | ptr_rule = &large_route_table[num_route_entries]; | |
268 | for (k = 0; k < rule_num; k++) { | |
269 | ptr_rule->ip = (start << (RTE_LPM_MAX_DEPTH - depth)) | |
270 | | ip_head_mask; | |
271 | ptr_rule->depth = depth; | |
272 | ptr_rule++; | |
273 | start = (start + step) & mask; | |
274 | } | |
275 | num_route_entries += rule_num; | |
276 | } | |
277 | ||
278 | static void insert_rule_in_random_pos(uint32_t ip, uint8_t depth) | |
279 | { | |
280 | uint32_t pos; | |
281 | int try_count = 0; | |
282 | struct route_rule tmp; | |
283 | ||
284 | do { | |
285 | pos = lrand48(); | |
286 | try_count++; | |
287 | } while ((try_count < 10) && (pos > num_route_entries)); | |
288 | ||
289 | if ((pos > num_route_entries) || (pos >= MAX_RULE_NUM)) | |
290 | pos = num_route_entries >> 1; | |
291 | ||
292 | tmp = large_route_table[pos]; | |
293 | large_route_table[pos].ip = ip; | |
294 | large_route_table[pos].depth = depth; | |
295 | if (num_route_entries < MAX_RULE_NUM) | |
296 | large_route_table[num_route_entries++] = tmp; | |
297 | } | |
298 | ||
299 | static void generate_large_route_rule_table(void) | |
300 | { | |
301 | uint32_t ip_class; | |
302 | uint8_t depth; | |
303 | ||
304 | num_route_entries = 0; | |
305 | memset(large_route_table, 0, sizeof(large_route_table)); | |
306 | ||
307 | for (ip_class = IP_CLASS_A; ip_class <= IP_CLASS_C; ip_class++) { | |
308 | for (depth = 1; depth <= RTE_LPM_MAX_DEPTH; depth++) { | |
309 | generate_random_rule_prefix(ip_class, depth); | |
310 | } | |
311 | } | |
312 | ||
313 | /* Add following rules to keep same as previous large constant table, | |
314 | * they are 4 rules with private local IP address and 1 all-zeros prefix | |
315 | * with depth = 8. | |
316 | */ | |
317 | insert_rule_in_random_pos(IPv4(0, 0, 0, 0), 8); | |
318 | insert_rule_in_random_pos(IPv4(10, 2, 23, 147), 32); | |
319 | insert_rule_in_random_pos(IPv4(192, 168, 100, 10), 24); | |
320 | insert_rule_in_random_pos(IPv4(192, 168, 25, 100), 24); | |
321 | insert_rule_in_random_pos(IPv4(192, 168, 129, 124), 32); | |
322 | } | |
323 | ||
324 | static void | |
325 | print_route_distribution(const struct route_rule *table, uint32_t n) | |
326 | { | |
327 | unsigned i, j; | |
328 | ||
329 | printf("Route distribution per prefix width: \n"); | |
330 | printf("DEPTH QUANTITY (PERCENT)\n"); | |
331 | printf("--------------------------- \n"); | |
332 | ||
333 | /* Count depths. */ | |
334 | for (i = 1; i <= 32; i++) { | |
335 | unsigned depth_counter = 0; | |
336 | double percent_hits; | |
337 | ||
338 | for (j = 0; j < n; j++) | |
339 | if (table[j].depth == (uint8_t) i) | |
340 | depth_counter++; | |
341 | ||
342 | percent_hits = ((double)depth_counter)/((double)n) * 100; | |
343 | printf("%.2u%15u (%.2f)\n", i, depth_counter, percent_hits); | |
344 | } | |
345 | printf("\n"); | |
346 | } | |
347 | ||
348 | static int | |
349 | test_lpm_perf(void) | |
350 | { | |
351 | struct rte_lpm *lpm = NULL; | |
352 | struct rte_lpm_config config; | |
353 | ||
354 | config.max_rules = 2000000; | |
355 | config.number_tbl8s = 2048; | |
356 | config.flags = 0; | |
357 | uint64_t begin, total_time, lpm_used_entries = 0; | |
358 | unsigned i, j; | |
359 | uint32_t next_hop_add = 0xAA, next_hop_return = 0; | |
360 | int status = 0; | |
361 | uint64_t cache_line_counter = 0; | |
362 | int64_t count = 0; | |
363 | ||
364 | rte_srand(rte_rdtsc()); | |
365 | ||
366 | generate_large_route_rule_table(); | |
367 | ||
368 | printf("No. routes = %u\n", (unsigned) NUM_ROUTE_ENTRIES); | |
369 | ||
370 | print_route_distribution(large_route_table, (uint32_t) NUM_ROUTE_ENTRIES); | |
371 | ||
372 | lpm = rte_lpm_create(__func__, SOCKET_ID_ANY, &config); | |
373 | TEST_LPM_ASSERT(lpm != NULL); | |
374 | ||
375 | /* Measue add. */ | |
376 | begin = rte_rdtsc(); | |
377 | ||
378 | for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { | |
379 | if (rte_lpm_add(lpm, large_route_table[i].ip, | |
380 | large_route_table[i].depth, next_hop_add) == 0) | |
381 | status++; | |
382 | } | |
383 | /* End Timer. */ | |
384 | total_time = rte_rdtsc() - begin; | |
385 | ||
386 | printf("Unique added entries = %d\n", status); | |
387 | /* Obtain add statistics. */ | |
388 | for (i = 0; i < RTE_LPM_TBL24_NUM_ENTRIES; i++) { | |
389 | if (lpm->tbl24[i].valid) | |
390 | lpm_used_entries++; | |
391 | ||
392 | if (i % 32 == 0) { | |
393 | if ((uint64_t)count < lpm_used_entries) { | |
394 | cache_line_counter++; | |
395 | count = lpm_used_entries; | |
396 | } | |
397 | } | |
398 | } | |
399 | ||
400 | printf("Used table 24 entries = %u (%g%%)\n", | |
401 | (unsigned) lpm_used_entries, | |
402 | (lpm_used_entries * 100.0) / RTE_LPM_TBL24_NUM_ENTRIES); | |
403 | printf("64 byte Cache entries used = %u (%u bytes)\n", | |
404 | (unsigned) cache_line_counter, (unsigned) cache_line_counter * 64); | |
405 | ||
406 | printf("Average LPM Add: %g cycles\n", | |
407 | (double)total_time / NUM_ROUTE_ENTRIES); | |
408 | ||
409 | /* Measure single Lookup */ | |
410 | total_time = 0; | |
411 | count = 0; | |
412 | ||
413 | for (i = 0; i < ITERATIONS; i++) { | |
414 | static uint32_t ip_batch[BATCH_SIZE]; | |
415 | ||
416 | for (j = 0; j < BATCH_SIZE; j++) | |
417 | ip_batch[j] = rte_rand(); | |
418 | ||
419 | /* Lookup per batch */ | |
420 | begin = rte_rdtsc(); | |
421 | ||
422 | for (j = 0; j < BATCH_SIZE; j++) { | |
423 | if (rte_lpm_lookup(lpm, ip_batch[j], &next_hop_return) != 0) | |
424 | count++; | |
425 | } | |
426 | ||
427 | total_time += rte_rdtsc() - begin; | |
428 | ||
429 | } | |
430 | printf("Average LPM Lookup: %.1f cycles (fails = %.1f%%)\n", | |
431 | (double)total_time / ((double)ITERATIONS * BATCH_SIZE), | |
432 | (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); | |
433 | ||
434 | /* Measure bulk Lookup */ | |
435 | total_time = 0; | |
436 | count = 0; | |
437 | for (i = 0; i < ITERATIONS; i++) { | |
438 | static uint32_t ip_batch[BATCH_SIZE]; | |
439 | uint32_t next_hops[BULK_SIZE]; | |
440 | ||
441 | /* Create array of random IP addresses */ | |
442 | for (j = 0; j < BATCH_SIZE; j++) | |
443 | ip_batch[j] = rte_rand(); | |
444 | ||
445 | /* Lookup per batch */ | |
446 | begin = rte_rdtsc(); | |
447 | for (j = 0; j < BATCH_SIZE; j += BULK_SIZE) { | |
448 | unsigned k; | |
449 | rte_lpm_lookup_bulk(lpm, &ip_batch[j], next_hops, BULK_SIZE); | |
450 | for (k = 0; k < BULK_SIZE; k++) | |
451 | if (unlikely(!(next_hops[k] & RTE_LPM_LOOKUP_SUCCESS))) | |
452 | count++; | |
453 | } | |
454 | ||
455 | total_time += rte_rdtsc() - begin; | |
456 | } | |
457 | printf("BULK LPM Lookup: %.1f cycles (fails = %.1f%%)\n", | |
458 | (double)total_time / ((double)ITERATIONS * BATCH_SIZE), | |
459 | (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); | |
460 | ||
461 | /* Measure LookupX4 */ | |
462 | total_time = 0; | |
463 | count = 0; | |
464 | for (i = 0; i < ITERATIONS; i++) { | |
465 | static uint32_t ip_batch[BATCH_SIZE]; | |
466 | uint32_t next_hops[4]; | |
467 | ||
468 | /* Create array of random IP addresses */ | |
469 | for (j = 0; j < BATCH_SIZE; j++) | |
470 | ip_batch[j] = rte_rand(); | |
471 | ||
472 | /* Lookup per batch */ | |
473 | begin = rte_rdtsc(); | |
474 | for (j = 0; j < BATCH_SIZE; j += RTE_DIM(next_hops)) { | |
475 | unsigned k; | |
476 | xmm_t ipx4; | |
477 | ||
478 | ipx4 = vect_loadu_sil128((xmm_t *)(ip_batch + j)); | |
479 | ipx4 = *(xmm_t *)(ip_batch + j); | |
480 | rte_lpm_lookupx4(lpm, ipx4, next_hops, UINT32_MAX); | |
481 | for (k = 0; k < RTE_DIM(next_hops); k++) | |
482 | if (unlikely(next_hops[k] == UINT32_MAX)) | |
483 | count++; | |
484 | } | |
485 | ||
486 | total_time += rte_rdtsc() - begin; | |
487 | } | |
488 | printf("LPM LookupX4: %.1f cycles (fails = %.1f%%)\n", | |
489 | (double)total_time / ((double)ITERATIONS * BATCH_SIZE), | |
490 | (count * 100.0) / (double)(ITERATIONS * BATCH_SIZE)); | |
491 | ||
492 | /* Delete */ | |
493 | status = 0; | |
494 | begin = rte_rdtsc(); | |
495 | ||
496 | for (i = 0; i < NUM_ROUTE_ENTRIES; i++) { | |
497 | /* rte_lpm_delete(lpm, ip, depth) */ | |
498 | status += rte_lpm_delete(lpm, large_route_table[i].ip, | |
499 | large_route_table[i].depth); | |
500 | } | |
501 | ||
502 | total_time += rte_rdtsc() - begin; | |
503 | ||
504 | printf("Average LPM Delete: %g cycles\n", | |
505 | (double)total_time / NUM_ROUTE_ENTRIES); | |
506 | ||
507 | rte_lpm_delete_all(lpm); | |
508 | rte_lpm_free(lpm); | |
509 | ||
510 | return 0; | |
511 | } | |
512 | ||
513 | REGISTER_TEST_COMMAND(lpm_perf_autotest, test_lpm_perf); |