]> git.proxmox.com Git - ceph.git/blob - ceph/src/dpdk/examples/vmdq/main.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / dpdk / examples / vmdq / main.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <stdint.h>
35 #include <sys/queue.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include <assert.h>
40 #include <errno.h>
41 #include <signal.h>
42 #include <stdarg.h>
43 #include <inttypes.h>
44 #include <getopt.h>
45
46 #include <rte_common.h>
47 #include <rte_log.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
51 #include <rte_eal.h>
52 #include <rte_per_lcore.h>
53 #include <rte_launch.h>
54 #include <rte_atomic.h>
55 #include <rte_cycles.h>
56 #include <rte_prefetch.h>
57 #include <rte_lcore.h>
58 #include <rte_per_lcore.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_interrupts.h>
61 #include <rte_pci.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_log.h>
67 #include <rte_mempool.h>
68 #include <rte_mbuf.h>
69 #include <rte_memcpy.h>
70
71 #define MAX_QUEUES 1024
72 /*
73 * 1024 queues require to meet the needs of a large number of vmdq_pools.
74 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
75 */
76 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
77 RTE_TEST_TX_DESC_DEFAULT))
78 #define MBUF_CACHE_SIZE 64
79
80 #define MAX_PKT_BURST 32
81
82 /*
83 * Configurable number of RX/TX ring descriptors
84 */
85 #define RTE_TEST_RX_DESC_DEFAULT 128
86 #define RTE_TEST_TX_DESC_DEFAULT 512
87
88 #define INVALID_PORT_ID 0xFF
89
90 /* mask of enabled ports */
91 static uint32_t enabled_port_mask;
92
93 /* number of pools (if user does not specify any, 8 by default */
94 static uint32_t num_queues = 8;
95 static uint32_t num_pools = 8;
96
97 /* empty vmdq configuration structure. Filled in programatically */
98 static const struct rte_eth_conf vmdq_conf_default = {
99 .rxmode = {
100 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
101 .split_hdr_size = 0,
102 .header_split = 0, /**< Header Split disabled */
103 .hw_ip_checksum = 0, /**< IP checksum offload disabled */
104 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
105 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
106 },
107
108 .txmode = {
109 .mq_mode = ETH_MQ_TX_NONE,
110 },
111 .rx_adv_conf = {
112 /*
113 * should be overridden separately in code with
114 * appropriate values
115 */
116 .vmdq_rx_conf = {
117 .nb_queue_pools = ETH_8_POOLS,
118 .enable_default_pool = 0,
119 .default_pool = 0,
120 .nb_pool_maps = 0,
121 .pool_map = {{0, 0},},
122 },
123 },
124 };
125
126 static unsigned lcore_ids[RTE_MAX_LCORE];
127 static uint8_t ports[RTE_MAX_ETHPORTS];
128 static unsigned num_ports; /**< The number of ports specified in command line */
129
130 /* array used for printing out statistics */
131 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
132
133 const uint16_t vlan_tags[] = {
134 0, 1, 2, 3, 4, 5, 6, 7,
135 8, 9, 10, 11, 12, 13, 14, 15,
136 16, 17, 18, 19, 20, 21, 22, 23,
137 24, 25, 26, 27, 28, 29, 30, 31,
138 32, 33, 34, 35, 36, 37, 38, 39,
139 40, 41, 42, 43, 44, 45, 46, 47,
140 48, 49, 50, 51, 52, 53, 54, 55,
141 56, 57, 58, 59, 60, 61, 62, 63,
142 };
143 const uint16_t num_vlans = RTE_DIM(vlan_tags);
144 static uint16_t num_pf_queues, num_vmdq_queues;
145 static uint16_t vmdq_pool_base, vmdq_queue_base;
146 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
147 static struct ether_addr pool_addr_template = {
148 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
149 };
150
151 /* ethernet addresses of ports */
152 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
153
154 #define MAX_QUEUE_NUM_10G 128
155 #define MAX_QUEUE_NUM_1G 8
156 #define MAX_POOL_MAP_NUM_10G 64
157 #define MAX_POOL_MAP_NUM_1G 32
158 #define MAX_POOL_NUM_10G 64
159 #define MAX_POOL_NUM_1G 8
160 /*
161 * Builds up the correct configuration for vmdq based on the vlan tags array
162 * given above, and determine the queue number and pool map number according to
163 * valid pool number
164 */
165 static inline int
166 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
167 {
168 struct rte_eth_vmdq_rx_conf conf;
169 unsigned i;
170
171 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
172 conf.nb_pool_maps = num_pools;
173 conf.enable_default_pool = 0;
174 conf.default_pool = 0; /* set explicit value, even if not used */
175
176 for (i = 0; i < conf.nb_pool_maps; i++) {
177 conf.pool_map[i].vlan_id = vlan_tags[i];
178 conf.pool_map[i].pools = (1UL << (i % num_pools));
179 }
180
181 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
182 (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
183 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
184 return 0;
185 }
186
187 /*
188 * Initialises a given port using global settings and with the rx buffers
189 * coming from the mbuf_pool passed as parameter
190 */
191 static inline int
192 port_init(uint8_t port, struct rte_mempool *mbuf_pool)
193 {
194 struct rte_eth_dev_info dev_info;
195 struct rte_eth_rxconf *rxconf;
196 struct rte_eth_conf port_conf;
197 uint16_t rxRings, txRings;
198 const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT;
199 int retval;
200 uint16_t q;
201 uint16_t queues_per_pool;
202 uint32_t max_nb_pools;
203
204 /*
205 * The max pool number from dev_info will be used to validate the pool
206 * number specified in cmd line
207 */
208 rte_eth_dev_info_get(port, &dev_info);
209 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
210 /*
211 * We allow to process part of VMDQ pools specified by num_pools in
212 * command line.
213 */
214 if (num_pools > max_nb_pools) {
215 printf("num_pools %d >max_nb_pools %d\n",
216 num_pools, max_nb_pools);
217 return -1;
218 }
219 retval = get_eth_conf(&port_conf, max_nb_pools);
220 if (retval < 0)
221 return retval;
222
223 /*
224 * NIC queues are divided into pf queues and vmdq queues.
225 */
226 /* There is assumption here all ports have the same configuration! */
227 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
228 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
229 num_vmdq_queues = num_pools * queues_per_pool;
230 num_queues = num_pf_queues + num_vmdq_queues;
231 vmdq_queue_base = dev_info.vmdq_queue_base;
232 vmdq_pool_base = dev_info.vmdq_pool_base;
233
234 printf("pf queue num: %u, configured vmdq pool num: %u,"
235 " each vmdq pool has %u queues\n",
236 num_pf_queues, num_pools, queues_per_pool);
237 printf("vmdq queue base: %d pool base %d\n",
238 vmdq_queue_base, vmdq_pool_base);
239 if (port >= rte_eth_dev_count())
240 return -1;
241
242 /*
243 * Though in this example, we only receive packets from the first queue
244 * of each pool and send packets through first rte_lcore_count() tx
245 * queues of vmdq queues, all queues including pf queues are setup.
246 * This is because VMDQ queues doesn't always start from zero, and the
247 * PMD layer doesn't support selectively initialising part of rx/tx
248 * queues.
249 */
250 rxRings = (uint16_t)dev_info.max_rx_queues;
251 txRings = (uint16_t)dev_info.max_tx_queues;
252 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
253 if (retval != 0)
254 return retval;
255
256 rte_eth_dev_info_get(port, &dev_info);
257 rxconf = &dev_info.default_rxconf;
258 rxconf->rx_drop_en = 1;
259 for (q = 0; q < rxRings; q++) {
260 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
261 rte_eth_dev_socket_id(port),
262 rxconf,
263 mbuf_pool);
264 if (retval < 0) {
265 printf("initialise rx queue %d failed\n", q);
266 return retval;
267 }
268 }
269
270 for (q = 0; q < txRings; q++) {
271 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
272 rte_eth_dev_socket_id(port),
273 NULL);
274 if (retval < 0) {
275 printf("initialise tx queue %d failed\n", q);
276 return retval;
277 }
278 }
279
280 retval = rte_eth_dev_start(port);
281 if (retval < 0) {
282 printf("port %d start failed\n", port);
283 return retval;
284 }
285
286 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
287 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
288 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
289 (unsigned)port,
290 vmdq_ports_eth_addr[port].addr_bytes[0],
291 vmdq_ports_eth_addr[port].addr_bytes[1],
292 vmdq_ports_eth_addr[port].addr_bytes[2],
293 vmdq_ports_eth_addr[port].addr_bytes[3],
294 vmdq_ports_eth_addr[port].addr_bytes[4],
295 vmdq_ports_eth_addr[port].addr_bytes[5]);
296
297 /*
298 * Set mac for each pool.
299 * There is no default mac for the pools in i40.
300 * Removes this after i40e fixes this issue.
301 */
302 for (q = 0; q < num_pools; q++) {
303 struct ether_addr mac;
304 mac = pool_addr_template;
305 mac.addr_bytes[4] = port;
306 mac.addr_bytes[5] = q;
307 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
308 port, q,
309 mac.addr_bytes[0], mac.addr_bytes[1],
310 mac.addr_bytes[2], mac.addr_bytes[3],
311 mac.addr_bytes[4], mac.addr_bytes[5]);
312 retval = rte_eth_dev_mac_addr_add(port, &mac,
313 q + vmdq_pool_base);
314 if (retval) {
315 printf("mac addr add failed at pool %d\n", q);
316 return retval;
317 }
318 }
319
320 return 0;
321 }
322
323 /* Check num_pools parameter and set it if OK*/
324 static int
325 vmdq_parse_num_pools(const char *q_arg)
326 {
327 char *end = NULL;
328 int n;
329
330 /* parse number string */
331 n = strtol(q_arg, &end, 10);
332 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
333 return -1;
334
335 if (num_pools > num_vlans) {
336 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
337 return -1;
338 }
339
340 num_pools = n;
341
342 return 0;
343 }
344
345
346 static int
347 parse_portmask(const char *portmask)
348 {
349 char *end = NULL;
350 unsigned long pm;
351
352 /* parse hexadecimal string */
353 pm = strtoul(portmask, &end, 16);
354 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
355 return -1;
356
357 if (pm == 0)
358 return -1;
359
360 return pm;
361 }
362
363 /* Display usage */
364 static void
365 vmdq_usage(const char *prgname)
366 {
367 printf("%s [EAL options] -- -p PORTMASK]\n"
368 " --nb-pools NP: number of pools\n",
369 prgname);
370 }
371
372 /* Parse the argument (num_pools) given in the command line of the application */
373 static int
374 vmdq_parse_args(int argc, char **argv)
375 {
376 int opt;
377 int option_index;
378 unsigned i;
379 const char *prgname = argv[0];
380 static struct option long_option[] = {
381 {"nb-pools", required_argument, NULL, 0},
382 {NULL, 0, 0, 0}
383 };
384
385 /* Parse command line */
386 while ((opt = getopt_long(argc, argv, "p:", long_option,
387 &option_index)) != EOF) {
388 switch (opt) {
389 /* portmask */
390 case 'p':
391 enabled_port_mask = parse_portmask(optarg);
392 if (enabled_port_mask == 0) {
393 printf("invalid portmask\n");
394 vmdq_usage(prgname);
395 return -1;
396 }
397 break;
398 case 0:
399 if (vmdq_parse_num_pools(optarg) == -1) {
400 printf("invalid number of pools\n");
401 vmdq_usage(prgname);
402 return -1;
403 }
404 break;
405
406 default:
407 vmdq_usage(prgname);
408 return -1;
409 }
410 }
411
412 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
413 if (enabled_port_mask & (1 << i))
414 ports[num_ports++] = (uint8_t)i;
415 }
416
417 if (num_ports < 2 || num_ports % 2) {
418 printf("Current enabled port number is %u,"
419 "but it should be even and at least 2\n", num_ports);
420 return -1;
421 }
422
423 return 0;
424 }
425
426 static void
427 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
428 {
429 struct ether_hdr *eth;
430 void *tmp;
431
432 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
433
434 /* 02:00:00:00:00:xx */
435 tmp = &eth->d_addr.addr_bytes[0];
436 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
437
438 /* src addr */
439 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
440 }
441
442 /* When we receive a HUP signal, print out our stats */
443 static void
444 sighup_handler(int signum)
445 {
446 unsigned q;
447 for (q = 0; q < num_queues; q++) {
448 if (q % (num_queues/num_pools) == 0)
449 printf("\nPool %u: ", q/(num_queues/num_pools));
450 printf("%lu ", rxPackets[q]);
451 }
452 printf("\nFinished handling signal %d\n", signum);
453 }
454
455 /*
456 * Main thread that does the work, reading from INPUT_PORT
457 * and writing to OUTPUT_PORT
458 */
459 static int
460 lcore_main(__attribute__((__unused__)) void *dummy)
461 {
462 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
463 const uint16_t num_cores = (uint16_t)rte_lcore_count();
464 uint16_t core_id = 0;
465 uint16_t startQueue, endQueue;
466 uint16_t q, i, p;
467 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
468
469 for (i = 0; i < num_cores; i++)
470 if (lcore_ids[i] == lcore_id) {
471 core_id = i;
472 break;
473 }
474
475 if (remainder != 0) {
476 if (core_id < remainder) {
477 startQueue = (uint16_t)(core_id *
478 (num_vmdq_queues / num_cores + 1));
479 endQueue = (uint16_t)(startQueue +
480 (num_vmdq_queues / num_cores) + 1);
481 } else {
482 startQueue = (uint16_t)(core_id *
483 (num_vmdq_queues / num_cores) +
484 remainder);
485 endQueue = (uint16_t)(startQueue +
486 (num_vmdq_queues / num_cores));
487 }
488 } else {
489 startQueue = (uint16_t)(core_id *
490 (num_vmdq_queues / num_cores));
491 endQueue = (uint16_t)(startQueue +
492 (num_vmdq_queues / num_cores));
493 }
494
495 /* vmdq queue idx doesn't always start from zero.*/
496 startQueue += vmdq_queue_base;
497 endQueue += vmdq_queue_base;
498 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
499 (unsigned)lcore_id, startQueue, endQueue - 1);
500
501 if (startQueue == endQueue) {
502 printf("lcore %u has nothing to do\n", lcore_id);
503 return 0;
504 }
505
506 for (;;) {
507 struct rte_mbuf *buf[MAX_PKT_BURST];
508 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
509
510 for (p = 0; p < num_ports; p++) {
511 const uint8_t sport = ports[p];
512 /* 0 <-> 1, 2 <-> 3 etc */
513 const uint8_t dport = ports[p ^ 1];
514 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
515 continue;
516
517 for (q = startQueue; q < endQueue; q++) {
518 const uint16_t rxCount = rte_eth_rx_burst(sport,
519 q, buf, buf_size);
520
521 if (unlikely(rxCount == 0))
522 continue;
523
524 rxPackets[q] += rxCount;
525
526 for (i = 0; i < rxCount; i++)
527 update_mac_address(buf[i], dport);
528
529 const uint16_t txCount = rte_eth_tx_burst(dport,
530 vmdq_queue_base + core_id,
531 buf,
532 rxCount);
533
534 if (txCount != rxCount) {
535 for (i = txCount; i < rxCount; i++)
536 rte_pktmbuf_free(buf[i]);
537 }
538 }
539 }
540 }
541 }
542
543 /*
544 * Update the global var NUM_PORTS and array PORTS according to system ports number
545 * and return valid ports number
546 */
547 static unsigned check_ports_num(unsigned nb_ports)
548 {
549 unsigned valid_num_ports = num_ports;
550 unsigned portid;
551
552 if (num_ports > nb_ports) {
553 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
554 num_ports, nb_ports);
555 num_ports = nb_ports;
556 }
557
558 for (portid = 0; portid < num_ports; portid++) {
559 if (ports[portid] >= nb_ports) {
560 printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
561 ports[portid], (nb_ports - 1));
562 ports[portid] = INVALID_PORT_ID;
563 valid_num_ports--;
564 }
565 }
566 return valid_num_ports;
567 }
568
569 /* Main function, does initialisation and calls the per-lcore functions */
570 int
571 main(int argc, char *argv[])
572 {
573 struct rte_mempool *mbuf_pool;
574 unsigned lcore_id, core_id = 0;
575 int ret;
576 unsigned nb_ports, valid_num_ports;
577 uint8_t portid;
578
579 signal(SIGHUP, sighup_handler);
580
581 /* init EAL */
582 ret = rte_eal_init(argc, argv);
583 if (ret < 0)
584 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
585 argc -= ret;
586 argv += ret;
587
588 /* parse app arguments */
589 ret = vmdq_parse_args(argc, argv);
590 if (ret < 0)
591 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
592
593 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
594 if (rte_lcore_is_enabled(lcore_id))
595 lcore_ids[core_id++] = lcore_id;
596
597 if (rte_lcore_count() > RTE_MAX_LCORE)
598 rte_exit(EXIT_FAILURE, "Not enough cores\n");
599
600 nb_ports = rte_eth_dev_count();
601
602 /*
603 * Update the global var NUM_PORTS and global array PORTS
604 * and get value of var VALID_NUM_PORTS according to system ports number
605 */
606 valid_num_ports = check_ports_num(nb_ports);
607
608 if (valid_num_ports < 2 || valid_num_ports % 2) {
609 printf("Current valid ports number is %u\n", valid_num_ports);
610 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
611 }
612
613 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
614 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
615 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
616 if (mbuf_pool == NULL)
617 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
618
619 /* initialize all ports */
620 for (portid = 0; portid < nb_ports; portid++) {
621 /* skip ports that are not enabled */
622 if ((enabled_port_mask & (1 << portid)) == 0) {
623 printf("\nSkipping disabled port %d\n", portid);
624 continue;
625 }
626 if (port_init(portid, mbuf_pool) != 0)
627 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
628 }
629
630 /* call lcore_main() on every lcore */
631 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
632 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
633 if (rte_eal_wait_lcore(lcore_id) < 0)
634 return -1;
635 }
636
637 return 0;
638 }