]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/examples/vmdq/main.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / dpdk / examples / vmdq / main.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
3 */
4
5 #include <stdint.h>
6 #include <sys/queue.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <assert.h>
11 #include <errno.h>
12 #include <signal.h>
13 #include <stdarg.h>
14 #include <inttypes.h>
15 #include <getopt.h>
16
17 #include <rte_common.h>
18 #include <rte_log.h>
19 #include <rte_memory.h>
20 #include <rte_memcpy.h>
21 #include <rte_eal.h>
22 #include <rte_launch.h>
23 #include <rte_atomic.h>
24 #include <rte_cycles.h>
25 #include <rte_prefetch.h>
26 #include <rte_lcore.h>
27 #include <rte_per_lcore.h>
28 #include <rte_branch_prediction.h>
29 #include <rte_interrupts.h>
30 #include <rte_random.h>
31 #include <rte_debug.h>
32 #include <rte_ether.h>
33 #include <rte_ethdev.h>
34 #include <rte_mempool.h>
35 #include <rte_mbuf.h>
36
37 #define MAX_QUEUES 1024
38 /*
39 * 1024 queues require to meet the needs of a large number of vmdq_pools.
40 * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
41 */
42 #define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
43 RTE_TEST_TX_DESC_DEFAULT))
44 #define MBUF_CACHE_SIZE 64
45
46 #define MAX_PKT_BURST 32
47
48 /*
49 * Configurable number of RX/TX ring descriptors
50 */
51 #define RTE_TEST_RX_DESC_DEFAULT 1024
52 #define RTE_TEST_TX_DESC_DEFAULT 1024
53
54 #define INVALID_PORT_ID 0xFF
55
56 /* mask of enabled ports */
57 static uint32_t enabled_port_mask;
58
59 /* number of pools (if user does not specify any, 8 by default */
60 static uint32_t num_queues = 8;
61 static uint32_t num_pools = 8;
62
63 /* empty vmdq configuration structure. Filled in programatically */
64 static const struct rte_eth_conf vmdq_conf_default = {
65 .rxmode = {
66 .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
67 .split_hdr_size = 0,
68 },
69
70 .txmode = {
71 .mq_mode = ETH_MQ_TX_NONE,
72 },
73 .rx_adv_conf = {
74 /*
75 * should be overridden separately in code with
76 * appropriate values
77 */
78 .vmdq_rx_conf = {
79 .nb_queue_pools = ETH_8_POOLS,
80 .enable_default_pool = 0,
81 .default_pool = 0,
82 .nb_pool_maps = 0,
83 .pool_map = {{0, 0},},
84 },
85 },
86 };
87
88 static unsigned lcore_ids[RTE_MAX_LCORE];
89 static uint16_t ports[RTE_MAX_ETHPORTS];
90 static unsigned num_ports; /**< The number of ports specified in command line */
91
92 /* array used for printing out statistics */
93 volatile unsigned long rxPackets[MAX_QUEUES] = {0};
94
95 const uint16_t vlan_tags[] = {
96 0, 1, 2, 3, 4, 5, 6, 7,
97 8, 9, 10, 11, 12, 13, 14, 15,
98 16, 17, 18, 19, 20, 21, 22, 23,
99 24, 25, 26, 27, 28, 29, 30, 31,
100 32, 33, 34, 35, 36, 37, 38, 39,
101 40, 41, 42, 43, 44, 45, 46, 47,
102 48, 49, 50, 51, 52, 53, 54, 55,
103 56, 57, 58, 59, 60, 61, 62, 63,
104 };
105 const uint16_t num_vlans = RTE_DIM(vlan_tags);
106 static uint16_t num_pf_queues, num_vmdq_queues;
107 static uint16_t vmdq_pool_base, vmdq_queue_base;
108 /* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
109 static struct ether_addr pool_addr_template = {
110 .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
111 };
112
113 /* ethernet addresses of ports */
114 static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
115
116 #define MAX_QUEUE_NUM_10G 128
117 #define MAX_QUEUE_NUM_1G 8
118 #define MAX_POOL_MAP_NUM_10G 64
119 #define MAX_POOL_MAP_NUM_1G 32
120 #define MAX_POOL_NUM_10G 64
121 #define MAX_POOL_NUM_1G 8
122 /*
123 * Builds up the correct configuration for vmdq based on the vlan tags array
124 * given above, and determine the queue number and pool map number according to
125 * valid pool number
126 */
127 static inline int
128 get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
129 {
130 struct rte_eth_vmdq_rx_conf conf;
131 unsigned i;
132
133 conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
134 conf.nb_pool_maps = num_pools;
135 conf.enable_default_pool = 0;
136 conf.default_pool = 0; /* set explicit value, even if not used */
137
138 for (i = 0; i < conf.nb_pool_maps; i++) {
139 conf.pool_map[i].vlan_id = vlan_tags[i];
140 conf.pool_map[i].pools = (1UL << (i % num_pools));
141 }
142
143 (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
144 (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
145 sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
146 return 0;
147 }
148
149 /*
150 * Initialises a given port using global settings and with the rx buffers
151 * coming from the mbuf_pool passed as parameter
152 */
153 static inline int
154 port_init(uint16_t port, struct rte_mempool *mbuf_pool)
155 {
156 struct rte_eth_dev_info dev_info;
157 struct rte_eth_rxconf *rxconf;
158 struct rte_eth_txconf *txconf;
159 struct rte_eth_conf port_conf;
160 uint16_t rxRings, txRings;
161 uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
162 uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
163 int retval;
164 uint16_t q;
165 uint16_t queues_per_pool;
166 uint32_t max_nb_pools;
167
168 /*
169 * The max pool number from dev_info will be used to validate the pool
170 * number specified in cmd line
171 */
172 rte_eth_dev_info_get(port, &dev_info);
173 max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
174 /*
175 * We allow to process part of VMDQ pools specified by num_pools in
176 * command line.
177 */
178 if (num_pools > max_nb_pools) {
179 printf("num_pools %d >max_nb_pools %d\n",
180 num_pools, max_nb_pools);
181 return -1;
182 }
183 retval = get_eth_conf(&port_conf, max_nb_pools);
184 if (retval < 0)
185 return retval;
186
187 /*
188 * NIC queues are divided into pf queues and vmdq queues.
189 */
190 /* There is assumption here all ports have the same configuration! */
191 num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
192 queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
193 num_vmdq_queues = num_pools * queues_per_pool;
194 num_queues = num_pf_queues + num_vmdq_queues;
195 vmdq_queue_base = dev_info.vmdq_queue_base;
196 vmdq_pool_base = dev_info.vmdq_pool_base;
197
198 printf("pf queue num: %u, configured vmdq pool num: %u,"
199 " each vmdq pool has %u queues\n",
200 num_pf_queues, num_pools, queues_per_pool);
201 printf("vmdq queue base: %d pool base %d\n",
202 vmdq_queue_base, vmdq_pool_base);
203 if (!rte_eth_dev_is_valid_port(port))
204 return -1;
205
206 /*
207 * Though in this example, we only receive packets from the first queue
208 * of each pool and send packets through first rte_lcore_count() tx
209 * queues of vmdq queues, all queues including pf queues are setup.
210 * This is because VMDQ queues doesn't always start from zero, and the
211 * PMD layer doesn't support selectively initialising part of rx/tx
212 * queues.
213 */
214 rxRings = (uint16_t)dev_info.max_rx_queues;
215 txRings = (uint16_t)dev_info.max_tx_queues;
216
217 rte_eth_dev_info_get(port, &dev_info);
218 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
219 port_conf.txmode.offloads |=
220 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
221 retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
222 if (retval != 0)
223 return retval;
224
225 retval = rte_eth_dev_adjust_nb_rx_tx_desc(port, &rxRingSize,
226 &txRingSize);
227 if (retval != 0)
228 return retval;
229 if (RTE_MAX(rxRingSize, txRingSize) > RTE_MAX(RTE_TEST_RX_DESC_DEFAULT,
230 RTE_TEST_TX_DESC_DEFAULT)) {
231 printf("Mbuf pool has an insufficient size for port %u.\n",
232 port);
233 return -1;
234 }
235
236 rxconf = &dev_info.default_rxconf;
237 rxconf->rx_drop_en = 1;
238 txconf = &dev_info.default_txconf;
239 txconf->offloads = port_conf.txmode.offloads;
240 for (q = 0; q < rxRings; q++) {
241 retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
242 rte_eth_dev_socket_id(port),
243 rxconf,
244 mbuf_pool);
245 if (retval < 0) {
246 printf("initialise rx queue %d failed\n", q);
247 return retval;
248 }
249 }
250
251 for (q = 0; q < txRings; q++) {
252 retval = rte_eth_tx_queue_setup(port, q, txRingSize,
253 rte_eth_dev_socket_id(port),
254 txconf);
255 if (retval < 0) {
256 printf("initialise tx queue %d failed\n", q);
257 return retval;
258 }
259 }
260
261 retval = rte_eth_dev_start(port);
262 if (retval < 0) {
263 printf("port %d start failed\n", port);
264 return retval;
265 }
266
267 rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
268 printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
269 " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
270 (unsigned)port,
271 vmdq_ports_eth_addr[port].addr_bytes[0],
272 vmdq_ports_eth_addr[port].addr_bytes[1],
273 vmdq_ports_eth_addr[port].addr_bytes[2],
274 vmdq_ports_eth_addr[port].addr_bytes[3],
275 vmdq_ports_eth_addr[port].addr_bytes[4],
276 vmdq_ports_eth_addr[port].addr_bytes[5]);
277
278 /*
279 * Set mac for each pool.
280 * There is no default mac for the pools in i40.
281 * Removes this after i40e fixes this issue.
282 */
283 for (q = 0; q < num_pools; q++) {
284 struct ether_addr mac;
285 mac = pool_addr_template;
286 mac.addr_bytes[4] = port;
287 mac.addr_bytes[5] = q;
288 printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
289 port, q,
290 mac.addr_bytes[0], mac.addr_bytes[1],
291 mac.addr_bytes[2], mac.addr_bytes[3],
292 mac.addr_bytes[4], mac.addr_bytes[5]);
293 retval = rte_eth_dev_mac_addr_add(port, &mac,
294 q + vmdq_pool_base);
295 if (retval) {
296 printf("mac addr add failed at pool %d\n", q);
297 return retval;
298 }
299 }
300
301 return 0;
302 }
303
304 /* Check num_pools parameter and set it if OK*/
305 static int
306 vmdq_parse_num_pools(const char *q_arg)
307 {
308 char *end = NULL;
309 int n;
310
311 /* parse number string */
312 n = strtol(q_arg, &end, 10);
313 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
314 return -1;
315
316 if (num_pools > num_vlans) {
317 printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
318 return -1;
319 }
320
321 num_pools = n;
322
323 return 0;
324 }
325
326
327 static int
328 parse_portmask(const char *portmask)
329 {
330 char *end = NULL;
331 unsigned long pm;
332
333 /* parse hexadecimal string */
334 pm = strtoul(portmask, &end, 16);
335 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
336 return -1;
337
338 if (pm == 0)
339 return -1;
340
341 return pm;
342 }
343
344 /* Display usage */
345 static void
346 vmdq_usage(const char *prgname)
347 {
348 printf("%s [EAL options] -- -p PORTMASK]\n"
349 " --nb-pools NP: number of pools\n",
350 prgname);
351 }
352
353 /* Parse the argument (num_pools) given in the command line of the application */
354 static int
355 vmdq_parse_args(int argc, char **argv)
356 {
357 int opt;
358 int option_index;
359 unsigned i;
360 const char *prgname = argv[0];
361 static struct option long_option[] = {
362 {"nb-pools", required_argument, NULL, 0},
363 {NULL, 0, 0, 0}
364 };
365
366 /* Parse command line */
367 while ((opt = getopt_long(argc, argv, "p:", long_option,
368 &option_index)) != EOF) {
369 switch (opt) {
370 /* portmask */
371 case 'p':
372 enabled_port_mask = parse_portmask(optarg);
373 if (enabled_port_mask == 0) {
374 printf("invalid portmask\n");
375 vmdq_usage(prgname);
376 return -1;
377 }
378 break;
379 case 0:
380 if (vmdq_parse_num_pools(optarg) == -1) {
381 printf("invalid number of pools\n");
382 vmdq_usage(prgname);
383 return -1;
384 }
385 break;
386
387 default:
388 vmdq_usage(prgname);
389 return -1;
390 }
391 }
392
393 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
394 if (enabled_port_mask & (1 << i))
395 ports[num_ports++] = (uint8_t)i;
396 }
397
398 if (num_ports < 2 || num_ports % 2) {
399 printf("Current enabled port number is %u,"
400 "but it should be even and at least 2\n", num_ports);
401 return -1;
402 }
403
404 return 0;
405 }
406
407 static void
408 update_mac_address(struct rte_mbuf *m, unsigned dst_port)
409 {
410 struct ether_hdr *eth;
411 void *tmp;
412
413 eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
414
415 /* 02:00:00:00:00:xx */
416 tmp = &eth->d_addr.addr_bytes[0];
417 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
418
419 /* src addr */
420 ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
421 }
422
423 /* When we receive a HUP signal, print out our stats */
424 static void
425 sighup_handler(int signum)
426 {
427 unsigned q;
428 for (q = 0; q < num_queues; q++) {
429 if (q % (num_queues/num_pools) == 0)
430 printf("\nPool %u: ", q/(num_queues/num_pools));
431 printf("%lu ", rxPackets[q]);
432 }
433 printf("\nFinished handling signal %d\n", signum);
434 }
435
436 /*
437 * Main thread that does the work, reading from INPUT_PORT
438 * and writing to OUTPUT_PORT
439 */
440 static int
441 lcore_main(__attribute__((__unused__)) void *dummy)
442 {
443 const uint16_t lcore_id = (uint16_t)rte_lcore_id();
444 const uint16_t num_cores = (uint16_t)rte_lcore_count();
445 uint16_t core_id = 0;
446 uint16_t startQueue, endQueue;
447 uint16_t q, i, p;
448 const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
449
450 for (i = 0; i < num_cores; i++)
451 if (lcore_ids[i] == lcore_id) {
452 core_id = i;
453 break;
454 }
455
456 if (remainder != 0) {
457 if (core_id < remainder) {
458 startQueue = (uint16_t)(core_id *
459 (num_vmdq_queues / num_cores + 1));
460 endQueue = (uint16_t)(startQueue +
461 (num_vmdq_queues / num_cores) + 1);
462 } else {
463 startQueue = (uint16_t)(core_id *
464 (num_vmdq_queues / num_cores) +
465 remainder);
466 endQueue = (uint16_t)(startQueue +
467 (num_vmdq_queues / num_cores));
468 }
469 } else {
470 startQueue = (uint16_t)(core_id *
471 (num_vmdq_queues / num_cores));
472 endQueue = (uint16_t)(startQueue +
473 (num_vmdq_queues / num_cores));
474 }
475
476 /* vmdq queue idx doesn't always start from zero.*/
477 startQueue += vmdq_queue_base;
478 endQueue += vmdq_queue_base;
479 printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
480 (unsigned)lcore_id, startQueue, endQueue - 1);
481
482 if (startQueue == endQueue) {
483 printf("lcore %u has nothing to do\n", lcore_id);
484 return 0;
485 }
486
487 for (;;) {
488 struct rte_mbuf *buf[MAX_PKT_BURST];
489 const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
490
491 for (p = 0; p < num_ports; p++) {
492 const uint8_t sport = ports[p];
493 /* 0 <-> 1, 2 <-> 3 etc */
494 const uint8_t dport = ports[p ^ 1];
495 if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
496 continue;
497
498 for (q = startQueue; q < endQueue; q++) {
499 const uint16_t rxCount = rte_eth_rx_burst(sport,
500 q, buf, buf_size);
501
502 if (unlikely(rxCount == 0))
503 continue;
504
505 rxPackets[q] += rxCount;
506
507 for (i = 0; i < rxCount; i++)
508 update_mac_address(buf[i], dport);
509
510 const uint16_t txCount = rte_eth_tx_burst(dport,
511 vmdq_queue_base + core_id,
512 buf,
513 rxCount);
514
515 if (txCount != rxCount) {
516 for (i = txCount; i < rxCount; i++)
517 rte_pktmbuf_free(buf[i]);
518 }
519 }
520 }
521 }
522 }
523
524 /*
525 * Update the global var NUM_PORTS and array PORTS according to system ports number
526 * and return valid ports number
527 */
528 static unsigned check_ports_num(unsigned nb_ports)
529 {
530 unsigned valid_num_ports = num_ports;
531 unsigned portid;
532
533 if (num_ports > nb_ports) {
534 printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
535 num_ports, nb_ports);
536 num_ports = nb_ports;
537 }
538
539 for (portid = 0; portid < num_ports; portid++) {
540 if (!rte_eth_dev_is_valid_port(ports[portid])) {
541 printf("\nSpecified port ID(%u) is not valid\n",
542 ports[portid]);
543 ports[portid] = INVALID_PORT_ID;
544 valid_num_ports--;
545 }
546 }
547 return valid_num_ports;
548 }
549
550 /* Main function, does initialisation and calls the per-lcore functions */
551 int
552 main(int argc, char *argv[])
553 {
554 struct rte_mempool *mbuf_pool;
555 unsigned lcore_id, core_id = 0;
556 int ret;
557 unsigned nb_ports, valid_num_ports;
558 uint16_t portid;
559
560 signal(SIGHUP, sighup_handler);
561
562 /* init EAL */
563 ret = rte_eal_init(argc, argv);
564 if (ret < 0)
565 rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
566 argc -= ret;
567 argv += ret;
568
569 /* parse app arguments */
570 ret = vmdq_parse_args(argc, argv);
571 if (ret < 0)
572 rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
573
574 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
575 if (rte_lcore_is_enabled(lcore_id))
576 lcore_ids[core_id++] = lcore_id;
577
578 if (rte_lcore_count() > RTE_MAX_LCORE)
579 rte_exit(EXIT_FAILURE, "Not enough cores\n");
580
581 nb_ports = rte_eth_dev_count_avail();
582
583 /*
584 * Update the global var NUM_PORTS and global array PORTS
585 * and get value of var VALID_NUM_PORTS according to system ports number
586 */
587 valid_num_ports = check_ports_num(nb_ports);
588
589 if (valid_num_ports < 2 || valid_num_ports % 2) {
590 printf("Current valid ports number is %u\n", valid_num_ports);
591 rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
592 }
593
594 mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
595 NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
596 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
597 if (mbuf_pool == NULL)
598 rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
599
600 /* initialize all ports */
601 RTE_ETH_FOREACH_DEV(portid) {
602 /* skip ports that are not enabled */
603 if ((enabled_port_mask & (1 << portid)) == 0) {
604 printf("\nSkipping disabled port %d\n", portid);
605 continue;
606 }
607 if (port_init(portid, mbuf_pool) != 0)
608 rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
609 }
610
611 /* call lcore_main() on every lcore */
612 rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
613 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
614 if (rte_eal_wait_lcore(lcore_id) < 0)
615 return -1;
616 }
617
618 return 0;
619 }