]> git.proxmox.com Git - ceph.git/blame - ceph/src/seastar/dpdk/examples/ip_fragmentation/main.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / seastar / dpdk / examples / ip_fragmentation / main.c
CommitLineData
9f95a23c
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
7c673cae
FG
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <stdint.h>
8#include <inttypes.h>
9#include <sys/types.h>
10#include <sys/param.h>
11#include <string.h>
12#include <sys/queue.h>
13#include <stdarg.h>
14#include <errno.h>
15#include <getopt.h>
16
17#include <rte_common.h>
18#include <rte_byteorder.h>
19#include <rte_log.h>
20#include <rte_memory.h>
21#include <rte_memcpy.h>
7c673cae 22#include <rte_eal.h>
7c673cae
FG
23#include <rte_launch.h>
24#include <rte_atomic.h>
25#include <rte_cycles.h>
26#include <rte_prefetch.h>
27#include <rte_lcore.h>
28#include <rte_per_lcore.h>
29#include <rte_branch_prediction.h>
30#include <rte_interrupts.h>
7c673cae
FG
31#include <rte_random.h>
32#include <rte_debug.h>
33#include <rte_ether.h>
34#include <rte_ethdev.h>
35#include <rte_mempool.h>
36#include <rte_mbuf.h>
37#include <rte_lpm.h>
38#include <rte_lpm6.h>
39#include <rte_ip.h>
40#include <rte_string_fns.h>
41
42#include <rte_ip_frag.h>
43
44#define RTE_LOGTYPE_IP_FRAG RTE_LOGTYPE_USER1
45
46/* allow max jumbo frame 9.5 KB */
47#define JUMBO_FRAME_MAX_SIZE 0x2600
48
49#define ROUNDUP_DIV(a, b) (((a) + (b) - 1) / (b))
50
51/*
52 * Default byte size for the IPv6 Maximum Transfer Unit (MTU).
53 * This value includes the size of IPv6 header.
54 */
55#define IPV4_MTU_DEFAULT ETHER_MTU
56#define IPV6_MTU_DEFAULT ETHER_MTU
57
9f95a23c
TL
58/*
59 * The overhead from max frame size to MTU.
60 * We have to consider the max possible overhead.
61 */
62#define MTU_OVERHEAD \
63 (ETHER_HDR_LEN + ETHER_CRC_LEN + 2 * sizeof(struct vlan_hdr))
64
7c673cae
FG
65/*
66 * Default payload in bytes for the IPv6 packet.
67 */
68#define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr))
69#define IPV6_DEFAULT_PAYLOAD (IPV6_MTU_DEFAULT - sizeof(struct ipv6_hdr))
70
71/*
72 * Max number of fragments per packet expected - defined by config file.
73 */
74#define MAX_PACKET_FRAG RTE_LIBRTE_IP_FRAG_MAX_FRAG
75
76#define NB_MBUF 8192
77
78#define MAX_PKT_BURST 32
79#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
80
81/* Configure how many packets ahead to prefetch, when reading packets */
82#define PREFETCH_OFFSET 3
83
84/*
85 * Configurable number of RX/TX ring descriptors
86 */
9f95a23c
TL
87#define RTE_TEST_RX_DESC_DEFAULT 1024
88#define RTE_TEST_TX_DESC_DEFAULT 1024
7c673cae
FG
89static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
90static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
91
92/* ethernet addresses of ports */
93static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
94
95#ifndef IPv4_BYTES
96#define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8
97#define IPv4_BYTES(addr) \
98 (uint8_t) (((addr) >> 24) & 0xFF),\
99 (uint8_t) (((addr) >> 16) & 0xFF),\
100 (uint8_t) (((addr) >> 8) & 0xFF),\
101 (uint8_t) ((addr) & 0xFF)
102#endif
103
104#ifndef IPv6_BYTES
105#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
106 "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
107#define IPv6_BYTES(addr) \
108 addr[0], addr[1], addr[2], addr[3], \
109 addr[4], addr[5], addr[6], addr[7], \
110 addr[8], addr[9], addr[10], addr[11],\
111 addr[12], addr[13],addr[14], addr[15]
112#endif
113
114#define IPV6_ADDR_LEN 16
115
116/* mask of enabled ports */
117static int enabled_port_mask = 0;
118
119static int rx_queue_per_lcore = 1;
120
121#define MBUF_TABLE_SIZE (2 * MAX(MAX_PKT_BURST, MAX_PACKET_FRAG))
122
123struct mbuf_table {
124 uint16_t len;
125 struct rte_mbuf *m_table[MBUF_TABLE_SIZE];
126};
127
128struct rx_queue {
129 struct rte_mempool *direct_pool;
130 struct rte_mempool *indirect_pool;
131 struct rte_lpm *lpm;
132 struct rte_lpm6 *lpm6;
9f95a23c 133 uint16_t portid;
7c673cae
FG
134};
135
136#define MAX_RX_QUEUE_PER_LCORE 16
137#define MAX_TX_QUEUE_PER_PORT 16
138struct lcore_queue_conf {
139 uint16_t n_rx_queue;
140 uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
141 struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
142 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
143} __rte_cache_aligned;
144struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
145
11fdf7f2 146static struct rte_eth_conf port_conf = {
7c673cae
FG
147 .rxmode = {
148 .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
149 .split_hdr_size = 0,
9f95a23c
TL
150 .offloads = (DEV_RX_OFFLOAD_CHECKSUM |
151 DEV_RX_OFFLOAD_SCATTER |
152 DEV_RX_OFFLOAD_JUMBO_FRAME),
7c673cae
FG
153 },
154 .txmode = {
155 .mq_mode = ETH_MQ_TX_NONE,
9f95a23c
TL
156 .offloads = (DEV_TX_OFFLOAD_IPV4_CKSUM |
157 DEV_TX_OFFLOAD_MULTI_SEGS),
7c673cae
FG
158 },
159};
160
161/*
162 * IPv4 forwarding table
163 */
164struct l3fwd_ipv4_route {
165 uint32_t ip;
166 uint8_t depth;
167 uint8_t if_out;
168};
169
170struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = {
171 {IPv4(100,10,0,0), 16, 0},
172 {IPv4(100,20,0,0), 16, 1},
173 {IPv4(100,30,0,0), 16, 2},
174 {IPv4(100,40,0,0), 16, 3},
175 {IPv4(100,50,0,0), 16, 4},
176 {IPv4(100,60,0,0), 16, 5},
177 {IPv4(100,70,0,0), 16, 6},
178 {IPv4(100,80,0,0), 16, 7},
179};
180
181/*
182 * IPv6 forwarding table
183 */
184
185struct l3fwd_ipv6_route {
186 uint8_t ip[IPV6_ADDR_LEN];
187 uint8_t depth;
188 uint8_t if_out;
189};
190
191static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = {
192 {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0},
193 {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1},
194 {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2},
195 {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3},
196 {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4},
197 {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5},
198 {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6},
199 {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7},
200};
201
202#define LPM_MAX_RULES 1024
203#define LPM6_MAX_RULES 1024
204#define LPM6_NUMBER_TBL8S (1 << 16)
205
206struct rte_lpm6_config lpm6_config = {
207 .max_rules = LPM6_MAX_RULES,
208 .number_tbl8s = LPM6_NUMBER_TBL8S,
209 .flags = 0
210};
211
212static struct rte_mempool *socket_direct_pool[RTE_MAX_NUMA_NODES];
213static struct rte_mempool *socket_indirect_pool[RTE_MAX_NUMA_NODES];
214static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES];
215static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES];
216
217/* Send burst of packets on an output interface */
218static inline int
9f95a23c 219send_burst(struct lcore_queue_conf *qconf, uint16_t n, uint16_t port)
7c673cae
FG
220{
221 struct rte_mbuf **m_table;
222 int ret;
223 uint16_t queueid;
224
225 queueid = qconf->tx_queue_id[port];
226 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
227
228 ret = rte_eth_tx_burst(port, queueid, m_table, n);
229 if (unlikely(ret < n)) {
230 do {
231 rte_pktmbuf_free(m_table[ret]);
232 } while (++ret < n);
233 }
234
235 return 0;
236}
237
238static inline void
239l3fwd_simple_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf,
9f95a23c 240 uint8_t queueid, uint16_t port_in)
7c673cae
FG
241{
242 struct rx_queue *rxq;
11fdf7f2 243 uint32_t i, len, next_hop;
9f95a23c
TL
244 uint8_t ipv6;
245 uint16_t port_out;
7c673cae
FG
246 int32_t len2;
247
248 ipv6 = 0;
249 rxq = &qconf->rx_queue_list[queueid];
250
251 /* by default, send everything back to the source port */
252 port_out = port_in;
253
254 /* Remove the Ethernet header and trailer from the input packet */
255 rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr));
256
257 /* Build transmission burst */
258 len = qconf->tx_mbufs[port_out].len;
259
260 /* if this is an IPv4 packet */
261 if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
262 struct ipv4_hdr *ip_hdr;
263 uint32_t ip_dst;
264 /* Read the lookup key (i.e. ip_dst) from the input packet */
265 ip_hdr = rte_pktmbuf_mtod(m, struct ipv4_hdr *);
266 ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr);
267
268 /* Find destination port */
11fdf7f2
TL
269 if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop) == 0 &&
270 (enabled_port_mask & 1 << next_hop) != 0) {
271 port_out = next_hop;
7c673cae
FG
272
273 /* Build transmission burst for new port */
274 len = qconf->tx_mbufs[port_out].len;
275 }
276
277 /* if we don't need to do any fragmentation */
278 if (likely (IPV4_MTU_DEFAULT >= m->pkt_len)) {
279 qconf->tx_mbufs[port_out].m_table[len] = m;
280 len2 = 1;
281 } else {
282 len2 = rte_ipv4_fragment_packet(m,
283 &qconf->tx_mbufs[port_out].m_table[len],
284 (uint16_t)(MBUF_TABLE_SIZE - len),
285 IPV4_MTU_DEFAULT,
286 rxq->direct_pool, rxq->indirect_pool);
287
288 /* Free input packet */
289 rte_pktmbuf_free(m);
290
291 /* If we fail to fragment the packet */
292 if (unlikely (len2 < 0))
293 return;
294 }
295 } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
296 /* if this is an IPv6 packet */
297 struct ipv6_hdr *ip_hdr;
298
299 ipv6 = 1;
300
301 /* Read the lookup key (i.e. ip_dst) from the input packet */
302 ip_hdr = rte_pktmbuf_mtod(m, struct ipv6_hdr *);
303
304 /* Find destination port */
11fdf7f2
TL
305 if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr,
306 &next_hop) == 0 &&
307 (enabled_port_mask & 1 << next_hop) != 0) {
308 port_out = next_hop;
7c673cae
FG
309
310 /* Build transmission burst for new port */
311 len = qconf->tx_mbufs[port_out].len;
312 }
313
314 /* if we don't need to do any fragmentation */
315 if (likely (IPV6_MTU_DEFAULT >= m->pkt_len)) {
316 qconf->tx_mbufs[port_out].m_table[len] = m;
317 len2 = 1;
318 } else {
319 len2 = rte_ipv6_fragment_packet(m,
320 &qconf->tx_mbufs[port_out].m_table[len],
321 (uint16_t)(MBUF_TABLE_SIZE - len),
322 IPV6_MTU_DEFAULT,
323 rxq->direct_pool, rxq->indirect_pool);
324
325 /* Free input packet */
326 rte_pktmbuf_free(m);
327
328 /* If we fail to fragment the packet */
329 if (unlikely (len2 < 0))
330 return;
331 }
332 }
333 /* else, just forward the packet */
334 else {
335 qconf->tx_mbufs[port_out].m_table[len] = m;
336 len2 = 1;
337 }
338
339 for (i = len; i < len + len2; i ++) {
340 void *d_addr_bytes;
341
342 m = qconf->tx_mbufs[port_out].m_table[i];
343 struct ether_hdr *eth_hdr = (struct ether_hdr *)
344 rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct ether_hdr));
345 if (eth_hdr == NULL) {
346 rte_panic("No headroom in mbuf.\n");
347 }
348
349 m->l2_len = sizeof(struct ether_hdr);
350
351 /* 02:00:00:00:00:xx */
352 d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
353 *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)port_out << 40);
354
355 /* src addr */
356 ether_addr_copy(&ports_eth_addr[port_out], &eth_hdr->s_addr);
357 if (ipv6)
358 eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6);
359 else
360 eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
361 }
362
363 len += len2;
364
365 if (likely(len < MAX_PKT_BURST)) {
366 qconf->tx_mbufs[port_out].len = (uint16_t)len;
367 return;
368 }
369
370 /* Transmit packets */
371 send_burst(qconf, (uint16_t)len, port_out);
372 qconf->tx_mbufs[port_out].len = 0;
373}
374
375/* main processing loop */
376static int
377main_loop(__attribute__((unused)) void *dummy)
378{
379 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
380 unsigned lcore_id;
381 uint64_t prev_tsc, diff_tsc, cur_tsc;
382 int i, j, nb_rx;
9f95a23c 383 uint16_t portid;
7c673cae
FG
384 struct lcore_queue_conf *qconf;
385 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
386
387 prev_tsc = 0;
388
389 lcore_id = rte_lcore_id();
390 qconf = &lcore_queue_conf[lcore_id];
391
392 if (qconf->n_rx_queue == 0) {
393 RTE_LOG(INFO, IP_FRAG, "lcore %u has nothing to do\n", lcore_id);
394 return 0;
395 }
396
397 RTE_LOG(INFO, IP_FRAG, "entering main loop on lcore %u\n", lcore_id);
398
399 for (i = 0; i < qconf->n_rx_queue; i++) {
400
401 portid = qconf->rx_queue_list[i].portid;
402 RTE_LOG(INFO, IP_FRAG, " -- lcoreid=%u portid=%d\n", lcore_id,
9f95a23c 403 portid);
7c673cae
FG
404 }
405
406 while (1) {
407
408 cur_tsc = rte_rdtsc();
409
410 /*
411 * TX burst queue drain
412 */
413 diff_tsc = cur_tsc - prev_tsc;
414 if (unlikely(diff_tsc > drain_tsc)) {
415
416 /*
417 * This could be optimized (use queueid instead of
418 * portid), but it is not called so often
419 */
420 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
421 if (qconf->tx_mbufs[portid].len == 0)
422 continue;
423 send_burst(&lcore_queue_conf[lcore_id],
424 qconf->tx_mbufs[portid].len,
425 portid);
426 qconf->tx_mbufs[portid].len = 0;
427 }
428
429 prev_tsc = cur_tsc;
430 }
431
432 /*
433 * Read packet from RX queues
434 */
435 for (i = 0; i < qconf->n_rx_queue; i++) {
436
437 portid = qconf->rx_queue_list[i].portid;
438 nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst,
439 MAX_PKT_BURST);
440
441 /* Prefetch first packets */
442 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
443 rte_prefetch0(rte_pktmbuf_mtod(
444 pkts_burst[j], void *));
445 }
446
447 /* Prefetch and forward already prefetched packets */
448 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
449 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
450 j + PREFETCH_OFFSET], void *));
451 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
452 }
453
454 /* Forward remaining prefetched packets */
455 for (; j < nb_rx; j++) {
456 l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
457 }
458 }
459 }
460}
461
462/* display usage */
463static void
464print_usage(const char *prgname)
465{
466 printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
467 " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
468 " -q NQ: number of queue (=ports) per lcore (default is 1)\n",
469 prgname);
470}
471
472static int
473parse_portmask(const char *portmask)
474{
475 char *end = NULL;
476 unsigned long pm;
477
478 /* parse hexadecimal string */
479 pm = strtoul(portmask, &end, 16);
480 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
481 return -1;
482
483 if (pm == 0)
484 return -1;
485
486 return pm;
487}
488
489static int
490parse_nqueue(const char *q_arg)
491{
492 char *end = NULL;
493 unsigned long n;
494
495 /* parse hexadecimal string */
496 n = strtoul(q_arg, &end, 10);
497 if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
498 return -1;
499 if (n == 0)
500 return -1;
501 if (n >= MAX_RX_QUEUE_PER_LCORE)
502 return -1;
503
504 return n;
505}
506
507/* Parse the argument given in the command line of the application */
508static int
509parse_args(int argc, char **argv)
510{
511 int opt, ret;
512 char **argvopt;
513 int option_index;
514 char *prgname = argv[0];
515 static struct option lgopts[] = {
516 {NULL, 0, 0, 0}
517 };
518
519 argvopt = argv;
520
521 while ((opt = getopt_long(argc, argvopt, "p:q:",
522 lgopts, &option_index)) != EOF) {
523
524 switch (opt) {
525 /* portmask */
526 case 'p':
527 enabled_port_mask = parse_portmask(optarg);
528 if (enabled_port_mask < 0) {
529 printf("invalid portmask\n");
530 print_usage(prgname);
531 return -1;
532 }
533 break;
534
535 /* nqueue */
536 case 'q':
537 rx_queue_per_lcore = parse_nqueue(optarg);
538 if (rx_queue_per_lcore < 0) {
539 printf("invalid queue number\n");
540 print_usage(prgname);
541 return -1;
542 }
543 break;
544
545 /* long options */
546 case 0:
547 print_usage(prgname);
548 return -1;
549
550 default:
551 print_usage(prgname);
552 return -1;
553 }
554 }
555
556 if (enabled_port_mask == 0) {
557 printf("portmask not specified\n");
558 print_usage(prgname);
559 return -1;
560 }
561
562 if (optind >= 0)
563 argv[optind-1] = prgname;
564
565 ret = optind-1;
11fdf7f2 566 optind = 1; /* reset getopt lib */
7c673cae
FG
567 return ret;
568}
569
570static void
571print_ethaddr(const char *name, struct ether_addr *eth_addr)
572{
573 char buf[ETHER_ADDR_FMT_SIZE];
574 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
575 printf("%s%s", name, buf);
576}
577
578/* Check the link status of all ports in up to 9s, and print them finally */
579static void
9f95a23c 580check_all_ports_link_status(uint32_t port_mask)
7c673cae
FG
581{
582#define CHECK_INTERVAL 100 /* 100ms */
583#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
9f95a23c
TL
584 uint16_t portid;
585 uint8_t count, all_ports_up, print_flag = 0;
7c673cae
FG
586 struct rte_eth_link link;
587
588 printf("\nChecking link status");
589 fflush(stdout);
590 for (count = 0; count <= MAX_CHECK_TIME; count++) {
591 all_ports_up = 1;
9f95a23c 592 RTE_ETH_FOREACH_DEV(portid) {
7c673cae
FG
593 if ((port_mask & (1 << portid)) == 0)
594 continue;
595 memset(&link, 0, sizeof(link));
596 rte_eth_link_get_nowait(portid, &link);
597 /* print link status if flag set */
598 if (print_flag == 1) {
599 if (link.link_status)
9f95a23c
TL
600 printf(
601 "Port%d Link Up .Speed %u Mbps - %s\n",
602 portid, link.link_speed,
7c673cae
FG
603 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
604 ("full-duplex") : ("half-duplex\n"));
605 else
9f95a23c 606 printf("Port %d Link Down\n", portid);
7c673cae
FG
607 continue;
608 }
609 /* clear all_ports_up flag if any link down */
610 if (link.link_status == ETH_LINK_DOWN) {
611 all_ports_up = 0;
612 break;
613 }
614 }
615 /* after finally printing all link status, get out */
616 if (print_flag == 1)
617 break;
618
619 if (all_ports_up == 0) {
620 printf(".");
621 fflush(stdout);
622 rte_delay_ms(CHECK_INTERVAL);
623 }
624
625 /* set the print_flag if all ports up or timeout */
626 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
627 print_flag = 1;
628 printf("\ndone\n");
629 }
630 }
631}
632
11fdf7f2
TL
633/* Check L3 packet type detection capablity of the NIC port */
634static int
635check_ptype(int portid)
636{
637 int i, ret;
638 int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
639 uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
640
641 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
642 if (ret <= 0)
643 return 0;
644
645 uint32_t ptypes[ret];
646
647 ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
648 for (i = 0; i < ret; ++i) {
649 if (ptypes[i] & RTE_PTYPE_L3_IPV4)
650 ptype_l3_ipv4 = 1;
651 if (ptypes[i] & RTE_PTYPE_L3_IPV6)
652 ptype_l3_ipv6 = 1;
653 }
654
655 if (ptype_l3_ipv4 == 0)
656 printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
657
658 if (ptype_l3_ipv6 == 0)
659 printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
660
661 if (ptype_l3_ipv4 && ptype_l3_ipv6)
662 return 1;
663
664 return 0;
665
666}
667
668/* Parse packet type of a packet by SW */
669static inline void
670parse_ptype(struct rte_mbuf *m)
671{
672 struct ether_hdr *eth_hdr;
673 uint32_t packet_type = RTE_PTYPE_UNKNOWN;
674 uint16_t ether_type;
675
676 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
677 ether_type = eth_hdr->ether_type;
678 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
679 packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
680 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
681 packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
682
683 m->packet_type = packet_type;
684}
685
686/* callback function to detect packet type for a queue of a port */
687static uint16_t
9f95a23c 688cb_parse_ptype(uint16_t port __rte_unused, uint16_t queue __rte_unused,
11fdf7f2
TL
689 struct rte_mbuf *pkts[], uint16_t nb_pkts,
690 uint16_t max_pkts __rte_unused,
691 void *user_param __rte_unused)
692{
693 uint16_t i;
694
695 for (i = 0; i < nb_pkts; ++i)
696 parse_ptype(pkts[i]);
697
698 return nb_pkts;
699}
700
7c673cae
FG
701static int
702init_routing_table(void)
703{
704 struct rte_lpm *lpm;
705 struct rte_lpm6 *lpm6;
706 int socket, ret;
707 unsigned i;
708
709 for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
710 if (socket_lpm[socket]) {
711 lpm = socket_lpm[socket];
712 /* populate the LPM table */
713 for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) {
714 ret = rte_lpm_add(lpm,
715 l3fwd_ipv4_route_array[i].ip,
716 l3fwd_ipv4_route_array[i].depth,
717 l3fwd_ipv4_route_array[i].if_out);
718
719 if (ret < 0) {
720 RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
721 "LPM table\n", i);
722 return -1;
723 }
724
725 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv4_BYTES_FMT
726 "/%d (port %d)\n",
727 socket,
728 IPv4_BYTES(l3fwd_ipv4_route_array[i].ip),
729 l3fwd_ipv4_route_array[i].depth,
730 l3fwd_ipv4_route_array[i].if_out);
731 }
732 }
733
734 if (socket_lpm6[socket]) {
735 lpm6 = socket_lpm6[socket];
736 /* populate the LPM6 table */
737 for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) {
738 ret = rte_lpm6_add(lpm6,
739 l3fwd_ipv6_route_array[i].ip,
740 l3fwd_ipv6_route_array[i].depth,
741 l3fwd_ipv6_route_array[i].if_out);
742
743 if (ret < 0) {
744 RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
745 "LPM6 table\n", i);
746 return -1;
747 }
748
749 RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv6_BYTES_FMT
750 "/%d (port %d)\n",
751 socket,
752 IPv6_BYTES(l3fwd_ipv6_route_array[i].ip),
753 l3fwd_ipv6_route_array[i].depth,
754 l3fwd_ipv6_route_array[i].if_out);
755 }
756 }
757 }
758 return 0;
759}
760
761static int
762init_mem(void)
763{
764 char buf[PATH_MAX];
765 struct rte_mempool *mp;
766 struct rte_lpm *lpm;
767 struct rte_lpm6 *lpm6;
768 struct rte_lpm_config lpm_config;
769 int socket;
770 unsigned lcore_id;
771
772 /* traverse through lcores and initialize structures on each socket */
773
774 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
775
776 if (rte_lcore_is_enabled(lcore_id) == 0)
777 continue;
778
779 socket = rte_lcore_to_socket_id(lcore_id);
780
781 if (socket == SOCKET_ID_ANY)
782 socket = 0;
783
784 if (socket_direct_pool[socket] == NULL) {
785 RTE_LOG(INFO, IP_FRAG, "Creating direct mempool on socket %i\n",
786 socket);
787 snprintf(buf, sizeof(buf), "pool_direct_%i", socket);
788
789 mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32,
790 0, RTE_MBUF_DEFAULT_BUF_SIZE, socket);
791 if (mp == NULL) {
792 RTE_LOG(ERR, IP_FRAG, "Cannot create direct mempool\n");
793 return -1;
794 }
795 socket_direct_pool[socket] = mp;
796 }
797
798 if (socket_indirect_pool[socket] == NULL) {
799 RTE_LOG(INFO, IP_FRAG, "Creating indirect mempool on socket %i\n",
800 socket);
801 snprintf(buf, sizeof(buf), "pool_indirect_%i", socket);
802
803 mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 0, 0,
804 socket);
805 if (mp == NULL) {
806 RTE_LOG(ERR, IP_FRAG, "Cannot create indirect mempool\n");
807 return -1;
808 }
809 socket_indirect_pool[socket] = mp;
810 }
811
812 if (socket_lpm[socket] == NULL) {
813 RTE_LOG(INFO, IP_FRAG, "Creating LPM table on socket %i\n", socket);
814 snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
815
816 lpm_config.max_rules = LPM_MAX_RULES;
817 lpm_config.number_tbl8s = 256;
818 lpm_config.flags = 0;
819
820 lpm = rte_lpm_create(buf, socket, &lpm_config);
821 if (lpm == NULL) {
822 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
823 return -1;
824 }
825 socket_lpm[socket] = lpm;
826 }
827
828 if (socket_lpm6[socket] == NULL) {
829 RTE_LOG(INFO, IP_FRAG, "Creating LPM6 table on socket %i\n", socket);
830 snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
831
832 lpm6 = rte_lpm6_create(buf, socket, &lpm6_config);
833 if (lpm6 == NULL) {
834 RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
835 return -1;
836 }
837 socket_lpm6[socket] = lpm6;
838 }
839 }
840
841 return 0;
842}
843
844int
845main(int argc, char **argv)
846{
847 struct lcore_queue_conf *qconf;
848 struct rte_eth_dev_info dev_info;
849 struct rte_eth_txconf *txconf;
850 struct rx_queue *rxq;
851 int socket, ret;
9f95a23c 852 uint16_t nb_ports;
7c673cae
FG
853 uint16_t queueid = 0;
854 unsigned lcore_id = 0, rx_lcore_id = 0;
855 uint32_t n_tx_queue, nb_lcores;
9f95a23c 856 uint16_t portid;
7c673cae
FG
857
858 /* init EAL */
859 ret = rte_eal_init(argc, argv);
860 if (ret < 0)
861 rte_exit(EXIT_FAILURE, "rte_eal_init failed");
862 argc -= ret;
863 argv += ret;
864
865 /* parse application arguments (after the EAL ones) */
866 ret = parse_args(argc, argv);
867 if (ret < 0)
868 rte_exit(EXIT_FAILURE, "Invalid arguments");
869
9f95a23c 870 nb_ports = rte_eth_dev_count_avail();
7c673cae
FG
871 if (nb_ports == 0)
872 rte_exit(EXIT_FAILURE, "No ports found!\n");
873
874 nb_lcores = rte_lcore_count();
875
876 /* initialize structures (mempools, lpm etc.) */
877 if (init_mem() < 0)
878 rte_panic("Cannot initialize memory structures!\n");
879
880 /* check if portmask has non-existent ports */
881 if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned)))
882 rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n");
883
884 /* initialize all ports */
9f95a23c
TL
885 RTE_ETH_FOREACH_DEV(portid) {
886 struct rte_eth_conf local_port_conf = port_conf;
887 struct rte_eth_rxconf rxq_conf;
888
7c673cae
FG
889 /* skip ports that are not enabled */
890 if ((enabled_port_mask & (1 << portid)) == 0) {
891 printf("Skipping disabled port %d\n", portid);
892 continue;
893 }
894
895 qconf = &lcore_queue_conf[rx_lcore_id];
896
11fdf7f2
TL
897 /* limit the frame size to the maximum supported by NIC */
898 rte_eth_dev_info_get(portid, &dev_info);
9f95a23c
TL
899 local_port_conf.rxmode.max_rx_pkt_len = RTE_MIN(
900 dev_info.max_rx_pktlen,
901 local_port_conf.rxmode.max_rx_pkt_len);
11fdf7f2 902
7c673cae
FG
903 /* get the lcore_id for this port */
904 while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
905 qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) {
906
907 rx_lcore_id ++;
908 if (rx_lcore_id >= RTE_MAX_LCORE)
909 rte_exit(EXIT_FAILURE, "Not enough cores\n");
910
911 qconf = &lcore_queue_conf[rx_lcore_id];
912 }
913
914 socket = (int) rte_lcore_to_socket_id(rx_lcore_id);
915 if (socket == SOCKET_ID_ANY)
916 socket = 0;
917
918 rxq = &qconf->rx_queue_list[qconf->n_rx_queue];
919 rxq->portid = portid;
920 rxq->direct_pool = socket_direct_pool[socket];
921 rxq->indirect_pool = socket_indirect_pool[socket];
922 rxq->lpm = socket_lpm[socket];
923 rxq->lpm6 = socket_lpm6[socket];
924 qconf->n_rx_queue++;
925
926 /* init port */
927 printf("Initializing port %d on lcore %u...", portid,
928 rx_lcore_id);
929 fflush(stdout);
930
931 n_tx_queue = nb_lcores;
932 if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
933 n_tx_queue = MAX_TX_QUEUE_PER_PORT;
9f95a23c
TL
934 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
935 local_port_conf.txmode.offloads |=
936 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
7c673cae 937 ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue,
9f95a23c 938 &local_port_conf);
7c673cae
FG
939 if (ret < 0) {
940 printf("\n");
941 rte_exit(EXIT_FAILURE, "Cannot configure device: "
942 "err=%d, port=%d\n",
943 ret, portid);
944 }
945
9f95a23c
TL
946 /* set the mtu to the maximum received packet size */
947 ret = rte_eth_dev_set_mtu(portid,
948 local_port_conf.rxmode.max_rx_pkt_len - MTU_OVERHEAD);
949 if (ret < 0) {
950 printf("\n");
951 rte_exit(EXIT_FAILURE, "Set MTU failed: "
952 "err=%d, port=%d\n",
953 ret, portid);
954 }
955
956 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
957 &nb_txd);
958 if (ret < 0) {
959 printf("\n");
960 rte_exit(EXIT_FAILURE, "Cannot adjust number of "
961 "descriptors: err=%d, port=%d\n", ret, portid);
962 }
963
7c673cae 964 /* init one RX queue */
9f95a23c
TL
965 rxq_conf = dev_info.default_rxconf;
966 rxq_conf.offloads = local_port_conf.rxmode.offloads;
7c673cae 967 ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
9f95a23c 968 socket, &rxq_conf,
7c673cae
FG
969 socket_direct_pool[socket]);
970 if (ret < 0) {
971 printf("\n");
972 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: "
973 "err=%d, port=%d\n",
974 ret, portid);
975 }
976
977 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
978 print_ethaddr(" Address:", &ports_eth_addr[portid]);
979 printf("\n");
980
981 /* init one TX queue per couple (lcore,port) */
982 queueid = 0;
983 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
984 if (rte_lcore_is_enabled(lcore_id) == 0)
985 continue;
986
987 socket = (int) rte_lcore_to_socket_id(lcore_id);
988 printf("txq=%u,%d ", lcore_id, queueid);
989 fflush(stdout);
990
7c673cae 991 txconf = &dev_info.default_txconf;
9f95a23c 992 txconf->offloads = local_port_conf.txmode.offloads;
7c673cae
FG
993 ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
994 socket, txconf);
995 if (ret < 0) {
996 printf("\n");
997 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: "
998 "err=%d, port=%d\n", ret, portid);
999 }
1000
1001 qconf = &lcore_queue_conf[lcore_id];
1002 qconf->tx_queue_id[portid] = queueid;
1003 queueid++;
1004 }
1005
1006 printf("\n");
1007 }
1008
1009 printf("\n");
1010
1011 /* start ports */
9f95a23c 1012 RTE_ETH_FOREACH_DEV(portid) {
7c673cae
FG
1013 if ((enabled_port_mask & (1 << portid)) == 0) {
1014 continue;
1015 }
1016 /* Start device */
1017 ret = rte_eth_dev_start(portid);
1018 if (ret < 0)
1019 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
1020 ret, portid);
1021
1022 rte_eth_promiscuous_enable(portid);
11fdf7f2
TL
1023
1024 if (check_ptype(portid) == 0) {
1025 rte_eth_add_rx_callback(portid, 0, cb_parse_ptype, NULL);
9f95a23c 1026 printf("Add Rx callback function to detect L3 packet type by SW :"
11fdf7f2
TL
1027 " port = %d\n", portid);
1028 }
7c673cae
FG
1029 }
1030
1031 if (init_routing_table() < 0)
1032 rte_exit(EXIT_FAILURE, "Cannot init routing table\n");
1033
9f95a23c 1034 check_all_ports_link_status(enabled_port_mask);
7c673cae
FG
1035
1036 /* launch per-lcore init on every lcore */
1037 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1038 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1039 if (rte_eal_wait_lcore(lcore_id) < 0)
1040 return -1;
1041 }
1042
1043 return 0;
1044}