]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/examples/l3fwd-vf/main.c
bump version to 15.2.11-pve1
[ceph.git] / ceph / src / spdk / dpdk / examples / l3fwd-vf / main.c
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2014 Intel Corporation
7c673cae
FG
3 */
4
5#include <stdio.h>
6#include <stdlib.h>
7#include <stdint.h>
8#include <inttypes.h>
9#include <sys/types.h>
10#include <string.h>
11#include <sys/queue.h>
12#include <stdarg.h>
13#include <errno.h>
14#include <getopt.h>
15#include <signal.h>
16
17#include <rte_common.h>
18#include <rte_byteorder.h>
19#include <rte_log.h>
20#include <rte_memory.h>
21#include <rte_memcpy.h>
7c673cae 22#include <rte_eal.h>
7c673cae
FG
23#include <rte_launch.h>
24#include <rte_atomic.h>
25#include <rte_spinlock.h>
26#include <rte_cycles.h>
27#include <rte_prefetch.h>
28#include <rte_lcore.h>
29#include <rte_per_lcore.h>
30#include <rte_branch_prediction.h>
31#include <rte_interrupts.h>
7c673cae
FG
32#include <rte_random.h>
33#include <rte_debug.h>
34#include <rte_ether.h>
35#include <rte_ethdev.h>
36#include <rte_mempool.h>
37#include <rte_mbuf.h>
38#include <rte_ip.h>
39#include <rte_tcp.h>
40#include <rte_udp.h>
41#include <rte_string_fns.h>
42
43#define APP_LOOKUP_EXACT_MATCH 0
44#define APP_LOOKUP_LPM 1
45#define DO_RFC_1812_CHECKS
46
47//#define APP_LOOKUP_METHOD APP_LOOKUP_EXACT_MATCH
48#ifndef APP_LOOKUP_METHOD
49#define APP_LOOKUP_METHOD APP_LOOKUP_LPM
50#endif
51
52#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
53#include <rte_hash.h>
54#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
55#include <rte_lpm.h>
56#else
57#error "APP_LOOKUP_METHOD set to incorrect value"
58#endif
59
60#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
61
62#define MEMPOOL_CACHE_SIZE 256
63
64/*
65 * This expression is used to calculate the number of mbufs needed depending on user input, taking
66 * into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore.
67 * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192
68 */
69
11fdf7f2
TL
70#define NB_MBUF RTE_MAX ( \
71 (nb_ports*nb_rx_queue*nb_rxd + \
72 nb_ports*nb_lcores*MAX_PKT_BURST + \
73 nb_ports*n_tx_queue*nb_txd + \
74 nb_lcores*MEMPOOL_CACHE_SIZE), \
7c673cae
FG
75 (unsigned)8192)
76
77/*
78 * RX and TX Prefetch, Host, and Write-back threshold values should be
79 * carefully set for optimal performance. Consult the network
80 * controller's datasheet and supporting DPDK documentation for guidance
81 * on how these parameters should be set.
82 */
83#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
84#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
85#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
86
87/*
88 * These default values are optimized for use with the Intel(R) 82599 10 GbE
89 * Controller and the DPDK ixgbe PMD. Consider using other values for other
90 * network controllers and/or network drivers.
91 */
92#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
93#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */
94#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */
95
96#define MAX_PKT_BURST 32
97#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
98
99#define NB_SOCKETS 8
100
101#define SOCKET0 0
102
103/* Configure how many packets ahead to prefetch, when reading packets */
104#define PREFETCH_OFFSET 3
105
106/*
107 * Configurable number of RX/TX ring descriptors
108 */
11fdf7f2
TL
109#define RTE_TEST_RX_DESC_DEFAULT 1024
110#define RTE_TEST_TX_DESC_DEFAULT 1024
7c673cae
FG
111static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
112static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
113
114/* ethernet addresses of ports */
115static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
116
117/* mask of enabled ports */
118static uint32_t enabled_port_mask = 0;
119static int numa_on = 1; /**< NUMA is enabled by default. */
120
121struct mbuf_table {
122 uint16_t len;
123 struct rte_mbuf *m_table[MAX_PKT_BURST];
124};
125
126struct lcore_rx_queue {
11fdf7f2 127 uint16_t port_id;
7c673cae
FG
128 uint8_t queue_id;
129} __rte_cache_aligned;
130
131#define MAX_RX_QUEUE_PER_LCORE 16
132#define MAX_TX_QUEUE_PER_PORT 1
133#define MAX_RX_QUEUE_PER_PORT 1
134
135#define MAX_LCORE_PARAMS 1024
136struct lcore_params {
11fdf7f2 137 uint16_t port_id;
7c673cae
FG
138 uint8_t queue_id;
139 uint8_t lcore_id;
140} __rte_cache_aligned;
141
142static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
143static struct lcore_params lcore_params_array_default[] = {
144 {0, 0, 2},
145 {0, 1, 2},
146 {0, 2, 2},
147 {1, 0, 2},
148 {1, 1, 2},
149 {1, 2, 2},
150 {2, 0, 2},
151 {3, 0, 3},
152 {3, 1, 3},
153};
154
155static struct lcore_params * lcore_params = lcore_params_array_default;
156static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
157 sizeof(lcore_params_array_default[0]);
158
159static struct rte_eth_conf port_conf = {
160 .rxmode = {
161 .mq_mode = ETH_MQ_RX_RSS,
162 .max_rx_pkt_len = ETHER_MAX_LEN,
163 .split_hdr_size = 0,
9f95a23c 164 .offloads = DEV_RX_OFFLOAD_CHECKSUM,
7c673cae
FG
165 },
166 .rx_adv_conf = {
167 .rss_conf = {
168 .rss_key = NULL,
169 .rss_hf = ETH_RSS_IP,
170 },
171 },
172 .txmode = {
173 .mq_mode = ETH_MQ_TX_NONE,
174 },
175};
176
177static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
178
179
180#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
181
11fdf7f2 182#ifdef RTE_ARCH_X86
7c673cae
FG
183#include <rte_hash_crc.h>
184#define DEFAULT_HASH_FUNC rte_hash_crc
185#else
186#include <rte_jhash.h>
187#define DEFAULT_HASH_FUNC rte_jhash
188#endif
189
190struct ipv4_5tuple {
191 uint32_t ip_dst;
192 uint32_t ip_src;
193 uint16_t port_dst;
194 uint16_t port_src;
195 uint8_t proto;
196} __attribute__((__packed__));
197
198struct l3fwd_route {
199 struct ipv4_5tuple key;
200 uint8_t if_out;
201};
202
203static struct l3fwd_route l3fwd_route_array[] = {
204 {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
205 {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
206 {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
207 {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
208};
209
210typedef struct rte_hash lookup_struct_t;
211static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS];
212
213#define L3FWD_HASH_ENTRIES 1024
214struct rte_hash_parameters l3fwd_hash_params = {
215 .name = "l3fwd_hash_0",
216 .entries = L3FWD_HASH_ENTRIES,
217 .key_len = sizeof(struct ipv4_5tuple),
218 .hash_func = DEFAULT_HASH_FUNC,
219 .hash_func_init_val = 0,
220 .socket_id = SOCKET0,
221};
222
223#define L3FWD_NUM_ROUTES \
224 (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0]))
225
226static uint8_t l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
227#endif
228
229#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
230struct l3fwd_route {
231 uint32_t ip;
232 uint8_t depth;
233 uint8_t if_out;
234};
235
236static struct l3fwd_route l3fwd_route_array[] = {
237 {IPv4(1,1,1,0), 24, 0},
238 {IPv4(2,1,1,0), 24, 1},
239 {IPv4(3,1,1,0), 24, 2},
240 {IPv4(4,1,1,0), 24, 3},
241 {IPv4(5,1,1,0), 24, 4},
242 {IPv4(6,1,1,0), 24, 5},
243 {IPv4(7,1,1,0), 24, 6},
244 {IPv4(8,1,1,0), 24, 7},
245};
246
247#define L3FWD_NUM_ROUTES \
248 (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0]))
249
250#define L3FWD_LPM_MAX_RULES 1024
251
252typedef struct rte_lpm lookup_struct_t;
253static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS];
254#endif
255
256struct lcore_conf {
257 uint16_t n_rx_queue;
258 struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
259 uint16_t tx_queue_id;
260 struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
261 lookup_struct_t * lookup_struct;
262} __rte_cache_aligned;
263
264static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
265static rte_spinlock_t spinlock_conf[RTE_MAX_ETHPORTS] = {RTE_SPINLOCK_INITIALIZER};
266/* Send burst of packets on an output interface */
267static inline int
11fdf7f2 268send_burst(struct lcore_conf *qconf, uint16_t n, uint16_t port)
7c673cae
FG
269{
270 struct rte_mbuf **m_table;
271 int ret;
272 uint16_t queueid;
273
274 queueid = qconf->tx_queue_id;
275 m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
276
277 rte_spinlock_lock(&spinlock_conf[port]);
278 ret = rte_eth_tx_burst(port, queueid, m_table, n);
279 rte_spinlock_unlock(&spinlock_conf[port]);
280
281 if (unlikely(ret < n)) {
282 do {
283 rte_pktmbuf_free(m_table[ret]);
284 } while (++ret < n);
285 }
286
287 return 0;
288}
289
290/* Enqueue a single packet, and send burst if queue is filled */
291static inline int
11fdf7f2 292send_single_packet(struct rte_mbuf *m, uint16_t port)
7c673cae
FG
293{
294 uint32_t lcore_id;
295 uint16_t len;
296 struct lcore_conf *qconf;
297
298 lcore_id = rte_lcore_id();
299
300 qconf = &lcore_conf[lcore_id];
301 len = qconf->tx_mbufs[port].len;
302 qconf->tx_mbufs[port].m_table[len] = m;
303 len++;
304
305 /* enough pkts to be sent */
306 if (unlikely(len == MAX_PKT_BURST)) {
307 send_burst(qconf, MAX_PKT_BURST, port);
308 len = 0;
309 }
310
311 qconf->tx_mbufs[port].len = len;
312 return 0;
313}
314
315#ifdef DO_RFC_1812_CHECKS
316static inline int
317is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
318{
319 /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
320 /*
321 * 1. The packet length reported by the Link Layer must be large
322 * enough to hold the minimum length legal IP datagram (20 bytes).
323 */
324 if (link_len < sizeof(struct ipv4_hdr))
325 return -1;
326
327 /* 2. The IP checksum must be correct. */
328 /* this is checked in H/W */
329
330 /*
331 * 3. The IP version number must be 4. If the version number is not 4
332 * then the packet may be another version of IP, such as IPng or
333 * ST-II.
334 */
335 if (((pkt->version_ihl) >> 4) != 4)
336 return -3;
337 /*
338 * 4. The IP header length field must be large enough to hold the
339 * minimum length legal IP datagram (20 bytes = 5 words).
340 */
341 if ((pkt->version_ihl & 0xf) < 5)
342 return -4;
343
344 /*
345 * 5. The IP total length field must be large enough to hold the IP
346 * datagram header, whose length is specified in the IP header length
347 * field.
348 */
349 if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
350 return -5;
351
352 return 0;
353}
354#endif
355
356#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
357static void
358print_key(struct ipv4_5tuple key)
359{
360 printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, proto = %d\n",
361 (unsigned)key.ip_dst, (unsigned)key.ip_src, key.port_dst, key.port_src, key.proto);
362}
363
11fdf7f2
TL
364static inline uint16_t
365get_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid,
366 lookup_struct_t *l3fwd_lookup_struct)
7c673cae
FG
367{
368 struct ipv4_5tuple key;
369 struct tcp_hdr *tcp;
370 struct udp_hdr *udp;
371 int ret = 0;
372
373 key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
374 key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
375 key.proto = ipv4_hdr->next_proto_id;
376
377 switch (ipv4_hdr->next_proto_id) {
378 case IPPROTO_TCP:
379 tcp = (struct tcp_hdr *)((unsigned char *) ipv4_hdr +
380 sizeof(struct ipv4_hdr));
381 key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
382 key.port_src = rte_be_to_cpu_16(tcp->src_port);
383 break;
384
385 case IPPROTO_UDP:
386 udp = (struct udp_hdr *)((unsigned char *) ipv4_hdr +
387 sizeof(struct ipv4_hdr));
388 key.port_dst = rte_be_to_cpu_16(udp->dst_port);
389 key.port_src = rte_be_to_cpu_16(udp->src_port);
390 break;
391
392 default:
393 key.port_dst = 0;
394 key.port_src = 0;
395 }
396
397 /* Find destination port */
398 ret = rte_hash_lookup(l3fwd_lookup_struct, (const void *)&key);
11fdf7f2 399 return ((ret < 0) ? portid : l3fwd_out_if[ret]);
7c673cae
FG
400}
401#endif
402
403#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
11fdf7f2
TL
404static inline uint32_t
405get_dst_port(struct ipv4_hdr *ipv4_hdr, uint16_t portid,
406 lookup_struct_t *l3fwd_lookup_struct)
7c673cae
FG
407{
408 uint32_t next_hop;
409
11fdf7f2
TL
410 return ((rte_lpm_lookup(l3fwd_lookup_struct,
411 rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0) ?
412 next_hop : portid);
7c673cae
FG
413}
414#endif
415
416static inline void
11fdf7f2
TL
417l3fwd_simple_forward(struct rte_mbuf *m, uint16_t portid,
418 lookup_struct_t *l3fwd_lookup_struct)
7c673cae
FG
419{
420 struct ether_hdr *eth_hdr;
421 struct ipv4_hdr *ipv4_hdr;
422 void *tmp;
11fdf7f2 423 uint16_t dst_port;
7c673cae
FG
424
425 eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
426
427 ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
428 sizeof(struct ether_hdr));
429
430#ifdef DO_RFC_1812_CHECKS
431 /* Check to make sure the packet is valid (RFC1812) */
432 if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
433 rte_pktmbuf_free(m);
434 return;
435 }
436#endif
437
438 dst_port = get_dst_port(ipv4_hdr, portid, l3fwd_lookup_struct);
439 if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port) == 0)
440 dst_port = portid;
441
442 /* 02:00:00:00:00:xx */
443 tmp = &eth_hdr->d_addr.addr_bytes[0];
444 *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
445
446#ifdef DO_RFC_1812_CHECKS
447 /* Update time to live and header checksum */
448 --(ipv4_hdr->time_to_live);
449 ++(ipv4_hdr->hdr_checksum);
450#endif
451
452 /* src addr */
453 ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
454
455 send_single_packet(m, dst_port);
456
457}
458
459/* main processing loop */
460static int
461main_loop(__attribute__((unused)) void *dummy)
462{
463 struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
464 unsigned lcore_id;
465 uint64_t prev_tsc, diff_tsc, cur_tsc;
466 int i, j, nb_rx;
11fdf7f2
TL
467 uint8_t queueid;
468 uint16_t portid;
7c673cae
FG
469 struct lcore_conf *qconf;
470 const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
471
472 prev_tsc = 0;
473
474 lcore_id = rte_lcore_id();
475 qconf = &lcore_conf[lcore_id];
476
477 if (qconf->n_rx_queue == 0) {
478 RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
479 return 0;
480 }
481
482 RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
483
484 for (i = 0; i < qconf->n_rx_queue; i++) {
485
486 portid = qconf->rx_queue_list[i].port_id;
487 queueid = qconf->rx_queue_list[i].queue_id;
11fdf7f2
TL
488 RTE_LOG(INFO, L3FWD, " --lcoreid=%u portid=%u rxqueueid=%hhu\n",
489 lcore_id, portid, queueid);
7c673cae
FG
490 }
491
492 while (1) {
493
494 cur_tsc = rte_rdtsc();
495
496 /*
497 * TX burst queue drain
498 */
499 diff_tsc = cur_tsc - prev_tsc;
500 if (unlikely(diff_tsc > drain_tsc)) {
501
502 /*
503 * This could be optimized (use queueid instead of
504 * portid), but it is not called so often
505 */
506 for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
507 if (qconf->tx_mbufs[portid].len == 0)
508 continue;
509 send_burst(&lcore_conf[lcore_id],
510 qconf->tx_mbufs[portid].len,
511 portid);
512 qconf->tx_mbufs[portid].len = 0;
513 }
514
515 prev_tsc = cur_tsc;
516 }
517
518 /*
519 * Read packet from RX queues
520 */
521 for (i = 0; i < qconf->n_rx_queue; ++i) {
522
523 portid = qconf->rx_queue_list[i].port_id;
524 queueid = qconf->rx_queue_list[i].queue_id;
525 nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, MAX_PKT_BURST);
526
527 /* Prefetch first packets */
528 for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
529 rte_prefetch0(rte_pktmbuf_mtod(
530 pkts_burst[j], void *));
531 }
532
533 /* Prefetch and forward already prefetched packets */
534 for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
535 rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
536 j + PREFETCH_OFFSET], void *));
537 l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct);
538 }
539
540 /* Forward remaining prefetched packets */
541 for (; j < nb_rx; j++) {
542 l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct);
543 }
544 }
545 }
546}
547
548static int
549check_lcore_params(void)
550{
551 uint8_t queue, lcore;
552 uint16_t i;
553 int socketid;
554
555 for (i = 0; i < nb_lcore_params; ++i) {
556 queue = lcore_params[i].queue_id;
557 if (queue >= MAX_RX_QUEUE_PER_PORT) {
558 printf("invalid queue number: %hhu\n", queue);
559 return -1;
560 }
561 lcore = lcore_params[i].lcore_id;
562 if (!rte_lcore_is_enabled(lcore)) {
563 printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
564 return -1;
565 }
566 if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
567 (numa_on == 0)) {
568 printf("warning: lcore %hhu is on socket %d with numa off \n",
569 lcore, socketid);
570 }
571 }
572 return 0;
573}
574
575static int
11fdf7f2 576check_port_config(void)
7c673cae
FG
577{
578 unsigned portid;
579 uint16_t i;
580
581 for (i = 0; i < nb_lcore_params; ++i) {
582 portid = lcore_params[i].port_id;
583 if ((enabled_port_mask & (1 << portid)) == 0) {
584 printf("port %u is not enabled in port mask\n", portid);
585 return -1;
586 }
11fdf7f2 587 if (!rte_eth_dev_is_valid_port(portid)) {
7c673cae
FG
588 printf("port %u is not present on the board\n", portid);
589 return -1;
590 }
591 }
592 return 0;
593}
594
595static uint8_t
11fdf7f2 596get_port_n_rx_queues(const uint16_t port)
7c673cae
FG
597{
598 int queue = -1;
599 uint16_t i;
600
601 for (i = 0; i < nb_lcore_params; ++i) {
602 if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue)
603 queue = lcore_params[i].queue_id;
604 }
605 return (uint8_t)(++queue);
606}
607
608static int
609init_lcore_rx_queues(void)
610{
611 uint16_t i, nb_rx_queue;
612 uint8_t lcore;
613
614 for (i = 0; i < nb_lcore_params; ++i) {
615 lcore = lcore_params[i].lcore_id;
616 nb_rx_queue = lcore_conf[lcore].n_rx_queue;
617 if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
618 printf("error: too many queues (%u) for lcore: %u\n",
619 (unsigned)nb_rx_queue + 1, (unsigned)lcore);
620 return -1;
621 } else {
622 lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
623 lcore_params[i].port_id;
624 lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
625 lcore_params[i].queue_id;
626 lcore_conf[lcore].n_rx_queue++;
627 }
628 }
629 return 0;
630}
631
632/* display usage */
633static void
634print_usage(const char *prgname)
635{
636 printf ("%s [EAL options] -- -p PORTMASK"
637 " [--config (port,queue,lcore)[,(port,queue,lcore]]\n"
638 " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
639 " --config (port,queue,lcore): rx queues configuration\n"
640 " --no-numa: optional, disable numa awareness\n",
641 prgname);
642}
643
644/* Custom handling of signals to handle process terminal */
645static void
646signal_handler(int signum)
647{
11fdf7f2 648 uint16_t portid;
7c673cae
FG
649
650 /* When we receive a SIGINT signal */
651 if (signum == SIGINT) {
11fdf7f2 652 RTE_ETH_FOREACH_DEV(portid) {
7c673cae
FG
653 /* skip ports that are not enabled */
654 if ((enabled_port_mask & (1 << portid)) == 0)
655 continue;
656 rte_eth_dev_close(portid);
657 }
658 }
659 rte_exit(EXIT_SUCCESS, "\n User forced exit\n");
660}
661static int
662parse_portmask(const char *portmask)
663{
664 char *end = NULL;
665 unsigned long pm;
666
667 /* parse hexadecimal string */
668 pm = strtoul(portmask, &end, 16);
669 if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
670 return -1;
671
672 if (pm == 0)
673 return -1;
674
675 return pm;
676}
677
678static int
679parse_config(const char *q_arg)
680{
681 char s[256];
682 const char *p, *p0 = q_arg;
683 char *end;
684 enum fieldnames {
685 FLD_PORT = 0,
686 FLD_QUEUE,
687 FLD_LCORE,
688 _NUM_FLD
689 };
690 unsigned long int_fld[_NUM_FLD];
691 char *str_fld[_NUM_FLD];
692 int i;
693 unsigned size;
694
695 nb_lcore_params = 0;
696
697 while ((p = strchr(p0,'(')) != NULL) {
698 ++p;
699 if((p0 = strchr(p,')')) == NULL)
700 return -1;
701
702 size = p0 - p;
703 if(size >= sizeof(s))
704 return -1;
705
706 snprintf(s, sizeof(s), "%.*s", size, p);
707 if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
708 return -1;
709 for (i = 0; i < _NUM_FLD; i++){
710 errno = 0;
711 int_fld[i] = strtoul(str_fld[i], &end, 0);
712 if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
713 return -1;
714 }
715 if (nb_lcore_params >= MAX_LCORE_PARAMS) {
716 printf("exceeded max number of lcore params: %hu\n",
717 nb_lcore_params);
718 return -1;
719 }
11fdf7f2 720 lcore_params_array[nb_lcore_params].port_id = int_fld[FLD_PORT];
7c673cae
FG
721 lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE];
722 lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE];
723 ++nb_lcore_params;
724 }
725 lcore_params = lcore_params_array;
726 return 0;
727}
728
729/* Parse the argument given in the command line of the application */
730static int
731parse_args(int argc, char **argv)
732{
733 int opt, ret;
734 char **argvopt;
735 int option_index;
736 char *prgname = argv[0];
737 static struct option lgopts[] = {
738 {"config", 1, 0, 0},
739 {"no-numa", 0, 0, 0},
740 {NULL, 0, 0, 0}
741 };
742
743 argvopt = argv;
744
745 while ((opt = getopt_long(argc, argvopt, "p:",
746 lgopts, &option_index)) != EOF) {
747
748 switch (opt) {
749 /* portmask */
750 case 'p':
751 enabled_port_mask = parse_portmask(optarg);
752 if (enabled_port_mask == 0) {
753 printf("invalid portmask\n");
754 print_usage(prgname);
755 return -1;
756 }
757 break;
758
759 /* long options */
760 case 0:
761 if (!strcmp(lgopts[option_index].name, "config")) {
762 ret = parse_config(optarg);
763 if (ret) {
764 printf("invalid config\n");
765 print_usage(prgname);
766 return -1;
767 }
768 }
769
770 if (!strcmp(lgopts[option_index].name, "no-numa")) {
771 printf("numa is disabled \n");
772 numa_on = 0;
773 }
774 break;
775
776 default:
777 print_usage(prgname);
778 return -1;
779 }
780 }
781
782 if (optind >= 0)
783 argv[optind-1] = prgname;
784
785 ret = optind-1;
11fdf7f2 786 optind = 1; /* reset getopt lib */
7c673cae
FG
787 return ret;
788}
789
790static void
791print_ethaddr(const char *name, const struct ether_addr *eth_addr)
792{
793 char buf[ETHER_ADDR_FMT_SIZE];
794 ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
795 printf("%s%s", name, buf);
796}
797
798#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
799static void
800setup_hash(int socketid)
801{
802 unsigned i;
803 int ret;
804 char s[64];
805
806 /* create hashes */
807 snprintf(s, sizeof(s), "l3fwd_hash_%d", socketid);
808 l3fwd_hash_params.name = s;
809 l3fwd_hash_params.socket_id = socketid;
810 l3fwd_lookup_struct[socketid] = rte_hash_create(&l3fwd_hash_params);
811 if (l3fwd_lookup_struct[socketid] == NULL)
812 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
813 "socket %d\n", socketid);
814
815 /* populate the hash */
816 for (i = 0; i < L3FWD_NUM_ROUTES; i++) {
817 ret = rte_hash_add_key (l3fwd_lookup_struct[socketid],
818 (void *) &l3fwd_route_array[i].key);
819 if (ret < 0) {
820 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
821 "l3fwd hash on socket %d\n", i, socketid);
822 }
823 l3fwd_out_if[ret] = l3fwd_route_array[i].if_out;
824 printf("Hash: Adding key\n");
825 print_key(l3fwd_route_array[i].key);
826 }
827}
828#endif
829
830#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
831static void
832setup_lpm(int socketid)
833{
834 unsigned i;
835 int ret;
836 char s[64];
837
838 struct rte_lpm_config lpm_ipv4_config;
839
840 lpm_ipv4_config.max_rules = L3FWD_LPM_MAX_RULES;
841 lpm_ipv4_config.number_tbl8s = 256;
842 lpm_ipv4_config.flags = 0;
843
844 /* create the LPM table */
845 snprintf(s, sizeof(s), "L3FWD_LPM_%d", socketid);
846 l3fwd_lookup_struct[socketid] =
847 rte_lpm_create(s, socketid, &lpm_ipv4_config);
848 if (l3fwd_lookup_struct[socketid] == NULL)
849 rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
850 " on socket %d\n", socketid);
851
852 /* populate the LPM table */
853 for (i = 0; i < L3FWD_NUM_ROUTES; i++) {
854 ret = rte_lpm_add(l3fwd_lookup_struct[socketid],
855 l3fwd_route_array[i].ip,
856 l3fwd_route_array[i].depth,
857 l3fwd_route_array[i].if_out);
858
859 if (ret < 0) {
860 rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
861 "l3fwd LPM table on socket %d\n",
862 i, socketid);
863 }
864
865 printf("LPM: Adding route 0x%08x / %d (%d)\n",
866 (unsigned)l3fwd_route_array[i].ip,
867 l3fwd_route_array[i].depth,
868 l3fwd_route_array[i].if_out);
869 }
870}
871#endif
872
873static int
874init_mem(unsigned nb_mbuf)
875{
876 struct lcore_conf *qconf;
877 int socketid;
878 unsigned lcore_id;
879 char s[64];
880
881 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
882 if (rte_lcore_is_enabled(lcore_id) == 0)
883 continue;
884
885 if (numa_on)
886 socketid = rte_lcore_to_socket_id(lcore_id);
887 else
888 socketid = 0;
889
890 if (socketid >= NB_SOCKETS) {
891 rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
892 socketid, lcore_id, NB_SOCKETS);
893 }
894 if (pktmbuf_pool[socketid] == NULL) {
895 snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
896 pktmbuf_pool[socketid] = rte_pktmbuf_pool_create(s,
897 nb_mbuf, MEMPOOL_CACHE_SIZE, 0,
898 RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
899 if (pktmbuf_pool[socketid] == NULL)
900 rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", socketid);
901 else
902 printf("Allocated mbuf pool on socket %d\n", socketid);
903
904#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
905 setup_lpm(socketid);
906#else
907 setup_hash(socketid);
908#endif
909 }
910 qconf = &lcore_conf[lcore_id];
911 qconf->lookup_struct = l3fwd_lookup_struct[socketid];
912 }
913 return 0;
914}
915
916int
917main(int argc, char **argv)
918{
919 struct lcore_conf *qconf;
920 struct rte_eth_dev_info dev_info;
921 struct rte_eth_txconf *txconf;
922 int ret;
923 unsigned nb_ports;
11fdf7f2 924 uint16_t queueid, portid;
7c673cae
FG
925 unsigned lcore_id;
926 uint32_t nb_lcores;
927 uint16_t n_tx_queue;
11fdf7f2 928 uint8_t nb_rx_queue, queue, socketid;
7c673cae
FG
929
930 signal(SIGINT, signal_handler);
931 /* init EAL */
932 ret = rte_eal_init(argc, argv);
933 if (ret < 0)
934 rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
935 argc -= ret;
936 argv += ret;
937
938 /* parse application arguments (after the EAL ones) */
939 ret = parse_args(argc, argv);
940 if (ret < 0)
941 rte_exit(EXIT_FAILURE, "Invalid L3FWD-VF parameters\n");
942
943 if (check_lcore_params() < 0)
944 rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
945
946 ret = init_lcore_rx_queues();
947 if (ret < 0)
948 rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
949
11fdf7f2 950 nb_ports = rte_eth_dev_count_avail();
7c673cae 951
11fdf7f2 952 if (check_port_config() < 0)
7c673cae
FG
953 rte_exit(EXIT_FAILURE, "check_port_config failed\n");
954
955 nb_lcores = rte_lcore_count();
956
957 /* initialize all ports */
11fdf7f2
TL
958 RTE_ETH_FOREACH_DEV(portid) {
959 struct rte_eth_conf local_port_conf = port_conf;
960
7c673cae
FG
961 /* skip ports that are not enabled */
962 if ((enabled_port_mask & (1 << portid)) == 0) {
963 printf("\nSkipping disabled port %d\n", portid);
964 continue;
965 }
966
967 /* init port */
968 printf("Initializing port %d ... ", portid );
969 fflush(stdout);
970
971 /* must always equal(=1) */
972 nb_rx_queue = get_port_n_rx_queues(portid);
973 n_tx_queue = MAX_TX_QUEUE_PER_PORT;
974
975 printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
976 nb_rx_queue, (unsigned)1 );
11fdf7f2
TL
977
978 rte_eth_dev_info_get(portid, &dev_info);
979 if (dev_info.tx_offload_capa & DEV_TX_OFFLOAD_MBUF_FAST_FREE)
980 local_port_conf.txmode.offloads |=
981 DEV_TX_OFFLOAD_MBUF_FAST_FREE;
982
983 local_port_conf.rx_adv_conf.rss_conf.rss_hf &=
984 dev_info.flow_type_rss_offloads;
985 if (local_port_conf.rx_adv_conf.rss_conf.rss_hf !=
986 port_conf.rx_adv_conf.rss_conf.rss_hf) {
987 printf("Port %u modified RSS hash function based on hardware support,"
988 "requested:%#"PRIx64" configured:%#"PRIx64"\n",
989 portid,
990 port_conf.rx_adv_conf.rss_conf.rss_hf,
991 local_port_conf.rx_adv_conf.rss_conf.rss_hf);
992 }
993
994 ret = rte_eth_dev_configure(portid, nb_rx_queue,
995 n_tx_queue, &local_port_conf);
7c673cae
FG
996 if (ret < 0)
997 rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
998 ret, portid);
999
11fdf7f2
TL
1000 ret = rte_eth_dev_adjust_nb_rx_tx_desc(portid, &nb_rxd,
1001 &nb_txd);
1002 if (ret < 0)
1003 rte_exit(EXIT_FAILURE,
1004 "Cannot adjust number of descriptors: err=%d, port=%d\n",
1005 ret, portid);
1006
7c673cae
FG
1007 rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
1008 print_ethaddr(" Address:", &ports_eth_addr[portid]);
1009 printf(", ");
1010
1011 ret = init_mem(NB_MBUF);
1012 if (ret < 0)
1013 rte_exit(EXIT_FAILURE, "init_mem failed\n");
1014
1015 /* init one TX queue */
1016 socketid = (uint8_t)rte_lcore_to_socket_id(rte_get_master_lcore());
1017
1018 printf("txq=%d,%d,%d ", portid, 0, socketid);
1019 fflush(stdout);
1020
7c673cae 1021 txconf = &dev_info.default_txconf;
11fdf7f2 1022 txconf->offloads = local_port_conf.txmode.offloads;
7c673cae
FG
1023 ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
1024 socketid, txconf);
1025 if (ret < 0)
1026 rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
1027 "port=%d\n", ret, portid);
1028
1029 printf("\n");
1030 }
1031
1032 for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
11fdf7f2
TL
1033 struct rte_eth_rxconf rxq_conf;
1034
7c673cae
FG
1035 if (rte_lcore_is_enabled(lcore_id) == 0)
1036 continue;
1037 qconf = &lcore_conf[lcore_id];
1038 qconf->tx_queue_id = 0;
1039
1040 printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
1041 fflush(stdout);
1042 /* init RX queues */
1043 for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
11fdf7f2
TL
1044 struct rte_eth_dev *dev;
1045 struct rte_eth_conf *conf;
1046
7c673cae
FG
1047 portid = qconf->rx_queue_list[queue].port_id;
1048 queueid = qconf->rx_queue_list[queue].queue_id;
11fdf7f2
TL
1049 dev = &rte_eth_devices[portid];
1050 conf = &dev->data->dev_conf;
7c673cae
FG
1051
1052 if (numa_on)
1053 socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
1054 else
1055 socketid = 0;
1056
1057 printf("rxq=%d,%d,%d ", portid, queueid, socketid);
1058 fflush(stdout);
1059
11fdf7f2
TL
1060 rte_eth_dev_info_get(portid, &dev_info);
1061 rxq_conf = dev_info.default_rxconf;
1062 rxq_conf.offloads = conf->rxmode.offloads;
7c673cae 1063 ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
11fdf7f2 1064 socketid, &rxq_conf,
7c673cae
FG
1065 pktmbuf_pool[socketid]);
1066 if (ret < 0)
1067 rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d,"
1068 "port=%d\n", ret, portid);
1069 }
1070 }
1071 printf("\n");
1072
1073 /* start ports */
11fdf7f2 1074 RTE_ETH_FOREACH_DEV(portid) {
7c673cae
FG
1075 if ((enabled_port_mask & (1 << portid)) == 0) {
1076 continue;
1077 }
1078 /* Start device */
1079 ret = rte_eth_dev_start(portid);
1080 if (ret < 0)
1081 rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
1082 ret, portid);
1083
1084 printf("done: Port %d\n", portid);
1085
1086 }
1087
1088 /* launch per-lcore init on every lcore */
1089 rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
1090 RTE_LCORE_FOREACH_SLAVE(lcore_id) {
1091 if (rte_eal_wait_lcore(lcore_id) < 0)
1092 return -1;
1093 }
1094
1095 return 0;
1096}