4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 #include <rte_ethdev.h>
42 #include <rte_cycles.h>
43 #include <rte_malloc.h>
44 #include <rte_debug.h>
45 #include <rte_prefetch.h>
46 #include <rte_distributor.h>
48 #define RX_RING_SIZE 256
49 #define TX_RING_SIZE 512
50 #define NUM_MBUFS ((64*1024)-1)
51 #define MBUF_CACHE_SIZE 250
53 #define RTE_RING_SZ 1024
55 #define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1
57 /* mask of enabled ports */
58 static uint32_t enabled_port_mask
;
59 volatile uint8_t quit_signal
;
60 volatile uint8_t quit_signal_rx
;
62 static volatile struct app_stats
{
65 uint64_t returned_pkts
;
66 uint64_t enqueued_pkts
;
67 } rx __rte_cache_aligned
;
70 uint64_t dequeue_pkts
;
72 } tx __rte_cache_aligned
;
75 static const struct rte_eth_conf port_conf_default
= {
77 .mq_mode
= ETH_MQ_RX_RSS
,
78 .max_rx_pkt_len
= ETHER_MAX_LEN
,
81 .mq_mode
= ETH_MQ_TX_NONE
,
85 .rss_hf
= ETH_RSS_IP
| ETH_RSS_UDP
|
86 ETH_RSS_TCP
| ETH_RSS_SCTP
,
91 struct output_buffer
{
93 struct rte_mbuf
*mbufs
[BURST_SIZE
];
97 * Initialises a given port using global settings and with the rx buffers
98 * coming from the mbuf_pool passed as parameter
101 port_init(uint8_t port
, struct rte_mempool
*mbuf_pool
)
103 struct rte_eth_conf port_conf
= port_conf_default
;
104 const uint16_t rxRings
= 1, txRings
= rte_lcore_count() - 1;
108 if (port
>= rte_eth_dev_count())
111 retval
= rte_eth_dev_configure(port
, rxRings
, txRings
, &port_conf
);
115 for (q
= 0; q
< rxRings
; q
++) {
116 retval
= rte_eth_rx_queue_setup(port
, q
, RX_RING_SIZE
,
117 rte_eth_dev_socket_id(port
),
123 for (q
= 0; q
< txRings
; q
++) {
124 retval
= rte_eth_tx_queue_setup(port
, q
, TX_RING_SIZE
,
125 rte_eth_dev_socket_id(port
),
131 retval
= rte_eth_dev_start(port
);
135 struct rte_eth_link link
;
136 rte_eth_link_get_nowait(port
, &link
);
137 if (!link
.link_status
) {
139 rte_eth_link_get_nowait(port
, &link
);
142 if (!link
.link_status
) {
143 printf("Link down on port %"PRIu8
"\n", port
);
147 struct ether_addr addr
;
148 rte_eth_macaddr_get(port
, &addr
);
149 printf("Port %u MAC: %02"PRIx8
" %02"PRIx8
" %02"PRIx8
150 " %02"PRIx8
" %02"PRIx8
" %02"PRIx8
"\n",
152 addr
.addr_bytes
[0], addr
.addr_bytes
[1],
153 addr
.addr_bytes
[2], addr
.addr_bytes
[3],
154 addr
.addr_bytes
[4], addr
.addr_bytes
[5]);
156 rte_eth_promiscuous_enable(port
);
161 struct lcore_params
{
163 struct rte_distributor
*d
;
165 struct rte_mempool
*mem_pool
;
169 quit_workers(struct rte_distributor
*d
, struct rte_mempool
*p
)
171 const unsigned num_workers
= rte_lcore_count() - 2;
173 struct rte_mbuf
*bufs
[num_workers
];
175 if (rte_mempool_get_bulk(p
, (void *)bufs
, num_workers
) != 0) {
176 printf("line %d: Error getting mbufs from pool\n", __LINE__
);
180 for (i
= 0; i
< num_workers
; i
++)
181 bufs
[i
]->hash
.rss
= i
<< 1;
183 rte_distributor_process(d
, bufs
, num_workers
);
184 rte_mempool_put_bulk(p
, (void *)bufs
, num_workers
);
190 lcore_rx(struct lcore_params
*p
)
192 struct rte_distributor
*d
= p
->d
;
193 struct rte_mempool
*mem_pool
= p
->mem_pool
;
194 struct rte_ring
*r
= p
->r
;
195 const uint8_t nb_ports
= rte_eth_dev_count();
196 const int socket_id
= rte_socket_id();
199 for (port
= 0; port
< nb_ports
; port
++) {
200 /* skip ports that are not enabled */
201 if ((enabled_port_mask
& (1 << port
)) == 0)
204 if (rte_eth_dev_socket_id(port
) > 0 &&
205 rte_eth_dev_socket_id(port
) != socket_id
)
206 printf("WARNING, port %u is on remote NUMA node to "
207 "RX thread.\n\tPerformance will not "
208 "be optimal.\n", port
);
211 printf("\nCore %u doing packet RX.\n", rte_lcore_id());
213 while (!quit_signal_rx
) {
215 /* skip ports that are not enabled */
216 if ((enabled_port_mask
& (1 << port
)) == 0) {
217 if (++port
== nb_ports
)
221 struct rte_mbuf
*bufs
[BURST_SIZE
*2];
222 const uint16_t nb_rx
= rte_eth_rx_burst(port
, 0, bufs
,
224 if (unlikely(nb_rx
== 0)) {
225 if (++port
== nb_ports
)
229 app_stats
.rx
.rx_pkts
+= nb_rx
;
231 rte_distributor_process(d
, bufs
, nb_rx
);
232 const uint16_t nb_ret
= rte_distributor_returned_pkts(d
,
234 app_stats
.rx
.returned_pkts
+= nb_ret
;
235 if (unlikely(nb_ret
== 0)) {
236 if (++port
== nb_ports
)
241 uint16_t sent
= rte_ring_enqueue_burst(r
, (void *)bufs
, nb_ret
);
242 app_stats
.rx
.enqueued_pkts
+= sent
;
243 if (unlikely(sent
< nb_ret
)) {
244 RTE_LOG(DEBUG
, DISTRAPP
,
245 "%s:Packet loss due to full ring\n", __func__
);
246 while (sent
< nb_ret
)
247 rte_pktmbuf_free(bufs
[sent
++]);
249 if (++port
== nb_ports
)
252 rte_distributor_process(d
, NULL
, 0);
253 /* flush distributor to bring to known state */
254 rte_distributor_flush(d
);
255 /* set worker & tx threads quit flag */
258 * worker threads may hang in get packet as
259 * distributor process is not running, just make sure workers
260 * get packets till quit_signal is actually been
261 * received and they gracefully shutdown
263 if (quit_workers(d
, mem_pool
) != 0)
265 /* rx thread should quit at last */
270 flush_one_port(struct output_buffer
*outbuf
, uint8_t outp
)
272 unsigned nb_tx
= rte_eth_tx_burst(outp
, 0, outbuf
->mbufs
,
274 app_stats
.tx
.tx_pkts
+= nb_tx
;
276 if (unlikely(nb_tx
< outbuf
->count
)) {
277 RTE_LOG(DEBUG
, DISTRAPP
,
278 "%s:Packet loss with tx_burst\n", __func__
);
280 rte_pktmbuf_free(outbuf
->mbufs
[nb_tx
]);
281 } while (++nb_tx
< outbuf
->count
);
287 flush_all_ports(struct output_buffer
*tx_buffers
, uint8_t nb_ports
)
290 for (outp
= 0; outp
< nb_ports
; outp
++) {
291 /* skip ports that are not enabled */
292 if ((enabled_port_mask
& (1 << outp
)) == 0)
295 if (tx_buffers
[outp
].count
== 0)
298 flush_one_port(&tx_buffers
[outp
], outp
);
303 lcore_tx(struct rte_ring
*in_r
)
305 static struct output_buffer tx_buffers
[RTE_MAX_ETHPORTS
];
306 const uint8_t nb_ports
= rte_eth_dev_count();
307 const int socket_id
= rte_socket_id();
310 for (port
= 0; port
< nb_ports
; port
++) {
311 /* skip ports that are not enabled */
312 if ((enabled_port_mask
& (1 << port
)) == 0)
315 if (rte_eth_dev_socket_id(port
) > 0 &&
316 rte_eth_dev_socket_id(port
) != socket_id
)
317 printf("WARNING, port %u is on remote NUMA node to "
318 "TX thread.\n\tPerformance will not "
319 "be optimal.\n", port
);
322 printf("\nCore %u doing packet TX.\n", rte_lcore_id());
323 while (!quit_signal
) {
325 for (port
= 0; port
< nb_ports
; port
++) {
326 /* skip ports that are not enabled */
327 if ((enabled_port_mask
& (1 << port
)) == 0)
330 struct rte_mbuf
*bufs
[BURST_SIZE
];
331 const uint16_t nb_rx
= rte_ring_dequeue_burst(in_r
,
332 (void *)bufs
, BURST_SIZE
);
333 app_stats
.tx
.dequeue_pkts
+= nb_rx
;
335 /* if we get no traffic, flush anything we have */
336 if (unlikely(nb_rx
== 0)) {
337 flush_all_ports(tx_buffers
, nb_ports
);
341 /* for traffic we receive, queue it up for transmit */
343 rte_prefetch_non_temporal((void *)bufs
[0]);
344 rte_prefetch_non_temporal((void *)bufs
[1]);
345 rte_prefetch_non_temporal((void *)bufs
[2]);
346 for (i
= 0; i
< nb_rx
; i
++) {
347 struct output_buffer
*outbuf
;
349 rte_prefetch_non_temporal((void *)bufs
[i
+ 3]);
351 * workers should update in_port to hold the
354 outp
= bufs
[i
]->port
;
355 /* skip ports that are not enabled */
356 if ((enabled_port_mask
& (1 << outp
)) == 0)
359 outbuf
= &tx_buffers
[outp
];
360 outbuf
->mbufs
[outbuf
->count
++] = bufs
[i
];
361 if (outbuf
->count
== BURST_SIZE
)
362 flush_one_port(outbuf
, outp
);
370 int_handler(int sig_num
)
372 printf("Exiting on signal %d\n", sig_num
);
373 /* set quit flag for rx thread to exit */
380 struct rte_eth_stats eth_stats
;
383 printf("\nRX thread stats:\n");
384 printf(" - Received: %"PRIu64
"\n", app_stats
.rx
.rx_pkts
);
385 printf(" - Processed: %"PRIu64
"\n", app_stats
.rx
.returned_pkts
);
386 printf(" - Enqueued: %"PRIu64
"\n", app_stats
.rx
.enqueued_pkts
);
388 printf("\nTX thread stats:\n");
389 printf(" - Dequeued: %"PRIu64
"\n", app_stats
.tx
.dequeue_pkts
);
390 printf(" - Transmitted: %"PRIu64
"\n", app_stats
.tx
.tx_pkts
);
392 for (i
= 0; i
< rte_eth_dev_count(); i
++) {
393 rte_eth_stats_get(i
, ð_stats
);
394 printf("\nPort %u stats:\n", i
);
395 printf(" - Pkts in: %"PRIu64
"\n", eth_stats
.ipackets
);
396 printf(" - Pkts out: %"PRIu64
"\n", eth_stats
.opackets
);
397 printf(" - In Errs: %"PRIu64
"\n", eth_stats
.ierrors
);
398 printf(" - Out Errs: %"PRIu64
"\n", eth_stats
.oerrors
);
399 printf(" - Mbuf Errs: %"PRIu64
"\n", eth_stats
.rx_nombuf
);
404 lcore_worker(struct lcore_params
*p
)
406 struct rte_distributor
*d
= p
->d
;
407 const unsigned id
= p
->worker_id
;
409 * for single port, xor_val will be zero so we won't modify the output
410 * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa
412 const unsigned xor_val
= (rte_eth_dev_count() > 1);
413 struct rte_mbuf
*buf
= NULL
;
415 printf("\nCore %u acting as worker core.\n", rte_lcore_id());
416 while (!quit_signal
) {
417 buf
= rte_distributor_get_pkt(d
, id
, buf
);
418 buf
->port
^= xor_val
;
425 print_usage(const char *prgname
)
427 printf("%s [EAL options] -- -p PORTMASK\n"
428 " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
433 parse_portmask(const char *portmask
)
438 /* parse hexadecimal string */
439 pm
= strtoul(portmask
, &end
, 16);
440 if ((portmask
[0] == '\0') || (end
== NULL
) || (*end
!= '\0'))
449 /* Parse the argument given in the command line of the application */
451 parse_args(int argc
, char **argv
)
456 char *prgname
= argv
[0];
457 static struct option lgopts
[] = {
463 while ((opt
= getopt_long(argc
, argvopt
, "p:",
464 lgopts
, &option_index
)) != EOF
) {
469 enabled_port_mask
= parse_portmask(optarg
);
470 if (enabled_port_mask
== 0) {
471 printf("invalid portmask\n");
472 print_usage(prgname
);
478 print_usage(prgname
);
484 print_usage(prgname
);
488 argv
[optind
-1] = prgname
;
490 optind
= 0; /* reset getopt lib */
494 /* Main function, does initialization and calls the per-lcore functions */
496 main(int argc
, char *argv
[])
498 struct rte_mempool
*mbuf_pool
;
499 struct rte_distributor
*d
;
500 struct rte_ring
*output_ring
;
501 unsigned lcore_id
, worker_id
= 0;
504 uint8_t nb_ports_available
;
506 /* catch ctrl-c so we can print on exit */
507 signal(SIGINT
, int_handler
);
510 int ret
= rte_eal_init(argc
, argv
);
512 rte_exit(EXIT_FAILURE
, "Error with EAL initialization\n");
516 /* parse application arguments (after the EAL ones) */
517 ret
= parse_args(argc
, argv
);
519 rte_exit(EXIT_FAILURE
, "Invalid distributor parameters\n");
521 if (rte_lcore_count() < 3)
522 rte_exit(EXIT_FAILURE
, "Error, This application needs at "
523 "least 3 logical cores to run:\n"
524 "1 lcore for packet RX and distribution\n"
525 "1 lcore for packet TX\n"
526 "and at least 1 lcore for worker threads\n");
528 nb_ports
= rte_eth_dev_count();
530 rte_exit(EXIT_FAILURE
, "Error: no ethernet ports detected\n");
531 if (nb_ports
!= 1 && (nb_ports
& 1))
532 rte_exit(EXIT_FAILURE
, "Error: number of ports must be even, except "
533 "when using a single port\n");
535 mbuf_pool
= rte_pktmbuf_pool_create("MBUF_POOL",
536 NUM_MBUFS
* nb_ports
, MBUF_CACHE_SIZE
, 0,
537 RTE_MBUF_DEFAULT_BUF_SIZE
, rte_socket_id());
538 if (mbuf_pool
== NULL
)
539 rte_exit(EXIT_FAILURE
, "Cannot create mbuf pool\n");
540 nb_ports_available
= nb_ports
;
542 /* initialize all ports */
543 for (portid
= 0; portid
< nb_ports
; portid
++) {
544 /* skip ports that are not enabled */
545 if ((enabled_port_mask
& (1 << portid
)) == 0) {
546 printf("\nSkipping disabled port %d\n", portid
);
547 nb_ports_available
--;
551 printf("Initializing port %u... done\n", (unsigned) portid
);
553 if (port_init(portid
, mbuf_pool
) != 0)
554 rte_exit(EXIT_FAILURE
, "Cannot initialize port %"PRIu8
"\n",
558 if (!nb_ports_available
) {
559 rte_exit(EXIT_FAILURE
,
560 "All available ports are disabled. Please set portmask.\n");
563 d
= rte_distributor_create("PKT_DIST", rte_socket_id(),
564 rte_lcore_count() - 2);
566 rte_exit(EXIT_FAILURE
, "Cannot create distributor\n");
569 * scheduler ring is read only by the transmitter core, but written to
570 * by multiple threads
572 output_ring
= rte_ring_create("Output_ring", RTE_RING_SZ
,
573 rte_socket_id(), RING_F_SC_DEQ
);
574 if (output_ring
== NULL
)
575 rte_exit(EXIT_FAILURE
, "Cannot create output ring\n");
577 RTE_LCORE_FOREACH_SLAVE(lcore_id
) {
578 if (worker_id
== rte_lcore_count() - 2)
579 rte_eal_remote_launch((lcore_function_t
*)lcore_tx
,
580 output_ring
, lcore_id
);
582 struct lcore_params
*p
=
583 rte_malloc(NULL
, sizeof(*p
), 0);
585 rte_panic("malloc failure\n");
586 *p
= (struct lcore_params
){worker_id
, d
, output_ring
, mbuf_pool
};
588 rte_eal_remote_launch((lcore_function_t
*)lcore_worker
,
593 /* call lcore_main on master core only */
594 struct lcore_params p
= { 0, d
, output_ring
, mbuf_pool
};
596 if (lcore_rx(&p
) != 0)
599 RTE_LCORE_FOREACH_SLAVE(lcore_id
) {
600 if (rte_eal_wait_lcore(lcore_id
) < 0)