4 * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 #include <rte_common.h>
39 #include <rte_errno.h>
40 #include <rte_ethdev.h>
41 #include <rte_lcore.h>
42 #include <rte_malloc.h>
44 #include <rte_mempool.h>
46 #include <rte_reorder.h>
48 #define RX_DESC_PER_QUEUE 128
49 #define TX_DESC_PER_QUEUE 512
51 #define MAX_PKTS_BURST 32
52 #define REORDER_BUFFER_SIZE 8192
53 #define MBUF_PER_POOL 65535
54 #define MBUF_POOL_CACHE_SIZE 250
56 #define RING_SIZE 16384
58 /* Macros for printing using RTE_LOG */
59 #define RTE_LOGTYPE_REORDERAPP RTE_LOGTYPE_USER1
61 unsigned int portmask
;
62 unsigned int disable_reorder
;
63 volatile uint8_t quit_signal
;
65 static struct rte_mempool
*mbuf_pool
;
67 static struct rte_eth_conf port_conf_default
;
69 struct worker_thread_args
{
70 struct rte_ring
*ring_in
;
71 struct rte_ring
*ring_out
;
74 struct send_thread_args
{
75 struct rte_ring
*ring_in
;
76 struct rte_reorder_buffer
*buffer
;
79 volatile struct app_stats
{
82 uint64_t enqueue_pkts
;
83 uint64_t enqueue_failed_pkts
;
84 } rx __rte_cache_aligned
;
87 uint64_t dequeue_pkts
;
88 uint64_t enqueue_pkts
;
89 uint64_t enqueue_failed_pkts
;
90 } wkr __rte_cache_aligned
;
93 uint64_t dequeue_pkts
;
94 /* Too early pkts transmitted directly w/o reordering */
95 uint64_t early_pkts_txtd_woro
;
96 /* Too early pkts failed from direct transmit */
97 uint64_t early_pkts_tx_failed_woro
;
99 uint64_t ro_tx_failed_pkts
;
100 } tx __rte_cache_aligned
;
104 * Get the last enabled lcore ID
107 * The last enabled lcore ID.
110 get_last_lcore_id(void)
114 for (i
= RTE_MAX_LCORE
- 1; i
>= 0; i
--)
115 if (rte_lcore_is_enabled(i
))
121 * Get the previous enabled lcore ID
123 * The current lcore ID
125 * The previous enabled lcore ID or the current lcore
126 * ID if it is the first available core.
129 get_previous_lcore_id(unsigned int id
)
133 for (i
= id
- 1; i
>= 0; i
--)
134 if (rte_lcore_is_enabled(i
))
140 pktmbuf_free_bulk(struct rte_mbuf
*mbuf_table
[], unsigned n
)
144 for (i
= 0; i
< n
; i
++)
145 rte_pktmbuf_free(mbuf_table
[i
]);
150 print_usage(const char *prgname
)
152 printf("%s [EAL options] -- -p PORTMASK\n"
153 " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
158 parse_portmask(const char *portmask
)
163 /* parse hexadecimal string */
164 pm
= strtoul(portmask
, &end
, 16);
165 if ((portmask
[0] == '\0') || (end
== NULL
) || (*end
!= '\0'))
174 /* Parse the argument given in the command line of the application */
176 parse_args(int argc
, char **argv
)
181 char *prgname
= argv
[0];
182 static struct option lgopts
[] = {
183 {"disable-reorder", 0, 0, 0},
189 while ((opt
= getopt_long(argc
, argvopt
, "p:",
190 lgopts
, &option_index
)) != EOF
) {
194 portmask
= parse_portmask(optarg
);
196 printf("invalid portmask\n");
197 print_usage(prgname
);
203 if (!strcmp(lgopts
[option_index
].name
, "disable-reorder")) {
204 printf("reorder disabled\n");
209 print_usage(prgname
);
214 print_usage(prgname
);
218 argv
[optind
-1] = prgname
;
219 optind
= 0; /* reset getopt lib */
224 * Tx buffer error callback
227 flush_tx_error_callback(struct rte_mbuf
**unsent
, uint16_t count
,
228 void *userdata __rte_unused
) {
230 /* free the mbufs which failed from transmit */
231 app_stats
.tx
.ro_tx_failed_pkts
+= count
;
232 RTE_LOG(DEBUG
, REORDERAPP
, "%s:Packet loss with tx_burst\n", __func__
);
233 pktmbuf_free_bulk(unsent
, count
);
238 free_tx_buffers(struct rte_eth_dev_tx_buffer
*tx_buffer
[]) {
239 const uint8_t nb_ports
= rte_eth_dev_count();
242 /* initialize buffers for all ports */
243 for (port_id
= 0; port_id
< nb_ports
; port_id
++) {
244 /* skip ports that are not enabled */
245 if ((portmask
& (1 << port_id
)) == 0)
248 rte_free(tx_buffer
[port_id
]);
254 configure_tx_buffers(struct rte_eth_dev_tx_buffer
*tx_buffer
[])
256 const uint8_t nb_ports
= rte_eth_dev_count();
260 /* initialize buffers for all ports */
261 for (port_id
= 0; port_id
< nb_ports
; port_id
++) {
262 /* skip ports that are not enabled */
263 if ((portmask
& (1 << port_id
)) == 0)
266 /* Initialize TX buffers */
267 tx_buffer
[port_id
] = rte_zmalloc_socket("tx_buffer",
268 RTE_ETH_TX_BUFFER_SIZE(MAX_PKTS_BURST
), 0,
269 rte_eth_dev_socket_id(port_id
));
270 if (tx_buffer
[port_id
] == NULL
)
271 rte_exit(EXIT_FAILURE
, "Cannot allocate buffer for tx on port %u\n",
274 rte_eth_tx_buffer_init(tx_buffer
[port_id
], MAX_PKTS_BURST
);
276 ret
= rte_eth_tx_buffer_set_err_callback(tx_buffer
[port_id
],
277 flush_tx_error_callback
, NULL
);
279 rte_exit(EXIT_FAILURE
, "Cannot set error callback for "
280 "tx buffer on port %u\n", (unsigned) port_id
);
286 configure_eth_port(uint8_t port_id
)
288 struct ether_addr addr
;
289 const uint16_t rxRings
= 1, txRings
= 1;
290 const uint8_t nb_ports
= rte_eth_dev_count();
294 if (port_id
> nb_ports
)
297 ret
= rte_eth_dev_configure(port_id
, rxRings
, txRings
, &port_conf_default
);
301 for (q
= 0; q
< rxRings
; q
++) {
302 ret
= rte_eth_rx_queue_setup(port_id
, q
, RX_DESC_PER_QUEUE
,
303 rte_eth_dev_socket_id(port_id
), NULL
,
309 for (q
= 0; q
< txRings
; q
++) {
310 ret
= rte_eth_tx_queue_setup(port_id
, q
, TX_DESC_PER_QUEUE
,
311 rte_eth_dev_socket_id(port_id
), NULL
);
316 ret
= rte_eth_dev_start(port_id
);
320 rte_eth_macaddr_get(port_id
, &addr
);
321 printf("Port %u MAC: %02"PRIx8
" %02"PRIx8
" %02"PRIx8
322 " %02"PRIx8
" %02"PRIx8
" %02"PRIx8
"\n",
324 addr
.addr_bytes
[0], addr
.addr_bytes
[1],
325 addr
.addr_bytes
[2], addr
.addr_bytes
[3],
326 addr
.addr_bytes
[4], addr
.addr_bytes
[5]);
328 rte_eth_promiscuous_enable(port_id
);
336 const uint8_t nb_ports
= rte_eth_dev_count();
338 struct rte_eth_stats eth_stats
;
340 printf("\nRX thread stats:\n");
341 printf(" - Pkts rxd: %"PRIu64
"\n",
342 app_stats
.rx
.rx_pkts
);
343 printf(" - Pkts enqd to workers ring: %"PRIu64
"\n",
344 app_stats
.rx
.enqueue_pkts
);
346 printf("\nWorker thread stats:\n");
347 printf(" - Pkts deqd from workers ring: %"PRIu64
"\n",
348 app_stats
.wkr
.dequeue_pkts
);
349 printf(" - Pkts enqd to tx ring: %"PRIu64
"\n",
350 app_stats
.wkr
.enqueue_pkts
);
351 printf(" - Pkts enq to tx failed: %"PRIu64
"\n",
352 app_stats
.wkr
.enqueue_failed_pkts
);
354 printf("\nTX stats:\n");
355 printf(" - Pkts deqd from tx ring: %"PRIu64
"\n",
356 app_stats
.tx
.dequeue_pkts
);
357 printf(" - Ro Pkts transmitted: %"PRIu64
"\n",
358 app_stats
.tx
.ro_tx_pkts
);
359 printf(" - Ro Pkts tx failed: %"PRIu64
"\n",
360 app_stats
.tx
.ro_tx_failed_pkts
);
361 printf(" - Pkts transmitted w/o reorder: %"PRIu64
"\n",
362 app_stats
.tx
.early_pkts_txtd_woro
);
363 printf(" - Pkts tx failed w/o reorder: %"PRIu64
"\n",
364 app_stats
.tx
.early_pkts_tx_failed_woro
);
366 for (i
= 0; i
< nb_ports
; i
++) {
367 rte_eth_stats_get(i
, ð_stats
);
368 printf("\nPort %u stats:\n", i
);
369 printf(" - Pkts in: %"PRIu64
"\n", eth_stats
.ipackets
);
370 printf(" - Pkts out: %"PRIu64
"\n", eth_stats
.opackets
);
371 printf(" - In Errs: %"PRIu64
"\n", eth_stats
.ierrors
);
372 printf(" - Out Errs: %"PRIu64
"\n", eth_stats
.oerrors
);
373 printf(" - Mbuf Errs: %"PRIu64
"\n", eth_stats
.rx_nombuf
);
378 int_handler(int sig_num
)
380 printf("Exiting on signal %d\n", sig_num
);
385 * This thread receives mbufs from the port and affects them an internal
386 * sequence number to keep track of their order of arrival through an
388 * The mbufs are then passed to the worker threads via the rx_to_workers
392 rx_thread(struct rte_ring
*ring_out
)
394 const uint8_t nb_ports
= rte_eth_dev_count();
399 struct rte_mbuf
*pkts
[MAX_PKTS_BURST
];
401 RTE_LOG(INFO
, REORDERAPP
, "%s() started on lcore %u\n", __func__
,
404 while (!quit_signal
) {
406 for (port_id
= 0; port_id
< nb_ports
; port_id
++) {
407 if ((portmask
& (1 << port_id
)) != 0) {
409 /* receive packets */
410 nb_rx_pkts
= rte_eth_rx_burst(port_id
, 0,
411 pkts
, MAX_PKTS_BURST
);
412 if (nb_rx_pkts
== 0) {
413 RTE_LOG(DEBUG
, REORDERAPP
,
414 "%s():Received zero packets\n", __func__
);
417 app_stats
.rx
.rx_pkts
+= nb_rx_pkts
;
419 /* mark sequence number */
420 for (i
= 0; i
< nb_rx_pkts
; )
421 pkts
[i
++]->seqn
= seqn
++;
423 /* enqueue to rx_to_workers ring */
424 ret
= rte_ring_enqueue_burst(ring_out
, (void *) pkts
,
426 app_stats
.rx
.enqueue_pkts
+= ret
;
427 if (unlikely(ret
< nb_rx_pkts
)) {
428 app_stats
.rx
.enqueue_failed_pkts
+=
430 pktmbuf_free_bulk(&pkts
[ret
], nb_rx_pkts
- ret
);
439 * This thread takes bursts of packets from the rx_to_workers ring and
440 * Changes the input port value to output port value. And feds it to
444 worker_thread(void *args_ptr
)
446 const uint8_t nb_ports
= rte_eth_dev_count();
448 uint16_t burst_size
= 0;
449 struct worker_thread_args
*args
;
450 struct rte_mbuf
*burst_buffer
[MAX_PKTS_BURST
] = { NULL
};
451 struct rte_ring
*ring_in
, *ring_out
;
452 const unsigned xor_val
= (nb_ports
> 1);
454 args
= (struct worker_thread_args
*) args_ptr
;
455 ring_in
= args
->ring_in
;
456 ring_out
= args
->ring_out
;
458 RTE_LOG(INFO
, REORDERAPP
, "%s() started on lcore %u\n", __func__
,
461 while (!quit_signal
) {
463 /* dequeue the mbufs from rx_to_workers ring */
464 burst_size
= rte_ring_dequeue_burst(ring_in
,
465 (void *)burst_buffer
, MAX_PKTS_BURST
);
466 if (unlikely(burst_size
== 0))
469 __sync_fetch_and_add(&app_stats
.wkr
.dequeue_pkts
, burst_size
);
471 /* just do some operation on mbuf */
472 for (i
= 0; i
< burst_size
;)
473 burst_buffer
[i
++]->port
^= xor_val
;
475 /* enqueue the modified mbufs to workers_to_tx ring */
476 ret
= rte_ring_enqueue_burst(ring_out
, (void *)burst_buffer
, burst_size
);
477 __sync_fetch_and_add(&app_stats
.wkr
.enqueue_pkts
, ret
);
478 if (unlikely(ret
< burst_size
)) {
479 /* Return the mbufs to their respective pool, dropping packets */
480 __sync_fetch_and_add(&app_stats
.wkr
.enqueue_failed_pkts
,
481 (int)burst_size
- ret
);
482 pktmbuf_free_bulk(&burst_buffer
[ret
], burst_size
- ret
);
489 * Dequeue mbufs from the workers_to_tx ring and reorder them before
493 send_thread(struct send_thread_args
*args
)
496 unsigned int i
, dret
;
497 uint16_t nb_dq_mbufs
;
500 struct rte_mbuf
*mbufs
[MAX_PKTS_BURST
];
501 struct rte_mbuf
*rombufs
[MAX_PKTS_BURST
] = {NULL
};
502 static struct rte_eth_dev_tx_buffer
*tx_buffer
[RTE_MAX_ETHPORTS
];
504 RTE_LOG(INFO
, REORDERAPP
, "%s() started on lcore %u\n", __func__
, rte_lcore_id());
506 configure_tx_buffers(tx_buffer
);
508 while (!quit_signal
) {
510 /* deque the mbufs from workers_to_tx ring */
511 nb_dq_mbufs
= rte_ring_dequeue_burst(args
->ring_in
,
512 (void *)mbufs
, MAX_PKTS_BURST
);
514 if (unlikely(nb_dq_mbufs
== 0))
517 app_stats
.tx
.dequeue_pkts
+= nb_dq_mbufs
;
519 for (i
= 0; i
< nb_dq_mbufs
; i
++) {
520 /* send dequeued mbufs for reordering */
521 ret
= rte_reorder_insert(args
->buffer
, mbufs
[i
]);
523 if (ret
== -1 && rte_errno
== ERANGE
) {
524 /* Too early pkts should be transmitted out directly */
525 RTE_LOG(DEBUG
, REORDERAPP
,
526 "%s():Cannot reorder early packet "
527 "direct enqueuing to TX\n", __func__
);
528 outp
= mbufs
[i
]->port
;
529 if ((portmask
& (1 << outp
)) == 0) {
530 rte_pktmbuf_free(mbufs
[i
]);
533 if (rte_eth_tx_burst(outp
, 0, (void *)mbufs
[i
], 1) != 1) {
534 rte_pktmbuf_free(mbufs
[i
]);
535 app_stats
.tx
.early_pkts_tx_failed_woro
++;
537 app_stats
.tx
.early_pkts_txtd_woro
++;
538 } else if (ret
== -1 && rte_errno
== ENOSPC
) {
540 * Early pkts just outside of window should be dropped
542 rte_pktmbuf_free(mbufs
[i
]);
547 * drain MAX_PKTS_BURST of reordered
550 dret
= rte_reorder_drain(args
->buffer
, rombufs
, MAX_PKTS_BURST
);
551 for (i
= 0; i
< dret
; i
++) {
553 struct rte_eth_dev_tx_buffer
*outbuf
;
556 outp1
= rombufs
[i
]->port
;
557 /* skip ports that are not enabled */
558 if ((portmask
& (1 << outp1
)) == 0) {
559 rte_pktmbuf_free(rombufs
[i
]);
563 outbuf
= tx_buffer
[outp1
];
564 sent
= rte_eth_tx_buffer(outp1
, 0, outbuf
, rombufs
[i
]);
566 app_stats
.tx
.ro_tx_pkts
+= sent
;
570 free_tx_buffers(tx_buffer
);
576 * Dequeue mbufs from the workers_to_tx ring and transmit them
579 tx_thread(struct rte_ring
*ring_in
)
584 struct rte_mbuf
*mbufs
[MAX_PKTS_BURST
];
585 struct rte_eth_dev_tx_buffer
*outbuf
;
586 static struct rte_eth_dev_tx_buffer
*tx_buffer
[RTE_MAX_ETHPORTS
];
588 RTE_LOG(INFO
, REORDERAPP
, "%s() started on lcore %u\n", __func__
,
591 configure_tx_buffers(tx_buffer
);
593 while (!quit_signal
) {
595 /* deque the mbufs from workers_to_tx ring */
596 dqnum
= rte_ring_dequeue_burst(ring_in
,
597 (void *)mbufs
, MAX_PKTS_BURST
);
599 if (unlikely(dqnum
== 0))
602 app_stats
.tx
.dequeue_pkts
+= dqnum
;
604 for (i
= 0; i
< dqnum
; i
++) {
605 outp
= mbufs
[i
]->port
;
606 /* skip ports that are not enabled */
607 if ((portmask
& (1 << outp
)) == 0) {
608 rte_pktmbuf_free(mbufs
[i
]);
612 outbuf
= tx_buffer
[outp
];
613 sent
= rte_eth_tx_buffer(outp
, 0, outbuf
, mbufs
[i
]);
615 app_stats
.tx
.ro_tx_pkts
+= sent
;
623 main(int argc
, char **argv
)
627 unsigned int lcore_id
, last_lcore_id
, master_lcore_id
;
629 uint8_t nb_ports_available
;
630 struct worker_thread_args worker_args
= {NULL
, NULL
};
631 struct send_thread_args send_args
= {NULL
, NULL
};
632 struct rte_ring
*rx_to_workers
;
633 struct rte_ring
*workers_to_tx
;
635 /* catch ctrl-c so we can print on exit */
636 signal(SIGINT
, int_handler
);
639 ret
= rte_eal_init(argc
, argv
);
646 /* Parse the application specific arguments */
647 ret
= parse_args(argc
, argv
);
651 /* Check if we have enought cores */
652 if (rte_lcore_count() < 3)
653 rte_exit(EXIT_FAILURE
, "Error, This application needs at "
654 "least 3 logical cores to run:\n"
655 "1 lcore for packet RX\n"
656 "1 lcore for packet TX\n"
657 "and at least 1 lcore for worker threads\n");
659 nb_ports
= rte_eth_dev_count();
661 rte_exit(EXIT_FAILURE
, "Error: no ethernet ports detected\n");
662 if (nb_ports
!= 1 && (nb_ports
& 1))
663 rte_exit(EXIT_FAILURE
, "Error: number of ports must be even, except "
664 "when using a single port\n");
666 mbuf_pool
= rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL
,
667 MBUF_POOL_CACHE_SIZE
, 0, RTE_MBUF_DEFAULT_BUF_SIZE
,
669 if (mbuf_pool
== NULL
)
670 rte_exit(EXIT_FAILURE
, "%s\n", rte_strerror(rte_errno
));
672 nb_ports_available
= nb_ports
;
674 /* initialize all ports */
675 for (port_id
= 0; port_id
< nb_ports
; port_id
++) {
676 /* skip ports that are not enabled */
677 if ((portmask
& (1 << port_id
)) == 0) {
678 printf("\nSkipping disabled port %d\n", port_id
);
679 nb_ports_available
--;
683 printf("Initializing port %u... done\n", (unsigned) port_id
);
685 if (configure_eth_port(port_id
) != 0)
686 rte_exit(EXIT_FAILURE
, "Cannot initialize port %"PRIu8
"\n",
690 if (!nb_ports_available
) {
691 rte_exit(EXIT_FAILURE
,
692 "All available ports are disabled. Please set portmask.\n");
695 /* Create rings for inter core communication */
696 rx_to_workers
= rte_ring_create("rx_to_workers", RING_SIZE
, rte_socket_id(),
698 if (rx_to_workers
== NULL
)
699 rte_exit(EXIT_FAILURE
, "%s\n", rte_strerror(rte_errno
));
701 workers_to_tx
= rte_ring_create("workers_to_tx", RING_SIZE
, rte_socket_id(),
703 if (workers_to_tx
== NULL
)
704 rte_exit(EXIT_FAILURE
, "%s\n", rte_strerror(rte_errno
));
706 if (!disable_reorder
) {
707 send_args
.buffer
= rte_reorder_create("PKT_RO", rte_socket_id(),
708 REORDER_BUFFER_SIZE
);
709 if (send_args
.buffer
== NULL
)
710 rte_exit(EXIT_FAILURE
, "%s\n", rte_strerror(rte_errno
));
713 last_lcore_id
= get_last_lcore_id();
714 master_lcore_id
= rte_get_master_lcore();
716 worker_args
.ring_in
= rx_to_workers
;
717 worker_args
.ring_out
= workers_to_tx
;
719 /* Start worker_thread() on all the available slave cores but the last 1 */
720 for (lcore_id
= 0; lcore_id
<= get_previous_lcore_id(last_lcore_id
); lcore_id
++)
721 if (rte_lcore_is_enabled(lcore_id
) && lcore_id
!= master_lcore_id
)
722 rte_eal_remote_launch(worker_thread
, (void *)&worker_args
,
725 if (disable_reorder
) {
726 /* Start tx_thread() on the last slave core */
727 rte_eal_remote_launch((lcore_function_t
*)tx_thread
, workers_to_tx
,
730 send_args
.ring_in
= workers_to_tx
;
731 /* Start send_thread() on the last slave core */
732 rte_eal_remote_launch((lcore_function_t
*)send_thread
,
733 (void *)&send_args
, last_lcore_id
);
736 /* Start rx_thread() on the master core */
737 rx_thread(rx_to_workers
);
739 RTE_LCORE_FOREACH_SLAVE(lcore_id
) {
740 if (rte_eal_wait_lcore(lcore_id
) < 0)