]> git.proxmox.com Git - ceph.git/blob - ceph/src/dpdk/examples/load_balancer/init.c
add subtree-ish sources for 12.0.3
[ceph.git] / ceph / src / dpdk / examples / load_balancer / init.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <stdint.h>
37 #include <inttypes.h>
38 #include <sys/types.h>
39 #include <string.h>
40 #include <sys/queue.h>
41 #include <stdarg.h>
42 #include <errno.h>
43 #include <getopt.h>
44
45 #include <rte_common.h>
46 #include <rte_byteorder.h>
47 #include <rte_log.h>
48 #include <rte_memory.h>
49 #include <rte_memcpy.h>
50 #include <rte_memzone.h>
51 #include <rte_eal.h>
52 #include <rte_per_lcore.h>
53 #include <rte_launch.h>
54 #include <rte_atomic.h>
55 #include <rte_cycles.h>
56 #include <rte_prefetch.h>
57 #include <rte_lcore.h>
58 #include <rte_per_lcore.h>
59 #include <rte_branch_prediction.h>
60 #include <rte_interrupts.h>
61 #include <rte_pci.h>
62 #include <rte_random.h>
63 #include <rte_debug.h>
64 #include <rte_ether.h>
65 #include <rte_ethdev.h>
66 #include <rte_ring.h>
67 #include <rte_mempool.h>
68 #include <rte_mbuf.h>
69 #include <rte_string_fns.h>
70 #include <rte_ip.h>
71 #include <rte_tcp.h>
72 #include <rte_lpm.h>
73
74 #include "main.h"
75
76 static struct rte_eth_conf port_conf = {
77 .rxmode = {
78 .mq_mode = ETH_MQ_RX_RSS,
79 .split_hdr_size = 0,
80 .header_split = 0, /**< Header Split disabled */
81 .hw_ip_checksum = 1, /**< IP checksum offload enabled */
82 .hw_vlan_filter = 0, /**< VLAN filtering disabled */
83 .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
84 .hw_strip_crc = 0, /**< CRC stripped by hardware */
85 },
86 .rx_adv_conf = {
87 .rss_conf = {
88 .rss_key = NULL,
89 .rss_hf = ETH_RSS_IP,
90 },
91 },
92 .txmode = {
93 .mq_mode = ETH_MQ_TX_NONE,
94 },
95 };
96
97 static void
98 app_assign_worker_ids(void)
99 {
100 uint32_t lcore, worker_id;
101
102 /* Assign ID for each worker */
103 worker_id = 0;
104 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
105 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
106
107 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
108 continue;
109 }
110
111 lp_worker->worker_id = worker_id;
112 worker_id ++;
113 }
114 }
115
116 static void
117 app_init_mbuf_pools(void)
118 {
119 unsigned socket, lcore;
120
121 /* Init the buffer pools */
122 for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
123 char name[32];
124 if (app_is_socket_used(socket) == 0) {
125 continue;
126 }
127
128 snprintf(name, sizeof(name), "mbuf_pool_%u", socket);
129 printf("Creating the mbuf pool for socket %u ...\n", socket);
130 app.pools[socket] = rte_pktmbuf_pool_create(
131 name, APP_DEFAULT_MEMPOOL_BUFFERS,
132 APP_DEFAULT_MEMPOOL_CACHE_SIZE,
133 0, APP_DEFAULT_MBUF_DATA_SIZE, socket);
134 if (app.pools[socket] == NULL) {
135 rte_panic("Cannot create mbuf pool on socket %u\n", socket);
136 }
137 }
138
139 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
140 if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) {
141 continue;
142 }
143
144 socket = rte_lcore_to_socket_id(lcore);
145 app.lcore_params[lcore].pool = app.pools[socket];
146 }
147 }
148
149 static void
150 app_init_lpm_tables(void)
151 {
152 unsigned socket, lcore;
153
154 /* Init the LPM tables */
155 for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
156 char name[32];
157 uint32_t rule;
158
159 if (app_is_socket_used(socket) == 0) {
160 continue;
161 }
162
163 struct rte_lpm_config lpm_config;
164
165 lpm_config.max_rules = APP_MAX_LPM_RULES;
166 lpm_config.number_tbl8s = 256;
167 lpm_config.flags = 0;
168 snprintf(name, sizeof(name), "lpm_table_%u", socket);
169 printf("Creating the LPM table for socket %u ...\n", socket);
170 app.lpm_tables[socket] = rte_lpm_create(
171 name,
172 socket,
173 &lpm_config);
174 if (app.lpm_tables[socket] == NULL) {
175 rte_panic("Unable to create LPM table on socket %u\n", socket);
176 }
177
178 for (rule = 0; rule < app.n_lpm_rules; rule ++) {
179 int ret;
180
181 ret = rte_lpm_add(app.lpm_tables[socket],
182 app.lpm_rules[rule].ip,
183 app.lpm_rules[rule].depth,
184 app.lpm_rules[rule].if_out);
185
186 if (ret < 0) {
187 rte_panic("Unable to add entry %u (%x/%u => %u) to the LPM table on socket %u (%d)\n",
188 (unsigned) rule,
189 (unsigned) app.lpm_rules[rule].ip,
190 (unsigned) app.lpm_rules[rule].depth,
191 (unsigned) app.lpm_rules[rule].if_out,
192 socket,
193 ret);
194 }
195 }
196
197 }
198
199 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
200 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
201 continue;
202 }
203
204 socket = rte_lcore_to_socket_id(lcore);
205 app.lcore_params[lcore].worker.lpm_table = app.lpm_tables[socket];
206 }
207 }
208
209 static void
210 app_init_rings_rx(void)
211 {
212 unsigned lcore;
213
214 /* Initialize the rings for the RX side */
215 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
216 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
217 unsigned socket_io, lcore_worker;
218
219 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
220 (lp_io->rx.n_nic_queues == 0)) {
221 continue;
222 }
223
224 socket_io = rte_lcore_to_socket_id(lcore);
225
226 for (lcore_worker = 0; lcore_worker < APP_MAX_LCORES; lcore_worker ++) {
227 char name[32];
228 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore_worker].worker;
229 struct rte_ring *ring = NULL;
230
231 if (app.lcore_params[lcore_worker].type != e_APP_LCORE_WORKER) {
232 continue;
233 }
234
235 printf("Creating ring to connect I/O lcore %u (socket %u) with worker lcore %u ...\n",
236 lcore,
237 socket_io,
238 lcore_worker);
239 snprintf(name, sizeof(name), "app_ring_rx_s%u_io%u_w%u",
240 socket_io,
241 lcore,
242 lcore_worker);
243 ring = rte_ring_create(
244 name,
245 app.ring_rx_size,
246 socket_io,
247 RING_F_SP_ENQ | RING_F_SC_DEQ);
248 if (ring == NULL) {
249 rte_panic("Cannot create ring to connect I/O core %u with worker core %u\n",
250 lcore,
251 lcore_worker);
252 }
253
254 lp_io->rx.rings[lp_io->rx.n_rings] = ring;
255 lp_io->rx.n_rings ++;
256
257 lp_worker->rings_in[lp_worker->n_rings_in] = ring;
258 lp_worker->n_rings_in ++;
259 }
260 }
261
262 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
263 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
264
265 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
266 (lp_io->rx.n_nic_queues == 0)) {
267 continue;
268 }
269
270 if (lp_io->rx.n_rings != app_get_lcores_worker()) {
271 rte_panic("Algorithmic error (I/O RX rings)\n");
272 }
273 }
274
275 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
276 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
277
278 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
279 continue;
280 }
281
282 if (lp_worker->n_rings_in != app_get_lcores_io_rx()) {
283 rte_panic("Algorithmic error (worker input rings)\n");
284 }
285 }
286 }
287
288 static void
289 app_init_rings_tx(void)
290 {
291 unsigned lcore;
292
293 /* Initialize the rings for the TX side */
294 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
295 struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
296 unsigned port;
297
298 if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
299 continue;
300 }
301
302 for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
303 char name[32];
304 struct app_lcore_params_io *lp_io = NULL;
305 struct rte_ring *ring;
306 uint32_t socket_io, lcore_io;
307
308 if (app.nic_tx_port_mask[port] == 0) {
309 continue;
310 }
311
312 if (app_get_lcore_for_nic_tx((uint8_t) port, &lcore_io) < 0) {
313 rte_panic("Algorithmic error (no I/O core to handle TX of port %u)\n",
314 port);
315 }
316
317 lp_io = &app.lcore_params[lcore_io].io;
318 socket_io = rte_lcore_to_socket_id(lcore_io);
319
320 printf("Creating ring to connect worker lcore %u with TX port %u (through I/O lcore %u) (socket %u) ...\n",
321 lcore, port, (unsigned)lcore_io, (unsigned)socket_io);
322 snprintf(name, sizeof(name), "app_ring_tx_s%u_w%u_p%u", socket_io, lcore, port);
323 ring = rte_ring_create(
324 name,
325 app.ring_tx_size,
326 socket_io,
327 RING_F_SP_ENQ | RING_F_SC_DEQ);
328 if (ring == NULL) {
329 rte_panic("Cannot create ring to connect worker core %u with TX port %u\n",
330 lcore,
331 port);
332 }
333
334 lp_worker->rings_out[port] = ring;
335 lp_io->tx.rings[port][lp_worker->worker_id] = ring;
336 }
337 }
338
339 for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
340 struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
341 unsigned i;
342
343 if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
344 (lp_io->tx.n_nic_ports == 0)) {
345 continue;
346 }
347
348 for (i = 0; i < lp_io->tx.n_nic_ports; i ++){
349 unsigned port, j;
350
351 port = lp_io->tx.nic_ports[i];
352 for (j = 0; j < app_get_lcores_worker(); j ++) {
353 if (lp_io->tx.rings[port][j] == NULL) {
354 rte_panic("Algorithmic error (I/O TX rings)\n");
355 }
356 }
357 }
358 }
359 }
360
361 /* Check the link status of all ports in up to 9s, and print them finally */
362 static void
363 check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
364 {
365 #define CHECK_INTERVAL 100 /* 100ms */
366 #define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
367 uint8_t portid, count, all_ports_up, print_flag = 0;
368 struct rte_eth_link link;
369 uint32_t n_rx_queues, n_tx_queues;
370
371 printf("\nChecking link status");
372 fflush(stdout);
373 for (count = 0; count <= MAX_CHECK_TIME; count++) {
374 all_ports_up = 1;
375 for (portid = 0; portid < port_num; portid++) {
376 if ((port_mask & (1 << portid)) == 0)
377 continue;
378 n_rx_queues = app_get_nic_rx_queues_per_port(portid);
379 n_tx_queues = app.nic_tx_port_mask[portid];
380 if ((n_rx_queues == 0) && (n_tx_queues == 0))
381 continue;
382 memset(&link, 0, sizeof(link));
383 rte_eth_link_get_nowait(portid, &link);
384 /* print link status if flag set */
385 if (print_flag == 1) {
386 if (link.link_status)
387 printf("Port %d Link Up - speed %u "
388 "Mbps - %s\n", (uint8_t)portid,
389 (unsigned)link.link_speed,
390 (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
391 ("full-duplex") : ("half-duplex\n"));
392 else
393 printf("Port %d Link Down\n",
394 (uint8_t)portid);
395 continue;
396 }
397 /* clear all_ports_up flag if any link down */
398 if (link.link_status == ETH_LINK_DOWN) {
399 all_ports_up = 0;
400 break;
401 }
402 }
403 /* after finally printing all link status, get out */
404 if (print_flag == 1)
405 break;
406
407 if (all_ports_up == 0) {
408 printf(".");
409 fflush(stdout);
410 rte_delay_ms(CHECK_INTERVAL);
411 }
412
413 /* set the print_flag if all ports up or timeout */
414 if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
415 print_flag = 1;
416 printf("done\n");
417 }
418 }
419 }
420
421 static void
422 app_init_nics(void)
423 {
424 unsigned socket;
425 uint32_t lcore;
426 uint8_t port, queue;
427 int ret;
428 uint32_t n_rx_queues, n_tx_queues;
429
430 /* Init NIC ports and queues, then start the ports */
431 for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
432 struct rte_mempool *pool;
433
434 n_rx_queues = app_get_nic_rx_queues_per_port(port);
435 n_tx_queues = app.nic_tx_port_mask[port];
436
437 if ((n_rx_queues == 0) && (n_tx_queues == 0)) {
438 continue;
439 }
440
441 /* Init port */
442 printf("Initializing NIC port %u ...\n", (unsigned) port);
443 ret = rte_eth_dev_configure(
444 port,
445 (uint8_t) n_rx_queues,
446 (uint8_t) n_tx_queues,
447 &port_conf);
448 if (ret < 0) {
449 rte_panic("Cannot init NIC port %u (%d)\n", (unsigned) port, ret);
450 }
451 rte_eth_promiscuous_enable(port);
452
453 /* Init RX queues */
454 for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) {
455 if (app.nic_rx_queue_mask[port][queue] == 0) {
456 continue;
457 }
458
459 app_get_lcore_for_nic_rx(port, queue, &lcore);
460 socket = rte_lcore_to_socket_id(lcore);
461 pool = app.lcore_params[lcore].pool;
462
463 printf("Initializing NIC port %u RX queue %u ...\n",
464 (unsigned) port,
465 (unsigned) queue);
466 ret = rte_eth_rx_queue_setup(
467 port,
468 queue,
469 (uint16_t) app.nic_rx_ring_size,
470 socket,
471 NULL,
472 pool);
473 if (ret < 0) {
474 rte_panic("Cannot init RX queue %u for port %u (%d)\n",
475 (unsigned) queue,
476 (unsigned) port,
477 ret);
478 }
479 }
480
481 /* Init TX queues */
482 if (app.nic_tx_port_mask[port] == 1) {
483 app_get_lcore_for_nic_tx(port, &lcore);
484 socket = rte_lcore_to_socket_id(lcore);
485 printf("Initializing NIC port %u TX queue 0 ...\n",
486 (unsigned) port);
487 ret = rte_eth_tx_queue_setup(
488 port,
489 0,
490 (uint16_t) app.nic_tx_ring_size,
491 socket,
492 NULL);
493 if (ret < 0) {
494 rte_panic("Cannot init TX queue 0 for port %d (%d)\n",
495 port,
496 ret);
497 }
498 }
499
500 /* Start port */
501 ret = rte_eth_dev_start(port);
502 if (ret < 0) {
503 rte_panic("Cannot start port %d (%d)\n", port, ret);
504 }
505 }
506
507 check_all_ports_link_status(APP_MAX_NIC_PORTS, (~0x0));
508 }
509
510 void
511 app_init(void)
512 {
513 app_assign_worker_ids();
514 app_init_mbuf_pools();
515 app_init_lpm_tables();
516 app_init_rings_rx();
517 app_init_rings_tx();
518 app_init_nics();
519
520 printf("Initialization completed.\n");
521 }