]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/drivers/net/bonding/rte_eth_bond_pmd.c
import 15.2.0 Octopus source
[ceph.git] / ceph / src / spdk / dpdk / drivers / net / bonding / rte_eth_bond_pmd.c
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright(c) 2010-2017 Intel Corporation
7c673cae
FG
3 */
4#include <stdlib.h>
5#include <netinet/in.h>
6
7#include <rte_mbuf.h>
8#include <rte_malloc.h>
11fdf7f2
TL
9#include <rte_ethdev_driver.h>
10#include <rte_ethdev_vdev.h>
7c673cae
FG
11#include <rte_tcp.h>
12#include <rte_udp.h>
13#include <rte_ip.h>
14#include <rte_ip_frag.h>
15#include <rte_devargs.h>
16#include <rte_kvargs.h>
11fdf7f2 17#include <rte_bus_vdev.h>
7c673cae
FG
18#include <rte_alarm.h>
19#include <rte_cycles.h>
11fdf7f2 20#include <rte_string_fns.h>
7c673cae
FG
21
22#include "rte_eth_bond.h"
23#include "rte_eth_bond_private.h"
24#include "rte_eth_bond_8023ad_private.h"
25
26#define REORDER_PERIOD_MS 10
11fdf7f2
TL
27#define DEFAULT_POLLING_INTERVAL_10_MS (10)
28#define BOND_MAX_MAC_ADDRS 16
7c673cae
FG
29
30#define HASH_L4_PORTS(h) ((h)->src_port ^ (h)->dst_port)
31
32/* Table for statistics in mode 5 TLB */
33static uint64_t tlb_last_obytets[RTE_MAX_ETHPORTS];
34
35static inline size_t
36get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
37{
38 size_t vlan_offset = 0;
39
9f95a23c
TL
40 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto ||
41 rte_cpu_to_be_16(ETHER_TYPE_QINQ) == *proto) {
7c673cae
FG
42 struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
43
44 vlan_offset = sizeof(struct vlan_hdr);
45 *proto = vlan_hdr->eth_proto;
46
47 if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
48 vlan_hdr = vlan_hdr + 1;
49 *proto = vlan_hdr->eth_proto;
50 vlan_offset += sizeof(struct vlan_hdr);
51 }
52 }
53 return vlan_offset;
54}
55
56static uint16_t
57bond_ethdev_rx_burst(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
58{
59 struct bond_dev_private *internals;
60
7c673cae 61 uint16_t num_rx_total = 0;
9f95a23c
TL
62 uint16_t slave_count;
63 uint16_t active_slave;
7c673cae
FG
64 int i;
65
66 /* Cast to structure, containing bonded device's port id and queue id */
67 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
7c673cae 68 internals = bd_rx_q->dev_private;
9f95a23c
TL
69 slave_count = internals->active_slave_count;
70 active_slave = internals->active_slave;
7c673cae 71
9f95a23c
TL
72 for (i = 0; i < slave_count && nb_pkts; i++) {
73 uint16_t num_rx_slave;
7c673cae 74
7c673cae
FG
75 /* Offset of pointer to *bufs increases as packets are received
76 * from other slaves */
9f95a23c
TL
77 num_rx_slave =
78 rte_eth_rx_burst(internals->active_slaves[active_slave],
79 bd_rx_q->queue_id,
80 bufs + num_rx_total, nb_pkts);
81 num_rx_total += num_rx_slave;
82 nb_pkts -= num_rx_slave;
83 if (++active_slave == slave_count)
84 active_slave = 0;
7c673cae
FG
85 }
86
9f95a23c
TL
87 if (++internals->active_slave >= slave_count)
88 internals->active_slave = 0;
7c673cae
FG
89 return num_rx_total;
90}
91
92static uint16_t
93bond_ethdev_rx_burst_active_backup(void *queue, struct rte_mbuf **bufs,
94 uint16_t nb_pkts)
95{
96 struct bond_dev_private *internals;
97
98 /* Cast to structure, containing bonded device's port id and queue id */
99 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
100
101 internals = bd_rx_q->dev_private;
102
103 return rte_eth_rx_burst(internals->current_primary_port,
104 bd_rx_q->queue_id, bufs, nb_pkts);
105}
106
107static inline uint8_t
11fdf7f2 108is_lacp_packets(uint16_t ethertype, uint8_t subtype, struct rte_mbuf *mbuf)
7c673cae
FG
109{
110 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
111
11fdf7f2
TL
112 return !((mbuf->ol_flags & PKT_RX_VLAN) ? mbuf->vlan_tci : 0) &&
113 (ethertype == ether_type_slow_be &&
7c673cae
FG
114 (subtype == SLOW_SUBTYPE_MARKER || subtype == SLOW_SUBTYPE_LACP));
115}
116
11fdf7f2
TL
117/*****************************************************************************
118 * Flow director's setup for mode 4 optimization
119 */
120
121static struct rte_flow_item_eth flow_item_eth_type_8023ad = {
122 .dst.addr_bytes = { 0 },
123 .src.addr_bytes = { 0 },
124 .type = RTE_BE16(ETHER_TYPE_SLOW),
125};
126
127static struct rte_flow_item_eth flow_item_eth_mask_type_8023ad = {
128 .dst.addr_bytes = { 0 },
129 .src.addr_bytes = { 0 },
130 .type = 0xFFFF,
131};
132
133static struct rte_flow_item flow_item_8023ad[] = {
134 {
135 .type = RTE_FLOW_ITEM_TYPE_ETH,
136 .spec = &flow_item_eth_type_8023ad,
137 .last = NULL,
138 .mask = &flow_item_eth_mask_type_8023ad,
139 },
140 {
141 .type = RTE_FLOW_ITEM_TYPE_END,
142 .spec = NULL,
143 .last = NULL,
144 .mask = NULL,
145 }
146};
147
148const struct rte_flow_attr flow_attr_8023ad = {
149 .group = 0,
150 .priority = 0,
151 .ingress = 1,
152 .egress = 0,
153 .reserved = 0,
154};
155
156int
157bond_ethdev_8023ad_flow_verify(struct rte_eth_dev *bond_dev,
158 uint16_t slave_port) {
159 struct rte_eth_dev_info slave_info;
160 struct rte_flow_error error;
161 struct bond_dev_private *internals = (struct bond_dev_private *)
162 (bond_dev->data->dev_private);
163
164 const struct rte_flow_action_queue lacp_queue_conf = {
165 .index = 0,
166 };
167
168 const struct rte_flow_action actions[] = {
169 {
170 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
171 .conf = &lacp_queue_conf
172 },
173 {
174 .type = RTE_FLOW_ACTION_TYPE_END,
175 }
176 };
177
178 int ret = rte_flow_validate(slave_port, &flow_attr_8023ad,
179 flow_item_8023ad, actions, &error);
180 if (ret < 0) {
181 RTE_BOND_LOG(ERR, "%s: %s (slave_port=%d queue_id=%d)",
182 __func__, error.message, slave_port,
183 internals->mode4.dedicated_queues.rx_qid);
184 return -1;
185 }
186
187 rte_eth_dev_info_get(slave_port, &slave_info);
188 if (slave_info.max_rx_queues < bond_dev->data->nb_rx_queues ||
189 slave_info.max_tx_queues < bond_dev->data->nb_tx_queues) {
190 RTE_BOND_LOG(ERR,
191 "%s: Slave %d capabilities doesn't allow to allocate additional queues",
192 __func__, slave_port);
193 return -1;
194 }
195
196 return 0;
197}
198
199int
200bond_8023ad_slow_pkt_hw_filter_supported(uint16_t port_id) {
201 struct rte_eth_dev *bond_dev = &rte_eth_devices[port_id];
202 struct bond_dev_private *internals = (struct bond_dev_private *)
203 (bond_dev->data->dev_private);
204 struct rte_eth_dev_info bond_info;
205 uint16_t idx;
206
207 /* Verify if all slaves in bonding supports flow director and */
208 if (internals->slave_count > 0) {
209 rte_eth_dev_info_get(bond_dev->data->port_id, &bond_info);
210
211 internals->mode4.dedicated_queues.rx_qid = bond_info.nb_rx_queues;
212 internals->mode4.dedicated_queues.tx_qid = bond_info.nb_tx_queues;
213
214 for (idx = 0; idx < internals->slave_count; idx++) {
215 if (bond_ethdev_8023ad_flow_verify(bond_dev,
216 internals->slaves[idx].port_id) != 0)
217 return -1;
218 }
219 }
220
221 return 0;
222}
223
224int
225bond_ethdev_8023ad_flow_set(struct rte_eth_dev *bond_dev, uint16_t slave_port) {
226
227 struct rte_flow_error error;
228 struct bond_dev_private *internals = (struct bond_dev_private *)
229 (bond_dev->data->dev_private);
230
231 struct rte_flow_action_queue lacp_queue_conf = {
232 .index = internals->mode4.dedicated_queues.rx_qid,
233 };
234
235 const struct rte_flow_action actions[] = {
236 {
237 .type = RTE_FLOW_ACTION_TYPE_QUEUE,
238 .conf = &lacp_queue_conf
239 },
240 {
241 .type = RTE_FLOW_ACTION_TYPE_END,
242 }
243 };
244
245 internals->mode4.dedicated_queues.flow[slave_port] = rte_flow_create(slave_port,
246 &flow_attr_8023ad, flow_item_8023ad, actions, &error);
247 if (internals->mode4.dedicated_queues.flow[slave_port] == NULL) {
248 RTE_BOND_LOG(ERR, "bond_ethdev_8023ad_flow_set: %s "
249 "(slave_port=%d queue_id=%d)",
250 error.message, slave_port,
251 internals->mode4.dedicated_queues.rx_qid);
252 return -1;
253 }
254
255 return 0;
256}
257
258static uint16_t
259bond_ethdev_rx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
260 uint16_t nb_pkts)
261{
262 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
263 struct bond_dev_private *internals = bd_rx_q->dev_private;
264 uint16_t num_rx_total = 0; /* Total number of received packets */
265 uint16_t slaves[RTE_MAX_ETHPORTS];
266 uint16_t slave_count;
9f95a23c
TL
267 uint16_t active_slave;
268 uint16_t i;
11fdf7f2
TL
269
270 /* Copy slave list to protect against slave up/down changes during tx
271 * bursting */
272 slave_count = internals->active_slave_count;
9f95a23c 273 active_slave = internals->active_slave;
11fdf7f2
TL
274 memcpy(slaves, internals->active_slaves,
275 sizeof(internals->active_slaves[0]) * slave_count);
276
9f95a23c
TL
277 for (i = 0; i < slave_count && nb_pkts; i++) {
278 uint16_t num_rx_slave;
11fdf7f2
TL
279
280 /* Read packets from this slave */
9f95a23c
TL
281 num_rx_slave = rte_eth_rx_burst(slaves[active_slave],
282 bd_rx_q->queue_id,
283 bufs + num_rx_total, nb_pkts);
284 num_rx_total += num_rx_slave;
285 nb_pkts -= num_rx_slave;
286
287 if (++active_slave == slave_count)
288 active_slave = 0;
11fdf7f2
TL
289 }
290
9f95a23c
TL
291 if (++internals->active_slave >= slave_count)
292 internals->active_slave = 0;
11fdf7f2
TL
293
294 return num_rx_total;
295}
296
297static uint16_t
298bond_ethdev_tx_burst_8023ad_fast_queue(void *queue, struct rte_mbuf **bufs,
299 uint16_t nb_bufs)
300{
301 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
302 struct bond_dev_private *internals = bd_tx_q->dev_private;
303
304 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
305 uint16_t slave_count;
306
307 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
308 uint16_t dist_slave_count;
309
310 /* 2-D array to sort mbufs for transmission on each slave into */
311 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
312 /* Number of mbufs for transmission on each slave */
313 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
314 /* Mapping array generated by hash function to map mbufs to slaves */
315 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
316
9f95a23c 317 uint16_t slave_tx_count;
11fdf7f2
TL
318 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
319
9f95a23c 320 uint16_t i;
11fdf7f2
TL
321
322 if (unlikely(nb_bufs == 0))
323 return 0;
324
325 /* Copy slave list to protect against slave up/down changes during tx
326 * bursting */
327 slave_count = internals->active_slave_count;
328 if (unlikely(slave_count < 1))
329 return 0;
330
331 memcpy(slave_port_ids, internals->active_slaves,
332 sizeof(slave_port_ids[0]) * slave_count);
333
334
335 dist_slave_count = 0;
336 for (i = 0; i < slave_count; i++) {
9f95a23c 337 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
11fdf7f2
TL
338
339 if (ACTOR_STATE(port, DISTRIBUTING))
340 dist_slave_port_ids[dist_slave_count++] =
341 slave_port_ids[i];
342 }
343
344 if (unlikely(dist_slave_count < 1))
345 return 0;
346
347 /*
348 * Populate slaves mbuf with the packets which are to be sent on it
349 * selecting output slave using hash based on xmit policy
350 */
351 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
352 bufs_slave_port_idxs);
353
354 for (i = 0; i < nb_bufs; i++) {
355 /* Populate slave mbuf arrays with mbufs for that slave. */
9f95a23c 356 uint16_t slave_idx = bufs_slave_port_idxs[i];
11fdf7f2
TL
357
358 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
359 }
360
361
362 /* Send packet burst on each slave device */
363 for (i = 0; i < dist_slave_count; i++) {
364 if (slave_nb_bufs[i] == 0)
365 continue;
366
367 slave_tx_count = rte_eth_tx_burst(dist_slave_port_ids[i],
368 bd_tx_q->queue_id, slave_bufs[i],
369 slave_nb_bufs[i]);
370
371 total_tx_count += slave_tx_count;
372
373 /* If tx burst fails move packets to end of bufs */
374 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
9f95a23c 375 int slave_tx_fail_count = slave_nb_bufs[i] -
11fdf7f2 376 slave_tx_count;
9f95a23c
TL
377 total_tx_fail_count += slave_tx_fail_count;
378 memcpy(&bufs[nb_bufs - total_tx_fail_count],
379 &slave_bufs[i][slave_tx_count],
380 slave_tx_fail_count * sizeof(bufs[0]));
11fdf7f2
TL
381 }
382 }
383
384 return total_tx_count;
385}
386
387
7c673cae
FG
388static uint16_t
389bond_ethdev_rx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
390 uint16_t nb_pkts)
391{
392 /* Cast to structure, containing bonded device's port id and queue id */
393 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)queue;
394 struct bond_dev_private *internals = bd_rx_q->dev_private;
9f95a23c
TL
395 struct rte_eth_dev *bonded_eth_dev =
396 &rte_eth_devices[internals->port_id];
397 struct ether_addr *bond_mac = bonded_eth_dev->data->mac_addrs;
7c673cae
FG
398 struct ether_hdr *hdr;
399
400 const uint16_t ether_type_slow_be = rte_be_to_cpu_16(ETHER_TYPE_SLOW);
401 uint16_t num_rx_total = 0; /* Total number of received packets */
11fdf7f2
TL
402 uint16_t slaves[RTE_MAX_ETHPORTS];
403 uint16_t slave_count, idx;
7c673cae
FG
404
405 uint8_t collecting; /* current slave collecting status */
406 const uint8_t promisc = internals->promiscuous_en;
7c673cae 407 uint8_t subtype;
9f95a23c
TL
408 uint16_t i;
409 uint16_t j;
410 uint16_t k;
7c673cae 411
7c673cae
FG
412 /* Copy slave list to protect against slave up/down changes during tx
413 * bursting */
414 slave_count = internals->active_slave_count;
415 memcpy(slaves, internals->active_slaves,
416 sizeof(internals->active_slaves[0]) * slave_count);
417
11fdf7f2
TL
418 idx = internals->active_slave;
419 if (idx >= slave_count) {
420 internals->active_slave = 0;
421 idx = 0;
422 }
7c673cae
FG
423 for (i = 0; i < slave_count && num_rx_total < nb_pkts; i++) {
424 j = num_rx_total;
9f95a23c 425 collecting = ACTOR_STATE(&bond_mode_8023ad_ports[slaves[idx]],
11fdf7f2 426 COLLECTING);
7c673cae
FG
427
428 /* Read packets from this slave */
11fdf7f2 429 num_rx_total += rte_eth_rx_burst(slaves[idx], bd_rx_q->queue_id,
7c673cae
FG
430 &bufs[num_rx_total], nb_pkts - num_rx_total);
431
432 for (k = j; k < 2 && k < num_rx_total; k++)
433 rte_prefetch0(rte_pktmbuf_mtod(bufs[k], void *));
434
435 /* Handle slow protocol packets. */
436 while (j < num_rx_total) {
11fdf7f2
TL
437
438 /* If packet is not pure L2 and is known, skip it */
439 if ((bufs[j]->packet_type & ~RTE_PTYPE_L2_ETHER) != 0) {
440 j++;
441 continue;
442 }
443
7c673cae
FG
444 if (j + 3 < num_rx_total)
445 rte_prefetch0(rte_pktmbuf_mtod(bufs[j + 3], void *));
446
447 hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
448 subtype = ((struct slow_protocol_frame *)hdr)->slow_protocol.subtype;
449
450 /* Remove packet from array if it is slow packet or slave is not
11fdf7f2 451 * in collecting state or bonding interface is not in promiscuous
7c673cae 452 * mode and packet address does not match. */
11fdf7f2 453 if (unlikely(is_lacp_packets(hdr->ether_type, subtype, bufs[j]) ||
9f95a23c
TL
454 !collecting ||
455 (!promisc &&
456 !is_multicast_ether_addr(&hdr->d_addr) &&
457 !is_same_ether_addr(bond_mac,
458 &hdr->d_addr)))) {
7c673cae
FG
459
460 if (hdr->ether_type == ether_type_slow_be) {
11fdf7f2
TL
461 bond_mode_8023ad_handle_slow_pkt(
462 internals, slaves[idx], bufs[j]);
7c673cae
FG
463 } else
464 rte_pktmbuf_free(bufs[j]);
465
466 /* Packet is managed by mode 4 or dropped, shift the array */
467 num_rx_total--;
468 if (j < num_rx_total) {
469 memmove(&bufs[j], &bufs[j + 1], sizeof(bufs[0]) *
470 (num_rx_total - j));
471 }
472 } else
473 j++;
474 }
11fdf7f2
TL
475 if (unlikely(++idx == slave_count))
476 idx = 0;
7c673cae
FG
477 }
478
9f95a23c
TL
479 if (++internals->active_slave >= slave_count)
480 internals->active_slave = 0;
481
7c673cae
FG
482 return num_rx_total;
483}
484
485#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
486uint32_t burstnumberRX;
487uint32_t burstnumberTX;
488
489#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
490
491static void
9f95a23c 492arp_op_name(uint16_t arp_op, char *buf, size_t buf_len)
7c673cae
FG
493{
494 switch (arp_op) {
495 case ARP_OP_REQUEST:
9f95a23c 496 strlcpy(buf, "ARP Request", buf_len);
7c673cae
FG
497 return;
498 case ARP_OP_REPLY:
9f95a23c 499 strlcpy(buf, "ARP Reply", buf_len);
7c673cae
FG
500 return;
501 case ARP_OP_REVREQUEST:
9f95a23c 502 strlcpy(buf, "Reverse ARP Request", buf_len);
7c673cae
FG
503 return;
504 case ARP_OP_REVREPLY:
9f95a23c 505 strlcpy(buf, "Reverse ARP Reply", buf_len);
7c673cae
FG
506 return;
507 case ARP_OP_INVREQUEST:
9f95a23c 508 strlcpy(buf, "Peer Identify Request", buf_len);
7c673cae
FG
509 return;
510 case ARP_OP_INVREPLY:
9f95a23c 511 strlcpy(buf, "Peer Identify Reply", buf_len);
7c673cae
FG
512 return;
513 default:
514 break;
515 }
9f95a23c 516 strlcpy(buf, "Unknown", buf_len);
7c673cae
FG
517 return;
518}
519#endif
520#define MaxIPv4String 16
521static void
522ipv4_addr_to_dot(uint32_t be_ipv4_addr, char *buf, uint8_t buf_size)
523{
524 uint32_t ipv4_addr;
525
526 ipv4_addr = rte_be_to_cpu_32(be_ipv4_addr);
527 snprintf(buf, buf_size, "%d.%d.%d.%d", (ipv4_addr >> 24) & 0xFF,
528 (ipv4_addr >> 16) & 0xFF, (ipv4_addr >> 8) & 0xFF,
529 ipv4_addr & 0xFF);
530}
531
532#define MAX_CLIENTS_NUMBER 128
533uint8_t active_clients;
534struct client_stats_t {
11fdf7f2 535 uint16_t port;
7c673cae
FG
536 uint32_t ipv4_addr;
537 uint32_t ipv4_rx_packets;
538 uint32_t ipv4_tx_packets;
539};
540struct client_stats_t client_stats[MAX_CLIENTS_NUMBER];
541
542static void
11fdf7f2 543update_client_stats(uint32_t addr, uint16_t port, uint32_t *TXorRXindicator)
7c673cae
FG
544{
545 int i = 0;
546
547 for (; i < MAX_CLIENTS_NUMBER; i++) {
548 if ((client_stats[i].ipv4_addr == addr) && (client_stats[i].port == port)) {
549 /* Just update RX packets number for this client */
550 if (TXorRXindicator == &burstnumberRX)
551 client_stats[i].ipv4_rx_packets++;
552 else
553 client_stats[i].ipv4_tx_packets++;
554 return;
555 }
556 }
557 /* We have a new client. Insert him to the table, and increment stats */
558 if (TXorRXindicator == &burstnumberRX)
559 client_stats[active_clients].ipv4_rx_packets++;
560 else
561 client_stats[active_clients].ipv4_tx_packets++;
562 client_stats[active_clients].ipv4_addr = addr;
563 client_stats[active_clients].port = port;
564 active_clients++;
565
566}
567
568#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
11fdf7f2
TL
569#define MODE6_DEBUG(info, src_ip, dst_ip, eth_h, arp_op, port, burstnumber) \
570 rte_log(RTE_LOG_DEBUG, bond_logtype, \
571 "%s port:%d SrcMAC:%02X:%02X:%02X:%02X:%02X:%02X SrcIP:%s " \
572 "DstMAC:%02X:%02X:%02X:%02X:%02X:%02X DstIP:%s %s %d\n", \
573 info, \
574 port, \
575 eth_h->s_addr.addr_bytes[0], eth_h->s_addr.addr_bytes[1], \
576 eth_h->s_addr.addr_bytes[2], eth_h->s_addr.addr_bytes[3], \
577 eth_h->s_addr.addr_bytes[4], eth_h->s_addr.addr_bytes[5], \
578 src_ip, \
579 eth_h->d_addr.addr_bytes[0], eth_h->d_addr.addr_bytes[1], \
580 eth_h->d_addr.addr_bytes[2], eth_h->d_addr.addr_bytes[3], \
581 eth_h->d_addr.addr_bytes[4], eth_h->d_addr.addr_bytes[5], \
582 dst_ip, \
583 arp_op, ++burstnumber)
7c673cae
FG
584#endif
585
586static void
587mode6_debug(const char __attribute__((unused)) *info, struct ether_hdr *eth_h,
11fdf7f2 588 uint16_t port, uint32_t __attribute__((unused)) *burstnumber)
7c673cae
FG
589{
590 struct ipv4_hdr *ipv4_h;
591#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
592 struct arp_hdr *arp_h;
593 char dst_ip[16];
594 char ArpOp[24];
595 char buf[16];
596#endif
597 char src_ip[16];
598
599 uint16_t ether_type = eth_h->ether_type;
600 uint16_t offset = get_vlan_offset(eth_h, &ether_type);
601
602#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
11fdf7f2 603 strlcpy(buf, info, 16);
7c673cae
FG
604#endif
605
606 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
607 ipv4_h = (struct ipv4_hdr *)((char *)(eth_h + 1) + offset);
608 ipv4_addr_to_dot(ipv4_h->src_addr, src_ip, MaxIPv4String);
609#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
610 ipv4_addr_to_dot(ipv4_h->dst_addr, dst_ip, MaxIPv4String);
611 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, "", port, *burstnumber);
612#endif
613 update_client_stats(ipv4_h->src_addr, port, burstnumber);
614 }
615#ifdef RTE_LIBRTE_BOND_DEBUG_ALB
616 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
617 arp_h = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
618 ipv4_addr_to_dot(arp_h->arp_data.arp_sip, src_ip, MaxIPv4String);
619 ipv4_addr_to_dot(arp_h->arp_data.arp_tip, dst_ip, MaxIPv4String);
9f95a23c
TL
620 arp_op_name(rte_be_to_cpu_16(arp_h->arp_op),
621 ArpOp, sizeof(ArpOp));
7c673cae
FG
622 MODE6_DEBUG(buf, src_ip, dst_ip, eth_h, ArpOp, port, *burstnumber);
623 }
624#endif
625}
626#endif
627
628static uint16_t
629bond_ethdev_rx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
630{
631 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
632 struct bond_dev_private *internals = bd_tx_q->dev_private;
633 struct ether_hdr *eth_h;
634 uint16_t ether_type, offset;
635 uint16_t nb_recv_pkts;
636 int i;
637
638 nb_recv_pkts = bond_ethdev_rx_burst(queue, bufs, nb_pkts);
639
640 for (i = 0; i < nb_recv_pkts; i++) {
641 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
642 ether_type = eth_h->ether_type;
643 offset = get_vlan_offset(eth_h, &ether_type);
644
645 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
646#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
647 mode6_debug("RX ARP:", eth_h, bufs[i]->port, &burstnumberRX);
648#endif
649 bond_mode_alb_arp_recv(eth_h, offset, internals);
650 }
651#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
652 else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
653 mode6_debug("RX IPv4:", eth_h, bufs[i]->port, &burstnumberRX);
654#endif
655 }
656
657 return nb_recv_pkts;
658}
659
660static uint16_t
661bond_ethdev_tx_burst_round_robin(void *queue, struct rte_mbuf **bufs,
662 uint16_t nb_pkts)
663{
664 struct bond_dev_private *internals;
665 struct bond_tx_queue *bd_tx_q;
666
667 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_pkts];
668 uint16_t slave_nb_pkts[RTE_MAX_ETHPORTS] = { 0 };
669
11fdf7f2
TL
670 uint16_t num_of_slaves;
671 uint16_t slaves[RTE_MAX_ETHPORTS];
7c673cae
FG
672
673 uint16_t num_tx_total = 0, num_tx_slave;
674
675 static int slave_idx = 0;
676 int i, cslave_idx = 0, tx_fail_total = 0;
677
678 bd_tx_q = (struct bond_tx_queue *)queue;
679 internals = bd_tx_q->dev_private;
680
681 /* Copy slave list to protect against slave up/down changes during tx
682 * bursting */
683 num_of_slaves = internals->active_slave_count;
684 memcpy(slaves, internals->active_slaves,
685 sizeof(internals->active_slaves[0]) * num_of_slaves);
686
687 if (num_of_slaves < 1)
688 return num_tx_total;
689
690 /* Populate slaves mbuf with which packets are to be sent on it */
691 for (i = 0; i < nb_pkts; i++) {
692 cslave_idx = (slave_idx + i) % num_of_slaves;
693 slave_bufs[cslave_idx][(slave_nb_pkts[cslave_idx])++] = bufs[i];
694 }
695
696 /* increment current slave index so the next call to tx burst starts on the
697 * next slave */
698 slave_idx = ++cslave_idx;
699
700 /* Send packet burst on each slave device */
701 for (i = 0; i < num_of_slaves; i++) {
702 if (slave_nb_pkts[i] > 0) {
703 num_tx_slave = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
704 slave_bufs[i], slave_nb_pkts[i]);
705
706 /* if tx burst fails move packets to end of bufs */
707 if (unlikely(num_tx_slave < slave_nb_pkts[i])) {
708 int tx_fail_slave = slave_nb_pkts[i] - num_tx_slave;
709
710 tx_fail_total += tx_fail_slave;
711
712 memcpy(&bufs[nb_pkts - tx_fail_total],
9f95a23c
TL
713 &slave_bufs[i][num_tx_slave],
714 tx_fail_slave * sizeof(bufs[0]));
7c673cae
FG
715 }
716 num_tx_total += num_tx_slave;
717 }
718 }
719
720 return num_tx_total;
721}
722
723static uint16_t
724bond_ethdev_tx_burst_active_backup(void *queue,
725 struct rte_mbuf **bufs, uint16_t nb_pkts)
726{
727 struct bond_dev_private *internals;
728 struct bond_tx_queue *bd_tx_q;
729
730 bd_tx_q = (struct bond_tx_queue *)queue;
731 internals = bd_tx_q->dev_private;
732
733 if (internals->active_slave_count < 1)
734 return 0;
735
736 return rte_eth_tx_burst(internals->current_primary_port, bd_tx_q->queue_id,
737 bufs, nb_pkts);
738}
739
740static inline uint16_t
741ether_hash(struct ether_hdr *eth_hdr)
742{
743 unaligned_uint16_t *word_src_addr =
744 (unaligned_uint16_t *)eth_hdr->s_addr.addr_bytes;
745 unaligned_uint16_t *word_dst_addr =
746 (unaligned_uint16_t *)eth_hdr->d_addr.addr_bytes;
747
748 return (word_src_addr[0] ^ word_dst_addr[0]) ^
749 (word_src_addr[1] ^ word_dst_addr[1]) ^
750 (word_src_addr[2] ^ word_dst_addr[2]);
751}
752
753static inline uint32_t
754ipv4_hash(struct ipv4_hdr *ipv4_hdr)
755{
756 return ipv4_hdr->src_addr ^ ipv4_hdr->dst_addr;
757}
758
759static inline uint32_t
760ipv6_hash(struct ipv6_hdr *ipv6_hdr)
761{
762 unaligned_uint32_t *word_src_addr =
763 (unaligned_uint32_t *)&(ipv6_hdr->src_addr[0]);
764 unaligned_uint32_t *word_dst_addr =
765 (unaligned_uint32_t *)&(ipv6_hdr->dst_addr[0]);
766
767 return (word_src_addr[0] ^ word_dst_addr[0]) ^
768 (word_src_addr[1] ^ word_dst_addr[1]) ^
769 (word_src_addr[2] ^ word_dst_addr[2]) ^
770 (word_src_addr[3] ^ word_dst_addr[3]);
771}
772
11fdf7f2
TL
773
774void
775burst_xmit_l2_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
9f95a23c 776 uint16_t slave_count, uint16_t *slaves)
7c673cae 777{
11fdf7f2
TL
778 struct ether_hdr *eth_hdr;
779 uint32_t hash;
780 int i;
7c673cae 781
11fdf7f2
TL
782 for (i = 0; i < nb_pkts; i++) {
783 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
784
785 hash = ether_hash(eth_hdr);
7c673cae 786
11fdf7f2
TL
787 slaves[i] = (hash ^= hash >> 8) % slave_count;
788 }
7c673cae
FG
789}
790
11fdf7f2
TL
791void
792burst_xmit_l23_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
9f95a23c 793 uint16_t slave_count, uint16_t *slaves)
7c673cae 794{
11fdf7f2
TL
795 uint16_t i;
796 struct ether_hdr *eth_hdr;
797 uint16_t proto;
798 size_t vlan_offset;
799 uint32_t hash, l3hash;
7c673cae 800
11fdf7f2
TL
801 for (i = 0; i < nb_pkts; i++) {
802 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
803 l3hash = 0;
7c673cae 804
11fdf7f2
TL
805 proto = eth_hdr->ether_type;
806 hash = ether_hash(eth_hdr);
7c673cae 807
11fdf7f2 808 vlan_offset = get_vlan_offset(eth_hdr, &proto);
7c673cae 809
11fdf7f2
TL
810 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
811 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
812 ((char *)(eth_hdr + 1) + vlan_offset);
813 l3hash = ipv4_hash(ipv4_hdr);
7c673cae 814
11fdf7f2
TL
815 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
816 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
817 ((char *)(eth_hdr + 1) + vlan_offset);
818 l3hash = ipv6_hash(ipv6_hdr);
819 }
7c673cae 820
11fdf7f2
TL
821 hash = hash ^ l3hash;
822 hash ^= hash >> 16;
823 hash ^= hash >> 8;
7c673cae 824
11fdf7f2
TL
825 slaves[i] = hash % slave_count;
826 }
827}
7c673cae 828
11fdf7f2
TL
829void
830burst_xmit_l34_hash(struct rte_mbuf **buf, uint16_t nb_pkts,
9f95a23c 831 uint16_t slave_count, uint16_t *slaves)
11fdf7f2
TL
832{
833 struct ether_hdr *eth_hdr;
834 uint16_t proto;
835 size_t vlan_offset;
836 int i;
7c673cae 837
11fdf7f2
TL
838 struct udp_hdr *udp_hdr;
839 struct tcp_hdr *tcp_hdr;
840 uint32_t hash, l3hash, l4hash;
7c673cae 841
11fdf7f2
TL
842 for (i = 0; i < nb_pkts; i++) {
843 eth_hdr = rte_pktmbuf_mtod(buf[i], struct ether_hdr *);
9f95a23c 844 size_t pkt_end = (size_t)eth_hdr + rte_pktmbuf_data_len(buf[i]);
11fdf7f2
TL
845 proto = eth_hdr->ether_type;
846 vlan_offset = get_vlan_offset(eth_hdr, &proto);
847 l3hash = 0;
848 l4hash = 0;
849
850 if (rte_cpu_to_be_16(ETHER_TYPE_IPv4) == proto) {
851 struct ipv4_hdr *ipv4_hdr = (struct ipv4_hdr *)
852 ((char *)(eth_hdr + 1) + vlan_offset);
853 size_t ip_hdr_offset;
854
855 l3hash = ipv4_hash(ipv4_hdr);
856
857 /* there is no L4 header in fragmented packet */
858 if (likely(rte_ipv4_frag_pkt_is_fragmented(ipv4_hdr)
859 == 0)) {
860 ip_hdr_offset = (ipv4_hdr->version_ihl
861 & IPV4_HDR_IHL_MASK) *
7c673cae
FG
862 IPV4_IHL_MULTIPLIER;
863
11fdf7f2
TL
864 if (ipv4_hdr->next_proto_id == IPPROTO_TCP) {
865 tcp_hdr = (struct tcp_hdr *)
866 ((char *)ipv4_hdr +
867 ip_hdr_offset);
9f95a23c
TL
868 if ((size_t)tcp_hdr + sizeof(*tcp_hdr)
869 < pkt_end)
870 l4hash = HASH_L4_PORTS(tcp_hdr);
11fdf7f2
TL
871 } else if (ipv4_hdr->next_proto_id ==
872 IPPROTO_UDP) {
873 udp_hdr = (struct udp_hdr *)
874 ((char *)ipv4_hdr +
875 ip_hdr_offset);
9f95a23c
TL
876 if ((size_t)udp_hdr + sizeof(*udp_hdr)
877 < pkt_end)
878 l4hash = HASH_L4_PORTS(udp_hdr);
11fdf7f2
TL
879 }
880 }
881 } else if (rte_cpu_to_be_16(ETHER_TYPE_IPv6) == proto) {
882 struct ipv6_hdr *ipv6_hdr = (struct ipv6_hdr *)
883 ((char *)(eth_hdr + 1) + vlan_offset);
884 l3hash = ipv6_hash(ipv6_hdr);
885
886 if (ipv6_hdr->proto == IPPROTO_TCP) {
887 tcp_hdr = (struct tcp_hdr *)(ipv6_hdr + 1);
7c673cae 888 l4hash = HASH_L4_PORTS(tcp_hdr);
11fdf7f2
TL
889 } else if (ipv6_hdr->proto == IPPROTO_UDP) {
890 udp_hdr = (struct udp_hdr *)(ipv6_hdr + 1);
7c673cae
FG
891 l4hash = HASH_L4_PORTS(udp_hdr);
892 }
893 }
7c673cae 894
11fdf7f2
TL
895 hash = l3hash ^ l4hash;
896 hash ^= hash >> 16;
897 hash ^= hash >> 8;
7c673cae 898
11fdf7f2
TL
899 slaves[i] = hash % slave_count;
900 }
7c673cae
FG
901}
902
903struct bwg_slave {
904 uint64_t bwg_left_int;
905 uint64_t bwg_left_remainder;
9f95a23c 906 uint16_t slave;
7c673cae
FG
907};
908
909void
910bond_tlb_activate_slave(struct bond_dev_private *internals) {
911 int i;
912
913 for (i = 0; i < internals->active_slave_count; i++) {
914 tlb_last_obytets[internals->active_slaves[i]] = 0;
915 }
916}
917
918static int
919bandwidth_cmp(const void *a, const void *b)
920{
921 const struct bwg_slave *bwg_a = a;
922 const struct bwg_slave *bwg_b = b;
923 int64_t diff = (int64_t)bwg_b->bwg_left_int - (int64_t)bwg_a->bwg_left_int;
924 int64_t diff2 = (int64_t)bwg_b->bwg_left_remainder -
925 (int64_t)bwg_a->bwg_left_remainder;
926 if (diff > 0)
927 return 1;
928 else if (diff < 0)
929 return -1;
930 else if (diff2 > 0)
931 return 1;
932 else if (diff2 < 0)
933 return -1;
934 else
935 return 0;
936}
937
938static void
11fdf7f2 939bandwidth_left(uint16_t port_id, uint64_t load, uint8_t update_idx,
7c673cae
FG
940 struct bwg_slave *bwg_slave)
941{
942 struct rte_eth_link link_status;
943
11fdf7f2 944 rte_eth_link_get_nowait(port_id, &link_status);
7c673cae
FG
945 uint64_t link_bwg = link_status.link_speed * 1000000ULL / 8;
946 if (link_bwg == 0)
947 return;
948 link_bwg = link_bwg * (update_idx+1) * REORDER_PERIOD_MS;
949 bwg_slave->bwg_left_int = (link_bwg - 1000*load) / link_bwg;
950 bwg_slave->bwg_left_remainder = (link_bwg - 1000*load) % link_bwg;
951}
952
953static void
954bond_ethdev_update_tlb_slave_cb(void *arg)
955{
956 struct bond_dev_private *internals = arg;
957 struct rte_eth_stats slave_stats;
958 struct bwg_slave bwg_array[RTE_MAX_ETHPORTS];
9f95a23c 959 uint16_t slave_count;
7c673cae
FG
960 uint64_t tx_bytes;
961
962 uint8_t update_stats = 0;
9f95a23c
TL
963 uint16_t slave_id;
964 uint16_t i;
7c673cae
FG
965
966 internals->slave_update_idx++;
967
968
969 if (internals->slave_update_idx >= REORDER_PERIOD_MS)
970 update_stats = 1;
971
972 for (i = 0; i < internals->active_slave_count; i++) {
973 slave_id = internals->active_slaves[i];
974 rte_eth_stats_get(slave_id, &slave_stats);
975 tx_bytes = slave_stats.obytes - tlb_last_obytets[slave_id];
976 bandwidth_left(slave_id, tx_bytes,
977 internals->slave_update_idx, &bwg_array[i]);
978 bwg_array[i].slave = slave_id;
979
980 if (update_stats) {
981 tlb_last_obytets[slave_id] = slave_stats.obytes;
982 }
983 }
984
985 if (update_stats == 1)
986 internals->slave_update_idx = 0;
987
988 slave_count = i;
989 qsort(bwg_array, slave_count, sizeof(bwg_array[0]), bandwidth_cmp);
990 for (i = 0; i < slave_count; i++)
991 internals->tlb_slaves_order[i] = bwg_array[i].slave;
992
993 rte_eal_alarm_set(REORDER_PERIOD_MS * 1000, bond_ethdev_update_tlb_slave_cb,
994 (struct bond_dev_private *)internals);
995}
996
997static uint16_t
998bond_ethdev_tx_burst_tlb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
999{
1000 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1001 struct bond_dev_private *internals = bd_tx_q->dev_private;
1002
1003 struct rte_eth_dev *primary_port =
1004 &rte_eth_devices[internals->primary_port];
1005 uint16_t num_tx_total = 0;
11fdf7f2 1006 uint16_t i, j;
7c673cae 1007
11fdf7f2
TL
1008 uint16_t num_of_slaves = internals->active_slave_count;
1009 uint16_t slaves[RTE_MAX_ETHPORTS];
7c673cae
FG
1010
1011 struct ether_hdr *ether_hdr;
1012 struct ether_addr primary_slave_addr;
1013 struct ether_addr active_slave_addr;
1014
1015 if (num_of_slaves < 1)
1016 return num_tx_total;
1017
1018 memcpy(slaves, internals->tlb_slaves_order,
1019 sizeof(internals->tlb_slaves_order[0]) * num_of_slaves);
1020
1021
1022 ether_addr_copy(primary_port->data->mac_addrs, &primary_slave_addr);
1023
1024 if (nb_pkts > 3) {
1025 for (i = 0; i < 3; i++)
1026 rte_prefetch0(rte_pktmbuf_mtod(bufs[i], void*));
1027 }
1028
1029 for (i = 0; i < num_of_slaves; i++) {
1030 rte_eth_macaddr_get(slaves[i], &active_slave_addr);
1031 for (j = num_tx_total; j < nb_pkts; j++) {
1032 if (j + 3 < nb_pkts)
1033 rte_prefetch0(rte_pktmbuf_mtod(bufs[j+3], void*));
1034
1035 ether_hdr = rte_pktmbuf_mtod(bufs[j], struct ether_hdr *);
1036 if (is_same_ether_addr(&ether_hdr->s_addr, &primary_slave_addr))
1037 ether_addr_copy(&active_slave_addr, &ether_hdr->s_addr);
1038#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1039 mode6_debug("TX IPv4:", ether_hdr, slaves[i], &burstnumberTX);
1040#endif
1041 }
1042
1043 num_tx_total += rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1044 bufs + num_tx_total, nb_pkts - num_tx_total);
1045
1046 if (num_tx_total == nb_pkts)
1047 break;
1048 }
1049
1050 return num_tx_total;
1051}
1052
1053void
1054bond_tlb_disable(struct bond_dev_private *internals)
1055{
1056 rte_eal_alarm_cancel(bond_ethdev_update_tlb_slave_cb, internals);
1057}
1058
1059void
1060bond_tlb_enable(struct bond_dev_private *internals)
1061{
1062 bond_ethdev_update_tlb_slave_cb(internals);
1063}
1064
1065static uint16_t
1066bond_ethdev_tx_burst_alb(void *queue, struct rte_mbuf **bufs, uint16_t nb_pkts)
1067{
1068 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1069 struct bond_dev_private *internals = bd_tx_q->dev_private;
1070
1071 struct ether_hdr *eth_h;
1072 uint16_t ether_type, offset;
1073
1074 struct client_data *client_info;
1075
1076 /*
1077 * We create transmit buffers for every slave and one additional to send
1078 * through tlb. In worst case every packet will be send on one port.
1079 */
1080 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS + 1][nb_pkts];
1081 uint16_t slave_bufs_pkts[RTE_MAX_ETHPORTS + 1] = { 0 };
1082
1083 /*
11fdf7f2
TL
1084 * We create separate transmit buffers for update packets as they won't
1085 * be counted in num_tx_total.
7c673cae
FG
1086 */
1087 struct rte_mbuf *update_bufs[RTE_MAX_ETHPORTS][ALB_HASH_TABLE_SIZE];
1088 uint16_t update_bufs_pkts[RTE_MAX_ETHPORTS] = { 0 };
1089
1090 struct rte_mbuf *upd_pkt;
1091 size_t pkt_size;
1092
1093 uint16_t num_send, num_not_send = 0;
1094 uint16_t num_tx_total = 0;
11fdf7f2 1095 uint16_t slave_idx;
7c673cae
FG
1096
1097 int i, j;
1098
1099 /* Search tx buffer for ARP packets and forward them to alb */
1100 for (i = 0; i < nb_pkts; i++) {
1101 eth_h = rte_pktmbuf_mtod(bufs[i], struct ether_hdr *);
1102 ether_type = eth_h->ether_type;
1103 offset = get_vlan_offset(eth_h, &ether_type);
1104
1105 if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
1106 slave_idx = bond_mode_alb_arp_xmit(eth_h, offset, internals);
1107
1108 /* Change src mac in eth header */
1109 rte_eth_macaddr_get(slave_idx, &eth_h->s_addr);
1110
1111 /* Add packet to slave tx buffer */
1112 slave_bufs[slave_idx][slave_bufs_pkts[slave_idx]] = bufs[i];
1113 slave_bufs_pkts[slave_idx]++;
1114 } else {
1115 /* If packet is not ARP, send it with TLB policy */
1116 slave_bufs[RTE_MAX_ETHPORTS][slave_bufs_pkts[RTE_MAX_ETHPORTS]] =
1117 bufs[i];
1118 slave_bufs_pkts[RTE_MAX_ETHPORTS]++;
1119 }
1120 }
1121
1122 /* Update connected client ARP tables */
1123 if (internals->mode6.ntt) {
1124 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
1125 client_info = &internals->mode6.client_table[i];
1126
1127 if (client_info->in_use) {
1128 /* Allocate new packet to send ARP update on current slave */
1129 upd_pkt = rte_pktmbuf_alloc(internals->mode6.mempool);
1130 if (upd_pkt == NULL) {
11fdf7f2
TL
1131 RTE_BOND_LOG(ERR,
1132 "Failed to allocate ARP packet from pool");
7c673cae
FG
1133 continue;
1134 }
1135 pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr)
1136 + client_info->vlan_count * sizeof(struct vlan_hdr);
1137 upd_pkt->data_len = pkt_size;
1138 upd_pkt->pkt_len = pkt_size;
1139
1140 slave_idx = bond_mode_alb_arp_upd(client_info, upd_pkt,
1141 internals);
1142
1143 /* Add packet to update tx buffer */
1144 update_bufs[slave_idx][update_bufs_pkts[slave_idx]] = upd_pkt;
1145 update_bufs_pkts[slave_idx]++;
1146 }
1147 }
1148 internals->mode6.ntt = 0;
1149 }
1150
1151 /* Send ARP packets on proper slaves */
1152 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1153 if (slave_bufs_pkts[i] > 0) {
1154 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id,
1155 slave_bufs[i], slave_bufs_pkts[i]);
1156 for (j = 0; j < slave_bufs_pkts[i] - num_send; j++) {
1157 bufs[nb_pkts - 1 - num_not_send - j] =
1158 slave_bufs[i][nb_pkts - 1 - j];
1159 }
1160
1161 num_tx_total += num_send;
1162 num_not_send += slave_bufs_pkts[i] - num_send;
1163
1164#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1165 /* Print TX stats including update packets */
1166 for (j = 0; j < slave_bufs_pkts[i]; j++) {
1167 eth_h = rte_pktmbuf_mtod(slave_bufs[i][j], struct ether_hdr *);
1168 mode6_debug("TX ARP:", eth_h, i, &burstnumberTX);
1169 }
1170#endif
1171 }
1172 }
1173
1174 /* Send update packets on proper slaves */
1175 for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
1176 if (update_bufs_pkts[i] > 0) {
1177 num_send = rte_eth_tx_burst(i, bd_tx_q->queue_id, update_bufs[i],
1178 update_bufs_pkts[i]);
1179 for (j = num_send; j < update_bufs_pkts[i]; j++) {
1180 rte_pktmbuf_free(update_bufs[i][j]);
1181 }
1182#if defined(RTE_LIBRTE_BOND_DEBUG_ALB) || defined(RTE_LIBRTE_BOND_DEBUG_ALB_L1)
1183 for (j = 0; j < update_bufs_pkts[i]; j++) {
1184 eth_h = rte_pktmbuf_mtod(update_bufs[i][j], struct ether_hdr *);
1185 mode6_debug("TX ARPupd:", eth_h, i, &burstnumberTX);
1186 }
1187#endif
1188 }
1189 }
1190
1191 /* Send non-ARP packets using tlb policy */
1192 if (slave_bufs_pkts[RTE_MAX_ETHPORTS] > 0) {
1193 num_send = bond_ethdev_tx_burst_tlb(queue,
1194 slave_bufs[RTE_MAX_ETHPORTS],
1195 slave_bufs_pkts[RTE_MAX_ETHPORTS]);
1196
1197 for (j = 0; j < slave_bufs_pkts[RTE_MAX_ETHPORTS]; j++) {
1198 bufs[nb_pkts - 1 - num_not_send - j] =
1199 slave_bufs[RTE_MAX_ETHPORTS][nb_pkts - 1 - j];
1200 }
1201
1202 num_tx_total += num_send;
7c673cae
FG
1203 }
1204
1205 return num_tx_total;
1206}
1207
1208static uint16_t
1209bond_ethdev_tx_burst_balance(void *queue, struct rte_mbuf **bufs,
11fdf7f2 1210 uint16_t nb_bufs)
7c673cae 1211{
11fdf7f2
TL
1212 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1213 struct bond_dev_private *internals = bd_tx_q->dev_private;
7c673cae 1214
11fdf7f2
TL
1215 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1216 uint16_t slave_count;
7c673cae 1217
11fdf7f2
TL
1218 /* Array to sort mbufs for transmission on each slave into */
1219 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1220 /* Number of mbufs for transmission on each slave */
1221 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1222 /* Mapping array generated by hash function to map mbufs to slaves */
1223 uint16_t bufs_slave_port_idxs[nb_bufs];
7c673cae 1224
9f95a23c 1225 uint16_t slave_tx_count;
11fdf7f2 1226 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
7c673cae 1227
9f95a23c 1228 uint16_t i;
7c673cae 1229
11fdf7f2
TL
1230 if (unlikely(nb_bufs == 0))
1231 return 0;
7c673cae
FG
1232
1233 /* Copy slave list to protect against slave up/down changes during tx
1234 * bursting */
11fdf7f2
TL
1235 slave_count = internals->active_slave_count;
1236 if (unlikely(slave_count < 1))
1237 return 0;
7c673cae 1238
11fdf7f2
TL
1239 memcpy(slave_port_ids, internals->active_slaves,
1240 sizeof(slave_port_ids[0]) * slave_count);
7c673cae 1241
11fdf7f2
TL
1242 /*
1243 * Populate slaves mbuf with the packets which are to be sent on it
1244 * selecting output slave using hash based on xmit policy
1245 */
1246 internals->burst_xmit_hash(bufs, nb_bufs, slave_count,
1247 bufs_slave_port_idxs);
7c673cae 1248
11fdf7f2
TL
1249 for (i = 0; i < nb_bufs; i++) {
1250 /* Populate slave mbuf arrays with mbufs for that slave. */
9f95a23c 1251 uint16_t slave_idx = bufs_slave_port_idxs[i];
11fdf7f2
TL
1252
1253 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] = bufs[i];
7c673cae
FG
1254 }
1255
1256 /* Send packet burst on each slave device */
11fdf7f2
TL
1257 for (i = 0; i < slave_count; i++) {
1258 if (slave_nb_bufs[i] == 0)
1259 continue;
7c673cae 1260
11fdf7f2
TL
1261 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1262 bd_tx_q->queue_id, slave_bufs[i],
1263 slave_nb_bufs[i]);
7c673cae 1264
11fdf7f2
TL
1265 total_tx_count += slave_tx_count;
1266
1267 /* If tx burst fails move packets to end of bufs */
1268 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
9f95a23c 1269 int slave_tx_fail_count = slave_nb_bufs[i] -
11fdf7f2 1270 slave_tx_count;
9f95a23c
TL
1271 total_tx_fail_count += slave_tx_fail_count;
1272 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1273 &slave_bufs[i][slave_tx_count],
1274 slave_tx_fail_count * sizeof(bufs[0]));
7c673cae
FG
1275 }
1276 }
1277
11fdf7f2 1278 return total_tx_count;
7c673cae
FG
1279}
1280
1281static uint16_t
1282bond_ethdev_tx_burst_8023ad(void *queue, struct rte_mbuf **bufs,
11fdf7f2 1283 uint16_t nb_bufs)
7c673cae 1284{
11fdf7f2
TL
1285 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)queue;
1286 struct bond_dev_private *internals = bd_tx_q->dev_private;
7c673cae 1287
11fdf7f2
TL
1288 uint16_t slave_port_ids[RTE_MAX_ETHPORTS];
1289 uint16_t slave_count;
7c673cae 1290
11fdf7f2
TL
1291 uint16_t dist_slave_port_ids[RTE_MAX_ETHPORTS];
1292 uint16_t dist_slave_count;
7c673cae 1293
11fdf7f2
TL
1294 /* 2-D array to sort mbufs for transmission on each slave into */
1295 struct rte_mbuf *slave_bufs[RTE_MAX_ETHPORTS][nb_bufs];
1296 /* Number of mbufs for transmission on each slave */
1297 uint16_t slave_nb_bufs[RTE_MAX_ETHPORTS] = { 0 };
1298 /* Mapping array generated by hash function to map mbufs to slaves */
1299 uint16_t bufs_slave_port_idxs[RTE_MAX_ETHPORTS] = { 0 };
7c673cae 1300
9f95a23c 1301 uint16_t slave_tx_count;
11fdf7f2 1302 uint16_t total_tx_count = 0, total_tx_fail_count = 0;
7c673cae 1303
9f95a23c 1304 uint16_t i;
7c673cae
FG
1305
1306 /* Copy slave list to protect against slave up/down changes during tx
1307 * bursting */
11fdf7f2
TL
1308 slave_count = internals->active_slave_count;
1309 if (unlikely(slave_count < 1))
1310 return 0;
7c673cae 1311
11fdf7f2
TL
1312 memcpy(slave_port_ids, internals->active_slaves,
1313 sizeof(slave_port_ids[0]) * slave_count);
7c673cae 1314
9f95a23c
TL
1315 /* Check for LACP control packets and send if available */
1316 for (i = 0; i < slave_count; i++) {
1317 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
1318 struct rte_mbuf *ctrl_pkt = NULL;
1319
1320 if (likely(rte_ring_empty(port->tx_ring)))
1321 continue;
1322
1323 if (rte_ring_dequeue(port->tx_ring,
1324 (void **)&ctrl_pkt) != -ENOENT) {
1325 slave_tx_count = rte_eth_tx_burst(slave_port_ids[i],
1326 bd_tx_q->queue_id, &ctrl_pkt, 1);
1327 /*
1328 * re-enqueue LAG control plane packets to buffering
1329 * ring if transmission fails so the packet isn't lost.
1330 */
1331 if (slave_tx_count != 1)
1332 rte_ring_enqueue(port->tx_ring, ctrl_pkt);
1333 }
1334 }
1335
1336 if (unlikely(nb_bufs == 0))
1337 return 0;
1338
11fdf7f2
TL
1339 dist_slave_count = 0;
1340 for (i = 0; i < slave_count; i++) {
9f95a23c 1341 struct port *port = &bond_mode_8023ad_ports[slave_port_ids[i]];
7c673cae
FG
1342
1343 if (ACTOR_STATE(port, DISTRIBUTING))
11fdf7f2
TL
1344 dist_slave_port_ids[dist_slave_count++] =
1345 slave_port_ids[i];
7c673cae
FG
1346 }
1347
9f95a23c 1348 if (likely(dist_slave_count > 0)) {
11fdf7f2
TL
1349
1350 /*
1351 * Populate slaves mbuf with the packets which are to be sent
1352 * on it, selecting output slave using hash based on xmit policy
1353 */
1354 internals->burst_xmit_hash(bufs, nb_bufs, dist_slave_count,
1355 bufs_slave_port_idxs);
7c673cae 1356
11fdf7f2
TL
1357 for (i = 0; i < nb_bufs; i++) {
1358 /*
1359 * Populate slave mbuf arrays with mbufs for that
1360 * slave
1361 */
9f95a23c 1362 uint16_t slave_idx = bufs_slave_port_idxs[i];
11fdf7f2
TL
1363
1364 slave_bufs[slave_idx][slave_nb_bufs[slave_idx]++] =
1365 bufs[i];
7c673cae 1366 }
7c673cae 1367
7c673cae 1368
11fdf7f2
TL
1369 /* Send packet burst on each slave device */
1370 for (i = 0; i < dist_slave_count; i++) {
1371 if (slave_nb_bufs[i] == 0)
1372 continue;
7c673cae 1373
11fdf7f2
TL
1374 slave_tx_count = rte_eth_tx_burst(
1375 dist_slave_port_ids[i],
1376 bd_tx_q->queue_id, slave_bufs[i],
1377 slave_nb_bufs[i]);
1378
1379 total_tx_count += slave_tx_count;
1380
1381 /* If tx burst fails move packets to end of bufs */
1382 if (unlikely(slave_tx_count < slave_nb_bufs[i])) {
9f95a23c 1383 int slave_tx_fail_count = slave_nb_bufs[i] -
11fdf7f2 1384 slave_tx_count;
9f95a23c 1385 total_tx_fail_count += slave_tx_fail_count;
7c673cae 1386
9f95a23c
TL
1387 memcpy(&bufs[nb_bufs - total_tx_fail_count],
1388 &slave_bufs[i][slave_tx_count],
1389 slave_tx_fail_count * sizeof(bufs[0]));
11fdf7f2
TL
1390 }
1391 }
1392 }
7c673cae 1393
11fdf7f2 1394 return total_tx_count;
7c673cae
FG
1395}
1396
1397static uint16_t
1398bond_ethdev_tx_burst_broadcast(void *queue, struct rte_mbuf **bufs,
1399 uint16_t nb_pkts)
1400{
1401 struct bond_dev_private *internals;
1402 struct bond_tx_queue *bd_tx_q;
1403
11fdf7f2 1404 uint16_t slaves[RTE_MAX_ETHPORTS];
9f95a23c
TL
1405 uint8_t tx_failed_flag = 0;
1406 uint16_t num_of_slaves;
7c673cae
FG
1407
1408 uint16_t max_nb_of_tx_pkts = 0;
1409
1410 int slave_tx_total[RTE_MAX_ETHPORTS];
1411 int i, most_successful_tx_slave = -1;
1412
1413 bd_tx_q = (struct bond_tx_queue *)queue;
1414 internals = bd_tx_q->dev_private;
1415
1416 /* Copy slave list to protect against slave up/down changes during tx
1417 * bursting */
1418 num_of_slaves = internals->active_slave_count;
1419 memcpy(slaves, internals->active_slaves,
1420 sizeof(internals->active_slaves[0]) * num_of_slaves);
1421
1422 if (num_of_slaves < 1)
1423 return 0;
1424
1425 /* Increment reference count on mbufs */
1426 for (i = 0; i < nb_pkts; i++)
1427 rte_mbuf_refcnt_update(bufs[i], num_of_slaves - 1);
1428
1429 /* Transmit burst on each active slave */
1430 for (i = 0; i < num_of_slaves; i++) {
1431 slave_tx_total[i] = rte_eth_tx_burst(slaves[i], bd_tx_q->queue_id,
1432 bufs, nb_pkts);
1433
1434 if (unlikely(slave_tx_total[i] < nb_pkts))
1435 tx_failed_flag = 1;
1436
1437 /* record the value and slave index for the slave which transmits the
1438 * maximum number of packets */
1439 if (slave_tx_total[i] > max_nb_of_tx_pkts) {
1440 max_nb_of_tx_pkts = slave_tx_total[i];
1441 most_successful_tx_slave = i;
1442 }
1443 }
1444
1445 /* if slaves fail to transmit packets from burst, the calling application
1446 * is not expected to know about multiple references to packets so we must
1447 * handle failures of all packets except those of the most successful slave
1448 */
1449 if (unlikely(tx_failed_flag))
1450 for (i = 0; i < num_of_slaves; i++)
1451 if (i != most_successful_tx_slave)
1452 while (slave_tx_total[i] < nb_pkts)
1453 rte_pktmbuf_free(bufs[slave_tx_total[i]++]);
1454
1455 return max_nb_of_tx_pkts;
1456}
1457
9f95a23c 1458static void
11fdf7f2 1459link_properties_set(struct rte_eth_dev *ethdev, struct rte_eth_link *slave_link)
7c673cae 1460{
11fdf7f2 1461 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
7c673cae 1462
11fdf7f2
TL
1463 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1464 /**
1465 * If in mode 4 then save the link properties of the first
1466 * slave, all subsequent slaves must match these properties
1467 */
1468 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
7c673cae 1469
11fdf7f2
TL
1470 bond_link->link_autoneg = slave_link->link_autoneg;
1471 bond_link->link_duplex = slave_link->link_duplex;
1472 bond_link->link_speed = slave_link->link_speed;
1473 } else {
1474 /**
1475 * In any other mode the link properties are set to default
1476 * values of AUTONEG/DUPLEX
1477 */
1478 ethdev->data->dev_link.link_autoneg = ETH_LINK_AUTONEG;
1479 ethdev->data->dev_link.link_duplex = ETH_LINK_FULL_DUPLEX;
7c673cae
FG
1480 }
1481}
1482
9f95a23c 1483static int
11fdf7f2
TL
1484link_properties_valid(struct rte_eth_dev *ethdev,
1485 struct rte_eth_link *slave_link)
7c673cae 1486{
11fdf7f2 1487 struct bond_dev_private *bond_ctx = ethdev->data->dev_private;
7c673cae 1488
11fdf7f2
TL
1489 if (bond_ctx->mode == BONDING_MODE_8023AD) {
1490 struct rte_eth_link *bond_link = &bond_ctx->mode4.slave_link;
7c673cae 1491
11fdf7f2
TL
1492 if (bond_link->link_duplex != slave_link->link_duplex ||
1493 bond_link->link_autoneg != slave_link->link_autoneg ||
1494 bond_link->link_speed != slave_link->link_speed)
1495 return -1;
1496 }
7c673cae
FG
1497
1498 return 0;
1499}
1500
1501int
1502mac_address_get(struct rte_eth_dev *eth_dev, struct ether_addr *dst_mac_addr)
1503{
1504 struct ether_addr *mac_addr;
1505
1506 if (eth_dev == NULL) {
11fdf7f2 1507 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
7c673cae
FG
1508 return -1;
1509 }
1510
1511 if (dst_mac_addr == NULL) {
11fdf7f2 1512 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
7c673cae
FG
1513 return -1;
1514 }
1515
1516 mac_addr = eth_dev->data->mac_addrs;
1517
1518 ether_addr_copy(mac_addr, dst_mac_addr);
1519 return 0;
1520}
1521
1522int
1523mac_address_set(struct rte_eth_dev *eth_dev, struct ether_addr *new_mac_addr)
1524{
1525 struct ether_addr *mac_addr;
1526
1527 if (eth_dev == NULL) {
1528 RTE_BOND_LOG(ERR, "NULL pointer eth_dev specified");
1529 return -1;
1530 }
1531
1532 if (new_mac_addr == NULL) {
1533 RTE_BOND_LOG(ERR, "NULL pointer MAC specified");
1534 return -1;
1535 }
1536
1537 mac_addr = eth_dev->data->mac_addrs;
1538
1539 /* If new MAC is different to current MAC then update */
1540 if (memcmp(mac_addr, new_mac_addr, sizeof(*mac_addr)) != 0)
1541 memcpy(mac_addr, new_mac_addr, sizeof(*mac_addr));
1542
1543 return 0;
1544}
1545
11fdf7f2
TL
1546static const struct ether_addr null_mac_addr;
1547
1548/*
1549 * Add additional MAC addresses to the slave
1550 */
1551int
1552slave_add_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1553 uint16_t slave_port_id)
1554{
1555 int i, ret;
1556 struct ether_addr *mac_addr;
1557
1558 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1559 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1560 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1561 break;
1562
1563 ret = rte_eth_dev_mac_addr_add(slave_port_id, mac_addr, 0);
1564 if (ret < 0) {
1565 /* rollback */
1566 for (i--; i > 0; i--)
1567 rte_eth_dev_mac_addr_remove(slave_port_id,
1568 &bonded_eth_dev->data->mac_addrs[i]);
1569 return ret;
1570 }
1571 }
1572
1573 return 0;
1574}
1575
1576/*
1577 * Remove additional MAC addresses from the slave
1578 */
1579int
1580slave_remove_mac_addresses(struct rte_eth_dev *bonded_eth_dev,
1581 uint16_t slave_port_id)
1582{
1583 int i, rc, ret;
1584 struct ether_addr *mac_addr;
1585
1586 rc = 0;
1587 for (i = 1; i < BOND_MAX_MAC_ADDRS; i++) {
1588 mac_addr = &bonded_eth_dev->data->mac_addrs[i];
1589 if (is_same_ether_addr(mac_addr, &null_mac_addr))
1590 break;
1591
1592 ret = rte_eth_dev_mac_addr_remove(slave_port_id, mac_addr);
1593 /* save only the first error */
1594 if (ret < 0 && rc == 0)
1595 rc = ret;
1596 }
1597
1598 return rc;
1599}
1600
7c673cae
FG
1601int
1602mac_address_slaves_update(struct rte_eth_dev *bonded_eth_dev)
1603{
1604 struct bond_dev_private *internals = bonded_eth_dev->data->dev_private;
1605 int i;
1606
1607 /* Update slave devices MAC addresses */
1608 if (internals->slave_count < 1)
1609 return -1;
1610
1611 switch (internals->mode) {
1612 case BONDING_MODE_ROUND_ROBIN:
1613 case BONDING_MODE_BALANCE:
1614 case BONDING_MODE_BROADCAST:
1615 for (i = 0; i < internals->slave_count; i++) {
11fdf7f2
TL
1616 if (rte_eth_dev_default_mac_addr_set(
1617 internals->slaves[i].port_id,
7c673cae
FG
1618 bonded_eth_dev->data->mac_addrs)) {
1619 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1620 internals->slaves[i].port_id);
1621 return -1;
1622 }
1623 }
1624 break;
1625 case BONDING_MODE_8023AD:
1626 bond_mode_8023ad_mac_address_update(bonded_eth_dev);
1627 break;
1628 case BONDING_MODE_ACTIVE_BACKUP:
1629 case BONDING_MODE_TLB:
1630 case BONDING_MODE_ALB:
1631 default:
1632 for (i = 0; i < internals->slave_count; i++) {
1633 if (internals->slaves[i].port_id ==
1634 internals->current_primary_port) {
11fdf7f2
TL
1635 if (rte_eth_dev_default_mac_addr_set(
1636 internals->primary_port,
7c673cae
FG
1637 bonded_eth_dev->data->mac_addrs)) {
1638 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1639 internals->current_primary_port);
1640 return -1;
1641 }
1642 } else {
11fdf7f2
TL
1643 if (rte_eth_dev_default_mac_addr_set(
1644 internals->slaves[i].port_id,
7c673cae
FG
1645 &internals->slaves[i].persisted_mac_addr)) {
1646 RTE_BOND_LOG(ERR, "Failed to update port Id %d MAC address",
1647 internals->slaves[i].port_id);
1648 return -1;
1649 }
1650 }
1651 }
1652 }
1653
1654 return 0;
1655}
1656
1657int
1658bond_ethdev_mode_set(struct rte_eth_dev *eth_dev, int mode)
1659{
1660 struct bond_dev_private *internals;
1661
1662 internals = eth_dev->data->dev_private;
1663
1664 switch (mode) {
1665 case BONDING_MODE_ROUND_ROBIN:
1666 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_round_robin;
1667 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1668 break;
1669 case BONDING_MODE_ACTIVE_BACKUP:
1670 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_active_backup;
1671 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1672 break;
1673 case BONDING_MODE_BALANCE:
1674 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_balance;
1675 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1676 break;
1677 case BONDING_MODE_BROADCAST:
1678 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_broadcast;
1679 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst;
1680 break;
1681 case BONDING_MODE_8023AD:
1682 if (bond_mode_8023ad_enable(eth_dev) != 0)
1683 return -1;
1684
11fdf7f2
TL
1685 if (internals->mode4.dedicated_queues.enabled == 0) {
1686 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_8023ad;
1687 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_8023ad;
1688 RTE_BOND_LOG(WARNING,
1689 "Using mode 4, it is necessary to do TX burst "
1690 "and RX burst at least every 100ms.");
1691 } else {
1692 /* Use flow director's optimization */
1693 eth_dev->rx_pkt_burst =
1694 bond_ethdev_rx_burst_8023ad_fast_queue;
1695 eth_dev->tx_pkt_burst =
1696 bond_ethdev_tx_burst_8023ad_fast_queue;
1697 }
7c673cae
FG
1698 break;
1699 case BONDING_MODE_TLB:
1700 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_tlb;
1701 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_active_backup;
1702 break;
1703 case BONDING_MODE_ALB:
1704 if (bond_mode_alb_enable(eth_dev) != 0)
1705 return -1;
1706
1707 eth_dev->tx_pkt_burst = bond_ethdev_tx_burst_alb;
1708 eth_dev->rx_pkt_burst = bond_ethdev_rx_burst_alb;
1709 break;
1710 default:
1711 return -1;
1712 }
1713
1714 internals->mode = mode;
1715
1716 return 0;
1717}
1718
11fdf7f2
TL
1719
1720static int
1721slave_configure_slow_queue(struct rte_eth_dev *bonded_eth_dev,
1722 struct rte_eth_dev *slave_eth_dev)
1723{
1724 int errval = 0;
1725 struct bond_dev_private *internals = (struct bond_dev_private *)
1726 bonded_eth_dev->data->dev_private;
9f95a23c 1727 struct port *port = &bond_mode_8023ad_ports[slave_eth_dev->data->port_id];
11fdf7f2
TL
1728
1729 if (port->slow_pool == NULL) {
1730 char mem_name[256];
1731 int slave_id = slave_eth_dev->data->port_id;
1732
1733 snprintf(mem_name, RTE_DIM(mem_name), "slave_port%u_slow_pool",
1734 slave_id);
1735 port->slow_pool = rte_pktmbuf_pool_create(mem_name, 8191,
1736 250, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
1737 slave_eth_dev->data->numa_node);
1738
1739 /* Any memory allocation failure in initialization is critical because
1740 * resources can't be free, so reinitialization is impossible. */
1741 if (port->slow_pool == NULL) {
1742 rte_panic("Slave %u: Failed to create memory pool '%s': %s\n",
1743 slave_id, mem_name, rte_strerror(rte_errno));
1744 }
1745 }
1746
1747 if (internals->mode4.dedicated_queues.enabled == 1) {
1748 /* Configure slow Rx queue */
1749
1750 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id,
1751 internals->mode4.dedicated_queues.rx_qid, 128,
1752 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1753 NULL, port->slow_pool);
1754 if (errval != 0) {
1755 RTE_BOND_LOG(ERR,
1756 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1757 slave_eth_dev->data->port_id,
1758 internals->mode4.dedicated_queues.rx_qid,
1759 errval);
1760 return errval;
1761 }
1762
1763 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id,
1764 internals->mode4.dedicated_queues.tx_qid, 512,
1765 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1766 NULL);
1767 if (errval != 0) {
1768 RTE_BOND_LOG(ERR,
1769 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1770 slave_eth_dev->data->port_id,
1771 internals->mode4.dedicated_queues.tx_qid,
1772 errval);
1773 return errval;
1774 }
1775 }
1776 return 0;
1777}
1778
7c673cae
FG
1779int
1780slave_configure(struct rte_eth_dev *bonded_eth_dev,
1781 struct rte_eth_dev *slave_eth_dev)
1782{
1783 struct bond_rx_queue *bd_rx_q;
1784 struct bond_tx_queue *bd_tx_q;
11fdf7f2
TL
1785 uint16_t nb_rx_queues;
1786 uint16_t nb_tx_queues;
7c673cae 1787
7c673cae
FG
1788 int errval;
1789 uint16_t q_id;
11fdf7f2
TL
1790 struct rte_flow_error flow_error;
1791
1792 struct bond_dev_private *internals = (struct bond_dev_private *)
1793 bonded_eth_dev->data->dev_private;
7c673cae
FG
1794
1795 /* Stop slave */
1796 rte_eth_dev_stop(slave_eth_dev->data->port_id);
1797
1798 /* Enable interrupts on slave device if supported */
1799 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)
1800 slave_eth_dev->data->dev_conf.intr_conf.lsc = 1;
1801
1802 /* If RSS is enabled for bonding, try to enable it for slaves */
1803 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS_FLAG) {
9f95a23c 1804 if (internals->rss_key_len != 0) {
7c673cae 1805 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len =
9f95a23c 1806 internals->rss_key_len;
7c673cae 1807 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key =
9f95a23c 1808 internals->rss_key;
7c673cae
FG
1809 } else {
1810 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key = NULL;
1811 }
1812
1813 slave_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf =
1814 bonded_eth_dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
1815 slave_eth_dev->data->dev_conf.rxmode.mq_mode =
1816 bonded_eth_dev->data->dev_conf.rxmode.mq_mode;
1817 }
1818
11fdf7f2
TL
1819 if (bonded_eth_dev->data->dev_conf.rxmode.offloads &
1820 DEV_RX_OFFLOAD_VLAN_FILTER)
1821 slave_eth_dev->data->dev_conf.rxmode.offloads |=
1822 DEV_RX_OFFLOAD_VLAN_FILTER;
1823 else
1824 slave_eth_dev->data->dev_conf.rxmode.offloads &=
1825 ~DEV_RX_OFFLOAD_VLAN_FILTER;
1826
1827 nb_rx_queues = bonded_eth_dev->data->nb_rx_queues;
1828 nb_tx_queues = bonded_eth_dev->data->nb_tx_queues;
1829
1830 if (internals->mode == BONDING_MODE_8023AD) {
1831 if (internals->mode4.dedicated_queues.enabled == 1) {
1832 nb_rx_queues++;
1833 nb_tx_queues++;
1834 }
1835 }
1836
1837 errval = rte_eth_dev_set_mtu(slave_eth_dev->data->port_id,
1838 bonded_eth_dev->data->mtu);
1839 if (errval != 0 && errval != -ENOTSUP) {
1840 RTE_BOND_LOG(ERR, "rte_eth_dev_set_mtu: port %u, err (%d)",
1841 slave_eth_dev->data->port_id, errval);
1842 return errval;
1843 }
7c673cae
FG
1844
1845 /* Configure device */
1846 errval = rte_eth_dev_configure(slave_eth_dev->data->port_id,
11fdf7f2 1847 nb_rx_queues, nb_tx_queues,
7c673cae
FG
1848 &(slave_eth_dev->data->dev_conf));
1849 if (errval != 0) {
11fdf7f2 1850 RTE_BOND_LOG(ERR, "Cannot configure slave device: port %u, err (%d)",
7c673cae
FG
1851 slave_eth_dev->data->port_id, errval);
1852 return errval;
1853 }
1854
1855 /* Setup Rx Queues */
11fdf7f2 1856 for (q_id = 0; q_id < bonded_eth_dev->data->nb_rx_queues; q_id++) {
7c673cae
FG
1857 bd_rx_q = (struct bond_rx_queue *)bonded_eth_dev->data->rx_queues[q_id];
1858
1859 errval = rte_eth_rx_queue_setup(slave_eth_dev->data->port_id, q_id,
1860 bd_rx_q->nb_rx_desc,
1861 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1862 &(bd_rx_q->rx_conf), bd_rx_q->mb_pool);
1863 if (errval != 0) {
1864 RTE_BOND_LOG(ERR,
1865 "rte_eth_rx_queue_setup: port=%d queue_id %d, err (%d)",
1866 slave_eth_dev->data->port_id, q_id, errval);
1867 return errval;
1868 }
1869 }
1870
1871 /* Setup Tx Queues */
11fdf7f2 1872 for (q_id = 0; q_id < bonded_eth_dev->data->nb_tx_queues; q_id++) {
7c673cae
FG
1873 bd_tx_q = (struct bond_tx_queue *)bonded_eth_dev->data->tx_queues[q_id];
1874
1875 errval = rte_eth_tx_queue_setup(slave_eth_dev->data->port_id, q_id,
1876 bd_tx_q->nb_tx_desc,
1877 rte_eth_dev_socket_id(slave_eth_dev->data->port_id),
1878 &bd_tx_q->tx_conf);
1879 if (errval != 0) {
1880 RTE_BOND_LOG(ERR,
11fdf7f2
TL
1881 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1882 slave_eth_dev->data->port_id, q_id, errval);
1883 return errval;
1884 }
1885 }
1886
1887 if (internals->mode == BONDING_MODE_8023AD &&
1888 internals->mode4.dedicated_queues.enabled == 1) {
1889 if (slave_configure_slow_queue(bonded_eth_dev, slave_eth_dev)
1890 != 0)
7c673cae 1891 return errval;
11fdf7f2
TL
1892
1893 if (bond_ethdev_8023ad_flow_verify(bonded_eth_dev,
1894 slave_eth_dev->data->port_id) != 0) {
1895 RTE_BOND_LOG(ERR,
1896 "rte_eth_tx_queue_setup: port=%d queue_id %d, err (%d)",
1897 slave_eth_dev->data->port_id, q_id, errval);
1898 return -1;
7c673cae 1899 }
11fdf7f2
TL
1900
1901 if (internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id] != NULL)
1902 rte_flow_destroy(slave_eth_dev->data->port_id,
1903 internals->mode4.dedicated_queues.flow[slave_eth_dev->data->port_id],
1904 &flow_error);
1905
1906 bond_ethdev_8023ad_flow_set(bonded_eth_dev,
1907 slave_eth_dev->data->port_id);
7c673cae
FG
1908 }
1909
1910 /* Start device */
1911 errval = rte_eth_dev_start(slave_eth_dev->data->port_id);
1912 if (errval != 0) {
1913 RTE_BOND_LOG(ERR, "rte_eth_dev_start: port=%u, err (%d)",
1914 slave_eth_dev->data->port_id, errval);
1915 return -1;
1916 }
1917
1918 /* If RSS is enabled for bonding, synchronize RETA */
1919 if (bonded_eth_dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
1920 int i;
1921 struct bond_dev_private *internals;
1922
1923 internals = bonded_eth_dev->data->dev_private;
1924
1925 for (i = 0; i < internals->slave_count; i++) {
1926 if (internals->slaves[i].port_id == slave_eth_dev->data->port_id) {
1927 errval = rte_eth_dev_rss_reta_update(
1928 slave_eth_dev->data->port_id,
1929 &internals->reta_conf[0],
1930 internals->slaves[i].reta_size);
1931 if (errval != 0) {
11fdf7f2
TL
1932 RTE_BOND_LOG(WARNING,
1933 "rte_eth_dev_rss_reta_update on slave port %d fails (err %d)."
1934 " RSS Configuration for bonding may be inconsistent.",
1935 slave_eth_dev->data->port_id, errval);
7c673cae
FG
1936 }
1937 break;
1938 }
1939 }
1940 }
1941
1942 /* If lsc interrupt is set, check initial slave's link status */
11fdf7f2
TL
1943 if (slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) {
1944 slave_eth_dev->dev_ops->link_update(slave_eth_dev, 0);
7c673cae 1945 bond_ethdev_lsc_event_callback(slave_eth_dev->data->port_id,
11fdf7f2
TL
1946 RTE_ETH_EVENT_INTR_LSC, &bonded_eth_dev->data->port_id,
1947 NULL);
1948 }
7c673cae
FG
1949
1950 return 0;
1951}
1952
1953void
1954slave_remove(struct bond_dev_private *internals,
1955 struct rte_eth_dev *slave_eth_dev)
1956{
9f95a23c 1957 uint16_t i;
7c673cae
FG
1958
1959 for (i = 0; i < internals->slave_count; i++)
1960 if (internals->slaves[i].port_id ==
1961 slave_eth_dev->data->port_id)
1962 break;
1963
11fdf7f2
TL
1964 if (i < (internals->slave_count - 1)) {
1965 struct rte_flow *flow;
1966
7c673cae
FG
1967 memmove(&internals->slaves[i], &internals->slaves[i + 1],
1968 sizeof(internals->slaves[0]) *
1969 (internals->slave_count - i - 1));
11fdf7f2
TL
1970 TAILQ_FOREACH(flow, &internals->flow_list, next) {
1971 memmove(&flow->flows[i], &flow->flows[i + 1],
1972 sizeof(flow->flows[0]) *
1973 (internals->slave_count - i - 1));
1974 flow->flows[internals->slave_count - 1] = NULL;
1975 }
1976 }
7c673cae
FG
1977
1978 internals->slave_count--;
11fdf7f2
TL
1979
1980 /* force reconfiguration of slave interfaces */
1981 _rte_eth_dev_reset(slave_eth_dev);
7c673cae
FG
1982}
1983
1984static void
1985bond_ethdev_slave_link_status_change_monitor(void *cb_arg);
1986
1987void
1988slave_add(struct bond_dev_private *internals,
1989 struct rte_eth_dev *slave_eth_dev)
1990{
1991 struct bond_slave_details *slave_details =
1992 &internals->slaves[internals->slave_count];
1993
1994 slave_details->port_id = slave_eth_dev->data->port_id;
1995 slave_details->last_link_status = 0;
1996
1997 /* Mark slave devices that don't support interrupts so we can
1998 * compensate when we start the bond
1999 */
2000 if (!(slave_eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
2001 slave_details->link_status_poll_enabled = 1;
2002 }
2003
2004 slave_details->link_status_wait_to_complete = 0;
2005 /* clean tlb_last_obytes when adding port for bonding device */
2006 memcpy(&(slave_details->persisted_mac_addr), slave_eth_dev->data->mac_addrs,
2007 sizeof(struct ether_addr));
2008}
2009
2010void
2011bond_ethdev_primary_set(struct bond_dev_private *internals,
11fdf7f2 2012 uint16_t slave_port_id)
7c673cae
FG
2013{
2014 int i;
2015
2016 if (internals->active_slave_count < 1)
2017 internals->current_primary_port = slave_port_id;
2018 else
2019 /* Search bonded device slave ports for new proposed primary port */
2020 for (i = 0; i < internals->active_slave_count; i++) {
2021 if (internals->active_slaves[i] == slave_port_id)
2022 internals->current_primary_port = slave_port_id;
2023 }
2024}
2025
2026static void
2027bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev);
2028
2029static int
2030bond_ethdev_start(struct rte_eth_dev *eth_dev)
2031{
2032 struct bond_dev_private *internals;
2033 int i;
2034
2035 /* slave eth dev will be started by bonded device */
2036 if (check_for_bonded_ethdev(eth_dev)) {
2037 RTE_BOND_LOG(ERR, "User tried to explicitly start a slave eth_dev (%d)",
2038 eth_dev->data->port_id);
2039 return -1;
2040 }
2041
2042 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2043 eth_dev->data->dev_started = 1;
2044
2045 internals = eth_dev->data->dev_private;
2046
2047 if (internals->slave_count == 0) {
2048 RTE_BOND_LOG(ERR, "Cannot start port since there are no slave devices");
11fdf7f2 2049 goto out_err;
7c673cae
FG
2050 }
2051
2052 if (internals->user_defined_mac == 0) {
2053 struct ether_addr *new_mac_addr = NULL;
2054
2055 for (i = 0; i < internals->slave_count; i++)
2056 if (internals->slaves[i].port_id == internals->primary_port)
2057 new_mac_addr = &internals->slaves[i].persisted_mac_addr;
2058
2059 if (new_mac_addr == NULL)
11fdf7f2 2060 goto out_err;
7c673cae
FG
2061
2062 if (mac_address_set(eth_dev, new_mac_addr) != 0) {
2063 RTE_BOND_LOG(ERR, "bonded port (%d) failed to update MAC address",
2064 eth_dev->data->port_id);
11fdf7f2 2065 goto out_err;
7c673cae
FG
2066 }
2067 }
2068
7c673cae
FG
2069 /* If bonded device is configure in promiscuous mode then re-apply config */
2070 if (internals->promiscuous_en)
2071 bond_ethdev_promiscuous_enable(eth_dev);
2072
11fdf7f2
TL
2073 if (internals->mode == BONDING_MODE_8023AD) {
2074 if (internals->mode4.dedicated_queues.enabled == 1) {
2075 internals->mode4.dedicated_queues.rx_qid =
2076 eth_dev->data->nb_rx_queues;
2077 internals->mode4.dedicated_queues.tx_qid =
2078 eth_dev->data->nb_tx_queues;
2079 }
2080 }
2081
2082
7c673cae
FG
2083 /* Reconfigure each slave device if starting bonded device */
2084 for (i = 0; i < internals->slave_count; i++) {
11fdf7f2
TL
2085 struct rte_eth_dev *slave_ethdev =
2086 &(rte_eth_devices[internals->slaves[i].port_id]);
2087 if (slave_configure(eth_dev, slave_ethdev) != 0) {
7c673cae 2088 RTE_BOND_LOG(ERR,
11fdf7f2
TL
2089 "bonded port (%d) failed to reconfigure slave device (%d)",
2090 eth_dev->data->port_id,
2091 internals->slaves[i].port_id);
2092 goto out_err;
7c673cae
FG
2093 }
2094 /* We will need to poll for link status if any slave doesn't
2095 * support interrupts
2096 */
2097 if (internals->slaves[i].link_status_poll_enabled)
2098 internals->link_status_polling_enabled = 1;
2099 }
11fdf7f2 2100
7c673cae
FG
2101 /* start polling if needed */
2102 if (internals->link_status_polling_enabled) {
2103 rte_eal_alarm_set(
2104 internals->link_status_polling_interval_ms * 1000,
2105 bond_ethdev_slave_link_status_change_monitor,
2106 (void *)&rte_eth_devices[internals->port_id]);
2107 }
2108
11fdf7f2
TL
2109 /* Update all slave devices MACs*/
2110 if (mac_address_slaves_update(eth_dev) != 0)
2111 goto out_err;
2112
7c673cae
FG
2113 if (internals->user_defined_primary_port)
2114 bond_ethdev_primary_set(internals, internals->primary_port);
2115
2116 if (internals->mode == BONDING_MODE_8023AD)
2117 bond_mode_8023ad_start(eth_dev);
2118
2119 if (internals->mode == BONDING_MODE_TLB ||
2120 internals->mode == BONDING_MODE_ALB)
2121 bond_tlb_enable(internals);
2122
2123 return 0;
11fdf7f2
TL
2124
2125out_err:
2126 eth_dev->data->dev_started = 0;
2127 return -1;
7c673cae
FG
2128}
2129
2130static void
2131bond_ethdev_free_queues(struct rte_eth_dev *dev)
2132{
9f95a23c 2133 uint16_t i;
7c673cae
FG
2134
2135 if (dev->data->rx_queues != NULL) {
2136 for (i = 0; i < dev->data->nb_rx_queues; i++) {
2137 rte_free(dev->data->rx_queues[i]);
2138 dev->data->rx_queues[i] = NULL;
2139 }
2140 dev->data->nb_rx_queues = 0;
2141 }
2142
2143 if (dev->data->tx_queues != NULL) {
2144 for (i = 0; i < dev->data->nb_tx_queues; i++) {
2145 rte_free(dev->data->tx_queues[i]);
2146 dev->data->tx_queues[i] = NULL;
2147 }
2148 dev->data->nb_tx_queues = 0;
2149 }
2150}
2151
2152void
2153bond_ethdev_stop(struct rte_eth_dev *eth_dev)
2154{
2155 struct bond_dev_private *internals = eth_dev->data->dev_private;
9f95a23c 2156 uint16_t i;
7c673cae
FG
2157
2158 if (internals->mode == BONDING_MODE_8023AD) {
2159 struct port *port;
2160 void *pkt = NULL;
2161
2162 bond_mode_8023ad_stop(eth_dev);
2163
2164 /* Discard all messages to/from mode 4 state machines */
2165 for (i = 0; i < internals->active_slave_count; i++) {
9f95a23c 2166 port = &bond_mode_8023ad_ports[internals->active_slaves[i]];
7c673cae
FG
2167
2168 RTE_ASSERT(port->rx_ring != NULL);
2169 while (rte_ring_dequeue(port->rx_ring, &pkt) != -ENOENT)
2170 rte_pktmbuf_free(pkt);
2171
2172 RTE_ASSERT(port->tx_ring != NULL);
2173 while (rte_ring_dequeue(port->tx_ring, &pkt) != -ENOENT)
2174 rte_pktmbuf_free(pkt);
2175 }
2176 }
2177
2178 if (internals->mode == BONDING_MODE_TLB ||
2179 internals->mode == BONDING_MODE_ALB) {
2180 bond_tlb_disable(internals);
2181 for (i = 0; i < internals->active_slave_count; i++)
2182 tlb_last_obytets[internals->active_slaves[i]] = 0;
2183 }
2184
7c673cae
FG
2185 eth_dev->data->dev_link.link_status = ETH_LINK_DOWN;
2186 eth_dev->data->dev_started = 0;
9f95a23c
TL
2187
2188 internals->link_status_polling_enabled = 0;
2189 for (i = 0; i < internals->slave_count; i++) {
2190 uint16_t slave_id = internals->slaves[i].port_id;
2191 if (find_slave_by_id(internals->active_slaves,
2192 internals->active_slave_count, slave_id) !=
2193 internals->active_slave_count) {
2194 internals->slaves[i].last_link_status = 0;
2195 rte_eth_dev_stop(slave_id);
2196 deactivate_slave(eth_dev, slave_id);
2197 }
2198 }
7c673cae
FG
2199}
2200
2201void
2202bond_ethdev_close(struct rte_eth_dev *dev)
2203{
2204 struct bond_dev_private *internals = dev->data->dev_private;
9f95a23c 2205 uint16_t bond_port_id = internals->port_id;
11fdf7f2
TL
2206 int skipped = 0;
2207 struct rte_flow_error ferror;
7c673cae 2208
11fdf7f2
TL
2209 RTE_BOND_LOG(INFO, "Closing bonded device %s", dev->device->name);
2210 while (internals->slave_count != skipped) {
2211 uint16_t port_id = internals->slaves[skipped].port_id;
2212
2213 rte_eth_dev_stop(port_id);
2214
2215 if (rte_eth_bond_slave_remove(bond_port_id, port_id) != 0) {
2216 RTE_BOND_LOG(ERR,
2217 "Failed to remove port %d from bonded device %s",
2218 port_id, dev->device->name);
2219 skipped++;
2220 }
2221 }
2222 bond_flow_ops.flush(dev, &ferror);
7c673cae
FG
2223 bond_ethdev_free_queues(dev);
2224 rte_bitmap_reset(internals->vlan_filter_bmp);
2225}
2226
2227/* forward declaration */
2228static int bond_ethdev_configure(struct rte_eth_dev *dev);
2229
2230static void
2231bond_ethdev_info(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info)
2232{
2233 struct bond_dev_private *internals = dev->data->dev_private;
2234
11fdf7f2
TL
2235 uint16_t max_nb_rx_queues = UINT16_MAX;
2236 uint16_t max_nb_tx_queues = UINT16_MAX;
9f95a23c
TL
2237 uint16_t max_rx_desc_lim = UINT16_MAX;
2238 uint16_t max_tx_desc_lim = UINT16_MAX;
11fdf7f2
TL
2239
2240 dev_info->max_mac_addrs = BOND_MAX_MAC_ADDRS;
7c673cae
FG
2241
2242 dev_info->max_rx_pktlen = internals->candidate_max_rx_pktlen ?
11fdf7f2
TL
2243 internals->candidate_max_rx_pktlen :
2244 ETHER_MAX_JUMBO_FRAME_LEN;
2245
2246 /* Max number of tx/rx queues that the bonded device can support is the
2247 * minimum values of the bonded slaves, as all slaves must be capable
2248 * of supporting the same number of tx/rx queues.
2249 */
2250 if (internals->slave_count > 0) {
2251 struct rte_eth_dev_info slave_info;
9f95a23c 2252 uint16_t idx;
11fdf7f2
TL
2253
2254 for (idx = 0; idx < internals->slave_count; idx++) {
2255 rte_eth_dev_info_get(internals->slaves[idx].port_id,
2256 &slave_info);
2257
2258 if (slave_info.max_rx_queues < max_nb_rx_queues)
2259 max_nb_rx_queues = slave_info.max_rx_queues;
2260
2261 if (slave_info.max_tx_queues < max_nb_tx_queues)
2262 max_nb_tx_queues = slave_info.max_tx_queues;
9f95a23c
TL
2263
2264 if (slave_info.rx_desc_lim.nb_max < max_rx_desc_lim)
2265 max_rx_desc_lim = slave_info.rx_desc_lim.nb_max;
2266
2267 if (slave_info.tx_desc_lim.nb_max < max_tx_desc_lim)
2268 max_tx_desc_lim = slave_info.tx_desc_lim.nb_max;
11fdf7f2
TL
2269 }
2270 }
2271
2272 dev_info->max_rx_queues = max_nb_rx_queues;
2273 dev_info->max_tx_queues = max_nb_tx_queues;
7c673cae 2274
9f95a23c
TL
2275 memcpy(&dev_info->default_rxconf, &internals->default_rxconf,
2276 sizeof(dev_info->default_rxconf));
2277 memcpy(&dev_info->default_txconf, &internals->default_txconf,
2278 sizeof(dev_info->default_txconf));
2279
2280 dev_info->rx_desc_lim.nb_max = max_rx_desc_lim;
2281 dev_info->tx_desc_lim.nb_max = max_tx_desc_lim;
2282
11fdf7f2
TL
2283 /**
2284 * If dedicated hw queues enabled for link bonding device in LACP mode
2285 * then we need to reduce the maximum number of data path queues by 1.
2286 */
2287 if (internals->mode == BONDING_MODE_8023AD &&
2288 internals->mode4.dedicated_queues.enabled == 1) {
2289 dev_info->max_rx_queues--;
2290 dev_info->max_tx_queues--;
2291 }
7c673cae
FG
2292
2293 dev_info->min_rx_bufsize = 0;
7c673cae
FG
2294
2295 dev_info->rx_offload_capa = internals->rx_offload_capa;
2296 dev_info->tx_offload_capa = internals->tx_offload_capa;
11fdf7f2
TL
2297 dev_info->rx_queue_offload_capa = internals->rx_queue_offload_capa;
2298 dev_info->tx_queue_offload_capa = internals->tx_queue_offload_capa;
7c673cae
FG
2299 dev_info->flow_type_rss_offloads = internals->flow_type_rss_offloads;
2300
2301 dev_info->reta_size = internals->reta_size;
2302}
2303
2304static int
2305bond_ethdev_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on)
2306{
2307 int res;
11fdf7f2 2308 uint16_t i;
7c673cae
FG
2309 struct bond_dev_private *internals = dev->data->dev_private;
2310
2311 /* don't do this while a slave is being added */
2312 rte_spinlock_lock(&internals->lock);
2313
2314 if (on)
2315 rte_bitmap_set(internals->vlan_filter_bmp, vlan_id);
2316 else
2317 rte_bitmap_clear(internals->vlan_filter_bmp, vlan_id);
2318
2319 for (i = 0; i < internals->slave_count; i++) {
11fdf7f2 2320 uint16_t port_id = internals->slaves[i].port_id;
7c673cae
FG
2321
2322 res = rte_eth_dev_vlan_filter(port_id, vlan_id, on);
2323 if (res == ENOTSUP)
11fdf7f2
TL
2324 RTE_BOND_LOG(WARNING,
2325 "Setting VLAN filter on slave port %u not supported.",
2326 port_id);
7c673cae
FG
2327 }
2328
2329 rte_spinlock_unlock(&internals->lock);
2330 return 0;
2331}
2332
2333static int
2334bond_ethdev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id,
2335 uint16_t nb_rx_desc, unsigned int socket_id __rte_unused,
2336 const struct rte_eth_rxconf *rx_conf, struct rte_mempool *mb_pool)
2337{
2338 struct bond_rx_queue *bd_rx_q = (struct bond_rx_queue *)
2339 rte_zmalloc_socket(NULL, sizeof(struct bond_rx_queue),
2340 0, dev->data->numa_node);
2341 if (bd_rx_q == NULL)
2342 return -1;
2343
2344 bd_rx_q->queue_id = rx_queue_id;
2345 bd_rx_q->dev_private = dev->data->dev_private;
2346
2347 bd_rx_q->nb_rx_desc = nb_rx_desc;
2348
2349 memcpy(&(bd_rx_q->rx_conf), rx_conf, sizeof(struct rte_eth_rxconf));
2350 bd_rx_q->mb_pool = mb_pool;
2351
2352 dev->data->rx_queues[rx_queue_id] = bd_rx_q;
2353
2354 return 0;
2355}
2356
2357static int
2358bond_ethdev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id,
2359 uint16_t nb_tx_desc, unsigned int socket_id __rte_unused,
2360 const struct rte_eth_txconf *tx_conf)
2361{
2362 struct bond_tx_queue *bd_tx_q = (struct bond_tx_queue *)
2363 rte_zmalloc_socket(NULL, sizeof(struct bond_tx_queue),
2364 0, dev->data->numa_node);
2365
2366 if (bd_tx_q == NULL)
2367 return -1;
2368
2369 bd_tx_q->queue_id = tx_queue_id;
2370 bd_tx_q->dev_private = dev->data->dev_private;
2371
2372 bd_tx_q->nb_tx_desc = nb_tx_desc;
2373 memcpy(&(bd_tx_q->tx_conf), tx_conf, sizeof(bd_tx_q->tx_conf));
2374
2375 dev->data->tx_queues[tx_queue_id] = bd_tx_q;
2376
2377 return 0;
2378}
2379
2380static void
2381bond_ethdev_rx_queue_release(void *queue)
2382{
2383 if (queue == NULL)
2384 return;
2385
2386 rte_free(queue);
2387}
2388
2389static void
2390bond_ethdev_tx_queue_release(void *queue)
2391{
2392 if (queue == NULL)
2393 return;
2394
2395 rte_free(queue);
2396}
2397
2398static void
2399bond_ethdev_slave_link_status_change_monitor(void *cb_arg)
2400{
2401 struct rte_eth_dev *bonded_ethdev, *slave_ethdev;
2402 struct bond_dev_private *internals;
2403
2404 /* Default value for polling slave found is true as we don't want to
2405 * disable the polling thread if we cannot get the lock */
2406 int i, polling_slave_found = 1;
2407
2408 if (cb_arg == NULL)
2409 return;
2410
2411 bonded_ethdev = (struct rte_eth_dev *)cb_arg;
2412 internals = (struct bond_dev_private *)bonded_ethdev->data->dev_private;
2413
2414 if (!bonded_ethdev->data->dev_started ||
2415 !internals->link_status_polling_enabled)
2416 return;
2417
2418 /* If device is currently being configured then don't check slaves link
2419 * status, wait until next period */
2420 if (rte_spinlock_trylock(&internals->lock)) {
2421 if (internals->slave_count > 0)
2422 polling_slave_found = 0;
2423
2424 for (i = 0; i < internals->slave_count; i++) {
2425 if (!internals->slaves[i].link_status_poll_enabled)
2426 continue;
2427
2428 slave_ethdev = &rte_eth_devices[internals->slaves[i].port_id];
2429 polling_slave_found = 1;
2430
2431 /* Update slave link status */
2432 (*slave_ethdev->dev_ops->link_update)(slave_ethdev,
2433 internals->slaves[i].link_status_wait_to_complete);
2434
2435 /* if link status has changed since last checked then call lsc
2436 * event callback */
2437 if (slave_ethdev->data->dev_link.link_status !=
2438 internals->slaves[i].last_link_status) {
2439 internals->slaves[i].last_link_status =
2440 slave_ethdev->data->dev_link.link_status;
2441
2442 bond_ethdev_lsc_event_callback(internals->slaves[i].port_id,
2443 RTE_ETH_EVENT_INTR_LSC,
11fdf7f2
TL
2444 &bonded_ethdev->data->port_id,
2445 NULL);
7c673cae
FG
2446 }
2447 }
2448 rte_spinlock_unlock(&internals->lock);
2449 }
2450
2451 if (polling_slave_found)
2452 /* Set alarm to continue monitoring link status of slave ethdev's */
2453 rte_eal_alarm_set(internals->link_status_polling_interval_ms * 1000,
2454 bond_ethdev_slave_link_status_change_monitor, cb_arg);
2455}
2456
2457static int
11fdf7f2 2458bond_ethdev_link_update(struct rte_eth_dev *ethdev, int wait_to_complete)
7c673cae 2459{
11fdf7f2
TL
2460 void (*link_update)(uint16_t port_id, struct rte_eth_link *eth_link);
2461
2462 struct bond_dev_private *bond_ctx;
2463 struct rte_eth_link slave_link;
2464
2465 uint32_t idx;
7c673cae 2466
11fdf7f2
TL
2467 bond_ctx = ethdev->data->dev_private;
2468
2469 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2470
2471 if (ethdev->data->dev_started == 0 ||
2472 bond_ctx->active_slave_count == 0) {
2473 ethdev->data->dev_link.link_status = ETH_LINK_DOWN;
7c673cae 2474 return 0;
11fdf7f2 2475 }
7c673cae 2476
11fdf7f2 2477 ethdev->data->dev_link.link_status = ETH_LINK_UP;
7c673cae 2478
11fdf7f2
TL
2479 if (wait_to_complete)
2480 link_update = rte_eth_link_get;
2481 else
2482 link_update = rte_eth_link_get_nowait;
2483
2484 switch (bond_ctx->mode) {
2485 case BONDING_MODE_BROADCAST:
2486 /**
2487 * Setting link speed to UINT32_MAX to ensure we pick up the
2488 * value of the first active slave
2489 */
2490 ethdev->data->dev_link.link_speed = UINT32_MAX;
2491
2492 /**
2493 * link speed is minimum value of all the slaves link speed as
2494 * packet loss will occur on this slave if transmission at rates
2495 * greater than this are attempted
2496 */
2497 for (idx = 1; idx < bond_ctx->active_slave_count; idx++) {
2498 link_update(bond_ctx->active_slaves[0], &slave_link);
2499
2500 if (slave_link.link_speed <
2501 ethdev->data->dev_link.link_speed)
2502 ethdev->data->dev_link.link_speed =
2503 slave_link.link_speed;
7c673cae 2504 }
11fdf7f2
TL
2505 break;
2506 case BONDING_MODE_ACTIVE_BACKUP:
2507 /* Current primary slave */
2508 link_update(bond_ctx->current_primary_port, &slave_link);
2509
2510 ethdev->data->dev_link.link_speed = slave_link.link_speed;
2511 break;
2512 case BONDING_MODE_8023AD:
2513 ethdev->data->dev_link.link_autoneg =
2514 bond_ctx->mode4.slave_link.link_autoneg;
2515 ethdev->data->dev_link.link_duplex =
2516 bond_ctx->mode4.slave_link.link_duplex;
2517 /* fall through to update link speed */
2518 case BONDING_MODE_ROUND_ROBIN:
2519 case BONDING_MODE_BALANCE:
2520 case BONDING_MODE_TLB:
2521 case BONDING_MODE_ALB:
2522 default:
2523 /**
2524 * In theses mode the maximum theoretical link speed is the sum
2525 * of all the slaves
2526 */
2527 ethdev->data->dev_link.link_speed = ETH_SPEED_NUM_NONE;
2528
2529 for (idx = 0; idx < bond_ctx->active_slave_count; idx++) {
2530 link_update(bond_ctx->active_slaves[idx], &slave_link);
7c673cae 2531
11fdf7f2
TL
2532 ethdev->data->dev_link.link_speed +=
2533 slave_link.link_speed;
2534 }
7c673cae
FG
2535 }
2536
11fdf7f2 2537
7c673cae
FG
2538 return 0;
2539}
2540
11fdf7f2
TL
2541
2542static int
7c673cae
FG
2543bond_ethdev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats)
2544{
2545 struct bond_dev_private *internals = dev->data->dev_private;
2546 struct rte_eth_stats slave_stats;
2547 int i, j;
2548
2549 for (i = 0; i < internals->slave_count; i++) {
2550 rte_eth_stats_get(internals->slaves[i].port_id, &slave_stats);
2551
2552 stats->ipackets += slave_stats.ipackets;
2553 stats->opackets += slave_stats.opackets;
2554 stats->ibytes += slave_stats.ibytes;
2555 stats->obytes += slave_stats.obytes;
2556 stats->imissed += slave_stats.imissed;
2557 stats->ierrors += slave_stats.ierrors;
2558 stats->oerrors += slave_stats.oerrors;
2559 stats->rx_nombuf += slave_stats.rx_nombuf;
2560
2561 for (j = 0; j < RTE_ETHDEV_QUEUE_STAT_CNTRS; j++) {
2562 stats->q_ipackets[j] += slave_stats.q_ipackets[j];
2563 stats->q_opackets[j] += slave_stats.q_opackets[j];
2564 stats->q_ibytes[j] += slave_stats.q_ibytes[j];
2565 stats->q_obytes[j] += slave_stats.q_obytes[j];
2566 stats->q_errors[j] += slave_stats.q_errors[j];
2567 }
2568
2569 }
11fdf7f2
TL
2570
2571 return 0;
7c673cae
FG
2572}
2573
2574static void
2575bond_ethdev_stats_reset(struct rte_eth_dev *dev)
2576{
2577 struct bond_dev_private *internals = dev->data->dev_private;
2578 int i;
2579
2580 for (i = 0; i < internals->slave_count; i++)
2581 rte_eth_stats_reset(internals->slaves[i].port_id);
2582}
2583
2584static void
2585bond_ethdev_promiscuous_enable(struct rte_eth_dev *eth_dev)
2586{
2587 struct bond_dev_private *internals = eth_dev->data->dev_private;
2588 int i;
2589
2590 internals->promiscuous_en = 1;
2591
2592 switch (internals->mode) {
2593 /* Promiscuous mode is propagated to all slaves */
2594 case BONDING_MODE_ROUND_ROBIN:
2595 case BONDING_MODE_BALANCE:
2596 case BONDING_MODE_BROADCAST:
2597 for (i = 0; i < internals->slave_count; i++)
2598 rte_eth_promiscuous_enable(internals->slaves[i].port_id);
2599 break;
2600 /* In mode4 promiscus mode is managed when slave is added/removed */
2601 case BONDING_MODE_8023AD:
2602 break;
2603 /* Promiscuous mode is propagated only to primary slave */
2604 case BONDING_MODE_ACTIVE_BACKUP:
2605 case BONDING_MODE_TLB:
2606 case BONDING_MODE_ALB:
2607 default:
9f95a23c
TL
2608 /* Do not touch promisc when there cannot be primary ports */
2609 if (internals->slave_count == 0)
2610 break;
7c673cae
FG
2611 rte_eth_promiscuous_enable(internals->current_primary_port);
2612 }
2613}
2614
2615static void
2616bond_ethdev_promiscuous_disable(struct rte_eth_dev *dev)
2617{
2618 struct bond_dev_private *internals = dev->data->dev_private;
2619 int i;
2620
2621 internals->promiscuous_en = 0;
2622
2623 switch (internals->mode) {
2624 /* Promiscuous mode is propagated to all slaves */
2625 case BONDING_MODE_ROUND_ROBIN:
2626 case BONDING_MODE_BALANCE:
2627 case BONDING_MODE_BROADCAST:
2628 for (i = 0; i < internals->slave_count; i++)
2629 rte_eth_promiscuous_disable(internals->slaves[i].port_id);
2630 break;
2631 /* In mode4 promiscus mode is set managed when slave is added/removed */
2632 case BONDING_MODE_8023AD:
2633 break;
2634 /* Promiscuous mode is propagated only to primary slave */
2635 case BONDING_MODE_ACTIVE_BACKUP:
2636 case BONDING_MODE_TLB:
2637 case BONDING_MODE_ALB:
2638 default:
9f95a23c
TL
2639 /* Do not touch promisc when there cannot be primary ports */
2640 if (internals->slave_count == 0)
2641 break;
7c673cae
FG
2642 rte_eth_promiscuous_disable(internals->current_primary_port);
2643 }
2644}
2645
2646static void
2647bond_ethdev_delayed_lsc_propagation(void *arg)
2648{
2649 if (arg == NULL)
2650 return;
2651
2652 _rte_eth_dev_callback_process((struct rte_eth_dev *)arg,
2653 RTE_ETH_EVENT_INTR_LSC, NULL);
2654}
2655
11fdf7f2
TL
2656int
2657bond_ethdev_lsc_event_callback(uint16_t port_id, enum rte_eth_event_type type,
2658 void *param, void *ret_param __rte_unused)
7c673cae 2659{
11fdf7f2 2660 struct rte_eth_dev *bonded_eth_dev;
7c673cae
FG
2661 struct bond_dev_private *internals;
2662 struct rte_eth_link link;
11fdf7f2 2663 int rc = -1;
7c673cae 2664
7c673cae 2665 uint8_t lsc_flag = 0;
9f95a23c
TL
2666 int valid_slave = 0;
2667 uint16_t active_pos;
2668 uint16_t i;
7c673cae
FG
2669
2670 if (type != RTE_ETH_EVENT_INTR_LSC || param == NULL)
11fdf7f2 2671 return rc;
7c673cae 2672
9f95a23c 2673 bonded_eth_dev = &rte_eth_devices[*(uint16_t *)param];
7c673cae
FG
2674
2675 if (check_for_bonded_ethdev(bonded_eth_dev))
11fdf7f2 2676 return rc;
7c673cae
FG
2677
2678 internals = bonded_eth_dev->data->dev_private;
2679
2680 /* If the device isn't started don't handle interrupts */
2681 if (!bonded_eth_dev->data->dev_started)
11fdf7f2 2682 return rc;
7c673cae
FG
2683
2684 /* verify that port_id is a valid slave of bonded port */
2685 for (i = 0; i < internals->slave_count; i++) {
2686 if (internals->slaves[i].port_id == port_id) {
2687 valid_slave = 1;
2688 break;
2689 }
2690 }
2691
2692 if (!valid_slave)
11fdf7f2
TL
2693 return rc;
2694
2695 /* Synchronize lsc callback parallel calls either by real link event
2696 * from the slaves PMDs or by the bonding PMD itself.
2697 */
2698 rte_spinlock_lock(&internals->lsc_lock);
7c673cae
FG
2699
2700 /* Search for port in active port list */
2701 active_pos = find_slave_by_id(internals->active_slaves,
2702 internals->active_slave_count, port_id);
2703
2704 rte_eth_link_get_nowait(port_id, &link);
2705 if (link.link_status) {
2706 if (active_pos < internals->active_slave_count)
11fdf7f2 2707 goto link_update;
7c673cae 2708
11fdf7f2
TL
2709 /* check link state properties if bonded link is up*/
2710 if (bonded_eth_dev->data->dev_link.link_status == ETH_LINK_UP) {
2711 if (link_properties_valid(bonded_eth_dev, &link) != 0)
2712 RTE_BOND_LOG(ERR, "Invalid link properties "
2713 "for slave %d in bonding mode %d",
2714 port_id, internals->mode);
7c673cae 2715 } else {
11fdf7f2
TL
2716 /* inherit slave link properties */
2717 link_properties_set(bonded_eth_dev, &link);
7c673cae
FG
2718 }
2719
9f95a23c
TL
2720 /* If no active slave ports then set this port to be
2721 * the primary port.
2722 */
2723 if (internals->active_slave_count < 1) {
2724 /* If first active slave, then change link status */
2725 bonded_eth_dev->data->dev_link.link_status =
2726 ETH_LINK_UP;
2727 internals->current_primary_port = port_id;
2728 lsc_flag = 1;
2729
2730 mac_address_slaves_update(bonded_eth_dev);
2731 }
2732
7c673cae
FG
2733 activate_slave(bonded_eth_dev, port_id);
2734
9f95a23c
TL
2735 /* If the user has defined the primary port then default to
2736 * using it.
2737 */
7c673cae
FG
2738 if (internals->user_defined_primary_port &&
2739 internals->primary_port == port_id)
2740 bond_ethdev_primary_set(internals, port_id);
2741 } else {
2742 if (active_pos == internals->active_slave_count)
11fdf7f2 2743 goto link_update;
7c673cae
FG
2744
2745 /* Remove from active slave list */
2746 deactivate_slave(bonded_eth_dev, port_id);
2747
11fdf7f2 2748 if (internals->active_slave_count < 1)
7c673cae 2749 lsc_flag = 1;
7c673cae
FG
2750
2751 /* Update primary id, take first active slave from list or if none
2752 * available set to -1 */
2753 if (port_id == internals->current_primary_port) {
2754 if (internals->active_slave_count > 0)
2755 bond_ethdev_primary_set(internals,
2756 internals->active_slaves[0]);
2757 else
2758 internals->current_primary_port = internals->primary_port;
2759 }
2760 }
2761
11fdf7f2
TL
2762link_update:
2763 /**
2764 * Update bonded device link properties after any change to active
2765 * slaves
2766 */
2767 bond_ethdev_link_update(bonded_eth_dev, 0);
2768
7c673cae
FG
2769 if (lsc_flag) {
2770 /* Cancel any possible outstanding interrupts if delays are enabled */
2771 if (internals->link_up_delay_ms > 0 ||
2772 internals->link_down_delay_ms > 0)
2773 rte_eal_alarm_cancel(bond_ethdev_delayed_lsc_propagation,
2774 bonded_eth_dev);
2775
2776 if (bonded_eth_dev->data->dev_link.link_status) {
2777 if (internals->link_up_delay_ms > 0)
2778 rte_eal_alarm_set(internals->link_up_delay_ms * 1000,
2779 bond_ethdev_delayed_lsc_propagation,
2780 (void *)bonded_eth_dev);
2781 else
2782 _rte_eth_dev_callback_process(bonded_eth_dev,
11fdf7f2
TL
2783 RTE_ETH_EVENT_INTR_LSC,
2784 NULL);
7c673cae
FG
2785
2786 } else {
2787 if (internals->link_down_delay_ms > 0)
2788 rte_eal_alarm_set(internals->link_down_delay_ms * 1000,
2789 bond_ethdev_delayed_lsc_propagation,
2790 (void *)bonded_eth_dev);
2791 else
2792 _rte_eth_dev_callback_process(bonded_eth_dev,
11fdf7f2
TL
2793 RTE_ETH_EVENT_INTR_LSC,
2794 NULL);
7c673cae
FG
2795 }
2796 }
11fdf7f2
TL
2797
2798 rte_spinlock_unlock(&internals->lsc_lock);
2799
2800 return rc;
7c673cae
FG
2801}
2802
2803static int
2804bond_ethdev_rss_reta_update(struct rte_eth_dev *dev,
2805 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2806{
2807 unsigned i, j;
2808 int result = 0;
2809 int slave_reta_size;
2810 unsigned reta_count;
2811 struct bond_dev_private *internals = dev->data->dev_private;
2812
2813 if (reta_size != internals->reta_size)
2814 return -EINVAL;
2815
2816 /* Copy RETA table */
2817 reta_count = reta_size / RTE_RETA_GROUP_SIZE;
2818
2819 for (i = 0; i < reta_count; i++) {
2820 internals->reta_conf[i].mask = reta_conf[i].mask;
2821 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2822 if ((reta_conf[i].mask >> j) & 0x01)
2823 internals->reta_conf[i].reta[j] = reta_conf[i].reta[j];
2824 }
2825
2826 /* Fill rest of array */
2827 for (; i < RTE_DIM(internals->reta_conf); i += reta_count)
2828 memcpy(&internals->reta_conf[i], &internals->reta_conf[0],
2829 sizeof(internals->reta_conf[0]) * reta_count);
2830
2831 /* Propagate RETA over slaves */
2832 for (i = 0; i < internals->slave_count; i++) {
2833 slave_reta_size = internals->slaves[i].reta_size;
2834 result = rte_eth_dev_rss_reta_update(internals->slaves[i].port_id,
2835 &internals->reta_conf[0], slave_reta_size);
2836 if (result < 0)
2837 return result;
2838 }
2839
2840 return 0;
2841}
2842
2843static int
2844bond_ethdev_rss_reta_query(struct rte_eth_dev *dev,
2845 struct rte_eth_rss_reta_entry64 *reta_conf, uint16_t reta_size)
2846{
2847 int i, j;
2848 struct bond_dev_private *internals = dev->data->dev_private;
2849
2850 if (reta_size != internals->reta_size)
2851 return -EINVAL;
2852
2853 /* Copy RETA table */
2854 for (i = 0; i < reta_size / RTE_RETA_GROUP_SIZE; i++)
2855 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
2856 if ((reta_conf[i].mask >> j) & 0x01)
2857 reta_conf[i].reta[j] = internals->reta_conf[i].reta[j];
2858
2859 return 0;
2860}
2861
2862static int
2863bond_ethdev_rss_hash_update(struct rte_eth_dev *dev,
2864 struct rte_eth_rss_conf *rss_conf)
2865{
2866 int i, result = 0;
2867 struct bond_dev_private *internals = dev->data->dev_private;
2868 struct rte_eth_rss_conf bond_rss_conf;
2869
2870 memcpy(&bond_rss_conf, rss_conf, sizeof(struct rte_eth_rss_conf));
2871
2872 bond_rss_conf.rss_hf &= internals->flow_type_rss_offloads;
2873
2874 if (bond_rss_conf.rss_hf != 0)
2875 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf = bond_rss_conf.rss_hf;
2876
2877 if (bond_rss_conf.rss_key && bond_rss_conf.rss_key_len <
2878 sizeof(internals->rss_key)) {
2879 if (bond_rss_conf.rss_key_len == 0)
2880 bond_rss_conf.rss_key_len = 40;
2881 internals->rss_key_len = bond_rss_conf.rss_key_len;
2882 memcpy(internals->rss_key, bond_rss_conf.rss_key,
2883 internals->rss_key_len);
2884 }
2885
2886 for (i = 0; i < internals->slave_count; i++) {
2887 result = rte_eth_dev_rss_hash_update(internals->slaves[i].port_id,
2888 &bond_rss_conf);
2889 if (result < 0)
2890 return result;
2891 }
2892
2893 return 0;
2894}
2895
2896static int
2897bond_ethdev_rss_hash_conf_get(struct rte_eth_dev *dev,
2898 struct rte_eth_rss_conf *rss_conf)
2899{
2900 struct bond_dev_private *internals = dev->data->dev_private;
2901
2902 rss_conf->rss_hf = dev->data->dev_conf.rx_adv_conf.rss_conf.rss_hf;
2903 rss_conf->rss_key_len = internals->rss_key_len;
2904 if (rss_conf->rss_key)
2905 memcpy(rss_conf->rss_key, internals->rss_key, internals->rss_key_len);
2906
2907 return 0;
2908}
2909
11fdf7f2
TL
2910static int
2911bond_ethdev_mtu_set(struct rte_eth_dev *dev, uint16_t mtu)
2912{
2913 struct rte_eth_dev *slave_eth_dev;
2914 struct bond_dev_private *internals = dev->data->dev_private;
2915 int ret, i;
2916
2917 rte_spinlock_lock(&internals->lock);
2918
2919 for (i = 0; i < internals->slave_count; i++) {
2920 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2921 if (*slave_eth_dev->dev_ops->mtu_set == NULL) {
2922 rte_spinlock_unlock(&internals->lock);
2923 return -ENOTSUP;
2924 }
2925 }
2926 for (i = 0; i < internals->slave_count; i++) {
2927 ret = rte_eth_dev_set_mtu(internals->slaves[i].port_id, mtu);
2928 if (ret < 0) {
2929 rte_spinlock_unlock(&internals->lock);
2930 return ret;
2931 }
2932 }
2933
2934 rte_spinlock_unlock(&internals->lock);
2935 return 0;
2936}
2937
2938static int
2939bond_ethdev_mac_address_set(struct rte_eth_dev *dev, struct ether_addr *addr)
2940{
2941 if (mac_address_set(dev, addr)) {
2942 RTE_BOND_LOG(ERR, "Failed to update MAC address");
2943 return -EINVAL;
2944 }
2945
2946 return 0;
2947}
2948
2949static int
2950bond_filter_ctrl(struct rte_eth_dev *dev __rte_unused,
2951 enum rte_filter_type type, enum rte_filter_op op, void *arg)
2952{
2953 if (type == RTE_ETH_FILTER_GENERIC && op == RTE_ETH_FILTER_GET) {
2954 *(const void **)arg = &bond_flow_ops;
2955 return 0;
2956 }
2957 return -ENOTSUP;
2958}
2959
2960static int
2961bond_ethdev_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr,
2962 __rte_unused uint32_t index, uint32_t vmdq)
2963{
2964 struct rte_eth_dev *slave_eth_dev;
2965 struct bond_dev_private *internals = dev->data->dev_private;
2966 int ret, i;
2967
2968 rte_spinlock_lock(&internals->lock);
2969
2970 for (i = 0; i < internals->slave_count; i++) {
2971 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
2972 if (*slave_eth_dev->dev_ops->mac_addr_add == NULL ||
2973 *slave_eth_dev->dev_ops->mac_addr_remove == NULL) {
2974 ret = -ENOTSUP;
2975 goto end;
2976 }
2977 }
2978
2979 for (i = 0; i < internals->slave_count; i++) {
2980 ret = rte_eth_dev_mac_addr_add(internals->slaves[i].port_id,
2981 mac_addr, vmdq);
2982 if (ret < 0) {
2983 /* rollback */
2984 for (i--; i >= 0; i--)
2985 rte_eth_dev_mac_addr_remove(
2986 internals->slaves[i].port_id, mac_addr);
2987 goto end;
2988 }
2989 }
2990
2991 ret = 0;
2992end:
2993 rte_spinlock_unlock(&internals->lock);
2994 return ret;
2995}
2996
2997static void
2998bond_ethdev_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index)
2999{
3000 struct rte_eth_dev *slave_eth_dev;
3001 struct bond_dev_private *internals = dev->data->dev_private;
3002 int i;
3003
3004 rte_spinlock_lock(&internals->lock);
3005
3006 for (i = 0; i < internals->slave_count; i++) {
3007 slave_eth_dev = &rte_eth_devices[internals->slaves[i].port_id];
3008 if (*slave_eth_dev->dev_ops->mac_addr_remove == NULL)
3009 goto end;
3010 }
3011
3012 struct ether_addr *mac_addr = &dev->data->mac_addrs[index];
3013
3014 for (i = 0; i < internals->slave_count; i++)
3015 rte_eth_dev_mac_addr_remove(internals->slaves[i].port_id,
3016 mac_addr);
3017
3018end:
3019 rte_spinlock_unlock(&internals->lock);
3020}
3021
7c673cae
FG
3022const struct eth_dev_ops default_dev_ops = {
3023 .dev_start = bond_ethdev_start,
3024 .dev_stop = bond_ethdev_stop,
3025 .dev_close = bond_ethdev_close,
3026 .dev_configure = bond_ethdev_configure,
3027 .dev_infos_get = bond_ethdev_info,
3028 .vlan_filter_set = bond_ethdev_vlan_filter_set,
3029 .rx_queue_setup = bond_ethdev_rx_queue_setup,
3030 .tx_queue_setup = bond_ethdev_tx_queue_setup,
3031 .rx_queue_release = bond_ethdev_rx_queue_release,
3032 .tx_queue_release = bond_ethdev_tx_queue_release,
3033 .link_update = bond_ethdev_link_update,
3034 .stats_get = bond_ethdev_stats_get,
3035 .stats_reset = bond_ethdev_stats_reset,
3036 .promiscuous_enable = bond_ethdev_promiscuous_enable,
3037 .promiscuous_disable = bond_ethdev_promiscuous_disable,
3038 .reta_update = bond_ethdev_rss_reta_update,
3039 .reta_query = bond_ethdev_rss_reta_query,
3040 .rss_hash_update = bond_ethdev_rss_hash_update,
11fdf7f2
TL
3041 .rss_hash_conf_get = bond_ethdev_rss_hash_conf_get,
3042 .mtu_set = bond_ethdev_mtu_set,
3043 .mac_addr_set = bond_ethdev_mac_address_set,
3044 .mac_addr_add = bond_ethdev_mac_addr_add,
3045 .mac_addr_remove = bond_ethdev_mac_addr_remove,
3046 .filter_ctrl = bond_filter_ctrl
7c673cae
FG
3047};
3048
3049static int
11fdf7f2
TL
3050bond_alloc(struct rte_vdev_device *dev, uint8_t mode)
3051{
3052 const char *name = rte_vdev_device_name(dev);
3053 uint8_t socket_id = dev->device.numa_node;
3054 struct bond_dev_private *internals = NULL;
3055 struct rte_eth_dev *eth_dev = NULL;
3056 uint32_t vlan_filter_bmp_size;
3057
3058 /* now do all data allocation - for eth_dev structure, dummy pci driver
3059 * and internal (private) data
3060 */
3061
3062 /* reserve an ethdev entry */
3063 eth_dev = rte_eth_vdev_allocate(dev, sizeof(*internals));
3064 if (eth_dev == NULL) {
3065 RTE_BOND_LOG(ERR, "Unable to allocate rte_eth_dev");
3066 goto err;
3067 }
3068
3069 internals = eth_dev->data->dev_private;
3070 eth_dev->data->nb_rx_queues = (uint16_t)1;
3071 eth_dev->data->nb_tx_queues = (uint16_t)1;
3072
3073 /* Allocate memory for storing MAC addresses */
3074 eth_dev->data->mac_addrs = rte_zmalloc_socket(name, ETHER_ADDR_LEN *
3075 BOND_MAX_MAC_ADDRS, 0, socket_id);
3076 if (eth_dev->data->mac_addrs == NULL) {
3077 RTE_BOND_LOG(ERR,
3078 "Failed to allocate %u bytes needed to store MAC addresses",
3079 ETHER_ADDR_LEN * BOND_MAX_MAC_ADDRS);
3080 goto err;
3081 }
3082
3083 eth_dev->dev_ops = &default_dev_ops;
3084 eth_dev->data->dev_flags = RTE_ETH_DEV_INTR_LSC;
3085
3086 rte_spinlock_init(&internals->lock);
3087 rte_spinlock_init(&internals->lsc_lock);
3088
3089 internals->port_id = eth_dev->data->port_id;
3090 internals->mode = BONDING_MODE_INVALID;
3091 internals->current_primary_port = RTE_MAX_ETHPORTS + 1;
3092 internals->balance_xmit_policy = BALANCE_XMIT_POLICY_LAYER2;
3093 internals->burst_xmit_hash = burst_xmit_l2_hash;
3094 internals->user_defined_mac = 0;
3095
3096 internals->link_status_polling_enabled = 0;
3097
3098 internals->link_status_polling_interval_ms =
3099 DEFAULT_POLLING_INTERVAL_10_MS;
3100 internals->link_down_delay_ms = 0;
3101 internals->link_up_delay_ms = 0;
3102
3103 internals->slave_count = 0;
3104 internals->active_slave_count = 0;
3105 internals->rx_offload_capa = 0;
3106 internals->tx_offload_capa = 0;
3107 internals->rx_queue_offload_capa = 0;
3108 internals->tx_queue_offload_capa = 0;
3109 internals->candidate_max_rx_pktlen = 0;
3110 internals->max_rx_pktlen = 0;
3111
3112 /* Initially allow to choose any offload type */
3113 internals->flow_type_rss_offloads = ETH_RSS_PROTO_MASK;
3114
9f95a23c
TL
3115 memset(&internals->default_rxconf, 0,
3116 sizeof(internals->default_rxconf));
3117 memset(&internals->default_txconf, 0,
3118 sizeof(internals->default_txconf));
3119
3120 memset(&internals->rx_desc_lim, 0, sizeof(internals->rx_desc_lim));
3121 memset(&internals->tx_desc_lim, 0, sizeof(internals->tx_desc_lim));
3122
11fdf7f2
TL
3123 memset(internals->active_slaves, 0, sizeof(internals->active_slaves));
3124 memset(internals->slaves, 0, sizeof(internals->slaves));
3125
3126 TAILQ_INIT(&internals->flow_list);
3127 internals->flow_isolated_valid = 0;
3128
3129 /* Set mode 4 default configuration */
3130 bond_mode_8023ad_setup(eth_dev, NULL);
3131 if (bond_ethdev_mode_set(eth_dev, mode)) {
3132 RTE_BOND_LOG(ERR, "Failed to set bonded device %d mode to %d",
3133 eth_dev->data->port_id, mode);
3134 goto err;
3135 }
3136
3137 vlan_filter_bmp_size =
3138 rte_bitmap_get_memory_footprint(ETHER_MAX_VLAN_ID + 1);
3139 internals->vlan_filter_bmpmem = rte_malloc(name, vlan_filter_bmp_size,
3140 RTE_CACHE_LINE_SIZE);
3141 if (internals->vlan_filter_bmpmem == NULL) {
3142 RTE_BOND_LOG(ERR,
3143 "Failed to allocate vlan bitmap for bonded device %u",
3144 eth_dev->data->port_id);
3145 goto err;
3146 }
3147
3148 internals->vlan_filter_bmp = rte_bitmap_init(ETHER_MAX_VLAN_ID + 1,
3149 internals->vlan_filter_bmpmem, vlan_filter_bmp_size);
3150 if (internals->vlan_filter_bmp == NULL) {
3151 RTE_BOND_LOG(ERR,
3152 "Failed to init vlan bitmap for bonded device %u",
3153 eth_dev->data->port_id);
3154 rte_free(internals->vlan_filter_bmpmem);
3155 goto err;
3156 }
3157
3158 return eth_dev->data->port_id;
3159
3160err:
3161 rte_free(internals);
9f95a23c
TL
3162 if (eth_dev != NULL)
3163 eth_dev->data->dev_private = NULL;
3164 rte_eth_dev_release_port(eth_dev);
11fdf7f2
TL
3165 return -1;
3166}
3167
3168static int
3169bond_probe(struct rte_vdev_device *dev)
7c673cae 3170{
11fdf7f2 3171 const char *name;
7c673cae
FG
3172 struct bond_dev_private *internals;
3173 struct rte_kvargs *kvlist;
11fdf7f2 3174 uint8_t bonding_mode, socket_id/*, agg_mode*/;
7c673cae 3175 int arg_count, port_id;
11fdf7f2
TL
3176 uint8_t agg_mode;
3177 struct rte_eth_dev *eth_dev;
3178
3179 if (!dev)
3180 return -EINVAL;
7c673cae 3181
11fdf7f2
TL
3182 name = rte_vdev_device_name(dev);
3183 RTE_BOND_LOG(INFO, "Initializing pmd_bond for %s", name);
7c673cae 3184
9f95a23c 3185 if (rte_eal_process_type() == RTE_PROC_SECONDARY) {
11fdf7f2
TL
3186 eth_dev = rte_eth_dev_attach_secondary(name);
3187 if (!eth_dev) {
3188 RTE_BOND_LOG(ERR, "Failed to probe %s", name);
3189 return -1;
3190 }
3191 /* TODO: request info from primary to set up Rx and Tx */
3192 eth_dev->dev_ops = &default_dev_ops;
3193 eth_dev->device = &dev->device;
3194 rte_eth_dev_probing_finish(eth_dev);
3195 return 0;
3196 }
3197
3198 kvlist = rte_kvargs_parse(rte_vdev_device_args(dev),
3199 pmd_bond_init_valid_arguments);
7c673cae
FG
3200 if (kvlist == NULL)
3201 return -1;
3202
3203 /* Parse link bonding mode */
3204 if (rte_kvargs_count(kvlist, PMD_BOND_MODE_KVARG) == 1) {
3205 if (rte_kvargs_process(kvlist, PMD_BOND_MODE_KVARG,
3206 &bond_ethdev_parse_slave_mode_kvarg,
3207 &bonding_mode) != 0) {
11fdf7f2 3208 RTE_BOND_LOG(ERR, "Invalid mode for bonded device %s",
7c673cae
FG
3209 name);
3210 goto parse_error;
3211 }
3212 } else {
11fdf7f2
TL
3213 RTE_BOND_LOG(ERR, "Mode must be specified only once for bonded "
3214 "device %s", name);
7c673cae
FG
3215 goto parse_error;
3216 }
3217
3218 /* Parse socket id to create bonding device on */
3219 arg_count = rte_kvargs_count(kvlist, PMD_BOND_SOCKET_ID_KVARG);
3220 if (arg_count == 1) {
3221 if (rte_kvargs_process(kvlist, PMD_BOND_SOCKET_ID_KVARG,
3222 &bond_ethdev_parse_socket_id_kvarg, &socket_id)
3223 != 0) {
11fdf7f2
TL
3224 RTE_BOND_LOG(ERR, "Invalid socket Id specified for "
3225 "bonded device %s", name);
7c673cae
FG
3226 goto parse_error;
3227 }
3228 } else if (arg_count > 1) {
11fdf7f2
TL
3229 RTE_BOND_LOG(ERR, "Socket Id can be specified only once for "
3230 "bonded device %s", name);
7c673cae
FG
3231 goto parse_error;
3232 } else {
3233 socket_id = rte_socket_id();
3234 }
3235
11fdf7f2
TL
3236 dev->device.numa_node = socket_id;
3237
7c673cae 3238 /* Create link bonding eth device */
11fdf7f2 3239 port_id = bond_alloc(dev, bonding_mode);
7c673cae 3240 if (port_id < 0) {
11fdf7f2
TL
3241 RTE_BOND_LOG(ERR, "Failed to create socket %s in mode %u on "
3242 "socket %u.", name, bonding_mode, socket_id);
7c673cae
FG
3243 goto parse_error;
3244 }
3245 internals = rte_eth_devices[port_id].data->dev_private;
3246 internals->kvlist = kvlist;
3247
11fdf7f2
TL
3248 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3249 if (rte_kvargs_process(kvlist,
3250 PMD_BOND_AGG_MODE_KVARG,
3251 &bond_ethdev_parse_slave_agg_mode_kvarg,
3252 &agg_mode) != 0) {
3253 RTE_BOND_LOG(ERR,
3254 "Failed to parse agg selection mode for bonded device %s",
3255 name);
3256 goto parse_error;
3257 }
3258
3259 if (internals->mode == BONDING_MODE_8023AD)
9f95a23c 3260 internals->mode4.agg_selection = agg_mode;
11fdf7f2 3261 } else {
9f95a23c 3262 internals->mode4.agg_selection = AGG_STABLE;
11fdf7f2
TL
3263 }
3264
9f95a23c 3265 rte_eth_dev_probing_finish(&rte_eth_devices[port_id]);
11fdf7f2
TL
3266 RTE_BOND_LOG(INFO, "Create bonded device %s on port %d in mode %u on "
3267 "socket %u.", name, port_id, bonding_mode, socket_id);
7c673cae
FG
3268 return 0;
3269
3270parse_error:
3271 rte_kvargs_free(kvlist);
3272
3273 return -1;
3274}
3275
3276static int
11fdf7f2 3277bond_remove(struct rte_vdev_device *dev)
7c673cae 3278{
11fdf7f2
TL
3279 struct rte_eth_dev *eth_dev;
3280 struct bond_dev_private *internals;
3281 const char *name;
7c673cae 3282
11fdf7f2 3283 if (!dev)
7c673cae
FG
3284 return -EINVAL;
3285
11fdf7f2
TL
3286 name = rte_vdev_device_name(dev);
3287 RTE_BOND_LOG(INFO, "Uninitializing pmd_bond for %s", name);
3288
3289 /* now free all data allocation - for eth_dev structure,
3290 * dummy pci driver and internal (private) data
3291 */
7c673cae 3292
11fdf7f2
TL
3293 /* find an ethdev entry */
3294 eth_dev = rte_eth_dev_allocated(name);
3295 if (eth_dev == NULL)
3296 return -ENODEV;
7c673cae 3297
9f95a23c
TL
3298 if (rte_eal_process_type() != RTE_PROC_PRIMARY)
3299 return rte_eth_dev_release_port(eth_dev);
3300
11fdf7f2
TL
3301 RTE_ASSERT(eth_dev->device == &dev->device);
3302
3303 internals = eth_dev->data->dev_private;
3304 if (internals->slave_count != 0)
3305 return -EBUSY;
3306
3307 if (eth_dev->data->dev_started == 1) {
3308 bond_ethdev_stop(eth_dev);
3309 bond_ethdev_close(eth_dev);
3310 }
3311
3312 eth_dev->dev_ops = NULL;
3313 eth_dev->rx_pkt_burst = NULL;
3314 eth_dev->tx_pkt_burst = NULL;
3315
3316 internals = eth_dev->data->dev_private;
3317 /* Try to release mempool used in mode6. If the bond
3318 * device is not mode6, free the NULL is not problem.
3319 */
3320 rte_mempool_free(internals->mode6.mempool);
3321 rte_bitmap_free(internals->vlan_filter_bmp);
3322 rte_free(internals->vlan_filter_bmpmem);
11fdf7f2
TL
3323
3324 rte_eth_dev_release_port(eth_dev);
3325
3326 return 0;
7c673cae
FG
3327}
3328
3329/* this part will resolve the slave portids after all the other pdev and vdev
3330 * have been allocated */
3331static int
3332bond_ethdev_configure(struct rte_eth_dev *dev)
3333{
11fdf7f2 3334 const char *name = dev->device->name;
7c673cae
FG
3335 struct bond_dev_private *internals = dev->data->dev_private;
3336 struct rte_kvargs *kvlist = internals->kvlist;
3337 int arg_count;
11fdf7f2
TL
3338 uint16_t port_id = dev - rte_eth_devices;
3339 uint8_t agg_mode;
7c673cae
FG
3340
3341 static const uint8_t default_rss_key[40] = {
3342 0x6D, 0x5A, 0x56, 0xDA, 0x25, 0x5B, 0x0E, 0xC2, 0x41, 0x67, 0x25, 0x3D,
3343 0x43, 0xA3, 0x8F, 0xB0, 0xD0, 0xCA, 0x2B, 0xCB, 0xAE, 0x7B, 0x30, 0xB4,
3344 0x77, 0xCB, 0x2D, 0xA3, 0x80, 0x30, 0xF2, 0x0C, 0x6A, 0x42, 0xB7, 0x3B,
3345 0xBE, 0xAC, 0x01, 0xFA
3346 };
3347
3348 unsigned i, j;
3349
9f95a23c
TL
3350 /*
3351 * If RSS is enabled, fill table with default values and
3352 * set key to the the value specified in port RSS configuration.
3353 * Fall back to default RSS key if the key is not specified
3354 */
7c673cae 3355 if (dev->data->dev_conf.rxmode.mq_mode & ETH_MQ_RX_RSS) {
9f95a23c
TL
3356 if (dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key != NULL) {
3357 internals->rss_key_len =
3358 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key_len;
3359 memcpy(internals->rss_key,
3360 dev->data->dev_conf.rx_adv_conf.rss_conf.rss_key,
3361 internals->rss_key_len);
3362 } else {
3363 internals->rss_key_len = sizeof(default_rss_key);
3364 memcpy(internals->rss_key, default_rss_key,
3365 internals->rss_key_len);
3366 }
7c673cae
FG
3367
3368 for (i = 0; i < RTE_DIM(internals->reta_conf); i++) {
3369 internals->reta_conf[i].mask = ~0LL;
3370 for (j = 0; j < RTE_RETA_GROUP_SIZE; j++)
9f95a23c
TL
3371 internals->reta_conf[i].reta[j] =
3372 (i * RTE_RETA_GROUP_SIZE + j) %
3373 dev->data->nb_rx_queues;
7c673cae
FG
3374 }
3375 }
3376
3377 /* set the max_rx_pktlen */
3378 internals->max_rx_pktlen = internals->candidate_max_rx_pktlen;
3379
3380 /*
3381 * if no kvlist, it means that this bonded device has been created
3382 * through the bonding api.
3383 */
3384 if (!kvlist)
3385 return 0;
3386
3387 /* Parse MAC address for bonded device */
3388 arg_count = rte_kvargs_count(kvlist, PMD_BOND_MAC_ADDR_KVARG);
3389 if (arg_count == 1) {
3390 struct ether_addr bond_mac;
3391
3392 if (rte_kvargs_process(kvlist, PMD_BOND_MAC_ADDR_KVARG,
11fdf7f2
TL
3393 &bond_ethdev_parse_bond_mac_addr_kvarg, &bond_mac) < 0) {
3394 RTE_BOND_LOG(INFO, "Invalid mac address for bonded device %s",
3395 name);
7c673cae
FG
3396 return -1;
3397 }
3398
3399 /* Set MAC address */
3400 if (rte_eth_bond_mac_address_set(port_id, &bond_mac) != 0) {
11fdf7f2
TL
3401 RTE_BOND_LOG(ERR,
3402 "Failed to set mac address on bonded device %s",
3403 name);
7c673cae
FG
3404 return -1;
3405 }
3406 } else if (arg_count > 1) {
11fdf7f2
TL
3407 RTE_BOND_LOG(ERR,
3408 "MAC address can be specified only once for bonded device %s",
3409 name);
7c673cae
FG
3410 return -1;
3411 }
3412
3413 /* Parse/set balance mode transmit policy */
3414 arg_count = rte_kvargs_count(kvlist, PMD_BOND_XMIT_POLICY_KVARG);
3415 if (arg_count == 1) {
3416 uint8_t xmit_policy;
3417
3418 if (rte_kvargs_process(kvlist, PMD_BOND_XMIT_POLICY_KVARG,
11fdf7f2
TL
3419 &bond_ethdev_parse_balance_xmit_policy_kvarg, &xmit_policy) !=
3420 0) {
3421 RTE_BOND_LOG(INFO,
3422 "Invalid xmit policy specified for bonded device %s",
3423 name);
7c673cae
FG
3424 return -1;
3425 }
3426
3427 /* Set balance mode transmit policy*/
3428 if (rte_eth_bond_xmit_policy_set(port_id, xmit_policy) != 0) {
11fdf7f2
TL
3429 RTE_BOND_LOG(ERR,
3430 "Failed to set balance xmit policy on bonded device %s",
3431 name);
7c673cae
FG
3432 return -1;
3433 }
3434 } else if (arg_count > 1) {
11fdf7f2
TL
3435 RTE_BOND_LOG(ERR,
3436 "Transmit policy can be specified only once for bonded device %s",
3437 name);
7c673cae
FG
3438 return -1;
3439 }
3440
11fdf7f2
TL
3441 if (rte_kvargs_count(kvlist, PMD_BOND_AGG_MODE_KVARG) == 1) {
3442 if (rte_kvargs_process(kvlist,
3443 PMD_BOND_AGG_MODE_KVARG,
3444 &bond_ethdev_parse_slave_agg_mode_kvarg,
3445 &agg_mode) != 0) {
3446 RTE_BOND_LOG(ERR,
3447 "Failed to parse agg selection mode for bonded device %s",
3448 name);
3449 }
9f95a23c
TL
3450 if (internals->mode == BONDING_MODE_8023AD) {
3451 int ret = rte_eth_bond_8023ad_agg_selection_set(port_id,
3452 agg_mode);
3453 if (ret < 0) {
3454 RTE_BOND_LOG(ERR,
3455 "Invalid args for agg selection set for bonded device %s",
3456 name);
3457 return -1;
3458 }
3459 }
11fdf7f2
TL
3460 }
3461
7c673cae
FG
3462 /* Parse/add slave ports to bonded device */
3463 if (rte_kvargs_count(kvlist, PMD_BOND_SLAVE_PORT_KVARG) > 0) {
3464 struct bond_ethdev_slave_ports slave_ports;
3465 unsigned i;
3466
3467 memset(&slave_ports, 0, sizeof(slave_ports));
3468
3469 if (rte_kvargs_process(kvlist, PMD_BOND_SLAVE_PORT_KVARG,
11fdf7f2
TL
3470 &bond_ethdev_parse_slave_port_kvarg, &slave_ports) != 0) {
3471 RTE_BOND_LOG(ERR,
3472 "Failed to parse slave ports for bonded device %s",
3473 name);
7c673cae
FG
3474 return -1;
3475 }
3476
3477 for (i = 0; i < slave_ports.slave_count; i++) {
3478 if (rte_eth_bond_slave_add(port_id, slave_ports.slaves[i]) != 0) {
11fdf7f2
TL
3479 RTE_BOND_LOG(ERR,
3480 "Failed to add port %d as slave to bonded device %s",
3481 slave_ports.slaves[i], name);
7c673cae
FG
3482 }
3483 }
3484
3485 } else {
11fdf7f2 3486 RTE_BOND_LOG(INFO, "No slaves specified for bonded device %s", name);
7c673cae
FG
3487 return -1;
3488 }
3489
3490 /* Parse/set primary slave port id*/
3491 arg_count = rte_kvargs_count(kvlist, PMD_BOND_PRIMARY_SLAVE_KVARG);
3492 if (arg_count == 1) {
11fdf7f2 3493 uint16_t primary_slave_port_id;
7c673cae
FG
3494
3495 if (rte_kvargs_process(kvlist,
11fdf7f2
TL
3496 PMD_BOND_PRIMARY_SLAVE_KVARG,
3497 &bond_ethdev_parse_primary_slave_port_id_kvarg,
3498 &primary_slave_port_id) < 0) {
3499 RTE_BOND_LOG(INFO,
3500 "Invalid primary slave port id specified for bonded device %s",
3501 name);
7c673cae
FG
3502 return -1;
3503 }
3504
3505 /* Set balance mode transmit policy*/
11fdf7f2
TL
3506 if (rte_eth_bond_primary_set(port_id, primary_slave_port_id)
3507 != 0) {
3508 RTE_BOND_LOG(ERR,
3509 "Failed to set primary slave port %d on bonded device %s",
3510 primary_slave_port_id, name);
7c673cae
FG
3511 return -1;
3512 }
3513 } else if (arg_count > 1) {
11fdf7f2
TL
3514 RTE_BOND_LOG(INFO,
3515 "Primary slave can be specified only once for bonded device %s",
3516 name);
7c673cae
FG
3517 return -1;
3518 }
3519
3520 /* Parse link status monitor polling interval */
3521 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LSC_POLL_PERIOD_KVARG);
3522 if (arg_count == 1) {
3523 uint32_t lsc_poll_interval_ms;
3524
3525 if (rte_kvargs_process(kvlist,
11fdf7f2
TL
3526 PMD_BOND_LSC_POLL_PERIOD_KVARG,
3527 &bond_ethdev_parse_time_ms_kvarg,
3528 &lsc_poll_interval_ms) < 0) {
3529 RTE_BOND_LOG(INFO,
3530 "Invalid lsc polling interval value specified for bonded"
3531 " device %s", name);
7c673cae
FG
3532 return -1;
3533 }
3534
3535 if (rte_eth_bond_link_monitoring_set(port_id, lsc_poll_interval_ms)
11fdf7f2
TL
3536 != 0) {
3537 RTE_BOND_LOG(ERR,
3538 "Failed to set lsc monitor polling interval (%u ms) on bonded device %s",
3539 lsc_poll_interval_ms, name);
7c673cae
FG
3540 return -1;
3541 }
3542 } else if (arg_count > 1) {
11fdf7f2
TL
3543 RTE_BOND_LOG(INFO,
3544 "LSC polling interval can be specified only once for bonded"
3545 " device %s", name);
7c673cae
FG
3546 return -1;
3547 }
3548
3549 /* Parse link up interrupt propagation delay */
3550 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_UP_PROP_DELAY_KVARG);
3551 if (arg_count == 1) {
3552 uint32_t link_up_delay_ms;
3553
3554 if (rte_kvargs_process(kvlist,
11fdf7f2
TL
3555 PMD_BOND_LINK_UP_PROP_DELAY_KVARG,
3556 &bond_ethdev_parse_time_ms_kvarg,
3557 &link_up_delay_ms) < 0) {
3558 RTE_BOND_LOG(INFO,
3559 "Invalid link up propagation delay value specified for"
3560 " bonded device %s", name);
7c673cae
FG
3561 return -1;
3562 }
3563
3564 /* Set balance mode transmit policy*/
3565 if (rte_eth_bond_link_up_prop_delay_set(port_id, link_up_delay_ms)
11fdf7f2
TL
3566 != 0) {
3567 RTE_BOND_LOG(ERR,
3568 "Failed to set link up propagation delay (%u ms) on bonded"
3569 " device %s", link_up_delay_ms, name);
7c673cae
FG
3570 return -1;
3571 }
3572 } else if (arg_count > 1) {
11fdf7f2
TL
3573 RTE_BOND_LOG(INFO,
3574 "Link up propagation delay can be specified only once for"
3575 " bonded device %s", name);
7c673cae
FG
3576 return -1;
3577 }
3578
3579 /* Parse link down interrupt propagation delay */
3580 arg_count = rte_kvargs_count(kvlist, PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG);
3581 if (arg_count == 1) {
3582 uint32_t link_down_delay_ms;
3583
3584 if (rte_kvargs_process(kvlist,
11fdf7f2
TL
3585 PMD_BOND_LINK_DOWN_PROP_DELAY_KVARG,
3586 &bond_ethdev_parse_time_ms_kvarg,
3587 &link_down_delay_ms) < 0) {
3588 RTE_BOND_LOG(INFO,
3589 "Invalid link down propagation delay value specified for"
3590 " bonded device %s", name);
7c673cae
FG
3591 return -1;
3592 }
3593
3594 /* Set balance mode transmit policy*/
3595 if (rte_eth_bond_link_down_prop_delay_set(port_id, link_down_delay_ms)
11fdf7f2
TL
3596 != 0) {
3597 RTE_BOND_LOG(ERR,
3598 "Failed to set link down propagation delay (%u ms) on bonded device %s",
3599 link_down_delay_ms, name);
7c673cae
FG
3600 return -1;
3601 }
3602 } else if (arg_count > 1) {
11fdf7f2
TL
3603 RTE_BOND_LOG(INFO,
3604 "Link down propagation delay can be specified only once for bonded device %s",
3605 name);
7c673cae
FG
3606 return -1;
3607 }
3608
3609 return 0;
3610}
3611
11fdf7f2 3612struct rte_vdev_driver pmd_bond_drv = {
7c673cae
FG
3613 .probe = bond_probe,
3614 .remove = bond_remove,
3615};
3616
11fdf7f2 3617RTE_PMD_REGISTER_VDEV(net_bonding, pmd_bond_drv);
7c673cae
FG
3618RTE_PMD_REGISTER_ALIAS(net_bonding, eth_bond);
3619
3620RTE_PMD_REGISTER_PARAM_STRING(net_bonding,
3621 "slave=<ifc> "
3622 "primary=<ifc> "
3623 "mode=[0-6] "
3624 "xmit_policy=[l2 | l23 | l34] "
11fdf7f2 3625 "agg_mode=[count | stable | bandwidth] "
7c673cae
FG
3626 "socket_id=<int> "
3627 "mac=<mac addr> "
3628 "lsc_poll_period_ms=<int> "
3629 "up_delay=<int> "
3630 "down_delay=<int>");
11fdf7f2
TL
3631
3632int bond_logtype;
3633
3634RTE_INIT(bond_init_log)
3635{
9f95a23c 3636 bond_logtype = rte_log_register("pmd.net.bond");
11fdf7f2
TL
3637 if (bond_logtype >= 0)
3638 rte_log_set_level(bond_logtype, RTE_LOG_NOTICE);
3639}