]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/drivers/net/bonding/rte_eth_bond_alb.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / seastar / dpdk / drivers / net / bonding / rte_eth_bond_alb.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of Intel Corporation nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include "rte_eth_bond_private.h"
35 #include "rte_eth_bond_alb.h"
36
37 static inline uint8_t
38 simple_hash(uint8_t *hash_start, int hash_size)
39 {
40 int i;
41 uint8_t hash;
42
43 hash = 0;
44 for (i = 0; i < hash_size; ++i)
45 hash ^= hash_start[i];
46
47 return hash;
48 }
49
50 static uint8_t
51 calculate_slave(struct bond_dev_private *internals)
52 {
53 uint8_t idx;
54
55 idx = (internals->mode6.last_slave + 1) % internals->active_slave_count;
56 internals->mode6.last_slave = idx;
57 return internals->active_slaves[idx];
58 }
59
60 int
61 bond_mode_alb_enable(struct rte_eth_dev *bond_dev)
62 {
63 struct bond_dev_private *internals = bond_dev->data->dev_private;
64 struct client_data *hash_table = internals->mode6.client_table;
65
66 uint16_t data_size;
67 char mem_name[RTE_ETH_NAME_MAX_LEN];
68 int socket_id = bond_dev->data->numa_node;
69
70 /* Fill hash table with initial values */
71 memset(hash_table, 0, sizeof(struct client_data) * ALB_HASH_TABLE_SIZE);
72 rte_spinlock_init(&internals->mode6.lock);
73 internals->mode6.last_slave = ALB_NULL_INDEX;
74 internals->mode6.ntt = 0;
75
76 /* Initialize memory pool for ARP packets to send */
77 if (internals->mode6.mempool == NULL) {
78 /*
79 * 256 is size of ETH header, ARP header and nested VLAN headers.
80 * The value is chosen to be cache aligned.
81 */
82 data_size = 256 + RTE_PKTMBUF_HEADROOM;
83 snprintf(mem_name, sizeof(mem_name), "%s_MODE6", bond_dev->data->name);
84 internals->mode6.mempool = rte_pktmbuf_pool_create(mem_name,
85 512 * RTE_MAX_ETHPORTS,
86 RTE_MEMPOOL_CACHE_MAX_SIZE >= 32 ?
87 32 : RTE_MEMPOOL_CACHE_MAX_SIZE,
88 0, data_size, socket_id);
89
90 if (internals->mode6.mempool == NULL) {
91 RTE_LOG(ERR, PMD, "%s: Failed to initialize ALB mempool.\n",
92 bond_dev->data->name);
93 goto mempool_alloc_error;
94 }
95 }
96
97 return 0;
98
99 mempool_alloc_error:
100 return -ENOMEM;
101 }
102
103 void bond_mode_alb_arp_recv(struct ether_hdr *eth_h, uint16_t offset,
104 struct bond_dev_private *internals) {
105 struct arp_hdr *arp;
106
107 struct client_data *hash_table = internals->mode6.client_table;
108 struct client_data *client_info;
109
110 uint8_t hash_index;
111
112 arp = (struct arp_hdr *) ((char *) (eth_h + 1) + offset);
113
114 /* ARP Requests are forwarded to the application with no changes */
115 if (arp->arp_op != rte_cpu_to_be_16(ARP_OP_REPLY))
116 return;
117
118 /* From now on, we analyze only ARP Reply packets */
119 hash_index = simple_hash((uint8_t *) &arp->arp_data.arp_sip,
120 sizeof(arp->arp_data.arp_sip));
121 client_info = &hash_table[hash_index];
122
123 /*
124 * We got reply for ARP Request send by the application. We need to
125 * update client table when received data differ from what is stored
126 * in ALB table and issue sending update packet to that slave.
127 */
128 rte_spinlock_lock(&internals->mode6.lock);
129 if (client_info->in_use == 0 ||
130 client_info->app_ip != arp->arp_data.arp_tip ||
131 client_info->cli_ip != arp->arp_data.arp_sip ||
132 !is_same_ether_addr(&client_info->cli_mac, &arp->arp_data.arp_sha) ||
133 client_info->vlan_count != offset / sizeof(struct vlan_hdr) ||
134 memcmp(client_info->vlan, eth_h + 1, offset) != 0
135 ) {
136 client_info->in_use = 1;
137 client_info->app_ip = arp->arp_data.arp_tip;
138 client_info->cli_ip = arp->arp_data.arp_sip;
139 ether_addr_copy(&arp->arp_data.arp_sha, &client_info->cli_mac);
140 client_info->slave_idx = calculate_slave(internals);
141 rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
142 ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_tha);
143 memcpy(client_info->vlan, eth_h + 1, offset);
144 client_info->vlan_count = offset / sizeof(struct vlan_hdr);
145 }
146 internals->mode6.ntt = 1;
147 rte_spinlock_unlock(&internals->mode6.lock);
148 }
149
150 uint8_t
151 bond_mode_alb_arp_xmit(struct ether_hdr *eth_h, uint16_t offset,
152 struct bond_dev_private *internals)
153 {
154 struct arp_hdr *arp;
155
156 struct client_data *hash_table = internals->mode6.client_table;
157 struct client_data *client_info;
158
159 uint8_t hash_index;
160
161 struct ether_addr bonding_mac;
162
163 arp = (struct arp_hdr *)((char *)(eth_h + 1) + offset);
164
165 /*
166 * Traffic with src MAC other than bonding should be sent on
167 * current primary port.
168 */
169 rte_eth_macaddr_get(internals->port_id, &bonding_mac);
170 if (!is_same_ether_addr(&bonding_mac, &arp->arp_data.arp_sha)) {
171 rte_eth_macaddr_get(internals->current_primary_port,
172 &arp->arp_data.arp_sha);
173 return internals->current_primary_port;
174 }
175
176 hash_index = simple_hash((uint8_t *)&arp->arp_data.arp_tip,
177 sizeof(uint32_t));
178 client_info = &hash_table[hash_index];
179
180 rte_spinlock_lock(&internals->mode6.lock);
181 if (arp->arp_op == rte_cpu_to_be_16(ARP_OP_REPLY)) {
182 if (client_info->in_use) {
183 if (client_info->app_ip == arp->arp_data.arp_sip &&
184 client_info->cli_ip == arp->arp_data.arp_tip) {
185 /* Entry is already assigned to this client */
186 if (!is_broadcast_ether_addr(&arp->arp_data.arp_tha)) {
187 ether_addr_copy(&arp->arp_data.arp_tha,
188 &client_info->cli_mac);
189 }
190 rte_eth_macaddr_get(client_info->slave_idx,
191 &client_info->app_mac);
192 ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
193 memcpy(client_info->vlan, eth_h + 1, offset);
194 client_info->vlan_count = offset / sizeof(struct vlan_hdr);
195 rte_spinlock_unlock(&internals->mode6.lock);
196 return client_info->slave_idx;
197 }
198 }
199
200 /* Assign new slave to this client and update src mac in ARP */
201 client_info->in_use = 1;
202 client_info->ntt = 0;
203 client_info->app_ip = arp->arp_data.arp_sip;
204 ether_addr_copy(&arp->arp_data.arp_tha, &client_info->cli_mac);
205 client_info->cli_ip = arp->arp_data.arp_tip;
206 client_info->slave_idx = calculate_slave(internals);
207 rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
208 ether_addr_copy(&client_info->app_mac, &arp->arp_data.arp_sha);
209 memcpy(client_info->vlan, eth_h + 1, offset);
210 client_info->vlan_count = offset / sizeof(struct vlan_hdr);
211 rte_spinlock_unlock(&internals->mode6.lock);
212 return client_info->slave_idx;
213 }
214
215 /* If packet is not ARP Reply, send it on current primary port. */
216 rte_spinlock_unlock(&internals->mode6.lock);
217 rte_eth_macaddr_get(internals->current_primary_port,
218 &arp->arp_data.arp_sha);
219 return internals->current_primary_port;
220 }
221
222 uint8_t
223 bond_mode_alb_arp_upd(struct client_data *client_info,
224 struct rte_mbuf *pkt, struct bond_dev_private *internals)
225 {
226 struct ether_hdr *eth_h;
227 struct arp_hdr *arp_h;
228 uint8_t slave_idx;
229
230 rte_spinlock_lock(&internals->mode6.lock);
231 eth_h = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
232
233 ether_addr_copy(&client_info->app_mac, &eth_h->s_addr);
234 ether_addr_copy(&client_info->cli_mac, &eth_h->d_addr);
235 if (client_info->vlan_count > 0)
236 eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN);
237 else
238 eth_h->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP);
239
240 arp_h = (struct arp_hdr *)((char *)eth_h + sizeof(struct ether_hdr)
241 + client_info->vlan_count * sizeof(struct vlan_hdr));
242
243 memcpy(eth_h + 1, client_info->vlan,
244 client_info->vlan_count * sizeof(struct vlan_hdr));
245
246 ether_addr_copy(&client_info->app_mac, &arp_h->arp_data.arp_sha);
247 arp_h->arp_data.arp_sip = client_info->app_ip;
248 ether_addr_copy(&client_info->cli_mac, &arp_h->arp_data.arp_tha);
249 arp_h->arp_data.arp_tip = client_info->cli_ip;
250
251 arp_h->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER);
252 arp_h->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
253 arp_h->arp_hln = ETHER_ADDR_LEN;
254 arp_h->arp_pln = sizeof(uint32_t);
255 arp_h->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
256
257 slave_idx = client_info->slave_idx;
258 rte_spinlock_unlock(&internals->mode6.lock);
259
260 return slave_idx;
261 }
262
263 void
264 bond_mode_alb_client_list_upd(struct rte_eth_dev *bond_dev)
265 {
266 struct bond_dev_private *internals = bond_dev->data->dev_private;
267 struct client_data *client_info;
268
269 int i;
270
271 /* If active slave count is 0, it's pointless to refresh alb table */
272 if (internals->active_slave_count <= 0)
273 return;
274
275 rte_spinlock_lock(&internals->mode6.lock);
276 internals->mode6.last_slave = ALB_NULL_INDEX;
277
278 for (i = 0; i < ALB_HASH_TABLE_SIZE; i++) {
279 client_info = &internals->mode6.client_table[i];
280 if (client_info->in_use) {
281 client_info->slave_idx = calculate_slave(internals);
282 rte_eth_macaddr_get(client_info->slave_idx, &client_info->app_mac);
283 internals->mode6.ntt = 1;
284 }
285 }
286 rte_spinlock_unlock(&internals->mode6.lock);
287 }