]> git.proxmox.com Git - ceph.git/blob - ceph/src/spdk/dpdk/drivers/net/failsafe/failsafe_ether.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / spdk / dpdk / drivers / net / failsafe / failsafe_ether.c
1 /* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2017 6WIND S.A.
3 * Copyright 2017 Mellanox Technologies, Ltd
4 */
5
6 #include <unistd.h>
7
8 #include <rte_flow.h>
9 #include <rte_flow_driver.h>
10 #include <rte_cycles.h>
11
12 #include "failsafe_private.h"
13
14 /** Print a message out of a flow error. */
15 static int
16 fs_flow_complain(struct rte_flow_error *error)
17 {
18 static const char *const errstrlist[] = {
19 [RTE_FLOW_ERROR_TYPE_NONE] = "no error",
20 [RTE_FLOW_ERROR_TYPE_UNSPECIFIED] = "cause unspecified",
21 [RTE_FLOW_ERROR_TYPE_HANDLE] = "flow rule (handle)",
22 [RTE_FLOW_ERROR_TYPE_ATTR_GROUP] = "group field",
23 [RTE_FLOW_ERROR_TYPE_ATTR_PRIORITY] = "priority field",
24 [RTE_FLOW_ERROR_TYPE_ATTR_INGRESS] = "ingress field",
25 [RTE_FLOW_ERROR_TYPE_ATTR_EGRESS] = "egress field",
26 [RTE_FLOW_ERROR_TYPE_ATTR] = "attributes structure",
27 [RTE_FLOW_ERROR_TYPE_ITEM_NUM] = "pattern length",
28 [RTE_FLOW_ERROR_TYPE_ITEM] = "specific pattern item",
29 [RTE_FLOW_ERROR_TYPE_ACTION_NUM] = "number of actions",
30 [RTE_FLOW_ERROR_TYPE_ACTION] = "specific action",
31 };
32 const char *errstr;
33 char buf[32];
34 int err = rte_errno;
35
36 if ((unsigned int)error->type >= RTE_DIM(errstrlist) ||
37 !errstrlist[error->type])
38 errstr = "unknown type";
39 else
40 errstr = errstrlist[error->type];
41 ERROR("Caught error type %d (%s): %s%s\n",
42 error->type, errstr,
43 error->cause ? (snprintf(buf, sizeof(buf), "cause: %p, ",
44 error->cause), buf) : "",
45 error->message ? error->message : "(no stated reason)");
46 return -err;
47 }
48
49 static int
50 eth_dev_flow_isolate_set(struct rte_eth_dev *dev,
51 struct sub_device *sdev)
52 {
53 struct rte_flow_error ferror;
54 int ret;
55
56 if (!PRIV(dev)->flow_isolated) {
57 DEBUG("Flow isolation already disabled");
58 } else {
59 DEBUG("Enabling flow isolation");
60 ret = rte_flow_isolate(PORT_ID(sdev),
61 PRIV(dev)->flow_isolated,
62 &ferror);
63 if (ret) {
64 fs_flow_complain(&ferror);
65 return ret;
66 }
67 }
68 return 0;
69 }
70
71 static int
72 fs_eth_dev_conf_apply(struct rte_eth_dev *dev,
73 struct sub_device *sdev)
74 {
75 struct rte_eth_dev *edev;
76 struct rte_vlan_filter_conf *vfc1;
77 struct rte_vlan_filter_conf *vfc2;
78 struct rte_flow *flow;
79 struct rte_flow_error ferror;
80 uint32_t i;
81 int ret;
82
83 edev = ETH(sdev);
84 /* RX queue setup */
85 for (i = 0; i < dev->data->nb_rx_queues; i++) {
86 struct rxq *rxq;
87
88 rxq = dev->data->rx_queues[i];
89 ret = rte_eth_rx_queue_setup(PORT_ID(sdev), i,
90 rxq->info.nb_desc, rxq->socket_id,
91 &rxq->info.conf, rxq->info.mp);
92 if (ret) {
93 ERROR("rx_queue_setup failed");
94 return ret;
95 }
96 }
97 /* TX queue setup */
98 for (i = 0; i < dev->data->nb_tx_queues; i++) {
99 struct txq *txq;
100
101 txq = dev->data->tx_queues[i];
102 ret = rte_eth_tx_queue_setup(PORT_ID(sdev), i,
103 txq->info.nb_desc, txq->socket_id,
104 &txq->info.conf);
105 if (ret) {
106 ERROR("tx_queue_setup failed");
107 return ret;
108 }
109 }
110 /* dev_link.link_status */
111 if (dev->data->dev_link.link_status !=
112 edev->data->dev_link.link_status) {
113 DEBUG("Configuring link_status");
114 if (dev->data->dev_link.link_status)
115 ret = rte_eth_dev_set_link_up(PORT_ID(sdev));
116 else
117 ret = rte_eth_dev_set_link_down(PORT_ID(sdev));
118 if (ret) {
119 ERROR("Failed to apply link_status");
120 return ret;
121 }
122 } else {
123 DEBUG("link_status already set");
124 }
125 /* promiscuous */
126 if (dev->data->promiscuous != edev->data->promiscuous) {
127 DEBUG("Configuring promiscuous");
128 if (dev->data->promiscuous)
129 rte_eth_promiscuous_enable(PORT_ID(sdev));
130 else
131 rte_eth_promiscuous_disable(PORT_ID(sdev));
132 } else {
133 DEBUG("promiscuous already set");
134 }
135 /* all_multicast */
136 if (dev->data->all_multicast != edev->data->all_multicast) {
137 DEBUG("Configuring all_multicast");
138 if (dev->data->all_multicast)
139 rte_eth_allmulticast_enable(PORT_ID(sdev));
140 else
141 rte_eth_allmulticast_disable(PORT_ID(sdev));
142 } else {
143 DEBUG("all_multicast already set");
144 }
145 /* MTU */
146 if (dev->data->mtu != edev->data->mtu) {
147 DEBUG("Configuring MTU");
148 ret = rte_eth_dev_set_mtu(PORT_ID(sdev), dev->data->mtu);
149 if (ret) {
150 ERROR("Failed to apply MTU");
151 return ret;
152 }
153 } else {
154 DEBUG("MTU already set");
155 }
156 /* default MAC */
157 DEBUG("Configuring default MAC address");
158 ret = rte_eth_dev_default_mac_addr_set(PORT_ID(sdev),
159 &dev->data->mac_addrs[0]);
160 if (ret) {
161 ERROR("Setting default MAC address failed");
162 return ret;
163 }
164 /* additional MAC */
165 if (PRIV(dev)->nb_mac_addr > 1)
166 DEBUG("Configure additional MAC address%s",
167 (PRIV(dev)->nb_mac_addr > 2 ? "es" : ""));
168 for (i = 1; i < PRIV(dev)->nb_mac_addr; i++) {
169 struct ether_addr *ea;
170
171 ea = &dev->data->mac_addrs[i];
172 ret = rte_eth_dev_mac_addr_add(PORT_ID(sdev), ea,
173 PRIV(dev)->mac_addr_pool[i]);
174 if (ret) {
175 char ea_fmt[ETHER_ADDR_FMT_SIZE];
176
177 ether_format_addr(ea_fmt, ETHER_ADDR_FMT_SIZE, ea);
178 ERROR("Adding MAC address %s failed", ea_fmt);
179 return ret;
180 }
181 }
182 /* VLAN filter */
183 vfc1 = &dev->data->vlan_filter_conf;
184 vfc2 = &edev->data->vlan_filter_conf;
185 if (memcmp(vfc1, vfc2, sizeof(struct rte_vlan_filter_conf))) {
186 uint64_t vbit;
187 uint64_t ids;
188 size_t i;
189 uint16_t vlan_id;
190
191 DEBUG("Configuring VLAN filter");
192 for (i = 0; i < RTE_DIM(vfc1->ids); i++) {
193 if (vfc1->ids[i] == 0)
194 continue;
195 ids = vfc1->ids[i];
196 while (ids) {
197 vlan_id = 64 * i;
198 /* count trailing zeroes */
199 vbit = ~ids & (ids - 1);
200 /* clear least significant bit set */
201 ids ^= (ids ^ (ids - 1)) ^ vbit;
202 for (; vbit; vlan_id++)
203 vbit >>= 1;
204 ret = rte_eth_dev_vlan_filter(
205 PORT_ID(sdev), vlan_id, 1);
206 if (ret) {
207 ERROR("Failed to apply VLAN filter %hu",
208 vlan_id);
209 return ret;
210 }
211 }
212 }
213 } else {
214 DEBUG("VLAN filter already set");
215 }
216 /* rte_flow */
217 if (TAILQ_EMPTY(&PRIV(dev)->flow_list)) {
218 DEBUG("rte_flow already set");
219 } else {
220 DEBUG("Resetting rte_flow configuration");
221 ret = rte_flow_flush(PORT_ID(sdev), &ferror);
222 if (ret) {
223 fs_flow_complain(&ferror);
224 return ret;
225 }
226 i = 0;
227 rte_errno = 0;
228 DEBUG("Configuring rte_flow");
229 TAILQ_FOREACH(flow, &PRIV(dev)->flow_list, next) {
230 DEBUG("Creating flow #%" PRIu32, i++);
231 flow->flows[SUB_ID(sdev)] =
232 rte_flow_create(PORT_ID(sdev),
233 &flow->fd->attr,
234 flow->fd->items,
235 flow->fd->actions,
236 &ferror);
237 ret = rte_errno;
238 if (ret)
239 break;
240 }
241 if (ret) {
242 fs_flow_complain(&ferror);
243 return ret;
244 }
245 }
246 return 0;
247 }
248
249 static void
250 fs_dev_remove(struct sub_device *sdev)
251 {
252 int ret;
253
254 if (sdev == NULL)
255 return;
256 switch (sdev->state) {
257 case DEV_STARTED:
258 failsafe_rx_intr_uninstall_subdevice(sdev);
259 rte_eth_dev_stop(PORT_ID(sdev));
260 sdev->state = DEV_ACTIVE;
261 /* fallthrough */
262 case DEV_ACTIVE:
263 failsafe_eth_dev_unregister_callbacks(sdev);
264 rte_eth_dev_close(PORT_ID(sdev));
265 sdev->state = DEV_PROBED;
266 /* fallthrough */
267 case DEV_PROBED:
268 ret = rte_eal_hotplug_remove(sdev->bus->name,
269 sdev->dev->name);
270 if (ret) {
271 ERROR("Bus detach failed for sub_device %u",
272 SUB_ID(sdev));
273 } else {
274 rte_eth_dev_release_port(ETH(sdev));
275 }
276 sdev->state = DEV_PARSED;
277 /* fallthrough */
278 case DEV_PARSED:
279 case DEV_UNDEFINED:
280 sdev->state = DEV_UNDEFINED;
281 /* the end */
282 break;
283 }
284 sdev->remove = 0;
285 failsafe_hotplug_alarm_install(sdev->fs_dev);
286 }
287
288 static void
289 fs_dev_stats_save(struct sub_device *sdev)
290 {
291 struct rte_eth_stats stats;
292 int err;
293
294 /* Attempt to read current stats. */
295 err = rte_eth_stats_get(PORT_ID(sdev), &stats);
296 if (err) {
297 uint64_t timestamp = sdev->stats_snapshot.timestamp;
298
299 WARN("Could not access latest statistics from sub-device %d.\n",
300 SUB_ID(sdev));
301 if (timestamp != 0)
302 WARN("Using latest snapshot taken before %"PRIu64" seconds.\n",
303 (rte_rdtsc() - timestamp) / rte_get_tsc_hz());
304 }
305 failsafe_stats_increment(&PRIV(sdev->fs_dev)->stats_accumulator,
306 err ? &sdev->stats_snapshot.stats : &stats);
307 memset(&sdev->stats_snapshot, 0, sizeof(sdev->stats_snapshot));
308 }
309
310 static inline int
311 fs_rxtx_clean(struct sub_device *sdev)
312 {
313 uint16_t i;
314
315 for (i = 0; i < ETH(sdev)->data->nb_rx_queues; i++)
316 if (FS_ATOMIC_RX(sdev, i))
317 return 0;
318 for (i = 0; i < ETH(sdev)->data->nb_tx_queues; i++)
319 if (FS_ATOMIC_TX(sdev, i))
320 return 0;
321 return 1;
322 }
323
324 void
325 failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev)
326 {
327 int ret;
328
329 if (sdev == NULL)
330 return;
331 if (sdev->rmv_callback) {
332 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
333 RTE_ETH_EVENT_INTR_RMV,
334 failsafe_eth_rmv_event_callback,
335 sdev);
336 if (ret)
337 WARN("Failed to unregister RMV callback for sub_device"
338 " %d", SUB_ID(sdev));
339 sdev->rmv_callback = 0;
340 }
341 if (sdev->lsc_callback) {
342 ret = rte_eth_dev_callback_unregister(PORT_ID(sdev),
343 RTE_ETH_EVENT_INTR_LSC,
344 failsafe_eth_lsc_event_callback,
345 sdev);
346 if (ret)
347 WARN("Failed to unregister LSC callback for sub_device"
348 " %d", SUB_ID(sdev));
349 sdev->lsc_callback = 0;
350 }
351 }
352
353 void
354 failsafe_dev_remove(struct rte_eth_dev *dev)
355 {
356 struct sub_device *sdev;
357 uint8_t i;
358
359 FOREACH_SUBDEV_STATE(sdev, i, dev, DEV_ACTIVE)
360 if (sdev->remove && fs_rxtx_clean(sdev)) {
361 if (fs_lock(dev, 1) != 0)
362 return;
363 fs_dev_stats_save(sdev);
364 fs_dev_remove(sdev);
365 fs_unlock(dev, 1);
366 }
367 }
368
369 int
370 failsafe_eth_dev_state_sync(struct rte_eth_dev *dev)
371 {
372 struct sub_device *sdev;
373 uint32_t inactive;
374 int ret;
375 uint8_t i;
376
377 if (PRIV(dev)->state < DEV_PARSED)
378 return 0;
379
380 ret = failsafe_args_parse_subs(dev);
381 if (ret)
382 goto err_remove;
383
384 if (PRIV(dev)->state < DEV_PROBED)
385 return 0;
386 ret = failsafe_eal_init(dev);
387 if (ret)
388 goto err_remove;
389 if (PRIV(dev)->state < DEV_ACTIVE)
390 return 0;
391 inactive = 0;
392 FOREACH_SUBDEV(sdev, i, dev) {
393 if (sdev->state == DEV_PROBED) {
394 inactive |= UINT32_C(1) << i;
395 ret = eth_dev_flow_isolate_set(dev, sdev);
396 if (ret) {
397 ERROR("Could not apply configuration to sub_device %d",
398 i);
399 goto err_remove;
400 }
401 }
402 }
403 ret = dev->dev_ops->dev_configure(dev);
404 if (ret)
405 goto err_remove;
406 FOREACH_SUBDEV(sdev, i, dev) {
407 if (inactive & (UINT32_C(1) << i)) {
408 ret = fs_eth_dev_conf_apply(dev, sdev);
409 if (ret) {
410 ERROR("Could not apply configuration to sub_device %d",
411 i);
412 goto err_remove;
413 }
414 }
415 }
416 /*
417 * If new devices have been configured, check if
418 * the link state has changed.
419 */
420 if (inactive)
421 dev->dev_ops->link_update(dev, 1);
422 if (PRIV(dev)->state < DEV_STARTED)
423 return 0;
424 ret = dev->dev_ops->dev_start(dev);
425 if (ret)
426 goto err_remove;
427 return 0;
428 err_remove:
429 FOREACH_SUBDEV(sdev, i, dev)
430 if (sdev->state != PRIV(dev)->state)
431 sdev->remove = 1;
432 return ret;
433 }
434
435 void
436 failsafe_stats_increment(struct rte_eth_stats *to, struct rte_eth_stats *from)
437 {
438 uint32_t i;
439
440 RTE_ASSERT(to != NULL && from != NULL);
441 to->ipackets += from->ipackets;
442 to->opackets += from->opackets;
443 to->ibytes += from->ibytes;
444 to->obytes += from->obytes;
445 to->imissed += from->imissed;
446 to->ierrors += from->ierrors;
447 to->oerrors += from->oerrors;
448 to->rx_nombuf += from->rx_nombuf;
449 for (i = 0; i < RTE_ETHDEV_QUEUE_STAT_CNTRS; i++) {
450 to->q_ipackets[i] += from->q_ipackets[i];
451 to->q_opackets[i] += from->q_opackets[i];
452 to->q_ibytes[i] += from->q_ibytes[i];
453 to->q_obytes[i] += from->q_obytes[i];
454 to->q_errors[i] += from->q_errors[i];
455 }
456 }
457
458 int
459 failsafe_eth_rmv_event_callback(uint16_t port_id __rte_unused,
460 enum rte_eth_event_type event __rte_unused,
461 void *cb_arg, void *out __rte_unused)
462 {
463 struct sub_device *sdev = cb_arg;
464
465 fs_lock(sdev->fs_dev, 0);
466 /* Switch as soon as possible tx_dev. */
467 fs_switch_dev(sdev->fs_dev, sdev);
468 /* Use safe bursts in any case. */
469 set_burst_fn(sdev->fs_dev, 1);
470 /*
471 * Async removal, the sub-PMD will try to unregister
472 * the callback at the source of the current thread context.
473 */
474 sdev->remove = 1;
475 fs_unlock(sdev->fs_dev, 0);
476 return 0;
477 }
478
479 int
480 failsafe_eth_lsc_event_callback(uint16_t port_id __rte_unused,
481 enum rte_eth_event_type event __rte_unused,
482 void *cb_arg, void *out __rte_unused)
483 {
484 struct rte_eth_dev *dev = cb_arg;
485 int ret;
486
487 ret = dev->dev_ops->link_update(dev, 0);
488 /* We must pass on the LSC event */
489 if (ret)
490 return _rte_eth_dev_callback_process(dev,
491 RTE_ETH_EVENT_INTR_LSC,
492 NULL);
493 else
494 return 0;
495 }
496
497 /* Take sub-device ownership before it becomes exposed to the application. */
498 int
499 failsafe_eth_new_event_callback(uint16_t port_id,
500 enum rte_eth_event_type event __rte_unused,
501 void *cb_arg, void *out __rte_unused)
502 {
503 struct rte_eth_dev *fs_dev = cb_arg;
504 struct sub_device *sdev;
505 struct rte_eth_dev *dev = &rte_eth_devices[port_id];
506 uint8_t i;
507
508 FOREACH_SUBDEV_STATE(sdev, i, fs_dev, DEV_PARSED) {
509 if (sdev->state >= DEV_PROBED)
510 continue;
511 if (strcmp(sdev->devargs.name, dev->device->name) != 0)
512 continue;
513 rte_eth_dev_owner_set(port_id, &PRIV(fs_dev)->my_owner);
514 /* The actual owner will be checked after the port probing. */
515 break;
516 }
517 return 0;
518 }