]> git.proxmox.com Git - ceph.git/blame - ceph/src/spdk/dpdk/drivers/net/failsafe/failsafe_private.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / spdk / dpdk / drivers / net / failsafe / failsafe_private.h
CommitLineData
11fdf7f2
TL
1/* SPDX-License-Identifier: BSD-3-Clause
2 * Copyright 2017 6WIND S.A.
3 * Copyright 2017 Mellanox Technologies, Ltd
4 */
5
f67539c2
TL
6#ifndef _ETH_FAILSAFE_PRIVATE_H_
7#define _ETH_FAILSAFE_PRIVATE_H_
11fdf7f2 8
9f95a23c 9#include <stdint.h>
11fdf7f2
TL
10#include <sys/queue.h>
11#include <pthread.h>
12
13#include <rte_atomic.h>
14#include <rte_dev.h>
15#include <rte_ethdev_driver.h>
16#include <rte_devargs.h>
9f95a23c 17#include <rte_flow.h>
11fdf7f2
TL
18#include <rte_interrupts.h>
19
20#define FAILSAFE_DRIVER_NAME "Fail-safe PMD"
21#define FAILSAFE_OWNER_NAME "Fail-safe"
22
23#define PMD_FAILSAFE_MAC_KVARG "mac"
24#define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll"
25#define PMD_FAILSAFE_PARAM_STRING \
26 "dev(<ifc>)," \
27 "exec(<shell command>)," \
28 "fd(<fd number>)," \
29 "mac=mac_addr," \
30 "hotplug_poll=u64" \
31 ""
32
33#define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000
34
35#define FAILSAFE_MAX_ETHPORTS 2
36#define FAILSAFE_MAX_ETHADDR 128
37
38#define DEVARGS_MAXLEN 4096
39
40enum rxp_service_state {
41 SS_NO_SERVICE = 0,
42 SS_REGISTERED,
43 SS_READY,
44 SS_RUNNING,
45};
46
47/* TYPES */
48
49struct rx_proxy {
50 /* epoll file descriptor */
51 int efd;
52 /* event vector to be used by epoll */
53 struct rte_epoll_event *evec;
54 /* rte service id */
55 uint32_t sid;
56 /* service core id */
57 uint32_t scid;
58 enum rxp_service_state sstate;
59};
60
f67539c2
TL
61#define FS_RX_PROXY_INIT (struct rx_proxy){ \
62 .efd = -1, \
63 .evec = NULL, \
64 .sid = 0, \
65 .scid = 0, \
66 .sstate = SS_NO_SERVICE, \
67}
68
11fdf7f2
TL
69struct rxq {
70 struct fs_priv *priv;
71 uint16_t qid;
72 /* next sub_device to poll */
73 struct sub_device *sdev;
74 unsigned int socket_id;
75 int event_fd;
76 unsigned int enable_events:1;
77 struct rte_eth_rxq_info info;
78 rte_atomic64_t refcnt[];
79};
80
81struct txq {
82 struct fs_priv *priv;
83 uint16_t qid;
84 unsigned int socket_id;
85 struct rte_eth_txq_info info;
86 rte_atomic64_t refcnt[];
87};
88
89struct rte_flow {
90 TAILQ_ENTRY(rte_flow) next;
91 /* sub_flows */
92 struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS];
93 /* flow description for synchronization */
9f95a23c
TL
94 struct rte_flow_conv_rule rule;
95 uint8_t rule_data[];
11fdf7f2
TL
96};
97
98enum dev_state {
99 DEV_UNDEFINED,
100 DEV_PARSED,
101 DEV_PROBED,
102 DEV_ACTIVE,
103 DEV_STARTED,
104};
105
106struct fs_stats {
107 struct rte_eth_stats stats;
108 uint64_t timestamp;
109};
110
9f95a23c
TL
111/*
112 * Allocated in shared memory.
113 */
11fdf7f2
TL
114struct sub_device {
115 /* Exhaustive DPDK device description */
116 struct sub_device *next;
117 struct rte_devargs devargs;
9f95a23c
TL
118 struct rte_bus *bus; /* for primary process only. */
119 struct rte_device *dev; /* for primary process only. */
11fdf7f2
TL
120 uint8_t sid;
121 /* Device state machine */
122 enum dev_state state;
123 /* Last stats snapshot passed to user */
124 struct fs_stats stats_snapshot;
125 /* Some device are defined as a command line */
126 char *cmdline;
127 /* Others are retrieved through a file descriptor */
128 char *fd_str;
129 /* fail-safe device backreference */
9f95a23c
TL
130 uint16_t fs_port_id; /* shared between processes */
131 /* sub device port id*/
132 uint16_t sdev_port_id; /* shared between processes */
11fdf7f2
TL
133 /* flag calling for recollection */
134 volatile unsigned int remove:1;
135 /* flow isolation state */
136 int flow_isolated:1;
137 /* RMV callback registration state */
138 unsigned int rmv_callback:1;
139 /* LSC callback registration state */
140 unsigned int lsc_callback:1;
141};
142
9f95a23c
TL
143/*
144 * This is referenced by eth_dev->data->dev_private
145 * This is shared between processes.
146 */
11fdf7f2 147struct fs_priv {
9f95a23c 148 struct rte_eth_dev_data *data; /* backreference to shared data. */
11fdf7f2
TL
149 /*
150 * Set of sub_devices.
151 * subs[0] is the preferred device
152 * any other is just another slave
153 */
9f95a23c 154 struct sub_device *subs; /* shared between processes */
11fdf7f2
TL
155 uint8_t subs_head; /* if head == tail, no subs */
156 uint8_t subs_tail; /* first invalid */
157 uint8_t subs_tx; /* current emitting device */
158 uint8_t current_probed;
159 /* flow mapping */
160 TAILQ_HEAD(sub_flows, rte_flow) flow_list;
161 /* current number of mac_addr slots allocated. */
162 uint32_t nb_mac_addr;
f67539c2 163 struct rte_ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR];
11fdf7f2 164 uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR];
9f95a23c 165 uint32_t nb_mcast_addr;
f67539c2 166 struct rte_ether_addr *mcast_addrs;
11fdf7f2 167 /* current capabilities */
11fdf7f2
TL
168 struct rte_eth_dev_owner my_owner; /* Unique owner. */
169 struct rte_intr_handle intr_handle; /* Port interrupt handle. */
170 /*
171 * Fail-safe state machine.
172 * This level will be tracking state of the EAL and eth
173 * layer at large as defined by the user application.
174 * It will then steer the sub_devices toward the same
175 * synchronized state.
176 */
177 enum dev_state state;
178 struct rte_eth_stats stats_accumulator;
179 /*
180 * Rx interrupts/events proxy.
181 * The PMD issues Rx events to the EAL on behalf of its subdevices,
182 * it does that by registering an event-fd for each of its queues with
183 * the EAL. A PMD service thread listens to all the Rx events from the
184 * subdevices, when an Rx event is issued by a subdevice it will be
185 * caught by this service with will trigger an Rx event in the
186 * appropriate failsafe Rx queue.
187 */
188 struct rx_proxy rxp;
189 pthread_mutex_t hotplug_mutex;
190 /* Hot-plug mutex is locked by the alarm mechanism. */
191 volatile unsigned int alarm_lock:1;
192 unsigned int pending_alarm:1; /* An alarm is pending */
193 /* flow isolation state */
194 int flow_isolated:1;
195};
196
197/* FAILSAFE_INTR */
198
199int failsafe_rx_intr_install(struct rte_eth_dev *dev);
200void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev);
201int failsafe_rx_intr_install_subdevice(struct sub_device *sdev);
202void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev);
203
204/* MISC */
205
206int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev);
207int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev);
208
209/* RX / TX */
210
9f95a23c 211void failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe);
11fdf7f2
TL
212
213uint16_t failsafe_rx_burst(void *rxq,
214 struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
215uint16_t failsafe_tx_burst(void *txq,
216 struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
217
218uint16_t failsafe_rx_burst_fast(void *rxq,
219 struct rte_mbuf **rx_pkts, uint16_t nb_pkts);
220uint16_t failsafe_tx_burst_fast(void *txq,
221 struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
222
223/* ARGS */
224
225int failsafe_args_parse(struct rte_eth_dev *dev, const char *params);
226void failsafe_args_free(struct rte_eth_dev *dev);
227int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params);
228int failsafe_args_parse_subs(struct rte_eth_dev *dev);
229
230/* EAL */
231
232int failsafe_eal_init(struct rte_eth_dev *dev);
233int failsafe_eal_uninit(struct rte_eth_dev *dev);
234
235/* ETH_DEV */
236
237int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev);
238void failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev);
239void failsafe_dev_remove(struct rte_eth_dev *dev);
240void failsafe_stats_increment(struct rte_eth_stats *to,
241 struct rte_eth_stats *from);
242int failsafe_eth_rmv_event_callback(uint16_t port_id,
243 enum rte_eth_event_type type,
244 void *arg, void *out);
245int failsafe_eth_lsc_event_callback(uint16_t port_id,
246 enum rte_eth_event_type event,
247 void *cb_arg, void *out);
248int failsafe_eth_new_event_callback(uint16_t port_id,
249 enum rte_eth_event_type event,
250 void *cb_arg, void *out);
251
252/* GLOBALS */
253
254extern const char pmd_failsafe_driver_name[];
255extern const struct eth_dev_ops failsafe_ops;
256extern const struct rte_flow_ops fs_flow_ops;
9f95a23c
TL
257extern uint64_t failsafe_hotplug_poll;
258extern int failsafe_mac_from_arg;
11fdf7f2
TL
259
260/* HELPERS */
261
262/* dev: (struct rte_eth_dev *) fail-safe device */
263#define PRIV(dev) \
264 ((struct fs_priv *)(dev)->data->dev_private)
265
266/* sdev: (struct sub_device *) */
267#define ETH(sdev) \
9f95a23c
TL
268 ((sdev)->sdev_port_id == RTE_MAX_ETHPORTS ? \
269 NULL : &rte_eth_devices[(sdev)->sdev_port_id])
11fdf7f2
TL
270
271/* sdev: (struct sub_device *) */
272#define PORT_ID(sdev) \
9f95a23c 273 ((sdev)->sdev_port_id)
11fdf7f2
TL
274
275/* sdev: (struct sub_device *) */
276#define SUB_ID(sdev) \
277 ((sdev)->sid)
278
279/**
280 * Stateful iterator construct over fail-safe sub-devices:
281 * s: (struct sub_device *), iterator
282 * i: (uint8_t), increment
283 * dev: (struct rte_eth_dev *), fail-safe ethdev
284 * state: (enum dev_state), minimum acceptable device state
285 */
286#define FOREACH_SUBDEV_STATE(s, i, dev, state) \
287 for (s = fs_find_next((dev), 0, state, &i); \
288 s != NULL; \
289 s = fs_find_next((dev), i + 1, state, &i))
290
291/**
292 * Iterator construct over fail-safe sub-devices:
293 * s: (struct sub_device *), iterator
294 * i: (uint8_t), increment
295 * dev: (struct rte_eth_dev *), fail-safe ethdev
296 */
297#define FOREACH_SUBDEV(s, i, dev) \
298 FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED)
299
300/* dev: (struct rte_eth_dev *) fail-safe device */
301#define PREFERRED_SUBDEV(dev) \
302 (&PRIV(dev)->subs[0])
303
304/* dev: (struct rte_eth_dev *) fail-safe device */
305#define TX_SUBDEV(dev) \
306 (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \
307 : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \
308 : &PRIV(dev)->subs[PRIV(dev)->subs_tx]))
309
310/**
311 * s: (struct sub_device *)
312 * ops: (struct eth_dev_ops) member
313 */
314#define SUBOPS(s, ops) \
315 (ETH(s)->dev_ops->ops)
316
317/**
318 * Atomic guard
319 */
320
321/**
322 * a: (rte_atomic64_t)
323 */
324#define FS_ATOMIC_P(a) \
325 rte_atomic64_set(&(a), 1)
326
327/**
328 * a: (rte_atomic64_t)
329 */
330#define FS_ATOMIC_V(a) \
331 rte_atomic64_set(&(a), 0)
332
333/**
334 * s: (struct sub_device *)
335 * i: uint16_t qid
336 */
337#define FS_ATOMIC_RX(s, i) \
338 rte_atomic64_read( \
9f95a23c
TL
339 &((struct rxq *) \
340 (fs_dev(s)->data->rx_queues[i]))->refcnt[(s)->sid])
11fdf7f2
TL
341/**
342 * s: (struct sub_device *)
343 * i: uint16_t qid
344 */
345#define FS_ATOMIC_TX(s, i) \
346 rte_atomic64_read( \
9f95a23c
TL
347 &((struct txq *) \
348 (fs_dev(s)->data->tx_queues[i]))->refcnt[(s)->sid])
11fdf7f2 349
9f95a23c 350#ifdef RTE_EXEC_ENV_FREEBSD
11fdf7f2
TL
351#define FS_THREADID_TYPE void*
352#define FS_THREADID_FMT "p"
353#else
354#define FS_THREADID_TYPE unsigned long
355#define FS_THREADID_FMT "lu"
356#endif
357
358extern int failsafe_logtype;
359
360#define LOG__(l, m, ...) \
361 rte_log(RTE_LOG_ ## l, failsafe_logtype, \
362 "net_failsafe: " m "%c", __VA_ARGS__)
363
364#define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n')
365#define DEBUG(...) LOG_(DEBUG, __VA_ARGS__)
366#define INFO(...) LOG_(INFO, __VA_ARGS__)
367#define WARN(...) LOG_(WARNING, __VA_ARGS__)
368#define ERROR(...) LOG_(ERR, __VA_ARGS__)
369
370/* inlined functions */
371
372static inline struct sub_device *
373fs_find_next(struct rte_eth_dev *dev,
374 uint8_t sid,
375 enum dev_state min_state,
376 uint8_t *sid_out)
377{
378 struct sub_device *subs;
379 uint8_t tail;
380
381 subs = PRIV(dev)->subs;
382 tail = PRIV(dev)->subs_tail;
383 while (sid < tail) {
384 if (subs[sid].state >= min_state)
385 break;
386 sid++;
387 }
388 *sid_out = sid;
389 if (sid >= tail)
390 return NULL;
391 return &subs[sid];
392}
393
9f95a23c
TL
394static inline struct rte_eth_dev *
395fs_dev(struct sub_device *sdev) {
396 return &rte_eth_devices[sdev->fs_port_id];
397}
398
11fdf7f2
TL
399/*
400 * Lock hot-plug mutex.
401 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
402 */
403static inline int
404fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm)
405{
406 int ret;
407
408 if (is_alarm) {
409 ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex);
410 if (ret) {
411 DEBUG("Hot-plug mutex lock trying failed(%s), will try"
412 " again later...", strerror(ret));
413 return ret;
414 }
415 PRIV(dev)->alarm_lock = 1;
416 } else {
417 ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex);
418 if (ret) {
419 ERROR("Cannot lock mutex(%s)", strerror(ret));
420 return ret;
421 }
422 }
11fdf7f2
TL
423 return ret;
424}
425
426/*
427 * Unlock hot-plug mutex.
428 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism.
429 */
430static inline void
431fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm)
432{
433 int ret;
11fdf7f2
TL
434
435 if (is_alarm) {
436 RTE_ASSERT(PRIV(dev)->alarm_lock == 1);
437 PRIV(dev)->alarm_lock = 0;
438 }
439 ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex);
440 if (ret)
441 ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret));
11fdf7f2
TL
442}
443
444/*
445 * Switch emitting device.
446 * If banned is set, banned must not be considered for
447 * the role of emitting device.
448 */
449static inline void
450fs_switch_dev(struct rte_eth_dev *dev,
451 struct sub_device *banned)
452{
453 struct sub_device *txd;
454 enum dev_state req_state;
455
456 req_state = PRIV(dev)->state;
457 txd = TX_SUBDEV(dev);
458 if (PREFERRED_SUBDEV(dev)->state >= req_state &&
459 PREFERRED_SUBDEV(dev) != banned) {
460 if (txd != PREFERRED_SUBDEV(dev) &&
461 (txd == NULL ||
462 (req_state == DEV_STARTED) ||
463 (txd && txd->state < DEV_STARTED))) {
464 DEBUG("Switching tx_dev to preferred sub_device");
465 PRIV(dev)->subs_tx = 0;
466 }
467 } else if ((txd && txd->state < req_state) ||
468 txd == NULL ||
469 txd == banned) {
470 struct sub_device *sdev = NULL;
471 uint8_t i;
472
473 /* Using acceptable device */
474 FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) {
475 if (sdev == banned)
476 continue;
477 DEBUG("Switching tx_dev to sub_device %d",
478 i);
479 PRIV(dev)->subs_tx = i;
480 break;
481 }
482 if (i >= PRIV(dev)->subs_tail || sdev == NULL) {
483 DEBUG("No device ready, deactivating tx_dev");
484 PRIV(dev)->subs_tx = PRIV(dev)->subs_tail;
485 }
486 } else {
487 return;
488 }
9f95a23c 489 failsafe_set_burst_fn(dev, 0);
11fdf7f2
TL
490 rte_wmb();
491}
492
493/*
494 * Adjust error value and rte_errno to the fail-safe actual error value.
495 */
496static inline int
497fs_err(struct sub_device *sdev, int err)
498{
499 /* A device removal shouldn't be reported as an error. */
500 if (sdev->remove == 1 || err == -EIO)
501 return rte_errno = 0;
502 return err;
503}
f67539c2 504#endif /* _ETH_FAILSAFE_PRIVATE_H_ */