]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * net/switchdev/switchdev.c - Switch device API | |
3 | * Copyright (c) 2014-2015 Jiri Pirko <jiri@resnulli.us> | |
4 | * Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com> | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | */ | |
11 | ||
12 | #include <linux/kernel.h> | |
13 | #include <linux/types.h> | |
14 | #include <linux/init.h> | |
15 | #include <linux/mutex.h> | |
16 | #include <linux/notifier.h> | |
17 | #include <linux/netdevice.h> | |
18 | #include <linux/etherdevice.h> | |
19 | #include <linux/if_bridge.h> | |
20 | #include <linux/list.h> | |
21 | #include <linux/workqueue.h> | |
22 | #include <linux/if_vlan.h> | |
23 | #include <linux/rtnetlink.h> | |
24 | #include <net/ip_fib.h> | |
25 | #include <net/switchdev.h> | |
26 | ||
27 | /** | |
28 | * switchdev_trans_item_enqueue - Enqueue data item to transaction queue | |
29 | * | |
30 | * @trans: transaction | |
31 | * @data: pointer to data being queued | |
32 | * @destructor: data destructor | |
33 | * @tritem: transaction item being queued | |
34 | * | |
35 | * Enqeueue data item to transaction queue. tritem is typically placed in | |
36 | * cointainter pointed at by data pointer. Destructor is called on | |
37 | * transaction abort and after successful commit phase in case | |
38 | * the caller did not dequeue the item before. | |
39 | */ | |
40 | void switchdev_trans_item_enqueue(struct switchdev_trans *trans, | |
41 | void *data, void (*destructor)(void const *), | |
42 | struct switchdev_trans_item *tritem) | |
43 | { | |
44 | tritem->data = data; | |
45 | tritem->destructor = destructor; | |
46 | list_add_tail(&tritem->list, &trans->item_list); | |
47 | } | |
48 | EXPORT_SYMBOL_GPL(switchdev_trans_item_enqueue); | |
49 | ||
50 | static struct switchdev_trans_item * | |
51 | __switchdev_trans_item_dequeue(struct switchdev_trans *trans) | |
52 | { | |
53 | struct switchdev_trans_item *tritem; | |
54 | ||
55 | if (list_empty(&trans->item_list)) | |
56 | return NULL; | |
57 | tritem = list_first_entry(&trans->item_list, | |
58 | struct switchdev_trans_item, list); | |
59 | list_del(&tritem->list); | |
60 | return tritem; | |
61 | } | |
62 | ||
63 | /** | |
64 | * switchdev_trans_item_dequeue - Dequeue data item from transaction queue | |
65 | * | |
66 | * @trans: transaction | |
67 | */ | |
68 | void *switchdev_trans_item_dequeue(struct switchdev_trans *trans) | |
69 | { | |
70 | struct switchdev_trans_item *tritem; | |
71 | ||
72 | tritem = __switchdev_trans_item_dequeue(trans); | |
73 | BUG_ON(!tritem); | |
74 | return tritem->data; | |
75 | } | |
76 | EXPORT_SYMBOL_GPL(switchdev_trans_item_dequeue); | |
77 | ||
78 | static void switchdev_trans_init(struct switchdev_trans *trans) | |
79 | { | |
80 | INIT_LIST_HEAD(&trans->item_list); | |
81 | } | |
82 | ||
83 | static void switchdev_trans_items_destroy(struct switchdev_trans *trans) | |
84 | { | |
85 | struct switchdev_trans_item *tritem; | |
86 | ||
87 | while ((tritem = __switchdev_trans_item_dequeue(trans))) | |
88 | tritem->destructor(tritem->data); | |
89 | } | |
90 | ||
91 | static void switchdev_trans_items_warn_destroy(struct net_device *dev, | |
92 | struct switchdev_trans *trans) | |
93 | { | |
94 | WARN(!list_empty(&trans->item_list), "%s: transaction item queue is not empty.\n", | |
95 | dev->name); | |
96 | switchdev_trans_items_destroy(trans); | |
97 | } | |
98 | ||
99 | static LIST_HEAD(deferred); | |
100 | static DEFINE_SPINLOCK(deferred_lock); | |
101 | ||
102 | typedef void switchdev_deferred_func_t(struct net_device *dev, | |
103 | const void *data); | |
104 | ||
105 | struct switchdev_deferred_item { | |
106 | struct list_head list; | |
107 | struct net_device *dev; | |
108 | switchdev_deferred_func_t *func; | |
109 | unsigned long data[0]; | |
110 | }; | |
111 | ||
112 | static struct switchdev_deferred_item *switchdev_deferred_dequeue(void) | |
113 | { | |
114 | struct switchdev_deferred_item *dfitem; | |
115 | ||
116 | spin_lock_bh(&deferred_lock); | |
117 | if (list_empty(&deferred)) { | |
118 | dfitem = NULL; | |
119 | goto unlock; | |
120 | } | |
121 | dfitem = list_first_entry(&deferred, | |
122 | struct switchdev_deferred_item, list); | |
123 | list_del(&dfitem->list); | |
124 | unlock: | |
125 | spin_unlock_bh(&deferred_lock); | |
126 | return dfitem; | |
127 | } | |
128 | ||
129 | /** | |
130 | * switchdev_deferred_process - Process ops in deferred queue | |
131 | * | |
132 | * Called to flush the ops currently queued in deferred ops queue. | |
133 | * rtnl_lock must be held. | |
134 | */ | |
135 | void switchdev_deferred_process(void) | |
136 | { | |
137 | struct switchdev_deferred_item *dfitem; | |
138 | ||
139 | ASSERT_RTNL(); | |
140 | ||
141 | while ((dfitem = switchdev_deferred_dequeue())) { | |
142 | dfitem->func(dfitem->dev, dfitem->data); | |
143 | dev_put(dfitem->dev); | |
144 | kfree(dfitem); | |
145 | } | |
146 | } | |
147 | EXPORT_SYMBOL_GPL(switchdev_deferred_process); | |
148 | ||
149 | static void switchdev_deferred_process_work(struct work_struct *work) | |
150 | { | |
151 | rtnl_lock(); | |
152 | switchdev_deferred_process(); | |
153 | rtnl_unlock(); | |
154 | } | |
155 | ||
156 | static DECLARE_WORK(deferred_process_work, switchdev_deferred_process_work); | |
157 | ||
158 | static int switchdev_deferred_enqueue(struct net_device *dev, | |
159 | const void *data, size_t data_len, | |
160 | switchdev_deferred_func_t *func) | |
161 | { | |
162 | struct switchdev_deferred_item *dfitem; | |
163 | ||
164 | dfitem = kmalloc(sizeof(*dfitem) + data_len, GFP_ATOMIC); | |
165 | if (!dfitem) | |
166 | return -ENOMEM; | |
167 | dfitem->dev = dev; | |
168 | dfitem->func = func; | |
169 | memcpy(dfitem->data, data, data_len); | |
170 | dev_hold(dev); | |
171 | spin_lock_bh(&deferred_lock); | |
172 | list_add_tail(&dfitem->list, &deferred); | |
173 | spin_unlock_bh(&deferred_lock); | |
174 | schedule_work(&deferred_process_work); | |
175 | return 0; | |
176 | } | |
177 | ||
178 | /** | |
179 | * switchdev_port_attr_get - Get port attribute | |
180 | * | |
181 | * @dev: port device | |
182 | * @attr: attribute to get | |
183 | */ | |
184 | int switchdev_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) | |
185 | { | |
186 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
187 | struct net_device *lower_dev; | |
188 | struct list_head *iter; | |
189 | struct switchdev_attr first = { | |
190 | .id = SWITCHDEV_ATTR_ID_UNDEFINED | |
191 | }; | |
192 | int err = -EOPNOTSUPP; | |
193 | ||
194 | if (ops && ops->switchdev_port_attr_get) | |
195 | return ops->switchdev_port_attr_get(dev, attr); | |
196 | ||
197 | if (attr->flags & SWITCHDEV_F_NO_RECURSE) | |
198 | return err; | |
199 | ||
200 | /* Switch device port(s) may be stacked under | |
201 | * bond/team/vlan dev, so recurse down to get attr on | |
202 | * each port. Return -ENODATA if attr values don't | |
203 | * compare across ports. | |
204 | */ | |
205 | ||
206 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
207 | err = switchdev_port_attr_get(lower_dev, attr); | |
208 | if (err) | |
209 | break; | |
210 | if (first.id == SWITCHDEV_ATTR_ID_UNDEFINED) | |
211 | first = *attr; | |
212 | else if (memcmp(&first, attr, sizeof(*attr))) | |
213 | return -ENODATA; | |
214 | } | |
215 | ||
216 | return err; | |
217 | } | |
218 | EXPORT_SYMBOL_GPL(switchdev_port_attr_get); | |
219 | ||
220 | static int __switchdev_port_attr_set(struct net_device *dev, | |
221 | const struct switchdev_attr *attr, | |
222 | struct switchdev_trans *trans) | |
223 | { | |
224 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
225 | struct net_device *lower_dev; | |
226 | struct list_head *iter; | |
227 | int err = -EOPNOTSUPP; | |
228 | ||
229 | if (ops && ops->switchdev_port_attr_set) { | |
230 | err = ops->switchdev_port_attr_set(dev, attr, trans); | |
231 | goto done; | |
232 | } | |
233 | ||
234 | if (attr->flags & SWITCHDEV_F_NO_RECURSE) | |
235 | goto done; | |
236 | ||
237 | /* Switch device port(s) may be stacked under | |
238 | * bond/team/vlan dev, so recurse down to set attr on | |
239 | * each port. | |
240 | */ | |
241 | ||
242 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
243 | err = __switchdev_port_attr_set(lower_dev, attr, trans); | |
244 | if (err) | |
245 | break; | |
246 | } | |
247 | ||
248 | done: | |
249 | if (err == -EOPNOTSUPP && attr->flags & SWITCHDEV_F_SKIP_EOPNOTSUPP) | |
250 | err = 0; | |
251 | ||
252 | return err; | |
253 | } | |
254 | ||
255 | static int switchdev_port_attr_set_now(struct net_device *dev, | |
256 | const struct switchdev_attr *attr) | |
257 | { | |
258 | struct switchdev_trans trans; | |
259 | int err; | |
260 | ||
261 | switchdev_trans_init(&trans); | |
262 | ||
263 | /* Phase I: prepare for attr set. Driver/device should fail | |
264 | * here if there are going to be issues in the commit phase, | |
265 | * such as lack of resources or support. The driver/device | |
266 | * should reserve resources needed for the commit phase here, | |
267 | * but should not commit the attr. | |
268 | */ | |
269 | ||
270 | trans.ph_prepare = true; | |
271 | err = __switchdev_port_attr_set(dev, attr, &trans); | |
272 | if (err) { | |
273 | /* Prepare phase failed: abort the transaction. Any | |
274 | * resources reserved in the prepare phase are | |
275 | * released. | |
276 | */ | |
277 | ||
278 | if (err != -EOPNOTSUPP) | |
279 | switchdev_trans_items_destroy(&trans); | |
280 | ||
281 | return err; | |
282 | } | |
283 | ||
284 | /* Phase II: commit attr set. This cannot fail as a fault | |
285 | * of driver/device. If it does, it's a bug in the driver/device | |
286 | * because the driver said everythings was OK in phase I. | |
287 | */ | |
288 | ||
289 | trans.ph_prepare = false; | |
290 | err = __switchdev_port_attr_set(dev, attr, &trans); | |
291 | WARN(err, "%s: Commit of attribute (id=%d) failed.\n", | |
292 | dev->name, attr->id); | |
293 | switchdev_trans_items_warn_destroy(dev, &trans); | |
294 | ||
295 | return err; | |
296 | } | |
297 | ||
298 | static void switchdev_port_attr_set_deferred(struct net_device *dev, | |
299 | const void *data) | |
300 | { | |
301 | const struct switchdev_attr *attr = data; | |
302 | int err; | |
303 | ||
304 | err = switchdev_port_attr_set_now(dev, attr); | |
305 | if (err && err != -EOPNOTSUPP) | |
306 | netdev_err(dev, "failed (err=%d) to set attribute (id=%d)\n", | |
307 | err, attr->id); | |
308 | if (attr->complete) | |
309 | attr->complete(dev, err, attr->complete_priv); | |
310 | } | |
311 | ||
312 | static int switchdev_port_attr_set_defer(struct net_device *dev, | |
313 | const struct switchdev_attr *attr) | |
314 | { | |
315 | return switchdev_deferred_enqueue(dev, attr, sizeof(*attr), | |
316 | switchdev_port_attr_set_deferred); | |
317 | } | |
318 | ||
319 | /** | |
320 | * switchdev_port_attr_set - Set port attribute | |
321 | * | |
322 | * @dev: port device | |
323 | * @attr: attribute to set | |
324 | * | |
325 | * Use a 2-phase prepare-commit transaction model to ensure | |
326 | * system is not left in a partially updated state due to | |
327 | * failure from driver/device. | |
328 | * | |
329 | * rtnl_lock must be held and must not be in atomic section, | |
330 | * in case SWITCHDEV_F_DEFER flag is not set. | |
331 | */ | |
332 | int switchdev_port_attr_set(struct net_device *dev, | |
333 | const struct switchdev_attr *attr) | |
334 | { | |
335 | if (attr->flags & SWITCHDEV_F_DEFER) | |
336 | return switchdev_port_attr_set_defer(dev, attr); | |
337 | ASSERT_RTNL(); | |
338 | return switchdev_port_attr_set_now(dev, attr); | |
339 | } | |
340 | EXPORT_SYMBOL_GPL(switchdev_port_attr_set); | |
341 | ||
342 | static size_t switchdev_obj_size(const struct switchdev_obj *obj) | |
343 | { | |
344 | switch (obj->id) { | |
345 | case SWITCHDEV_OBJ_ID_PORT_VLAN: | |
346 | return sizeof(struct switchdev_obj_port_vlan); | |
347 | case SWITCHDEV_OBJ_ID_IPV4_FIB: | |
348 | return sizeof(struct switchdev_obj_ipv4_fib); | |
349 | case SWITCHDEV_OBJ_ID_PORT_FDB: | |
350 | return sizeof(struct switchdev_obj_port_fdb); | |
351 | case SWITCHDEV_OBJ_ID_PORT_MDB: | |
352 | return sizeof(struct switchdev_obj_port_mdb); | |
353 | default: | |
354 | BUG(); | |
355 | } | |
356 | return 0; | |
357 | } | |
358 | ||
359 | static int __switchdev_port_obj_add(struct net_device *dev, | |
360 | const struct switchdev_obj *obj, | |
361 | struct switchdev_trans *trans) | |
362 | { | |
363 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
364 | struct net_device *lower_dev; | |
365 | struct list_head *iter; | |
366 | int err = -EOPNOTSUPP; | |
367 | ||
368 | if (ops && ops->switchdev_port_obj_add) | |
369 | return ops->switchdev_port_obj_add(dev, obj, trans); | |
370 | ||
371 | /* Switch device port(s) may be stacked under | |
372 | * bond/team/vlan dev, so recurse down to add object on | |
373 | * each port. | |
374 | */ | |
375 | ||
376 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
377 | err = __switchdev_port_obj_add(lower_dev, obj, trans); | |
378 | if (err) | |
379 | break; | |
380 | } | |
381 | ||
382 | return err; | |
383 | } | |
384 | ||
385 | static int switchdev_port_obj_add_now(struct net_device *dev, | |
386 | const struct switchdev_obj *obj) | |
387 | { | |
388 | struct switchdev_trans trans; | |
389 | int err; | |
390 | ||
391 | ASSERT_RTNL(); | |
392 | ||
393 | switchdev_trans_init(&trans); | |
394 | ||
395 | /* Phase I: prepare for obj add. Driver/device should fail | |
396 | * here if there are going to be issues in the commit phase, | |
397 | * such as lack of resources or support. The driver/device | |
398 | * should reserve resources needed for the commit phase here, | |
399 | * but should not commit the obj. | |
400 | */ | |
401 | ||
402 | trans.ph_prepare = true; | |
403 | err = __switchdev_port_obj_add(dev, obj, &trans); | |
404 | if (err) { | |
405 | /* Prepare phase failed: abort the transaction. Any | |
406 | * resources reserved in the prepare phase are | |
407 | * released. | |
408 | */ | |
409 | ||
410 | if (err != -EOPNOTSUPP) | |
411 | switchdev_trans_items_destroy(&trans); | |
412 | ||
413 | return err; | |
414 | } | |
415 | ||
416 | /* Phase II: commit obj add. This cannot fail as a fault | |
417 | * of driver/device. If it does, it's a bug in the driver/device | |
418 | * because the driver said everythings was OK in phase I. | |
419 | */ | |
420 | ||
421 | trans.ph_prepare = false; | |
422 | err = __switchdev_port_obj_add(dev, obj, &trans); | |
423 | WARN(err, "%s: Commit of object (id=%d) failed.\n", dev->name, obj->id); | |
424 | switchdev_trans_items_warn_destroy(dev, &trans); | |
425 | ||
426 | return err; | |
427 | } | |
428 | ||
429 | static void switchdev_port_obj_add_deferred(struct net_device *dev, | |
430 | const void *data) | |
431 | { | |
432 | const struct switchdev_obj *obj = data; | |
433 | int err; | |
434 | ||
435 | err = switchdev_port_obj_add_now(dev, obj); | |
436 | if (err && err != -EOPNOTSUPP) | |
437 | netdev_err(dev, "failed (err=%d) to add object (id=%d)\n", | |
438 | err, obj->id); | |
439 | if (obj->complete) | |
440 | obj->complete(dev, err, obj->complete_priv); | |
441 | } | |
442 | ||
443 | static int switchdev_port_obj_add_defer(struct net_device *dev, | |
444 | const struct switchdev_obj *obj) | |
445 | { | |
446 | return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), | |
447 | switchdev_port_obj_add_deferred); | |
448 | } | |
449 | ||
450 | /** | |
451 | * switchdev_port_obj_add - Add port object | |
452 | * | |
453 | * @dev: port device | |
454 | * @id: object ID | |
455 | * @obj: object to add | |
456 | * | |
457 | * Use a 2-phase prepare-commit transaction model to ensure | |
458 | * system is not left in a partially updated state due to | |
459 | * failure from driver/device. | |
460 | * | |
461 | * rtnl_lock must be held and must not be in atomic section, | |
462 | * in case SWITCHDEV_F_DEFER flag is not set. | |
463 | */ | |
464 | int switchdev_port_obj_add(struct net_device *dev, | |
465 | const struct switchdev_obj *obj) | |
466 | { | |
467 | if (obj->flags & SWITCHDEV_F_DEFER) | |
468 | return switchdev_port_obj_add_defer(dev, obj); | |
469 | ASSERT_RTNL(); | |
470 | return switchdev_port_obj_add_now(dev, obj); | |
471 | } | |
472 | EXPORT_SYMBOL_GPL(switchdev_port_obj_add); | |
473 | ||
474 | static int switchdev_port_obj_del_now(struct net_device *dev, | |
475 | const struct switchdev_obj *obj) | |
476 | { | |
477 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
478 | struct net_device *lower_dev; | |
479 | struct list_head *iter; | |
480 | int err = -EOPNOTSUPP; | |
481 | ||
482 | if (ops && ops->switchdev_port_obj_del) | |
483 | return ops->switchdev_port_obj_del(dev, obj); | |
484 | ||
485 | /* Switch device port(s) may be stacked under | |
486 | * bond/team/vlan dev, so recurse down to delete object on | |
487 | * each port. | |
488 | */ | |
489 | ||
490 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
491 | err = switchdev_port_obj_del_now(lower_dev, obj); | |
492 | if (err) | |
493 | break; | |
494 | } | |
495 | ||
496 | return err; | |
497 | } | |
498 | ||
499 | static void switchdev_port_obj_del_deferred(struct net_device *dev, | |
500 | const void *data) | |
501 | { | |
502 | const struct switchdev_obj *obj = data; | |
503 | int err; | |
504 | ||
505 | err = switchdev_port_obj_del_now(dev, obj); | |
506 | if (err && err != -EOPNOTSUPP) | |
507 | netdev_err(dev, "failed (err=%d) to del object (id=%d)\n", | |
508 | err, obj->id); | |
509 | if (obj->complete) | |
510 | obj->complete(dev, err, obj->complete_priv); | |
511 | } | |
512 | ||
513 | static int switchdev_port_obj_del_defer(struct net_device *dev, | |
514 | const struct switchdev_obj *obj) | |
515 | { | |
516 | return switchdev_deferred_enqueue(dev, obj, switchdev_obj_size(obj), | |
517 | switchdev_port_obj_del_deferred); | |
518 | } | |
519 | ||
520 | /** | |
521 | * switchdev_port_obj_del - Delete port object | |
522 | * | |
523 | * @dev: port device | |
524 | * @id: object ID | |
525 | * @obj: object to delete | |
526 | * | |
527 | * rtnl_lock must be held and must not be in atomic section, | |
528 | * in case SWITCHDEV_F_DEFER flag is not set. | |
529 | */ | |
530 | int switchdev_port_obj_del(struct net_device *dev, | |
531 | const struct switchdev_obj *obj) | |
532 | { | |
533 | if (obj->flags & SWITCHDEV_F_DEFER) | |
534 | return switchdev_port_obj_del_defer(dev, obj); | |
535 | ASSERT_RTNL(); | |
536 | return switchdev_port_obj_del_now(dev, obj); | |
537 | } | |
538 | EXPORT_SYMBOL_GPL(switchdev_port_obj_del); | |
539 | ||
540 | /** | |
541 | * switchdev_port_obj_dump - Dump port objects | |
542 | * | |
543 | * @dev: port device | |
544 | * @id: object ID | |
545 | * @obj: object to dump | |
546 | * @cb: function to call with a filled object | |
547 | * | |
548 | * rtnl_lock must be held. | |
549 | */ | |
550 | int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, | |
551 | switchdev_obj_dump_cb_t *cb) | |
552 | { | |
553 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
554 | struct net_device *lower_dev; | |
555 | struct list_head *iter; | |
556 | int err = -EOPNOTSUPP; | |
557 | ||
558 | ASSERT_RTNL(); | |
559 | ||
560 | if (ops && ops->switchdev_port_obj_dump) | |
561 | return ops->switchdev_port_obj_dump(dev, obj, cb); | |
562 | ||
563 | /* Switch device port(s) may be stacked under | |
564 | * bond/team/vlan dev, so recurse down to dump objects on | |
565 | * first port at bottom of stack. | |
566 | */ | |
567 | ||
568 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
569 | err = switchdev_port_obj_dump(lower_dev, obj, cb); | |
570 | break; | |
571 | } | |
572 | ||
573 | return err; | |
574 | } | |
575 | EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); | |
576 | ||
577 | static RAW_NOTIFIER_HEAD(switchdev_notif_chain); | |
578 | ||
579 | /** | |
580 | * register_switchdev_notifier - Register notifier | |
581 | * @nb: notifier_block | |
582 | * | |
583 | * Register switch device notifier. This should be used by code | |
584 | * which needs to monitor events happening in particular device. | |
585 | * Return values are same as for atomic_notifier_chain_register(). | |
586 | */ | |
587 | int register_switchdev_notifier(struct notifier_block *nb) | |
588 | { | |
589 | int err; | |
590 | ||
591 | rtnl_lock(); | |
592 | err = raw_notifier_chain_register(&switchdev_notif_chain, nb); | |
593 | rtnl_unlock(); | |
594 | return err; | |
595 | } | |
596 | EXPORT_SYMBOL_GPL(register_switchdev_notifier); | |
597 | ||
598 | /** | |
599 | * unregister_switchdev_notifier - Unregister notifier | |
600 | * @nb: notifier_block | |
601 | * | |
602 | * Unregister switch device notifier. | |
603 | * Return values are same as for atomic_notifier_chain_unregister(). | |
604 | */ | |
605 | int unregister_switchdev_notifier(struct notifier_block *nb) | |
606 | { | |
607 | int err; | |
608 | ||
609 | rtnl_lock(); | |
610 | err = raw_notifier_chain_unregister(&switchdev_notif_chain, nb); | |
611 | rtnl_unlock(); | |
612 | return err; | |
613 | } | |
614 | EXPORT_SYMBOL_GPL(unregister_switchdev_notifier); | |
615 | ||
616 | /** | |
617 | * call_switchdev_notifiers - Call notifiers | |
618 | * @val: value passed unmodified to notifier function | |
619 | * @dev: port device | |
620 | * @info: notifier information data | |
621 | * | |
622 | * Call all network notifier blocks. This should be called by driver | |
623 | * when it needs to propagate hardware event. | |
624 | * Return values are same as for atomic_notifier_call_chain(). | |
625 | * rtnl_lock must be held. | |
626 | */ | |
627 | int call_switchdev_notifiers(unsigned long val, struct net_device *dev, | |
628 | struct switchdev_notifier_info *info) | |
629 | { | |
630 | int err; | |
631 | ||
632 | ASSERT_RTNL(); | |
633 | ||
634 | info->dev = dev; | |
635 | err = raw_notifier_call_chain(&switchdev_notif_chain, val, info); | |
636 | return err; | |
637 | } | |
638 | EXPORT_SYMBOL_GPL(call_switchdev_notifiers); | |
639 | ||
640 | struct switchdev_vlan_dump { | |
641 | struct switchdev_obj_port_vlan vlan; | |
642 | struct sk_buff *skb; | |
643 | u32 filter_mask; | |
644 | u16 flags; | |
645 | u16 begin; | |
646 | u16 end; | |
647 | }; | |
648 | ||
649 | static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump) | |
650 | { | |
651 | struct bridge_vlan_info vinfo; | |
652 | ||
653 | vinfo.flags = dump->flags; | |
654 | ||
655 | if (dump->begin == 0 && dump->end == 0) { | |
656 | return 0; | |
657 | } else if (dump->begin == dump->end) { | |
658 | vinfo.vid = dump->begin; | |
659 | if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, | |
660 | sizeof(vinfo), &vinfo)) | |
661 | return -EMSGSIZE; | |
662 | } else { | |
663 | vinfo.vid = dump->begin; | |
664 | vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; | |
665 | if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, | |
666 | sizeof(vinfo), &vinfo)) | |
667 | return -EMSGSIZE; | |
668 | vinfo.vid = dump->end; | |
669 | vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; | |
670 | vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; | |
671 | if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, | |
672 | sizeof(vinfo), &vinfo)) | |
673 | return -EMSGSIZE; | |
674 | } | |
675 | ||
676 | return 0; | |
677 | } | |
678 | ||
679 | static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj) | |
680 | { | |
681 | struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); | |
682 | struct switchdev_vlan_dump *dump = | |
683 | container_of(vlan, struct switchdev_vlan_dump, vlan); | |
684 | int err = 0; | |
685 | ||
686 | if (vlan->vid_begin > vlan->vid_end) | |
687 | return -EINVAL; | |
688 | ||
689 | if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { | |
690 | dump->flags = vlan->flags; | |
691 | for (dump->begin = dump->end = vlan->vid_begin; | |
692 | dump->begin <= vlan->vid_end; | |
693 | dump->begin++, dump->end++) { | |
694 | err = switchdev_port_vlan_dump_put(dump); | |
695 | if (err) | |
696 | return err; | |
697 | } | |
698 | } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { | |
699 | if (dump->begin > vlan->vid_begin && | |
700 | dump->begin >= vlan->vid_end) { | |
701 | if ((dump->begin - 1) == vlan->vid_end && | |
702 | dump->flags == vlan->flags) { | |
703 | /* prepend */ | |
704 | dump->begin = vlan->vid_begin; | |
705 | } else { | |
706 | err = switchdev_port_vlan_dump_put(dump); | |
707 | dump->flags = vlan->flags; | |
708 | dump->begin = vlan->vid_begin; | |
709 | dump->end = vlan->vid_end; | |
710 | } | |
711 | } else if (dump->end <= vlan->vid_begin && | |
712 | dump->end < vlan->vid_end) { | |
713 | if ((dump->end + 1) == vlan->vid_begin && | |
714 | dump->flags == vlan->flags) { | |
715 | /* append */ | |
716 | dump->end = vlan->vid_end; | |
717 | } else { | |
718 | err = switchdev_port_vlan_dump_put(dump); | |
719 | dump->flags = vlan->flags; | |
720 | dump->begin = vlan->vid_begin; | |
721 | dump->end = vlan->vid_end; | |
722 | } | |
723 | } else { | |
724 | err = -EINVAL; | |
725 | } | |
726 | } | |
727 | ||
728 | return err; | |
729 | } | |
730 | ||
731 | static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, | |
732 | u32 filter_mask) | |
733 | { | |
734 | struct switchdev_vlan_dump dump = { | |
735 | .vlan.obj.orig_dev = dev, | |
736 | .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, | |
737 | .skb = skb, | |
738 | .filter_mask = filter_mask, | |
739 | }; | |
740 | int err = 0; | |
741 | ||
742 | if ((filter_mask & RTEXT_FILTER_BRVLAN) || | |
743 | (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { | |
744 | err = switchdev_port_obj_dump(dev, &dump.vlan.obj, | |
745 | switchdev_port_vlan_dump_cb); | |
746 | if (err) | |
747 | goto err_out; | |
748 | if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) | |
749 | /* last one */ | |
750 | err = switchdev_port_vlan_dump_put(&dump); | |
751 | } | |
752 | ||
753 | err_out: | |
754 | return err == -EOPNOTSUPP ? 0 : err; | |
755 | } | |
756 | ||
757 | /** | |
758 | * switchdev_port_bridge_getlink - Get bridge port attributes | |
759 | * | |
760 | * @dev: port device | |
761 | * | |
762 | * Called for SELF on rtnl_bridge_getlink to get bridge port | |
763 | * attributes. | |
764 | */ | |
765 | int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, | |
766 | struct net_device *dev, u32 filter_mask, | |
767 | int nlflags) | |
768 | { | |
769 | struct switchdev_attr attr = { | |
770 | .orig_dev = dev, | |
771 | .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, | |
772 | }; | |
773 | u16 mode = BRIDGE_MODE_UNDEF; | |
774 | u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; | |
775 | int err; | |
776 | ||
777 | err = switchdev_port_attr_get(dev, &attr); | |
778 | if (err && err != -EOPNOTSUPP) | |
779 | return err; | |
780 | ||
781 | return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, | |
782 | attr.u.brport_flags, mask, nlflags, | |
783 | filter_mask, switchdev_port_vlan_fill); | |
784 | } | |
785 | EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); | |
786 | ||
787 | static int switchdev_port_br_setflag(struct net_device *dev, | |
788 | struct nlattr *nlattr, | |
789 | unsigned long brport_flag) | |
790 | { | |
791 | struct switchdev_attr attr = { | |
792 | .orig_dev = dev, | |
793 | .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, | |
794 | }; | |
795 | u8 flag = nla_get_u8(nlattr); | |
796 | int err; | |
797 | ||
798 | err = switchdev_port_attr_get(dev, &attr); | |
799 | if (err) | |
800 | return err; | |
801 | ||
802 | if (flag) | |
803 | attr.u.brport_flags |= brport_flag; | |
804 | else | |
805 | attr.u.brport_flags &= ~brport_flag; | |
806 | ||
807 | return switchdev_port_attr_set(dev, &attr); | |
808 | } | |
809 | ||
810 | static const struct nla_policy | |
811 | switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { | |
812 | [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, | |
813 | [IFLA_BRPORT_COST] = { .type = NLA_U32 }, | |
814 | [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, | |
815 | [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, | |
816 | [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, | |
817 | [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, | |
818 | [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, | |
819 | [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, | |
820 | [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, | |
821 | [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, | |
822 | }; | |
823 | ||
824 | static int switchdev_port_br_setlink_protinfo(struct net_device *dev, | |
825 | struct nlattr *protinfo) | |
826 | { | |
827 | struct nlattr *attr; | |
828 | int rem; | |
829 | int err; | |
830 | ||
831 | err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, | |
832 | switchdev_port_bridge_policy); | |
833 | if (err) | |
834 | return err; | |
835 | ||
836 | nla_for_each_nested(attr, protinfo, rem) { | |
837 | switch (nla_type(attr)) { | |
838 | case IFLA_BRPORT_LEARNING: | |
839 | err = switchdev_port_br_setflag(dev, attr, | |
840 | BR_LEARNING); | |
841 | break; | |
842 | case IFLA_BRPORT_LEARNING_SYNC: | |
843 | err = switchdev_port_br_setflag(dev, attr, | |
844 | BR_LEARNING_SYNC); | |
845 | break; | |
846 | case IFLA_BRPORT_UNICAST_FLOOD: | |
847 | err = switchdev_port_br_setflag(dev, attr, BR_FLOOD); | |
848 | break; | |
849 | default: | |
850 | err = -EOPNOTSUPP; | |
851 | break; | |
852 | } | |
853 | if (err) | |
854 | return err; | |
855 | } | |
856 | ||
857 | return 0; | |
858 | } | |
859 | ||
860 | static int switchdev_port_br_afspec(struct net_device *dev, | |
861 | struct nlattr *afspec, | |
862 | int (*f)(struct net_device *dev, | |
863 | const struct switchdev_obj *obj)) | |
864 | { | |
865 | struct nlattr *attr; | |
866 | struct bridge_vlan_info *vinfo; | |
867 | struct switchdev_obj_port_vlan vlan = { | |
868 | .obj.orig_dev = dev, | |
869 | .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, | |
870 | }; | |
871 | int rem; | |
872 | int err; | |
873 | ||
874 | nla_for_each_nested(attr, afspec, rem) { | |
875 | if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) | |
876 | continue; | |
877 | if (nla_len(attr) != sizeof(struct bridge_vlan_info)) | |
878 | return -EINVAL; | |
879 | vinfo = nla_data(attr); | |
880 | if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) | |
881 | return -EINVAL; | |
882 | vlan.flags = vinfo->flags; | |
883 | if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { | |
884 | if (vlan.vid_begin) | |
885 | return -EINVAL; | |
886 | vlan.vid_begin = vinfo->vid; | |
887 | /* don't allow range of pvids */ | |
888 | if (vlan.flags & BRIDGE_VLAN_INFO_PVID) | |
889 | return -EINVAL; | |
890 | } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { | |
891 | if (!vlan.vid_begin) | |
892 | return -EINVAL; | |
893 | vlan.vid_end = vinfo->vid; | |
894 | if (vlan.vid_end <= vlan.vid_begin) | |
895 | return -EINVAL; | |
896 | err = f(dev, &vlan.obj); | |
897 | if (err) | |
898 | return err; | |
899 | vlan.vid_begin = 0; | |
900 | } else { | |
901 | if (vlan.vid_begin) | |
902 | return -EINVAL; | |
903 | vlan.vid_begin = vinfo->vid; | |
904 | vlan.vid_end = vinfo->vid; | |
905 | err = f(dev, &vlan.obj); | |
906 | if (err) | |
907 | return err; | |
908 | vlan.vid_begin = 0; | |
909 | } | |
910 | } | |
911 | ||
912 | return 0; | |
913 | } | |
914 | ||
915 | /** | |
916 | * switchdev_port_bridge_setlink - Set bridge port attributes | |
917 | * | |
918 | * @dev: port device | |
919 | * @nlh: netlink header | |
920 | * @flags: netlink flags | |
921 | * | |
922 | * Called for SELF on rtnl_bridge_setlink to set bridge port | |
923 | * attributes. | |
924 | */ | |
925 | int switchdev_port_bridge_setlink(struct net_device *dev, | |
926 | struct nlmsghdr *nlh, u16 flags) | |
927 | { | |
928 | struct nlattr *protinfo; | |
929 | struct nlattr *afspec; | |
930 | int err = 0; | |
931 | ||
932 | protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), | |
933 | IFLA_PROTINFO); | |
934 | if (protinfo) { | |
935 | err = switchdev_port_br_setlink_protinfo(dev, protinfo); | |
936 | if (err) | |
937 | return err; | |
938 | } | |
939 | ||
940 | afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), | |
941 | IFLA_AF_SPEC); | |
942 | if (afspec) | |
943 | err = switchdev_port_br_afspec(dev, afspec, | |
944 | switchdev_port_obj_add); | |
945 | ||
946 | return err; | |
947 | } | |
948 | EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); | |
949 | ||
950 | /** | |
951 | * switchdev_port_bridge_dellink - Set bridge port attributes | |
952 | * | |
953 | * @dev: port device | |
954 | * @nlh: netlink header | |
955 | * @flags: netlink flags | |
956 | * | |
957 | * Called for SELF on rtnl_bridge_dellink to set bridge port | |
958 | * attributes. | |
959 | */ | |
960 | int switchdev_port_bridge_dellink(struct net_device *dev, | |
961 | struct nlmsghdr *nlh, u16 flags) | |
962 | { | |
963 | struct nlattr *afspec; | |
964 | ||
965 | afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), | |
966 | IFLA_AF_SPEC); | |
967 | if (afspec) | |
968 | return switchdev_port_br_afspec(dev, afspec, | |
969 | switchdev_port_obj_del); | |
970 | ||
971 | return 0; | |
972 | } | |
973 | EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); | |
974 | ||
975 | /** | |
976 | * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port | |
977 | * | |
978 | * @ndmsg: netlink hdr | |
979 | * @nlattr: netlink attributes | |
980 | * @dev: port device | |
981 | * @addr: MAC address to add | |
982 | * @vid: VLAN to add | |
983 | * | |
984 | * Add FDB entry to switch device. | |
985 | */ | |
986 | int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], | |
987 | struct net_device *dev, const unsigned char *addr, | |
988 | u16 vid, u16 nlm_flags) | |
989 | { | |
990 | struct switchdev_obj_port_fdb fdb = { | |
991 | .obj.orig_dev = dev, | |
992 | .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, | |
993 | .vid = vid, | |
994 | }; | |
995 | ||
996 | ether_addr_copy(fdb.addr, addr); | |
997 | return switchdev_port_obj_add(dev, &fdb.obj); | |
998 | } | |
999 | EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); | |
1000 | ||
1001 | /** | |
1002 | * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port | |
1003 | * | |
1004 | * @ndmsg: netlink hdr | |
1005 | * @nlattr: netlink attributes | |
1006 | * @dev: port device | |
1007 | * @addr: MAC address to delete | |
1008 | * @vid: VLAN to delete | |
1009 | * | |
1010 | * Delete FDB entry from switch device. | |
1011 | */ | |
1012 | int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], | |
1013 | struct net_device *dev, const unsigned char *addr, | |
1014 | u16 vid) | |
1015 | { | |
1016 | struct switchdev_obj_port_fdb fdb = { | |
1017 | .obj.orig_dev = dev, | |
1018 | .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, | |
1019 | .vid = vid, | |
1020 | }; | |
1021 | ||
1022 | ether_addr_copy(fdb.addr, addr); | |
1023 | return switchdev_port_obj_del(dev, &fdb.obj); | |
1024 | } | |
1025 | EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); | |
1026 | ||
1027 | struct switchdev_fdb_dump { | |
1028 | struct switchdev_obj_port_fdb fdb; | |
1029 | struct net_device *dev; | |
1030 | struct sk_buff *skb; | |
1031 | struct netlink_callback *cb; | |
1032 | int idx; | |
1033 | }; | |
1034 | ||
1035 | static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) | |
1036 | { | |
1037 | struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj); | |
1038 | struct switchdev_fdb_dump *dump = | |
1039 | container_of(fdb, struct switchdev_fdb_dump, fdb); | |
1040 | u32 portid = NETLINK_CB(dump->cb->skb).portid; | |
1041 | u32 seq = dump->cb->nlh->nlmsg_seq; | |
1042 | struct nlmsghdr *nlh; | |
1043 | struct ndmsg *ndm; | |
1044 | ||
1045 | if (dump->idx < dump->cb->args[0]) | |
1046 | goto skip; | |
1047 | ||
1048 | nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, | |
1049 | sizeof(*ndm), NLM_F_MULTI); | |
1050 | if (!nlh) | |
1051 | return -EMSGSIZE; | |
1052 | ||
1053 | ndm = nlmsg_data(nlh); | |
1054 | ndm->ndm_family = AF_BRIDGE; | |
1055 | ndm->ndm_pad1 = 0; | |
1056 | ndm->ndm_pad2 = 0; | |
1057 | ndm->ndm_flags = NTF_SELF; | |
1058 | ndm->ndm_type = 0; | |
1059 | ndm->ndm_ifindex = dump->dev->ifindex; | |
1060 | ndm->ndm_state = fdb->ndm_state; | |
1061 | ||
1062 | if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr)) | |
1063 | goto nla_put_failure; | |
1064 | ||
1065 | if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid)) | |
1066 | goto nla_put_failure; | |
1067 | ||
1068 | nlmsg_end(dump->skb, nlh); | |
1069 | ||
1070 | skip: | |
1071 | dump->idx++; | |
1072 | return 0; | |
1073 | ||
1074 | nla_put_failure: | |
1075 | nlmsg_cancel(dump->skb, nlh); | |
1076 | return -EMSGSIZE; | |
1077 | } | |
1078 | ||
1079 | /** | |
1080 | * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries | |
1081 | * | |
1082 | * @skb: netlink skb | |
1083 | * @cb: netlink callback | |
1084 | * @dev: port device | |
1085 | * @filter_dev: filter device | |
1086 | * @idx: | |
1087 | * | |
1088 | * Dump FDB entries from switch device. | |
1089 | */ | |
1090 | int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, | |
1091 | struct net_device *dev, | |
1092 | struct net_device *filter_dev, int idx) | |
1093 | { | |
1094 | struct switchdev_fdb_dump dump = { | |
1095 | .fdb.obj.orig_dev = dev, | |
1096 | .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, | |
1097 | .dev = dev, | |
1098 | .skb = skb, | |
1099 | .cb = cb, | |
1100 | .idx = idx, | |
1101 | }; | |
1102 | int err; | |
1103 | ||
1104 | err = switchdev_port_obj_dump(dev, &dump.fdb.obj, | |
1105 | switchdev_port_fdb_dump_cb); | |
1106 | cb->args[1] = err; | |
1107 | return dump.idx; | |
1108 | } | |
1109 | EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); | |
1110 | ||
1111 | static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) | |
1112 | { | |
1113 | const struct switchdev_ops *ops = dev->switchdev_ops; | |
1114 | struct net_device *lower_dev; | |
1115 | struct net_device *port_dev; | |
1116 | struct list_head *iter; | |
1117 | ||
1118 | /* Recusively search down until we find a sw port dev. | |
1119 | * (A sw port dev supports switchdev_port_attr_get). | |
1120 | */ | |
1121 | ||
1122 | if (ops && ops->switchdev_port_attr_get) | |
1123 | return dev; | |
1124 | ||
1125 | netdev_for_each_lower_dev(dev, lower_dev, iter) { | |
1126 | port_dev = switchdev_get_lowest_dev(lower_dev); | |
1127 | if (port_dev) | |
1128 | return port_dev; | |
1129 | } | |
1130 | ||
1131 | return NULL; | |
1132 | } | |
1133 | ||
1134 | static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) | |
1135 | { | |
1136 | struct switchdev_attr attr = { | |
1137 | .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, | |
1138 | }; | |
1139 | struct switchdev_attr prev_attr; | |
1140 | struct net_device *dev = NULL; | |
1141 | int nhsel; | |
1142 | ||
1143 | ASSERT_RTNL(); | |
1144 | ||
1145 | /* For this route, all nexthop devs must be on the same switch. */ | |
1146 | ||
1147 | for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { | |
1148 | const struct fib_nh *nh = &fi->fib_nh[nhsel]; | |
1149 | ||
1150 | if (!nh->nh_dev) | |
1151 | return NULL; | |
1152 | ||
1153 | dev = switchdev_get_lowest_dev(nh->nh_dev); | |
1154 | if (!dev) | |
1155 | return NULL; | |
1156 | ||
1157 | attr.orig_dev = dev; | |
1158 | if (switchdev_port_attr_get(dev, &attr)) | |
1159 | return NULL; | |
1160 | ||
1161 | if (nhsel > 0 && | |
1162 | !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) | |
1163 | return NULL; | |
1164 | ||
1165 | prev_attr = attr; | |
1166 | } | |
1167 | ||
1168 | return dev; | |
1169 | } | |
1170 | ||
1171 | /** | |
1172 | * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry | |
1173 | * | |
1174 | * @dst: route's IPv4 destination address | |
1175 | * @dst_len: destination address length (prefix length) | |
1176 | * @fi: route FIB info structure | |
1177 | * @tos: route TOS | |
1178 | * @type: route type | |
1179 | * @nlflags: netlink flags passed in (NLM_F_*) | |
1180 | * @tb_id: route table ID | |
1181 | * | |
1182 | * Add/modify switch IPv4 route entry. | |
1183 | */ | |
1184 | int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, | |
1185 | u8 tos, u8 type, u32 nlflags, u32 tb_id) | |
1186 | { | |
1187 | struct switchdev_obj_ipv4_fib ipv4_fib = { | |
1188 | .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, | |
1189 | .dst = dst, | |
1190 | .dst_len = dst_len, | |
1191 | .tos = tos, | |
1192 | .type = type, | |
1193 | .nlflags = nlflags, | |
1194 | .tb_id = tb_id, | |
1195 | }; | |
1196 | struct net_device *dev; | |
1197 | int err = 0; | |
1198 | ||
1199 | memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); | |
1200 | ||
1201 | /* Don't offload route if using custom ip rules or if | |
1202 | * IPv4 FIB offloading has been disabled completely. | |
1203 | */ | |
1204 | ||
1205 | #ifdef CONFIG_IP_MULTIPLE_TABLES | |
1206 | if (fi->fib_net->ipv4.fib_has_custom_rules) | |
1207 | return 0; | |
1208 | #endif | |
1209 | ||
1210 | if (fi->fib_net->ipv4.fib_offload_disabled) | |
1211 | return 0; | |
1212 | ||
1213 | dev = switchdev_get_dev_by_nhs(fi); | |
1214 | if (!dev) | |
1215 | return 0; | |
1216 | ||
1217 | ipv4_fib.obj.orig_dev = dev; | |
1218 | err = switchdev_port_obj_add(dev, &ipv4_fib.obj); | |
1219 | if (!err) | |
1220 | fi->fib_flags |= RTNH_F_OFFLOAD; | |
1221 | ||
1222 | return err == -EOPNOTSUPP ? 0 : err; | |
1223 | } | |
1224 | EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); | |
1225 | ||
1226 | /** | |
1227 | * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch | |
1228 | * | |
1229 | * @dst: route's IPv4 destination address | |
1230 | * @dst_len: destination address length (prefix length) | |
1231 | * @fi: route FIB info structure | |
1232 | * @tos: route TOS | |
1233 | * @type: route type | |
1234 | * @tb_id: route table ID | |
1235 | * | |
1236 | * Delete IPv4 route entry from switch device. | |
1237 | */ | |
1238 | int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, | |
1239 | u8 tos, u8 type, u32 tb_id) | |
1240 | { | |
1241 | struct switchdev_obj_ipv4_fib ipv4_fib = { | |
1242 | .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, | |
1243 | .dst = dst, | |
1244 | .dst_len = dst_len, | |
1245 | .tos = tos, | |
1246 | .type = type, | |
1247 | .nlflags = 0, | |
1248 | .tb_id = tb_id, | |
1249 | }; | |
1250 | struct net_device *dev; | |
1251 | int err = 0; | |
1252 | ||
1253 | memcpy(&ipv4_fib.fi, fi, sizeof(ipv4_fib.fi)); | |
1254 | ||
1255 | if (!(fi->fib_flags & RTNH_F_OFFLOAD)) | |
1256 | return 0; | |
1257 | ||
1258 | dev = switchdev_get_dev_by_nhs(fi); | |
1259 | if (!dev) | |
1260 | return 0; | |
1261 | ||
1262 | ipv4_fib.obj.orig_dev = dev; | |
1263 | err = switchdev_port_obj_del(dev, &ipv4_fib.obj); | |
1264 | if (!err) | |
1265 | fi->fib_flags &= ~RTNH_F_OFFLOAD; | |
1266 | ||
1267 | return err == -EOPNOTSUPP ? 0 : err; | |
1268 | } | |
1269 | EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); | |
1270 | ||
1271 | /** | |
1272 | * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation | |
1273 | * | |
1274 | * @fi: route FIB info structure | |
1275 | */ | |
1276 | void switchdev_fib_ipv4_abort(struct fib_info *fi) | |
1277 | { | |
1278 | /* There was a problem installing this route to the offload | |
1279 | * device. For now, until we come up with more refined | |
1280 | * policy handling, abruptly end IPv4 fib offloading for | |
1281 | * for entire net by flushing offload device(s) of all | |
1282 | * IPv4 routes, and mark IPv4 fib offloading broken from | |
1283 | * this point forward. | |
1284 | */ | |
1285 | ||
1286 | fib_flush_external(fi->fib_net); | |
1287 | fi->fib_net->ipv4.fib_offload_disabled = true; | |
1288 | } | |
1289 | EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); | |
1290 | ||
1291 | static bool switchdev_port_same_parent_id(struct net_device *a, | |
1292 | struct net_device *b) | |
1293 | { | |
1294 | struct switchdev_attr a_attr = { | |
1295 | .orig_dev = a, | |
1296 | .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, | |
1297 | .flags = SWITCHDEV_F_NO_RECURSE, | |
1298 | }; | |
1299 | struct switchdev_attr b_attr = { | |
1300 | .orig_dev = b, | |
1301 | .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, | |
1302 | .flags = SWITCHDEV_F_NO_RECURSE, | |
1303 | }; | |
1304 | ||
1305 | if (switchdev_port_attr_get(a, &a_attr) || | |
1306 | switchdev_port_attr_get(b, &b_attr)) | |
1307 | return false; | |
1308 | ||
1309 | return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); | |
1310 | } | |
1311 | ||
1312 | static u32 switchdev_port_fwd_mark_get(struct net_device *dev, | |
1313 | struct net_device *group_dev) | |
1314 | { | |
1315 | struct net_device *lower_dev; | |
1316 | struct list_head *iter; | |
1317 | ||
1318 | netdev_for_each_lower_dev(group_dev, lower_dev, iter) { | |
1319 | if (lower_dev == dev) | |
1320 | continue; | |
1321 | if (switchdev_port_same_parent_id(dev, lower_dev)) | |
1322 | return lower_dev->offload_fwd_mark; | |
1323 | return switchdev_port_fwd_mark_get(dev, lower_dev); | |
1324 | } | |
1325 | ||
1326 | return dev->ifindex; | |
1327 | } | |
1328 | ||
1329 | static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, | |
1330 | u32 old_mark, u32 *reset_mark) | |
1331 | { | |
1332 | struct net_device *lower_dev; | |
1333 | struct list_head *iter; | |
1334 | ||
1335 | netdev_for_each_lower_dev(group_dev, lower_dev, iter) { | |
1336 | if (lower_dev->offload_fwd_mark == old_mark) { | |
1337 | if (!*reset_mark) | |
1338 | *reset_mark = lower_dev->ifindex; | |
1339 | lower_dev->offload_fwd_mark = *reset_mark; | |
1340 | } | |
1341 | switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); | |
1342 | } | |
1343 | } | |
1344 | ||
1345 | /** | |
1346 | * switchdev_port_fwd_mark_set - Set port offload forwarding mark | |
1347 | * | |
1348 | * @dev: port device | |
1349 | * @group_dev: containing device | |
1350 | * @joining: true if dev is joining group; false if leaving group | |
1351 | * | |
1352 | * An ungrouped port's offload mark is just its ifindex. A grouped | |
1353 | * port's (member of a bridge, for example) offload mark is the ifindex | |
1354 | * of one of the ports in the group with the same parent (switch) ID. | |
1355 | * Ports on the same device in the same group will have the same mark. | |
1356 | * | |
1357 | * Example: | |
1358 | * | |
1359 | * br0 ifindex=9 | |
1360 | * sw1p1 ifindex=2 mark=2 | |
1361 | * sw1p2 ifindex=3 mark=2 | |
1362 | * sw2p1 ifindex=4 mark=5 | |
1363 | * sw2p2 ifindex=5 mark=5 | |
1364 | * | |
1365 | * If sw2p2 leaves the bridge, we'll have: | |
1366 | * | |
1367 | * br0 ifindex=9 | |
1368 | * sw1p1 ifindex=2 mark=2 | |
1369 | * sw1p2 ifindex=3 mark=2 | |
1370 | * sw2p1 ifindex=4 mark=4 | |
1371 | * sw2p2 ifindex=5 mark=5 | |
1372 | */ | |
1373 | void switchdev_port_fwd_mark_set(struct net_device *dev, | |
1374 | struct net_device *group_dev, | |
1375 | bool joining) | |
1376 | { | |
1377 | u32 mark = dev->ifindex; | |
1378 | u32 reset_mark = 0; | |
1379 | ||
1380 | if (group_dev) { | |
1381 | ASSERT_RTNL(); | |
1382 | if (joining) | |
1383 | mark = switchdev_port_fwd_mark_get(dev, group_dev); | |
1384 | else if (dev->offload_fwd_mark == mark) | |
1385 | /* Ohoh, this port was the mark reference port, | |
1386 | * but it's leaving the group, so reset the | |
1387 | * mark for the remaining ports in the group. | |
1388 | */ | |
1389 | switchdev_port_fwd_mark_reset(group_dev, mark, | |
1390 | &reset_mark); | |
1391 | } | |
1392 | ||
1393 | dev->offload_fwd_mark = mark; | |
1394 | } | |
1395 | EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); |