]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_nhg.c
Merge pull request #12798 from donaldsharp/rib_match_multicast
[mirror_frr.git] / zebra / zebra_nhg.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* Zebra Nexthop Group Code.
3 * Copyright (C) 2019 Cumulus Networks, Inc.
4 * Donald Sharp
5 * Stephen Worley
6 */
7 #include <zebra.h>
8
9 #include "lib/nexthop.h"
10 #include "lib/nexthop_group_private.h"
11 #include "lib/routemap.h"
12 #include "lib/mpls.h"
13 #include "lib/jhash.h"
14 #include "lib/debug.h"
15 #include "lib/lib_errors.h"
16
17 #include "zebra/connected.h"
18 #include "zebra/debug.h"
19 #include "zebra/zebra_router.h"
20 #include "zebra/zebra_nhg_private.h"
21 #include "zebra/zebra_rnh.h"
22 #include "zebra/zebra_routemap.h"
23 #include "zebra/zebra_srte.h"
24 #include "zebra/zserv.h"
25 #include "zebra/rt.h"
26 #include "zebra_errors.h"
27 #include "zebra_dplane.h"
28 #include "zebra/interface.h"
29 #include "zebra/zapi_msg.h"
30 #include "zebra/rib.h"
31 #include "zebra/zebra_vxlan.h"
32
33 DEFINE_MTYPE_STATIC(ZEBRA, NHG, "Nexthop Group Entry");
34 DEFINE_MTYPE_STATIC(ZEBRA, NHG_CONNECTED, "Nexthop Group Connected");
35 DEFINE_MTYPE_STATIC(ZEBRA, NHG_CTX, "Nexthop Group Context");
36
37 /* Map backup nexthop indices between two nhes */
38 struct backup_nh_map_s {
39 int map_count;
40
41 struct {
42 uint8_t orig_idx;
43 uint8_t new_idx;
44 } map[MULTIPATH_NUM];
45 };
46
47 /* id counter to keep in sync with kernel */
48 uint32_t id_counter;
49
50 /* Controlled through ui */
51 static bool g_nexthops_enabled = true;
52 static bool proto_nexthops_only;
53 static bool use_recursive_backups = true;
54
55 static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi,
56 int type, bool from_dplane);
57 static void depends_add(struct nhg_connected_tree_head *head,
58 struct nhg_hash_entry *depend);
59 static struct nhg_hash_entry *
60 depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
61 afi_t afi, int type, bool from_dplane);
62 static struct nhg_hash_entry *
63 depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id);
64 static void depends_decrement_free(struct nhg_connected_tree_head *head);
65
66 static struct nhg_backup_info *
67 nhg_backup_copy(const struct nhg_backup_info *orig);
68
69 /* Helper function for getting the next allocatable ID */
70 static uint32_t nhg_get_next_id(void)
71 {
72 while (1) {
73 id_counter++;
74
75 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
76 zlog_debug("%s: ID %u checking", __func__, id_counter);
77
78 if (id_counter == ZEBRA_NHG_PROTO_LOWER) {
79 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
80 zlog_debug("%s: ID counter wrapped", __func__);
81
82 id_counter = 0;
83 continue;
84 }
85
86 if (zebra_nhg_lookup_id(id_counter)) {
87 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
88 zlog_debug("%s: ID already exists", __func__);
89
90 continue;
91 }
92
93 break;
94 }
95
96 return id_counter;
97 }
98
99 static void nhg_connected_free(struct nhg_connected *dep)
100 {
101 XFREE(MTYPE_NHG_CONNECTED, dep);
102 }
103
104 static struct nhg_connected *nhg_connected_new(struct nhg_hash_entry *nhe)
105 {
106 struct nhg_connected *new = NULL;
107
108 new = XCALLOC(MTYPE_NHG_CONNECTED, sizeof(struct nhg_connected));
109 new->nhe = nhe;
110
111 return new;
112 }
113
114 void nhg_connected_tree_free(struct nhg_connected_tree_head *head)
115 {
116 struct nhg_connected *rb_node_dep = NULL;
117
118 if (!nhg_connected_tree_is_empty(head)) {
119 frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
120 nhg_connected_tree_del(head, rb_node_dep);
121 nhg_connected_free(rb_node_dep);
122 }
123 }
124 }
125
126 bool nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head)
127 {
128 return nhg_connected_tree_count(head) ? false : true;
129 }
130
131 struct nhg_connected *
132 nhg_connected_tree_root(struct nhg_connected_tree_head *head)
133 {
134 return nhg_connected_tree_first(head);
135 }
136
137 struct nhg_hash_entry *
138 nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head,
139 struct nhg_hash_entry *depend)
140 {
141 struct nhg_connected lookup = {};
142 struct nhg_connected *remove = NULL;
143 struct nhg_hash_entry *removed_nhe;
144
145 lookup.nhe = depend;
146
147 /* Lookup to find the element, then remove it */
148 remove = nhg_connected_tree_find(head, &lookup);
149 if (remove)
150 /* Re-returning here just in case this API changes..
151 * the _del list api's are a bit undefined at the moment.
152 *
153 * So hopefully returning here will make it fail if the api
154 * changes to something different than currently expected.
155 */
156 remove = nhg_connected_tree_del(head, remove);
157
158 /* If the entry was sucessfully removed, free the 'connected` struct */
159 if (remove) {
160 removed_nhe = remove->nhe;
161 nhg_connected_free(remove);
162 return removed_nhe;
163 }
164
165 return NULL;
166 }
167
168 /* Assuming UNIQUE RB tree. If this changes, assumptions here about
169 * insertion need to change.
170 */
171 struct nhg_hash_entry *
172 nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head,
173 struct nhg_hash_entry *depend)
174 {
175 struct nhg_connected *new = NULL;
176
177 new = nhg_connected_new(depend);
178
179 /* On success, NULL will be returned from the
180 * RB code.
181 */
182 if (new && (nhg_connected_tree_add(head, new) == NULL))
183 return NULL;
184
185 /* If it wasn't successful, it must be a duplicate. We enforce the
186 * unique property for the `nhg_connected` tree.
187 */
188 nhg_connected_free(new);
189
190 return depend;
191 }
192
193 static void
194 nhg_connected_tree_decrement_ref(struct nhg_connected_tree_head *head)
195 {
196 struct nhg_connected *rb_node_dep = NULL;
197
198 frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
199 zebra_nhg_decrement_ref(rb_node_dep->nhe);
200 }
201 }
202
203 static void
204 nhg_connected_tree_increment_ref(struct nhg_connected_tree_head *head)
205 {
206 struct nhg_connected *rb_node_dep = NULL;
207
208 frr_each(nhg_connected_tree, head, rb_node_dep) {
209 zebra_nhg_increment_ref(rb_node_dep->nhe);
210 }
211 }
212
213 struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe)
214 {
215 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE)
216 && !zebra_nhg_depends_is_empty(nhe)) {
217 nhe = nhg_connected_tree_root(&nhe->nhg_depends)->nhe;
218 return zebra_nhg_resolve(nhe);
219 }
220
221 return nhe;
222 }
223
224 unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe)
225 {
226 return nhg_connected_tree_count(&nhe->nhg_depends);
227 }
228
229 bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe)
230 {
231 return nhg_connected_tree_is_empty(&nhe->nhg_depends);
232 }
233
234 static void zebra_nhg_depends_del(struct nhg_hash_entry *from,
235 struct nhg_hash_entry *depend)
236 {
237 nhg_connected_tree_del_nhe(&from->nhg_depends, depend);
238 }
239
240 static void zebra_nhg_depends_init(struct nhg_hash_entry *nhe)
241 {
242 nhg_connected_tree_init(&nhe->nhg_depends);
243 }
244
245 unsigned int zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe)
246 {
247 return nhg_connected_tree_count(&nhe->nhg_dependents);
248 }
249
250
251 bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe)
252 {
253 return nhg_connected_tree_is_empty(&nhe->nhg_dependents);
254 }
255
256 static void zebra_nhg_dependents_del(struct nhg_hash_entry *from,
257 struct nhg_hash_entry *dependent)
258 {
259 nhg_connected_tree_del_nhe(&from->nhg_dependents, dependent);
260 }
261
262 static void zebra_nhg_dependents_add(struct nhg_hash_entry *to,
263 struct nhg_hash_entry *dependent)
264 {
265 nhg_connected_tree_add_nhe(&to->nhg_dependents, dependent);
266 }
267
268 static void zebra_nhg_dependents_init(struct nhg_hash_entry *nhe)
269 {
270 nhg_connected_tree_init(&nhe->nhg_dependents);
271 }
272
273 /* Release this nhe from anything depending on it */
274 static void zebra_nhg_dependents_release(struct nhg_hash_entry *nhe)
275 {
276 struct nhg_connected *rb_node_dep = NULL;
277
278 frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) {
279 zebra_nhg_depends_del(rb_node_dep->nhe, nhe);
280 /* recheck validity of the dependent */
281 zebra_nhg_check_valid(rb_node_dep->nhe);
282 }
283 }
284
285 /* Release this nhe from anything that it depends on */
286 static void zebra_nhg_depends_release(struct nhg_hash_entry *nhe)
287 {
288 if (!zebra_nhg_depends_is_empty(nhe)) {
289 struct nhg_connected *rb_node_dep = NULL;
290
291 frr_each_safe(nhg_connected_tree, &nhe->nhg_depends,
292 rb_node_dep) {
293 zebra_nhg_dependents_del(rb_node_dep->nhe, nhe);
294 }
295 }
296 }
297
298
299 struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id)
300 {
301 struct nhg_hash_entry lookup = {};
302
303 lookup.id = id;
304 return hash_lookup(zrouter.nhgs_id, &lookup);
305 }
306
307 static int zebra_nhg_insert_id(struct nhg_hash_entry *nhe)
308 {
309 if (hash_lookup(zrouter.nhgs_id, nhe)) {
310 flog_err(
311 EC_ZEBRA_NHG_TABLE_INSERT_FAILED,
312 "Failed inserting NHG %pNG into the ID hash table, entry already exists",
313 nhe);
314 return -1;
315 }
316
317 (void)hash_get(zrouter.nhgs_id, nhe, hash_alloc_intern);
318
319 return 0;
320 }
321
322 static void zebra_nhg_set_if(struct nhg_hash_entry *nhe, struct interface *ifp)
323 {
324 nhe->ifp = ifp;
325 if_nhg_dependents_add(ifp, nhe);
326 }
327
328 static void
329 zebra_nhg_connect_depends(struct nhg_hash_entry *nhe,
330 struct nhg_connected_tree_head *nhg_depends)
331 {
332 struct nhg_connected *rb_node_dep = NULL;
333
334 /* This has been allocated higher above in the stack. Could probably
335 * re-allocate and free the old stuff but just using the same memory
336 * for now. Otherwise, their might be a time trade-off for repeated
337 * alloc/frees as startup.
338 */
339 nhe->nhg_depends = *nhg_depends;
340
341 /* Attach backpointer to anything that it depends on */
342 zebra_nhg_dependents_init(nhe);
343 if (!zebra_nhg_depends_is_empty(nhe)) {
344 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
345 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
346 zlog_debug("%s: nhe %p (%pNG), dep %p (%pNG)",
347 __func__, nhe, nhe, rb_node_dep->nhe,
348 rb_node_dep->nhe);
349
350 zebra_nhg_dependents_add(rb_node_dep->nhe, nhe);
351 }
352 }
353 }
354
355 /* Init an nhe, for use in a hash lookup for example */
356 void zebra_nhe_init(struct nhg_hash_entry *nhe, afi_t afi,
357 const struct nexthop *nh)
358 {
359 memset(nhe, 0, sizeof(struct nhg_hash_entry));
360 nhe->vrf_id = VRF_DEFAULT;
361 nhe->type = ZEBRA_ROUTE_NHG;
362 nhe->afi = AFI_UNSPEC;
363
364 /* There are some special rules that apply to groups representing
365 * a single nexthop.
366 */
367 if (nh && (nh->next == NULL)) {
368 switch (nh->type) {
369 case NEXTHOP_TYPE_IFINDEX:
370 case NEXTHOP_TYPE_BLACKHOLE:
371 /*
372 * This switch case handles setting the afi different
373 * for ipv4/v6 routes. Ifindex/blackhole nexthop
374 * objects cannot be ambiguous, they must be Address
375 * Family specific. If we get here, we will either use
376 * the AF of the route, or the one we got passed from
377 * here from the kernel.
378 */
379 nhe->afi = afi;
380 break;
381 case NEXTHOP_TYPE_IPV4_IFINDEX:
382 case NEXTHOP_TYPE_IPV4:
383 nhe->afi = AFI_IP;
384 break;
385 case NEXTHOP_TYPE_IPV6_IFINDEX:
386 case NEXTHOP_TYPE_IPV6:
387 nhe->afi = AFI_IP6;
388 break;
389 }
390 }
391 }
392
393 struct nhg_hash_entry *zebra_nhg_alloc(void)
394 {
395 struct nhg_hash_entry *nhe;
396
397 nhe = XCALLOC(MTYPE_NHG, sizeof(struct nhg_hash_entry));
398
399 return nhe;
400 }
401
402 /*
403 * Allocate new nhe and make shallow copy of 'orig'; no
404 * recursive info is copied.
405 */
406 struct nhg_hash_entry *zebra_nhe_copy(const struct nhg_hash_entry *orig,
407 uint32_t id)
408 {
409 struct nhg_hash_entry *nhe;
410
411 nhe = zebra_nhg_alloc();
412
413 nhe->id = id;
414
415 nexthop_group_copy(&(nhe->nhg), &(orig->nhg));
416
417 nhe->vrf_id = orig->vrf_id;
418 nhe->afi = orig->afi;
419 nhe->type = orig->type ? orig->type : ZEBRA_ROUTE_NHG;
420 nhe->refcnt = 0;
421 nhe->dplane_ref = zebra_router_get_next_sequence();
422
423 /* Copy backup info also, if present */
424 if (orig->backup_info)
425 nhe->backup_info = nhg_backup_copy(orig->backup_info);
426
427 return nhe;
428 }
429
430 /* Allocation via hash handler */
431 static void *zebra_nhg_hash_alloc(void *arg)
432 {
433 struct nhg_hash_entry *nhe = NULL;
434 struct nhg_hash_entry *copy = arg;
435
436 nhe = zebra_nhe_copy(copy, copy->id);
437
438 /* Mark duplicate nexthops in a group at creation time. */
439 nexthop_group_mark_duplicates(&(nhe->nhg));
440
441 /*
442 * Add the ifp now if it's not a group or recursive and has ifindex.
443 *
444 * A proto-owned ID is always a group.
445 */
446 if (!PROTO_OWNED(nhe) && nhe->nhg.nexthop && !nhe->nhg.nexthop->next
447 && !nhe->nhg.nexthop->resolved && nhe->nhg.nexthop->ifindex) {
448 struct interface *ifp = NULL;
449
450 ifp = if_lookup_by_index(nhe->nhg.nexthop->ifindex,
451 nhe->nhg.nexthop->vrf_id);
452 if (ifp)
453 zebra_nhg_set_if(nhe, ifp);
454 else {
455 if (IS_ZEBRA_DEBUG_NHG)
456 zlog_debug(
457 "Failed to lookup an interface with ifindex=%d in vrf=%u for NHE %pNG",
458 nhe->nhg.nexthop->ifindex,
459 nhe->nhg.nexthop->vrf_id, nhe);
460 }
461 }
462
463 return nhe;
464 }
465
466 uint32_t zebra_nhg_hash_key(const void *arg)
467 {
468 const struct nhg_hash_entry *nhe = arg;
469 uint32_t key = 0x5a351234;
470 uint32_t primary = 0;
471 uint32_t backup = 0;
472
473 primary = nexthop_group_hash(&(nhe->nhg));
474 if (nhe->backup_info)
475 backup = nexthop_group_hash(&(nhe->backup_info->nhe->nhg));
476
477 key = jhash_3words(primary, backup, nhe->type, key);
478
479 key = jhash_2words(nhe->vrf_id, nhe->afi, key);
480
481 return key;
482 }
483
484 uint32_t zebra_nhg_id_key(const void *arg)
485 {
486 const struct nhg_hash_entry *nhe = arg;
487
488 return nhe->id;
489 }
490
491 /* Helper with common nhg/nhe nexthop comparison logic */
492 static bool nhg_compare_nexthops(const struct nexthop *nh1,
493 const struct nexthop *nh2)
494 {
495 assert(nh1 != NULL && nh2 != NULL);
496
497 /*
498 * We have to check the active flag of each individual one,
499 * not just the overall active_num. This solves the special case
500 * issue of a route with a nexthop group with one nexthop
501 * resolving to itself and thus marking it inactive. If we
502 * have two different routes each wanting to mark a different
503 * nexthop inactive, they need to hash to two different groups.
504 *
505 * If we just hashed on num_active, they would hash the same
506 * which is incorrect.
507 *
508 * ex)
509 * 1.1.1.0/24
510 * -> 1.1.1.1 dummy1 (inactive)
511 * -> 1.1.2.1 dummy2
512 *
513 * 1.1.2.0/24
514 * -> 1.1.1.1 dummy1
515 * -> 1.1.2.1 dummy2 (inactive)
516 *
517 * Without checking each individual one, they would hash to
518 * the same group and both have 1.1.1.1 dummy1 marked inactive.
519 *
520 */
521 if (CHECK_FLAG(nh1->flags, NEXTHOP_FLAG_ACTIVE)
522 != CHECK_FLAG(nh2->flags, NEXTHOP_FLAG_ACTIVE))
523 return false;
524
525 if (!nexthop_same(nh1, nh2))
526 return false;
527
528 return true;
529 }
530
531 bool zebra_nhg_hash_equal(const void *arg1, const void *arg2)
532 {
533 const struct nhg_hash_entry *nhe1 = arg1;
534 const struct nhg_hash_entry *nhe2 = arg2;
535 struct nexthop *nexthop1;
536 struct nexthop *nexthop2;
537
538 /* No matter what if they equal IDs, assume equal */
539 if (nhe1->id && nhe2->id && (nhe1->id == nhe2->id))
540 return true;
541
542 if (nhe1->type != nhe2->type)
543 return false;
544
545 if (nhe1->vrf_id != nhe2->vrf_id)
546 return false;
547
548 if (nhe1->afi != nhe2->afi)
549 return false;
550
551 if (nhe1->nhg.nhgr.buckets != nhe2->nhg.nhgr.buckets)
552 return false;
553
554 if (nhe1->nhg.nhgr.idle_timer != nhe2->nhg.nhgr.idle_timer)
555 return false;
556
557 if (nhe1->nhg.nhgr.unbalanced_timer != nhe2->nhg.nhgr.unbalanced_timer)
558 return false;
559
560 /* Nexthops should be in-order, so we simply compare them in-place */
561 for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop;
562 nexthop1 && nexthop2;
563 nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
564
565 if (!nhg_compare_nexthops(nexthop1, nexthop2))
566 return false;
567 }
568
569 /* Check for unequal list lengths */
570 if (nexthop1 || nexthop2)
571 return false;
572
573 /* If there's no backup info, comparison is done. */
574 if ((nhe1->backup_info == NULL) && (nhe2->backup_info == NULL))
575 return true;
576
577 /* Compare backup info also - test the easy things first */
578 if (nhe1->backup_info && (nhe2->backup_info == NULL))
579 return false;
580 if (nhe2->backup_info && (nhe1->backup_info == NULL))
581 return false;
582
583 /* Compare number of backups before actually comparing any */
584 for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
585 nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
586 nexthop1 && nexthop2;
587 nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
588 ;
589 }
590
591 /* Did we find the end of one list before the other? */
592 if (nexthop1 || nexthop2)
593 return false;
594
595 /* Have to compare the backup nexthops */
596 for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
597 nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
598 nexthop1 && nexthop2;
599 nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
600
601 if (!nhg_compare_nexthops(nexthop1, nexthop2))
602 return false;
603 }
604
605 return true;
606 }
607
608 bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2)
609 {
610 const struct nhg_hash_entry *nhe1 = arg1;
611 const struct nhg_hash_entry *nhe2 = arg2;
612
613 return nhe1->id == nhe2->id;
614 }
615
616 static int zebra_nhg_process_grp(struct nexthop_group *nhg,
617 struct nhg_connected_tree_head *depends,
618 struct nh_grp *grp, uint8_t count,
619 struct nhg_resilience *resilience)
620 {
621 nhg_connected_tree_init(depends);
622
623 for (int i = 0; i < count; i++) {
624 struct nhg_hash_entry *depend = NULL;
625 /* We do not care about nexthop_grp.weight at
626 * this time. But we should figure out
627 * how to adapt this to our code in
628 * the future.
629 */
630 depend = depends_find_id_add(depends, grp[i].id);
631
632 if (!depend) {
633 flog_err(
634 EC_ZEBRA_NHG_SYNC,
635 "Received Nexthop Group from the kernel with a dependent Nexthop ID (%u) which we do not have in our table",
636 grp[i].id);
637 return -1;
638 }
639
640 /*
641 * If this is a nexthop with its own group
642 * dependencies, add them as well. Not sure its
643 * even possible to have a group within a group
644 * in the kernel.
645 */
646
647 copy_nexthops(&nhg->nexthop, depend->nhg.nexthop, NULL);
648 }
649
650 if (resilience)
651 nhg->nhgr = *resilience;
652
653 return 0;
654 }
655
656 static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends,
657 struct nexthop *nh, afi_t afi, int type)
658 {
659 struct nhg_hash_entry *depend = NULL;
660 struct nexthop_group resolved_ng = {};
661
662 resolved_ng.nexthop = nh;
663
664 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
665 zlog_debug("%s: head %p, nh %pNHv",
666 __func__, nhg_depends, nh);
667
668 depend = zebra_nhg_rib_find(0, &resolved_ng, afi, type);
669
670 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
671 zlog_debug("%s: nh %pNHv => %p (%u)",
672 __func__, nh, depend,
673 depend ? depend->id : 0);
674
675 if (depend)
676 depends_add(nhg_depends, depend);
677 }
678
679 /*
680 * Lookup an nhe in the global hash, using data from another nhe. If 'lookup'
681 * has an id value, that's used. Create a new global/shared nhe if not found.
682 */
683 static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */
684 struct nhg_hash_entry *lookup,
685 struct nhg_connected_tree_head *nhg_depends,
686 afi_t afi, bool from_dplane)
687 {
688 bool created = false;
689 bool recursive = false;
690 struct nhg_hash_entry *newnhe, *backup_nhe;
691 struct nexthop *nh = NULL;
692
693 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
694 zlog_debug(
695 "%s: id %u, lookup %p, vrf %d, type %d, depends %p%s",
696 __func__, lookup->id, lookup, lookup->vrf_id,
697 lookup->type, nhg_depends,
698 (from_dplane ? " (from dplane)" : ""));
699
700 if (lookup->id)
701 (*nhe) = zebra_nhg_lookup_id(lookup->id);
702 else
703 (*nhe) = hash_lookup(zrouter.nhgs, lookup);
704
705 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
706 zlog_debug("%s: lookup => %p (%pNG)", __func__, *nhe, *nhe);
707
708 /* If we found an existing object, we're done */
709 if (*nhe)
710 goto done;
711
712 /* We're going to create/insert a new nhe:
713 * assign the next global id value if necessary.
714 */
715 if (lookup->id == 0)
716 lookup->id = nhg_get_next_id();
717
718 if (!from_dplane && lookup->id < ZEBRA_NHG_PROTO_LOWER) {
719 /*
720 * This is a zebra hashed/owned NHG.
721 *
722 * It goes in HASH and ID table.
723 */
724 newnhe = hash_get(zrouter.nhgs, lookup, zebra_nhg_hash_alloc);
725 zebra_nhg_insert_id(newnhe);
726 } else {
727 /*
728 * This is upperproto owned NHG or one we read in from dataplane
729 * and should not be hashed to.
730 *
731 * It goes in ID table.
732 */
733 newnhe =
734 hash_get(zrouter.nhgs_id, lookup, zebra_nhg_hash_alloc);
735 }
736
737 created = true;
738
739 /* Mail back the new object */
740 *nhe = newnhe;
741
742 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
743 zlog_debug("%s: => created %p (%pNG)", __func__, newnhe,
744 newnhe);
745
746 /* Only hash/lookup the depends if the first lookup
747 * fails to find something. This should hopefully save a
748 * lot of cycles for larger ecmp sizes.
749 */
750 if (nhg_depends) {
751 /* If you don't want to hash on each nexthop in the
752 * nexthop group struct you can pass the depends
753 * directly. Kernel-side we do this since it just looks
754 * them up via IDs.
755 */
756 zebra_nhg_connect_depends(newnhe, nhg_depends);
757 goto done;
758 }
759
760 /* Prepare dependency relationships if this is not a
761 * singleton nexthop. There are two cases: a single
762 * recursive nexthop, where we need a relationship to the
763 * resolving nexthop; or a group of nexthops, where we need
764 * relationships with the corresponding singletons.
765 */
766 zebra_nhg_depends_init(newnhe);
767
768 nh = newnhe->nhg.nexthop;
769
770 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE))
771 SET_FLAG(newnhe->flags, NEXTHOP_GROUP_VALID);
772
773 if (nh->next == NULL && newnhe->id < ZEBRA_NHG_PROTO_LOWER) {
774 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) {
775 /* Single recursive nexthop */
776 handle_recursive_depend(&newnhe->nhg_depends,
777 nh->resolved, afi,
778 newnhe->type);
779 recursive = true;
780 }
781 } else {
782 /* Proto-owned are groups by default */
783 /* List of nexthops */
784 for (nh = newnhe->nhg.nexthop; nh; nh = nh->next) {
785 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
786 zlog_debug("%s: depends NH %pNHv %s",
787 __func__, nh,
788 CHECK_FLAG(nh->flags,
789 NEXTHOP_FLAG_RECURSIVE) ?
790 "(R)" : "");
791
792 depends_find_add(&newnhe->nhg_depends, nh, afi,
793 newnhe->type, from_dplane);
794 }
795 }
796
797 if (recursive)
798 SET_FLAG(newnhe->flags, NEXTHOP_GROUP_RECURSIVE);
799
800 /* Attach dependent backpointers to singletons */
801 zebra_nhg_connect_depends(newnhe, &newnhe->nhg_depends);
802
803 /**
804 * Backup Nexthops
805 */
806
807 if (zebra_nhg_get_backup_nhg(newnhe) == NULL ||
808 zebra_nhg_get_backup_nhg(newnhe)->nexthop == NULL)
809 goto done;
810
811 /* If there are backup nexthops, add them to the backup
812 * depends tree. The rules here are a little different.
813 */
814 recursive = false;
815 backup_nhe = newnhe->backup_info->nhe;
816
817 nh = backup_nhe->nhg.nexthop;
818
819 /* Singleton recursive NH */
820 if (nh->next == NULL &&
821 CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) {
822 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
823 zlog_debug("%s: backup depend NH %pNHv (R)",
824 __func__, nh);
825
826 /* Single recursive nexthop */
827 handle_recursive_depend(&backup_nhe->nhg_depends, nh->resolved,
828 afi, backup_nhe->type);
829 recursive = true;
830 } else {
831 /* One or more backup NHs */
832 for (; nh; nh = nh->next) {
833 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
834 zlog_debug("%s: backup depend NH %pNHv %s",
835 __func__, nh,
836 CHECK_FLAG(nh->flags,
837 NEXTHOP_FLAG_RECURSIVE) ?
838 "(R)" : "");
839
840 depends_find_add(&backup_nhe->nhg_depends, nh, afi,
841 backup_nhe->type, from_dplane);
842 }
843 }
844
845 if (recursive)
846 SET_FLAG(backup_nhe->flags, NEXTHOP_GROUP_RECURSIVE);
847
848 done:
849 /* Reset time since last update */
850 (*nhe)->uptime = monotime(NULL);
851
852 return created;
853 }
854
855 /*
856 * Lookup or create an nhe, based on an nhg or an nhe id.
857 */
858 static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id,
859 struct nexthop_group *nhg,
860 struct nhg_connected_tree_head *nhg_depends,
861 vrf_id_t vrf_id, afi_t afi, int type,
862 bool from_dplane)
863 {
864 struct nhg_hash_entry lookup = {};
865 bool created = false;
866
867 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
868 zlog_debug("%s: id %u, nhg %p, vrf %d, type %d, depends %p",
869 __func__, id, nhg, vrf_id, type,
870 nhg_depends);
871
872 /* Use a temporary nhe and call into the superset/common code */
873 lookup.id = id;
874 lookup.type = type ? type : ZEBRA_ROUTE_NHG;
875 lookup.nhg = *nhg;
876
877 lookup.vrf_id = vrf_id;
878 if (nhg_depends || lookup.nhg.nexthop->next) {
879 /* Groups can have all vrfs and AF's in them */
880 lookup.afi = AFI_UNSPEC;
881 } else {
882 switch (lookup.nhg.nexthop->type) {
883 case (NEXTHOP_TYPE_IFINDEX):
884 case (NEXTHOP_TYPE_BLACKHOLE):
885 /*
886 * This switch case handles setting the afi different
887 * for ipv4/v6 routes. Ifindex/blackhole nexthop
888 * objects cannot be ambiguous, they must be Address
889 * Family specific. If we get here, we will either use
890 * the AF of the route, or the one we got passed from
891 * here from the kernel.
892 */
893 lookup.afi = afi;
894 break;
895 case (NEXTHOP_TYPE_IPV4_IFINDEX):
896 case (NEXTHOP_TYPE_IPV4):
897 lookup.afi = AFI_IP;
898 break;
899 case (NEXTHOP_TYPE_IPV6_IFINDEX):
900 case (NEXTHOP_TYPE_IPV6):
901 lookup.afi = AFI_IP6;
902 break;
903 }
904 }
905
906 created = zebra_nhe_find(nhe, &lookup, nhg_depends, afi, from_dplane);
907
908 return created;
909 }
910
911 /* Find/create a single nexthop */
912 static struct nhg_hash_entry *zebra_nhg_find_nexthop(uint32_t id,
913 struct nexthop *nh,
914 afi_t afi, int type,
915 bool from_dplane)
916 {
917 struct nhg_hash_entry *nhe = NULL;
918 struct nexthop_group nhg = {};
919 vrf_id_t vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nh->vrf_id;
920
921 nexthop_group_add_sorted(&nhg, nh);
922
923 zebra_nhg_find(&nhe, id, &nhg, NULL, vrf_id, afi, type, from_dplane);
924
925 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
926 zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe);
927
928 return nhe;
929 }
930
931 static uint32_t nhg_ctx_get_id(const struct nhg_ctx *ctx)
932 {
933 return ctx->id;
934 }
935
936 static void nhg_ctx_set_status(struct nhg_ctx *ctx, enum nhg_ctx_status status)
937 {
938 ctx->status = status;
939 }
940
941 static enum nhg_ctx_status nhg_ctx_get_status(const struct nhg_ctx *ctx)
942 {
943 return ctx->status;
944 }
945
946 static void nhg_ctx_set_op(struct nhg_ctx *ctx, enum nhg_ctx_op_e op)
947 {
948 ctx->op = op;
949 }
950
951 static enum nhg_ctx_op_e nhg_ctx_get_op(const struct nhg_ctx *ctx)
952 {
953 return ctx->op;
954 }
955
956 static vrf_id_t nhg_ctx_get_vrf_id(const struct nhg_ctx *ctx)
957 {
958 return ctx->vrf_id;
959 }
960
961 static int nhg_ctx_get_type(const struct nhg_ctx *ctx)
962 {
963 return ctx->type;
964 }
965
966 static int nhg_ctx_get_afi(const struct nhg_ctx *ctx)
967 {
968 return ctx->afi;
969 }
970
971 static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx)
972 {
973 return &ctx->u.nh;
974 }
975
976 static uint8_t nhg_ctx_get_count(const struct nhg_ctx *ctx)
977 {
978 return ctx->count;
979 }
980
981 static struct nh_grp *nhg_ctx_get_grp(struct nhg_ctx *ctx)
982 {
983 return ctx->u.grp;
984 }
985
986 static struct nhg_resilience *nhg_ctx_get_resilience(struct nhg_ctx *ctx)
987 {
988 return &ctx->resilience;
989 }
990
991 static struct nhg_ctx *nhg_ctx_new(void)
992 {
993 struct nhg_ctx *new;
994
995 new = XCALLOC(MTYPE_NHG_CTX, sizeof(struct nhg_ctx));
996
997 return new;
998 }
999
1000 void nhg_ctx_free(struct nhg_ctx **ctx)
1001 {
1002 struct nexthop *nh;
1003
1004 if (ctx == NULL)
1005 return;
1006
1007 assert((*ctx) != NULL);
1008
1009 if (nhg_ctx_get_count(*ctx))
1010 goto done;
1011
1012 nh = nhg_ctx_get_nh(*ctx);
1013
1014 nexthop_del_labels(nh);
1015 nexthop_del_srv6_seg6local(nh);
1016 nexthop_del_srv6_seg6(nh);
1017
1018 done:
1019 XFREE(MTYPE_NHG_CTX, *ctx);
1020 }
1021
1022 static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh,
1023 struct nh_grp *grp, vrf_id_t vrf_id,
1024 afi_t afi, int type, uint8_t count,
1025 struct nhg_resilience *resilience)
1026 {
1027 struct nhg_ctx *ctx = NULL;
1028
1029 ctx = nhg_ctx_new();
1030
1031 ctx->id = id;
1032 ctx->vrf_id = vrf_id;
1033 ctx->afi = afi;
1034 ctx->type = type;
1035 ctx->count = count;
1036
1037 if (resilience)
1038 ctx->resilience = *resilience;
1039
1040 if (count)
1041 /* Copy over the array */
1042 memcpy(&ctx->u.grp, grp, count * sizeof(struct nh_grp));
1043 else if (nh)
1044 ctx->u.nh = *nh;
1045
1046 return ctx;
1047 }
1048
1049 static void zebra_nhg_set_valid(struct nhg_hash_entry *nhe)
1050 {
1051 struct nhg_connected *rb_node_dep;
1052
1053 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
1054
1055 frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
1056 zebra_nhg_set_valid(rb_node_dep->nhe);
1057 }
1058
1059 static void zebra_nhg_set_invalid(struct nhg_hash_entry *nhe)
1060 {
1061 struct nhg_connected *rb_node_dep;
1062
1063 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
1064
1065 /* If we're in shutdown, this interface event needs to clean
1066 * up installed NHGs, so don't clear that flag directly.
1067 */
1068 if (!zebra_router_in_shutdown())
1069 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
1070
1071 /* Update validity of nexthops depending on it */
1072 frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
1073 zebra_nhg_check_valid(rb_node_dep->nhe);
1074 }
1075
1076 void zebra_nhg_check_valid(struct nhg_hash_entry *nhe)
1077 {
1078 struct nhg_connected *rb_node_dep = NULL;
1079 bool valid = false;
1080
1081 /* If anthing else in the group is valid, the group is valid */
1082 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
1083 if (CHECK_FLAG(rb_node_dep->nhe->flags, NEXTHOP_GROUP_VALID)) {
1084 valid = true;
1085 goto done;
1086 }
1087 }
1088
1089 done:
1090 if (valid)
1091 zebra_nhg_set_valid(nhe);
1092 else
1093 zebra_nhg_set_invalid(nhe);
1094 }
1095
1096 static void zebra_nhg_release_all_deps(struct nhg_hash_entry *nhe)
1097 {
1098 /* Remove it from any lists it may be on */
1099 zebra_nhg_depends_release(nhe);
1100 zebra_nhg_dependents_release(nhe);
1101 if (nhe->ifp)
1102 if_nhg_dependents_del(nhe->ifp, nhe);
1103 }
1104
1105 static void zebra_nhg_release(struct nhg_hash_entry *nhe)
1106 {
1107 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1108 zlog_debug("%s: nhe %p (%pNG)", __func__, nhe, nhe);
1109
1110 zebra_nhg_release_all_deps(nhe);
1111
1112 /*
1113 * If its not zebra owned, we didn't store it here and have to be
1114 * sure we don't clear one thats actually being used.
1115 */
1116 if (nhe->id < ZEBRA_NHG_PROTO_LOWER)
1117 hash_release(zrouter.nhgs, nhe);
1118
1119 hash_release(zrouter.nhgs_id, nhe);
1120 }
1121
1122 static void zebra_nhg_handle_uninstall(struct nhg_hash_entry *nhe)
1123 {
1124 zebra_nhg_release(nhe);
1125 zebra_nhg_free(nhe);
1126 }
1127
1128 static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe)
1129 {
1130 /* Update validity of groups depending on it */
1131 struct nhg_connected *rb_node_dep;
1132
1133 frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
1134 zebra_nhg_set_valid(rb_node_dep->nhe);
1135 }
1136
1137 /*
1138 * The kernel/other program has changed the state of a nexthop object we are
1139 * using.
1140 */
1141 static void zebra_nhg_handle_kernel_state_change(struct nhg_hash_entry *nhe,
1142 bool is_delete)
1143 {
1144 if (nhe->refcnt) {
1145 flog_err(
1146 EC_ZEBRA_NHG_SYNC,
1147 "Kernel %s a nexthop group with ID (%pNG) that we are still using for a route, sending it back down",
1148 (is_delete ? "deleted" : "updated"), nhe);
1149
1150 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
1151 zebra_nhg_install_kernel(nhe);
1152 } else
1153 zebra_nhg_handle_uninstall(nhe);
1154 }
1155
1156 static int nhg_ctx_process_new(struct nhg_ctx *ctx)
1157 {
1158 struct nexthop_group *nhg = NULL;
1159 struct nhg_connected_tree_head nhg_depends = {};
1160 struct nhg_hash_entry *lookup = NULL;
1161 struct nhg_hash_entry *nhe = NULL;
1162
1163 uint32_t id = nhg_ctx_get_id(ctx);
1164 uint8_t count = nhg_ctx_get_count(ctx);
1165 vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx);
1166 int type = nhg_ctx_get_type(ctx);
1167 afi_t afi = nhg_ctx_get_afi(ctx);
1168
1169 lookup = zebra_nhg_lookup_id(id);
1170
1171 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1172 zlog_debug("%s: id %u, count %d, lookup => %p",
1173 __func__, id, count, lookup);
1174
1175 if (lookup) {
1176 /* This is already present in our table, hence an update
1177 * that we did not initate.
1178 */
1179 zebra_nhg_handle_kernel_state_change(lookup, false);
1180 return 0;
1181 }
1182
1183 if (nhg_ctx_get_count(ctx)) {
1184 nhg = nexthop_group_new();
1185 if (zebra_nhg_process_grp(nhg, &nhg_depends,
1186 nhg_ctx_get_grp(ctx), count,
1187 nhg_ctx_get_resilience(ctx))) {
1188 depends_decrement_free(&nhg_depends);
1189 nexthop_group_delete(&nhg);
1190 return -ENOENT;
1191 }
1192
1193 if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, afi,
1194 type, true))
1195 depends_decrement_free(&nhg_depends);
1196
1197 /* These got copied over in zebra_nhg_alloc() */
1198 nexthop_group_delete(&nhg);
1199 } else
1200 nhe = zebra_nhg_find_nexthop(id, nhg_ctx_get_nh(ctx), afi, type,
1201 true);
1202
1203 if (!nhe) {
1204 flog_err(
1205 EC_ZEBRA_TABLE_LOOKUP_FAILED,
1206 "Zebra failed to find or create a nexthop hash entry for ID (%u)",
1207 id);
1208 return -1;
1209 }
1210
1211 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1212 zlog_debug("%s: nhe %p (%pNG) is new", __func__, nhe, nhe);
1213
1214 /*
1215 * If daemon nhg from the kernel, add a refcnt here to indicate the
1216 * daemon owns it.
1217 */
1218 if (PROTO_OWNED(nhe))
1219 zebra_nhg_increment_ref(nhe);
1220
1221 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
1222 SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
1223
1224 return 0;
1225 }
1226
1227 static int nhg_ctx_process_del(struct nhg_ctx *ctx)
1228 {
1229 struct nhg_hash_entry *nhe = NULL;
1230 uint32_t id = nhg_ctx_get_id(ctx);
1231
1232 nhe = zebra_nhg_lookup_id(id);
1233
1234 if (!nhe) {
1235 flog_warn(
1236 EC_ZEBRA_BAD_NHG_MESSAGE,
1237 "Kernel delete message received for nexthop group ID (%u) that we do not have in our ID table",
1238 id);
1239 return -1;
1240 }
1241
1242 zebra_nhg_handle_kernel_state_change(nhe, true);
1243
1244 return 0;
1245 }
1246
1247 static void nhg_ctx_fini(struct nhg_ctx **ctx)
1248 {
1249 /*
1250 * Just freeing for now, maybe do something more in the future
1251 * based on flag.
1252 */
1253
1254 nhg_ctx_free(ctx);
1255 }
1256
1257 static int queue_add(struct nhg_ctx *ctx)
1258 {
1259 /* If its queued or already processed do nothing */
1260 if (nhg_ctx_get_status(ctx) == NHG_CTX_QUEUED)
1261 return 0;
1262
1263 if (rib_queue_nhg_ctx_add(ctx)) {
1264 nhg_ctx_set_status(ctx, NHG_CTX_FAILURE);
1265 return -1;
1266 }
1267
1268 nhg_ctx_set_status(ctx, NHG_CTX_QUEUED);
1269
1270 return 0;
1271 }
1272
1273 int nhg_ctx_process(struct nhg_ctx *ctx)
1274 {
1275 int ret = 0;
1276
1277 switch (nhg_ctx_get_op(ctx)) {
1278 case NHG_CTX_OP_NEW:
1279 ret = nhg_ctx_process_new(ctx);
1280 if (nhg_ctx_get_count(ctx) && ret == -ENOENT
1281 && nhg_ctx_get_status(ctx) != NHG_CTX_REQUEUED) {
1282 /**
1283 * We have entered a situation where we are
1284 * processing a group from the kernel
1285 * that has a contained nexthop which
1286 * we have not yet processed.
1287 *
1288 * Re-enqueue this ctx to be handled exactly one
1289 * more time (indicated by the flag).
1290 *
1291 * By the time we get back to it, we
1292 * should have processed its depends.
1293 */
1294 nhg_ctx_set_status(ctx, NHG_CTX_NONE);
1295 if (queue_add(ctx) == 0) {
1296 nhg_ctx_set_status(ctx, NHG_CTX_REQUEUED);
1297 return 0;
1298 }
1299 }
1300 break;
1301 case NHG_CTX_OP_DEL:
1302 ret = nhg_ctx_process_del(ctx);
1303 case NHG_CTX_OP_NONE:
1304 break;
1305 }
1306
1307 nhg_ctx_set_status(ctx, (ret ? NHG_CTX_FAILURE : NHG_CTX_SUCCESS));
1308
1309 nhg_ctx_fini(&ctx);
1310
1311 return ret;
1312 }
1313
1314 /* Kernel-side, you either get a single new nexthop or a array of ID's */
1315 int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
1316 uint8_t count, vrf_id_t vrf_id, afi_t afi, int type,
1317 int startup, struct nhg_resilience *nhgr)
1318 {
1319 struct nhg_ctx *ctx = NULL;
1320
1321 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1322 zlog_debug("%s: nh %pNHv, id %u, count %d",
1323 __func__, nh, id, (int)count);
1324
1325 if (id > id_counter && id < ZEBRA_NHG_PROTO_LOWER)
1326 /* Increase our counter so we don't try to create
1327 * an ID that already exists
1328 */
1329 id_counter = id;
1330
1331 ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr);
1332 nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW);
1333
1334 /* Under statup conditions, we need to handle them immediately
1335 * like we do for routes. Otherwise, we are going to get a route
1336 * with a nhe_id that we have not handled.
1337 */
1338 if (startup)
1339 return nhg_ctx_process(ctx);
1340
1341 if (queue_add(ctx)) {
1342 nhg_ctx_fini(&ctx);
1343 return -1;
1344 }
1345
1346 return 0;
1347 }
1348
1349 /* Kernel-side, received delete message */
1350 int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id)
1351 {
1352 struct nhg_ctx *ctx = NULL;
1353
1354 ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL);
1355
1356 nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL);
1357
1358 if (queue_add(ctx)) {
1359 nhg_ctx_fini(&ctx);
1360 return -1;
1361 }
1362
1363 return 0;
1364 }
1365
1366 /* Some dependency helper functions */
1367 static struct nhg_hash_entry *depends_find_recursive(const struct nexthop *nh,
1368 afi_t afi, int type)
1369 {
1370 struct nhg_hash_entry *nhe;
1371 struct nexthop *lookup = NULL;
1372
1373 lookup = nexthop_dup(nh, NULL);
1374
1375 nhe = zebra_nhg_find_nexthop(0, lookup, afi, type, false);
1376
1377 nexthops_free(lookup);
1378
1379 return nhe;
1380 }
1381
1382 static struct nhg_hash_entry *depends_find_singleton(const struct nexthop *nh,
1383 afi_t afi, int type,
1384 bool from_dplane)
1385 {
1386 struct nhg_hash_entry *nhe;
1387 struct nexthop lookup = {};
1388
1389 /* Capture a snapshot of this single nh; it might be part of a list,
1390 * so we need to make a standalone copy.
1391 */
1392 nexthop_copy_no_recurse(&lookup, nh, NULL);
1393
1394 nhe = zebra_nhg_find_nexthop(0, &lookup, afi, type, from_dplane);
1395
1396 /* The copy may have allocated labels; free them if necessary. */
1397 nexthop_del_labels(&lookup);
1398 nexthop_del_srv6_seg6local(&lookup);
1399 nexthop_del_srv6_seg6(&lookup);
1400
1401 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1402 zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe);
1403
1404 return nhe;
1405 }
1406
1407 static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi,
1408 int type, bool from_dplane)
1409 {
1410 struct nhg_hash_entry *nhe = NULL;
1411
1412 if (!nh)
1413 goto done;
1414
1415 /* We are separating these functions out to increase handling speed
1416 * in the non-recursive case (by not alloc/freeing)
1417 */
1418 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE))
1419 nhe = depends_find_recursive(nh, afi, type);
1420 else
1421 nhe = depends_find_singleton(nh, afi, type, from_dplane);
1422
1423
1424 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1425 zlog_debug("%s: nh %pNHv %s => %p (%pNG)", __func__, nh,
1426 CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE) ? "(R)"
1427 : "",
1428 nhe, nhe);
1429 }
1430
1431 done:
1432 return nhe;
1433 }
1434
1435 static void depends_add(struct nhg_connected_tree_head *head,
1436 struct nhg_hash_entry *depend)
1437 {
1438 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1439 zlog_debug("%s: head %p nh %pNHv",
1440 __func__, head, depend->nhg.nexthop);
1441
1442 /* If NULL is returned, it was successfully added and
1443 * needs to have its refcnt incremented.
1444 *
1445 * Else the NHE is already present in the tree and doesn't
1446 * need to increment the refcnt.
1447 */
1448 if (nhg_connected_tree_add_nhe(head, depend) == NULL)
1449 zebra_nhg_increment_ref(depend);
1450 }
1451
1452 static struct nhg_hash_entry *
1453 depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
1454 afi_t afi, int type, bool from_dplane)
1455 {
1456 struct nhg_hash_entry *depend = NULL;
1457
1458 depend = depends_find(nh, afi, type, from_dplane);
1459
1460 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1461 zlog_debug("%s: nh %pNHv => %p",
1462 __func__, nh, depend);
1463
1464 if (depend)
1465 depends_add(head, depend);
1466
1467 return depend;
1468 }
1469
1470 static struct nhg_hash_entry *
1471 depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id)
1472 {
1473 struct nhg_hash_entry *depend = NULL;
1474
1475 depend = zebra_nhg_lookup_id(id);
1476
1477 if (depend)
1478 depends_add(head, depend);
1479
1480 return depend;
1481 }
1482
1483 static void depends_decrement_free(struct nhg_connected_tree_head *head)
1484 {
1485 nhg_connected_tree_decrement_ref(head);
1486 nhg_connected_tree_free(head);
1487 }
1488
1489 /* Find an nhe based on a list of nexthops */
1490 struct nhg_hash_entry *zebra_nhg_rib_find(uint32_t id,
1491 struct nexthop_group *nhg,
1492 afi_t rt_afi, int type)
1493 {
1494 struct nhg_hash_entry *nhe = NULL;
1495 vrf_id_t vrf_id;
1496
1497 /*
1498 * CLANG SA is complaining that nexthop may be NULL
1499 * Make it happy but this is ridonc
1500 */
1501 assert(nhg->nexthop);
1502 vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nhg->nexthop->vrf_id;
1503
1504 zebra_nhg_find(&nhe, id, nhg, NULL, vrf_id, rt_afi, type, false);
1505
1506 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1507 zlog_debug("%s: => nhe %p (%pNG)", __func__, nhe, nhe);
1508
1509 return nhe;
1510 }
1511
1512 /* Find an nhe based on a route's nhe */
1513 struct nhg_hash_entry *
1514 zebra_nhg_rib_find_nhe(struct nhg_hash_entry *rt_nhe, afi_t rt_afi)
1515 {
1516 struct nhg_hash_entry *nhe = NULL;
1517
1518 if (!(rt_nhe && rt_nhe->nhg.nexthop)) {
1519 flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED,
1520 "No nexthop passed to %s", __func__);
1521 return NULL;
1522 }
1523
1524 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1525 zlog_debug("%s: rt_nhe %p (%pNG)", __func__, rt_nhe, rt_nhe);
1526
1527 zebra_nhe_find(&nhe, rt_nhe, NULL, rt_afi, false);
1528
1529 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1530 zlog_debug("%s: => nhe %p (%pNG)", __func__, nhe, nhe);
1531
1532 return nhe;
1533 }
1534
1535 /*
1536 * Allocate backup nexthop info object. Typically these are embedded in
1537 * nhg_hash_entry objects.
1538 */
1539 struct nhg_backup_info *zebra_nhg_backup_alloc(void)
1540 {
1541 struct nhg_backup_info *p;
1542
1543 p = XCALLOC(MTYPE_NHG, sizeof(struct nhg_backup_info));
1544
1545 p->nhe = zebra_nhg_alloc();
1546
1547 /* Identify the embedded group used to hold the list of backups */
1548 SET_FLAG(p->nhe->flags, NEXTHOP_GROUP_BACKUP);
1549
1550 return p;
1551 }
1552
1553 /*
1554 * Free backup nexthop info object, deal with any embedded allocations
1555 */
1556 void zebra_nhg_backup_free(struct nhg_backup_info **p)
1557 {
1558 if (p && *p) {
1559 if ((*p)->nhe)
1560 zebra_nhg_free((*p)->nhe);
1561
1562 XFREE(MTYPE_NHG, (*p));
1563 }
1564 }
1565
1566 /* Accessor for backup nexthop group */
1567 struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe)
1568 {
1569 struct nexthop_group *p = NULL;
1570
1571 if (nhe) {
1572 if (nhe->backup_info && nhe->backup_info->nhe)
1573 p = &(nhe->backup_info->nhe->nhg);
1574 }
1575
1576 return p;
1577 }
1578
1579 /*
1580 * Helper to return a copy of a backup_info - note that this is a shallow
1581 * copy, meant to be used when creating a new nhe from info passed in with
1582 * a route e.g.
1583 */
1584 static struct nhg_backup_info *
1585 nhg_backup_copy(const struct nhg_backup_info *orig)
1586 {
1587 struct nhg_backup_info *b;
1588
1589 b = zebra_nhg_backup_alloc();
1590
1591 /* Copy list of nexthops */
1592 nexthop_group_copy(&(b->nhe->nhg), &(orig->nhe->nhg));
1593
1594 return b;
1595 }
1596
1597 static void zebra_nhg_free_members(struct nhg_hash_entry *nhe)
1598 {
1599 nexthops_free(nhe->nhg.nexthop);
1600
1601 zebra_nhg_backup_free(&nhe->backup_info);
1602
1603 /* Decrement to remove connection ref */
1604 nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
1605 nhg_connected_tree_free(&nhe->nhg_depends);
1606 nhg_connected_tree_free(&nhe->nhg_dependents);
1607 }
1608
1609 void zebra_nhg_free(struct nhg_hash_entry *nhe)
1610 {
1611 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1612 /* Group or singleton? */
1613 if (nhe->nhg.nexthop && nhe->nhg.nexthop->next)
1614 zlog_debug("%s: nhe %p (%pNG), refcnt %d", __func__,
1615 nhe, nhe, nhe->refcnt);
1616 else
1617 zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv",
1618 __func__, nhe, nhe, nhe->refcnt,
1619 nhe->nhg.nexthop);
1620 }
1621
1622 THREAD_OFF(nhe->timer);
1623
1624 zebra_nhg_free_members(nhe);
1625
1626 XFREE(MTYPE_NHG, nhe);
1627 }
1628
1629 /*
1630 * Let's just drop the memory associated with each item
1631 */
1632 void zebra_nhg_hash_free(void *p)
1633 {
1634 struct nhg_hash_entry *nhe = p;
1635
1636 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1637 /* Group or singleton? */
1638 if (nhe->nhg.nexthop && nhe->nhg.nexthop->next)
1639 zlog_debug("%s: nhe %p (%u), refcnt %d", __func__, nhe,
1640 nhe->id, nhe->refcnt);
1641 else
1642 zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv",
1643 __func__, nhe, nhe, nhe->refcnt,
1644 nhe->nhg.nexthop);
1645 }
1646
1647 THREAD_OFF(nhe->timer);
1648
1649 nexthops_free(nhe->nhg.nexthop);
1650
1651 XFREE(MTYPE_NHG, nhe);
1652 }
1653
1654 /*
1655 * On cleanup there are nexthop groups that have not
1656 * been resolved at all( a nhe->id of 0 ). As such
1657 * zebra needs to clean up the memory associated with
1658 * those entries.
1659 */
1660 void zebra_nhg_hash_free_zero_id(struct hash_bucket *b, void *arg)
1661 {
1662 struct nhg_hash_entry *nhe = b->data;
1663 struct nhg_connected *dep;
1664
1665 while ((dep = nhg_connected_tree_pop(&nhe->nhg_depends))) {
1666 if (dep->nhe->id == 0)
1667 zebra_nhg_hash_free(dep->nhe);
1668
1669 nhg_connected_free(dep);
1670 }
1671
1672 while ((dep = nhg_connected_tree_pop(&nhe->nhg_dependents)))
1673 nhg_connected_free(dep);
1674
1675 if (nhe->backup_info && nhe->backup_info->nhe->id == 0) {
1676 while ((dep = nhg_connected_tree_pop(
1677 &nhe->backup_info->nhe->nhg_depends)))
1678 nhg_connected_free(dep);
1679
1680 zebra_nhg_hash_free(nhe->backup_info->nhe);
1681
1682 XFREE(MTYPE_NHG, nhe->backup_info);
1683 }
1684 }
1685
1686 static void zebra_nhg_timer(struct thread *thread)
1687 {
1688 struct nhg_hash_entry *nhe = THREAD_ARG(thread);
1689
1690 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1691 zlog_debug("Nexthop Timer for nhe: %pNG", nhe);
1692
1693 if (nhe->refcnt == 1)
1694 zebra_nhg_decrement_ref(nhe);
1695 }
1696
1697 void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe)
1698 {
1699 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1700 zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe,
1701 nhe->refcnt, nhe->refcnt - 1);
1702
1703 nhe->refcnt--;
1704
1705 if (!zebra_router_in_shutdown() && nhe->refcnt <= 0 &&
1706 CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) &&
1707 !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND)) {
1708 nhe->refcnt = 1;
1709 SET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND);
1710 thread_add_timer(zrouter.master, zebra_nhg_timer, nhe,
1711 zrouter.nhg_keep, &nhe->timer);
1712 return;
1713 }
1714
1715 if (!zebra_nhg_depends_is_empty(nhe))
1716 nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
1717
1718 if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0)
1719 zebra_nhg_uninstall_kernel(nhe);
1720 }
1721
1722 void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe)
1723 {
1724 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1725 zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe,
1726 nhe->refcnt, nhe->refcnt + 1);
1727
1728 nhe->refcnt++;
1729
1730 if (thread_is_scheduled(nhe->timer)) {
1731 THREAD_OFF(nhe->timer);
1732 nhe->refcnt--;
1733 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND);
1734 }
1735
1736 if (!zebra_nhg_depends_is_empty(nhe))
1737 nhg_connected_tree_increment_ref(&nhe->nhg_depends);
1738 }
1739
1740 static struct nexthop *nexthop_set_resolved(afi_t afi,
1741 const struct nexthop *newhop,
1742 struct nexthop *nexthop,
1743 struct zebra_sr_policy *policy)
1744 {
1745 struct nexthop *resolved_hop;
1746 uint8_t num_labels = 0;
1747 mpls_label_t labels[MPLS_MAX_LABELS];
1748 enum lsp_types_t label_type = ZEBRA_LSP_NONE;
1749 int i = 0;
1750
1751 resolved_hop = nexthop_new();
1752 SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ACTIVE);
1753
1754 resolved_hop->vrf_id = nexthop->vrf_id;
1755 switch (newhop->type) {
1756 case NEXTHOP_TYPE_IPV4:
1757 case NEXTHOP_TYPE_IPV4_IFINDEX:
1758 /* If the resolving route specifies a gateway, use it */
1759 resolved_hop->type = newhop->type;
1760 resolved_hop->gate.ipv4 = newhop->gate.ipv4;
1761
1762 if (newhop->ifindex) {
1763 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
1764 resolved_hop->ifindex = newhop->ifindex;
1765 }
1766 break;
1767 case NEXTHOP_TYPE_IPV6:
1768 case NEXTHOP_TYPE_IPV6_IFINDEX:
1769 resolved_hop->type = newhop->type;
1770 resolved_hop->gate.ipv6 = newhop->gate.ipv6;
1771
1772 if (newhop->ifindex) {
1773 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
1774 resolved_hop->ifindex = newhop->ifindex;
1775 }
1776 break;
1777 case NEXTHOP_TYPE_IFINDEX:
1778 /* If the resolving route is an interface route,
1779 * it means the gateway we are looking up is connected
1780 * to that interface. (The actual network is _not_ onlink).
1781 * Therefore, the resolved route should have the original
1782 * gateway as nexthop as it is directly connected.
1783 *
1784 * On Linux, we have to set the onlink netlink flag because
1785 * otherwise, the kernel won't accept the route.
1786 */
1787 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
1788 if (afi == AFI_IP) {
1789 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
1790 resolved_hop->gate.ipv4 = nexthop->gate.ipv4;
1791 } else if (afi == AFI_IP6) {
1792 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
1793 resolved_hop->gate.ipv6 = nexthop->gate.ipv6;
1794 }
1795 resolved_hop->ifindex = newhop->ifindex;
1796 break;
1797 case NEXTHOP_TYPE_BLACKHOLE:
1798 resolved_hop->type = NEXTHOP_TYPE_BLACKHOLE;
1799 resolved_hop->bh_type = newhop->bh_type;
1800 break;
1801 }
1802
1803 if (newhop->flags & NEXTHOP_FLAG_ONLINK)
1804 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
1805
1806 /* Copy labels of the resolved route and the parent resolving to it */
1807 if (policy) {
1808 int label_num = 0;
1809
1810 /*
1811 * Don't push the first SID if the corresponding action in the
1812 * LFIB is POP.
1813 */
1814 if (!newhop->nh_label || !newhop->nh_label->num_labels
1815 || newhop->nh_label->label[0] == MPLS_LABEL_IMPLICIT_NULL)
1816 label_num = 1;
1817
1818 for (; label_num < policy->segment_list.label_num; label_num++)
1819 labels[num_labels++] =
1820 policy->segment_list.labels[label_num];
1821 label_type = policy->segment_list.type;
1822 } else if (newhop->nh_label) {
1823 for (i = 0; i < newhop->nh_label->num_labels; i++) {
1824 /* Be a bit picky about overrunning the local array */
1825 if (num_labels >= MPLS_MAX_LABELS) {
1826 if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB)
1827 zlog_debug("%s: too many labels in newhop %pNHv",
1828 __func__, newhop);
1829 break;
1830 }
1831 labels[num_labels++] = newhop->nh_label->label[i];
1832 }
1833 /* Use the "outer" type */
1834 label_type = newhop->nh_label_type;
1835 }
1836
1837 if (nexthop->nh_label) {
1838 for (i = 0; i < nexthop->nh_label->num_labels; i++) {
1839 /* Be a bit picky about overrunning the local array */
1840 if (num_labels >= MPLS_MAX_LABELS) {
1841 if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB)
1842 zlog_debug("%s: too many labels in nexthop %pNHv",
1843 __func__, nexthop);
1844 break;
1845 }
1846 labels[num_labels++] = nexthop->nh_label->label[i];
1847 }
1848
1849 /* If the parent has labels, use its type if
1850 * we don't already have one.
1851 */
1852 if (label_type == ZEBRA_LSP_NONE)
1853 label_type = nexthop->nh_label_type;
1854 }
1855
1856 if (num_labels)
1857 nexthop_add_labels(resolved_hop, label_type, num_labels,
1858 labels);
1859
1860 if (nexthop->nh_srv6) {
1861 nexthop_add_srv6_seg6local(resolved_hop,
1862 nexthop->nh_srv6->seg6local_action,
1863 &nexthop->nh_srv6->seg6local_ctx);
1864 nexthop_add_srv6_seg6(resolved_hop,
1865 &nexthop->nh_srv6->seg6_segs);
1866 }
1867
1868 resolved_hop->rparent = nexthop;
1869 _nexthop_add(&nexthop->resolved, resolved_hop);
1870
1871 return resolved_hop;
1872 }
1873
1874 /* Checks if nexthop we are trying to resolve to is valid */
1875 static bool nexthop_valid_resolve(const struct nexthop *nexthop,
1876 const struct nexthop *resolved)
1877 {
1878 /* Can't resolve to a recursive nexthop */
1879 if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_RECURSIVE))
1880 return false;
1881
1882 /* Must be ACTIVE */
1883 if (!CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_ACTIVE))
1884 return false;
1885
1886 /* Must not be duplicate */
1887 if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_DUPLICATE))
1888 return false;
1889
1890 switch (nexthop->type) {
1891 case NEXTHOP_TYPE_IPV4_IFINDEX:
1892 case NEXTHOP_TYPE_IPV6_IFINDEX:
1893 /* If the nexthop we are resolving to does not match the
1894 * ifindex for the nexthop the route wanted, its not valid.
1895 */
1896 if (nexthop->ifindex != resolved->ifindex)
1897 return false;
1898 break;
1899 case NEXTHOP_TYPE_IPV4:
1900 case NEXTHOP_TYPE_IPV6:
1901 case NEXTHOP_TYPE_IFINDEX:
1902 case NEXTHOP_TYPE_BLACKHOLE:
1903 break;
1904 }
1905
1906 return true;
1907 }
1908
1909 /*
1910 * Downstream VNI and Single VXlan device check.
1911 *
1912 * If it has nexthop VNI labels at this point it must be D-VNI allocated
1913 * and all the nexthops have to be on an SVD.
1914 *
1915 * If SVD is not available, mark as inactive.
1916 */
1917 static bool nexthop_set_evpn_dvni_svd(vrf_id_t re_vrf_id,
1918 struct nexthop *nexthop)
1919 {
1920 if (!is_vrf_l3vni_svd_backed(re_vrf_id)) {
1921 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1922 struct vrf *vrf = vrf_lookup_by_id(re_vrf_id);
1923
1924 zlog_debug(
1925 "nexthop %pNHv D-VNI but route's vrf %s(%u) doesn't use SVD",
1926 nexthop, VRF_LOGNAME(vrf), re_vrf_id);
1927 }
1928
1929 return false;
1930 }
1931
1932 nexthop->ifindex = get_l3vni_vxlan_ifindex(re_vrf_id);
1933 nexthop->vrf_id = 0;
1934
1935 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1936 zlog_debug("nexthop %pNHv using SVD", nexthop);
1937
1938 return true;
1939 }
1940
1941 /*
1942 * Given a nexthop we need to properly recursively resolve
1943 * the route. As such, do a table lookup to find and match
1944 * if at all possible. Set the nexthop->ifindex and resolved_id
1945 * as appropriate
1946 */
1947 static int resolve_backup_nexthops(const struct nexthop *nexthop,
1948 const struct nhg_hash_entry *nhe,
1949 struct nexthop *resolved,
1950 struct nhg_hash_entry *resolve_nhe,
1951 struct backup_nh_map_s *map)
1952 {
1953 int i, j, idx;
1954 const struct nexthop *bnh;
1955 struct nexthop *nh, *newnh;
1956 mpls_label_t labels[MPLS_MAX_LABELS];
1957 uint8_t num_labels;
1958
1959 assert(nexthop->backup_num <= NEXTHOP_MAX_BACKUPS);
1960
1961 /* Locate backups from the original nexthop's backup index and nhe */
1962 for (i = 0; i < nexthop->backup_num; i++) {
1963 idx = nexthop->backup_idx[i];
1964
1965 /* Do we already know about this particular backup? */
1966 for (j = 0; j < map->map_count; j++) {
1967 if (map->map[j].orig_idx == idx)
1968 break;
1969 }
1970
1971 if (j < map->map_count) {
1972 resolved->backup_idx[resolved->backup_num] =
1973 map->map[j].new_idx;
1974 resolved->backup_num++;
1975
1976 SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP);
1977
1978 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
1979 zlog_debug("%s: found map idx orig %d, new %d",
1980 __func__, map->map[j].orig_idx,
1981 map->map[j].new_idx);
1982
1983 continue;
1984 }
1985
1986 /* We can't handle any new map entries at this point. */
1987 if (map->map_count == MULTIPATH_NUM)
1988 break;
1989
1990 /* Need to create/copy a new backup */
1991 bnh = nhe->backup_info->nhe->nhg.nexthop;
1992 for (j = 0; j < idx; j++) {
1993 if (bnh == NULL)
1994 break;
1995 bnh = bnh->next;
1996 }
1997
1998 /* Whoops - bad index in the nexthop? */
1999 if (bnh == NULL)
2000 continue;
2001
2002 if (resolve_nhe->backup_info == NULL)
2003 resolve_nhe->backup_info = zebra_nhg_backup_alloc();
2004
2005 /* Update backup info in the resolving nexthop and its nhe */
2006 newnh = nexthop_dup_no_recurse(bnh, NULL);
2007
2008 /* We may need some special handling for mpls labels: the new
2009 * backup needs to carry the recursive nexthop's labels,
2010 * if any: they may be vrf labels e.g.
2011 * The original/inner labels are in the stack of 'resolve_nhe',
2012 * if that is longer than the stack in 'nexthop'.
2013 */
2014 if (newnh->nh_label && resolved->nh_label &&
2015 nexthop->nh_label) {
2016 if (resolved->nh_label->num_labels >
2017 nexthop->nh_label->num_labels) {
2018 /* Prepare new label stack */
2019 num_labels = 0;
2020 for (j = 0; j < newnh->nh_label->num_labels;
2021 j++) {
2022 labels[j] = newnh->nh_label->label[j];
2023 num_labels++;
2024 }
2025
2026 /* Include inner labels */
2027 for (j = nexthop->nh_label->num_labels;
2028 j < resolved->nh_label->num_labels;
2029 j++) {
2030 labels[num_labels] =
2031 resolved->nh_label->label[j];
2032 num_labels++;
2033 }
2034
2035 /* Replace existing label stack in the backup */
2036 nexthop_del_labels(newnh);
2037 nexthop_add_labels(newnh, bnh->nh_label_type,
2038 num_labels, labels);
2039 }
2040 }
2041
2042 /* Need to compute the new backup index in the new
2043 * backup list, and add to map struct.
2044 */
2045 j = 0;
2046 nh = resolve_nhe->backup_info->nhe->nhg.nexthop;
2047 if (nh) {
2048 while (nh->next) {
2049 nh = nh->next;
2050 j++;
2051 }
2052
2053 nh->next = newnh;
2054 j++;
2055
2056 } else /* First one */
2057 resolve_nhe->backup_info->nhe->nhg.nexthop = newnh;
2058
2059 /* Capture index */
2060 resolved->backup_idx[resolved->backup_num] = j;
2061 resolved->backup_num++;
2062
2063 SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP);
2064
2065 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2066 zlog_debug("%s: added idx orig %d, new %d",
2067 __func__, idx, j);
2068
2069 /* Update map/cache */
2070 map->map[map->map_count].orig_idx = idx;
2071 map->map[map->map_count].new_idx = j;
2072 map->map_count++;
2073 }
2074
2075 return 0;
2076 }
2077
2078 /*
2079 * So this nexthop resolution has decided that a connected route
2080 * is the correct choice. At this point in time if FRR has multiple
2081 * connected routes that all point to the same prefix one will be
2082 * selected, *but* the particular interface may not be the one
2083 * that the nexthop points at. Let's look at all the available
2084 * connected routes on this node and if any of them auto match
2085 * the routes nexthops ifindex that is good enough for a match
2086 *
2087 * This code is depending on the fact that a nexthop->ifindex is 0
2088 * if it is not known, if this assumption changes, yummy!
2089 * Additionally a ifindx of 0 means figure it out for us.
2090 */
2091 static struct route_entry *
2092 zebra_nhg_connected_ifindex(struct route_node *rn, struct route_entry *match,
2093 int32_t curr_ifindex)
2094 {
2095 struct nexthop *newhop = match->nhe->nhg.nexthop;
2096 struct route_entry *re;
2097
2098 assert(newhop); /* What a kick in the patooey */
2099
2100 if (curr_ifindex == 0)
2101 return match;
2102
2103 if (curr_ifindex == newhop->ifindex)
2104 return match;
2105
2106 /*
2107 * At this point we know that this route is matching a connected
2108 * but there are possibly a bunch of connected routes that are
2109 * alive that should be considered as well. So let's iterate over
2110 * all the re's and see if they are connected as well and maybe one
2111 * of those ifindexes match as well.
2112 */
2113 RNODE_FOREACH_RE (rn, re) {
2114 if (re->type != ZEBRA_ROUTE_CONNECT)
2115 continue;
2116
2117 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
2118 continue;
2119
2120 /*
2121 * zebra has a connected route that is not removed
2122 * let's test if it is good
2123 */
2124 newhop = re->nhe->nhg.nexthop;
2125 assert(newhop);
2126 if (curr_ifindex == newhop->ifindex)
2127 return re;
2128 }
2129
2130 return match;
2131 }
2132
2133 /*
2134 * Given a nexthop we need to properly recursively resolve,
2135 * do a table lookup to find and match if at all possible.
2136 * Set the nexthop->ifindex and resolution info as appropriate.
2137 */
2138 static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
2139 const struct prefix *top, int type, uint32_t flags,
2140 uint32_t *pmtu, vrf_id_t vrf_id)
2141 {
2142 struct prefix p;
2143 struct route_table *table;
2144 struct route_node *rn;
2145 struct route_entry *match = NULL;
2146 int resolved;
2147 struct zebra_nhlfe *nhlfe;
2148 struct nexthop *newhop;
2149 struct interface *ifp;
2150 rib_dest_t *dest;
2151 struct zebra_vrf *zvrf;
2152 struct in_addr local_ipv4;
2153 struct in_addr *ipv4;
2154 afi_t afi = AFI_IP;
2155
2156 /* Reset some nexthop attributes that we'll recompute if necessary */
2157 if ((nexthop->type == NEXTHOP_TYPE_IPV4)
2158 || (nexthop->type == NEXTHOP_TYPE_IPV6))
2159 nexthop->ifindex = 0;
2160
2161 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
2162 nexthops_free(nexthop->resolved);
2163 nexthop->resolved = NULL;
2164
2165 /*
2166 * Set afi based on nexthop type.
2167 * Some nexthop types get special handling, possibly skipping
2168 * the normal processing.
2169 */
2170 switch (nexthop->type) {
2171 case NEXTHOP_TYPE_IFINDEX:
2172
2173 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
2174 /*
2175 * If the interface exists and its operative or its a kernel
2176 * route and interface is up, its active. We trust kernel routes
2177 * to be good.
2178 */
2179 if (ifp && (if_is_operative(ifp)))
2180 return 1;
2181 else
2182 return 0;
2183 break;
2184
2185 case NEXTHOP_TYPE_IPV6_IFINDEX:
2186 afi = AFI_IP6;
2187
2188 if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) {
2189 ifp = if_lookup_by_index(nexthop->ifindex,
2190 nexthop->vrf_id);
2191 if (ifp && if_is_operative(ifp))
2192 return 1;
2193 else
2194 return 0;
2195 }
2196 break;
2197
2198 case NEXTHOP_TYPE_IPV4:
2199 case NEXTHOP_TYPE_IPV4_IFINDEX:
2200 afi = AFI_IP;
2201 break;
2202 case NEXTHOP_TYPE_IPV6:
2203 afi = AFI_IP6;
2204 break;
2205
2206 case NEXTHOP_TYPE_BLACKHOLE:
2207 return 1;
2208 }
2209
2210 /*
2211 * If the nexthop has been marked as 'onlink' we just need to make
2212 * sure the nexthop's interface is known and is operational.
2213 */
2214 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) {
2215 /* DVNI/SVD Checks for EVPN routes */
2216 if (nexthop->nh_label &&
2217 nexthop->nh_label_type == ZEBRA_LSP_EVPN &&
2218 !nexthop_set_evpn_dvni_svd(vrf_id, nexthop))
2219 return 0;
2220
2221 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
2222 if (!ifp) {
2223 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2224 zlog_debug("nexthop %pNHv marked onlink but nhif %u doesn't exist",
2225 nexthop, nexthop->ifindex);
2226 return 0;
2227 }
2228 if (!if_is_operative(ifp)) {
2229 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2230 zlog_debug("nexthop %pNHv marked onlink but nhif %s is not operational",
2231 nexthop, ifp->name);
2232 return 0;
2233 }
2234 return 1;
2235 }
2236
2237 if (top &&
2238 ((top->family == AF_INET && top->prefixlen == IPV4_MAX_BITLEN &&
2239 nexthop->gate.ipv4.s_addr == top->u.prefix4.s_addr) ||
2240 (top->family == AF_INET6 && top->prefixlen == IPV6_MAX_BITLEN &&
2241 memcmp(&nexthop->gate.ipv6, &top->u.prefix6, IPV6_MAX_BYTELEN) ==
2242 0)) &&
2243 nexthop->vrf_id == vrf_id) {
2244 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2245 zlog_debug(
2246 " :%s: Attempting to install a max prefixlength route through itself",
2247 __func__);
2248 return 0;
2249 }
2250
2251 /* Validation for ipv4 mapped ipv6 nexthop. */
2252 if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) {
2253 afi = AFI_IP;
2254 ipv4 = &local_ipv4;
2255 ipv4_mapped_ipv6_to_ipv4(&nexthop->gate.ipv6, ipv4);
2256 } else {
2257 ipv4 = &nexthop->gate.ipv4;
2258 }
2259
2260 /* Processing for nexthops with SR 'color' attribute, using
2261 * the corresponding SR policy object.
2262 */
2263 if (nexthop->srte_color) {
2264 struct ipaddr endpoint = {0};
2265 struct zebra_sr_policy *policy;
2266
2267 switch (afi) {
2268 case AFI_IP:
2269 endpoint.ipa_type = IPADDR_V4;
2270 endpoint.ipaddr_v4 = *ipv4;
2271 break;
2272 case AFI_IP6:
2273 endpoint.ipa_type = IPADDR_V6;
2274 endpoint.ipaddr_v6 = nexthop->gate.ipv6;
2275 break;
2276 case AFI_UNSPEC:
2277 case AFI_L2VPN:
2278 case AFI_MAX:
2279 flog_err(EC_LIB_DEVELOPMENT,
2280 "%s: unknown address-family: %u", __func__,
2281 afi);
2282 exit(1);
2283 }
2284
2285 policy = zebra_sr_policy_find(nexthop->srte_color, &endpoint);
2286 if (policy && policy->status == ZEBRA_SR_POLICY_UP) {
2287 resolved = 0;
2288 frr_each_safe (nhlfe_list, &policy->lsp->nhlfe_list,
2289 nhlfe) {
2290 if (!CHECK_FLAG(nhlfe->flags,
2291 NHLFE_FLAG_SELECTED)
2292 || CHECK_FLAG(nhlfe->flags,
2293 NHLFE_FLAG_DELETED))
2294 continue;
2295 SET_FLAG(nexthop->flags,
2296 NEXTHOP_FLAG_RECURSIVE);
2297 nexthop_set_resolved(afi, nhlfe->nexthop,
2298 nexthop, policy);
2299 resolved = 1;
2300 }
2301 if (resolved)
2302 return 1;
2303 }
2304 }
2305
2306 /* Make lookup prefix. */
2307 memset(&p, 0, sizeof(struct prefix));
2308 switch (afi) {
2309 case AFI_IP:
2310 p.family = AF_INET;
2311 p.prefixlen = IPV4_MAX_BITLEN;
2312 p.u.prefix4 = *ipv4;
2313 break;
2314 case AFI_IP6:
2315 p.family = AF_INET6;
2316 p.prefixlen = IPV6_MAX_BITLEN;
2317 p.u.prefix6 = nexthop->gate.ipv6;
2318 break;
2319 case AFI_UNSPEC:
2320 case AFI_L2VPN:
2321 case AFI_MAX:
2322 assert(afi != AFI_IP && afi != AFI_IP6);
2323 break;
2324 }
2325 /* Lookup table. */
2326 table = zebra_vrf_table(afi, SAFI_UNICAST, nexthop->vrf_id);
2327 /* get zvrf */
2328 zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id);
2329 if (!table || !zvrf) {
2330 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2331 zlog_debug(" %s: Table not found", __func__);
2332 return 0;
2333 }
2334
2335 rn = route_node_match(table, (struct prefix *)&p);
2336 while (rn) {
2337 route_unlock_node(rn);
2338
2339 /* Lookup should halt if we've matched against ourselves ('top',
2340 * if specified) - i.e., we cannot have a nexthop NH1 is
2341 * resolved by a route NH1. The exception is if the route is a
2342 * host route.
2343 */
2344 if (prefix_same(&rn->p, top))
2345 if (((afi == AFI_IP)
2346 && (rn->p.prefixlen != IPV4_MAX_BITLEN))
2347 || ((afi == AFI_IP6)
2348 && (rn->p.prefixlen != IPV6_MAX_BITLEN))) {
2349 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2350 zlog_debug(
2351 " %s: Matched against ourself and prefix length is not max bit length",
2352 __func__);
2353 return 0;
2354 }
2355
2356 /* Pick up selected route. */
2357 /* However, do not resolve over default route unless explicitly
2358 * allowed.
2359 */
2360 if (is_default_prefix(&rn->p)
2361 && !rnh_resolve_via_default(zvrf, p.family)) {
2362 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2363 zlog_debug(
2364 " :%s: Resolved against default route",
2365 __func__);
2366 return 0;
2367 }
2368
2369 dest = rib_dest_from_rnode(rn);
2370 if (dest && dest->selected_fib
2371 && !CHECK_FLAG(dest->selected_fib->status,
2372 ROUTE_ENTRY_REMOVED)
2373 && dest->selected_fib->type != ZEBRA_ROUTE_TABLE)
2374 match = dest->selected_fib;
2375
2376 /* If there is no selected route or matched route is EGP, go up
2377 * tree.
2378 */
2379 if (!match) {
2380 do {
2381 rn = rn->parent;
2382 } while (rn && rn->info == NULL);
2383 if (rn)
2384 route_lock_node(rn);
2385
2386 continue;
2387 }
2388
2389 if ((match->type == ZEBRA_ROUTE_CONNECT) ||
2390 (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type))) {
2391 match = zebra_nhg_connected_ifindex(rn, match,
2392 nexthop->ifindex);
2393
2394 newhop = match->nhe->nhg.nexthop;
2395 if (nexthop->type == NEXTHOP_TYPE_IPV4 ||
2396 nexthop->type == NEXTHOP_TYPE_IPV6)
2397 nexthop->ifindex = newhop->ifindex;
2398 else if (nexthop->ifindex != newhop->ifindex) {
2399 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2400 zlog_debug(
2401 "%s: %pNHv given ifindex does not match nexthops ifindex found: %pNHv",
2402 __func__, nexthop, newhop);
2403 /*
2404 * NEXTHOP_TYPE_*_IFINDEX but ifindex
2405 * doesn't match what we found.
2406 */
2407 return 0;
2408 }
2409
2410 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2411 zlog_debug(
2412 "%s: CONNECT match %p (%pNG), newhop %pNHv",
2413 __func__, match, match->nhe, newhop);
2414
2415 return 1;
2416 } else if (CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
2417 struct nexthop_group *nhg;
2418 struct nexthop *resolver;
2419 struct backup_nh_map_s map = {};
2420
2421 resolved = 0;
2422
2423 /*
2424 * Only useful if installed or being Route Replacing
2425 * Why Being Route Replaced as well?
2426 * Imagine a route A and route B( that depends on A )
2427 * for recursive resolution and A already exists in the
2428 * zebra rib. If zebra receives the routes
2429 * for resolution at aproximately the same time in the [
2430 * B, A ] order on the workQ. If this happens then
2431 * normal route resolution will happen and B will be
2432 * resolved successfully and then A will be resolved
2433 * successfully. Now imagine the reversed order [A, B].
2434 * A will be resolved and then scheduled for installed
2435 * (Thus not having the ROUTE_ENTRY_INSTALLED flag ). B
2436 * will then get resolved and fail to be installed
2437 * because the original below test. Let's `loosen` this
2438 * up a tiny bit and allow the
2439 * ROUTE_ENTRY_ROUTE_REPLACING flag ( that is set when a
2440 * Route Replace operation is being initiated on A now )
2441 * to now satisfy this situation. This will allow
2442 * either order in the workQ to work properly.
2443 */
2444 if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED) &&
2445 !CHECK_FLAG(match->status,
2446 ROUTE_ENTRY_ROUTE_REPLACING)) {
2447 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2448 zlog_debug(
2449 "%s: match %p (%pNG) not installed or being Route Replaced",
2450 __func__, match, match->nhe);
2451
2452 goto done_with_match;
2453 }
2454
2455 /* Examine installed nexthops; note that there
2456 * may not be any installed primary nexthops if
2457 * only backups are installed.
2458 */
2459 nhg = rib_get_fib_nhg(match);
2460 for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
2461 if (!nexthop_valid_resolve(nexthop, newhop))
2462 continue;
2463
2464 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2465 zlog_debug(
2466 "%s: RECURSIVE match %p (%pNG), newhop %pNHv",
2467 __func__, match, match->nhe,
2468 newhop);
2469
2470 SET_FLAG(nexthop->flags,
2471 NEXTHOP_FLAG_RECURSIVE);
2472 resolver = nexthop_set_resolved(afi, newhop,
2473 nexthop, NULL);
2474 resolved = 1;
2475
2476 /* If there are backup nexthops, capture
2477 * that info with the resolving nexthop.
2478 */
2479 if (resolver && newhop->backup_num > 0) {
2480 resolve_backup_nexthops(newhop,
2481 match->nhe,
2482 resolver, nhe,
2483 &map);
2484 }
2485 }
2486
2487 /* Examine installed backup nexthops, if any. There
2488 * are only installed backups *if* there is a
2489 * dedicated fib list. The UI can also control use
2490 * of backups for resolution.
2491 */
2492 nhg = rib_get_fib_backup_nhg(match);
2493 if (!use_recursive_backups ||
2494 nhg == NULL || nhg->nexthop == NULL)
2495 goto done_with_match;
2496
2497 for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
2498 if (!nexthop_valid_resolve(nexthop, newhop))
2499 continue;
2500
2501 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2502 zlog_debug(
2503 "%s: RECURSIVE match backup %p (%pNG), newhop %pNHv",
2504 __func__, match, match->nhe,
2505 newhop);
2506
2507 SET_FLAG(nexthop->flags,
2508 NEXTHOP_FLAG_RECURSIVE);
2509 nexthop_set_resolved(afi, newhop, nexthop,
2510 NULL);
2511 resolved = 1;
2512 }
2513
2514 done_with_match:
2515 /* Capture resolving mtu */
2516 if (resolved) {
2517 if (pmtu)
2518 *pmtu = match->mtu;
2519
2520 } else if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2521 zlog_debug(
2522 " %s: Recursion failed to find",
2523 __func__);
2524
2525 return resolved;
2526 } else {
2527 if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
2528 zlog_debug(
2529 " %s: Route Type %s has not turned on recursion",
2530 __func__, zebra_route_string(type));
2531 if (type == ZEBRA_ROUTE_BGP
2532 && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP))
2533 zlog_debug(
2534 " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
2535 }
2536 return 0;
2537 }
2538 }
2539 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2540 zlog_debug(" %s: Nexthop did not lookup in table",
2541 __func__);
2542 return 0;
2543 }
2544
2545 /* This function verifies reachability of one given nexthop, which can be
2546 * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored
2547 * in nexthop->flags field. The nexthop->ifindex will be updated
2548 * appropriately as well.
2549 *
2550 * An existing route map can turn an otherwise active nexthop into inactive,
2551 * but not vice versa.
2552 *
2553 * The return value is the final value of 'ACTIVE' flag.
2554 */
2555 static unsigned nexthop_active_check(struct route_node *rn,
2556 struct route_entry *re,
2557 struct nexthop *nexthop,
2558 struct nhg_hash_entry *nhe)
2559 {
2560 route_map_result_t ret = RMAP_PERMITMATCH;
2561 afi_t family;
2562 const struct prefix *p, *src_p;
2563 struct zebra_vrf *zvrf;
2564 uint32_t mtu = 0;
2565 vrf_id_t vrf_id;
2566
2567 srcdest_rnode_prefixes(rn, &p, &src_p);
2568
2569 if (rn->p.family == AF_INET)
2570 family = AFI_IP;
2571 else if (rn->p.family == AF_INET6)
2572 family = AFI_IP6;
2573 else
2574 family = AF_UNSPEC;
2575
2576 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2577 zlog_debug("%s: re %p, nexthop %pNHv", __func__, re, nexthop);
2578
2579 /*
2580 * If this is a kernel route, then if the interface is *up* then
2581 * by golly gee whiz it's a good route.
2582 */
2583 if (re->type == ZEBRA_ROUTE_KERNEL || re->type == ZEBRA_ROUTE_SYSTEM) {
2584 struct interface *ifp;
2585
2586 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
2587
2588 if (ifp && (if_is_operative(ifp) || if_is_up(ifp))) {
2589 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2590 goto skip_check;
2591 }
2592 }
2593
2594 vrf_id = zvrf_id(rib_dest_vrf(rib_dest_from_rnode(rn)));
2595 switch (nexthop->type) {
2596 case NEXTHOP_TYPE_IFINDEX:
2597 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2598 &mtu, vrf_id))
2599 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2600 else
2601 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2602 break;
2603 case NEXTHOP_TYPE_IPV4:
2604 case NEXTHOP_TYPE_IPV4_IFINDEX:
2605 family = AFI_IP;
2606 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2607 &mtu, vrf_id))
2608 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2609 else
2610 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2611 break;
2612 case NEXTHOP_TYPE_IPV6:
2613 family = AFI_IP6;
2614 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2615 &mtu, vrf_id))
2616 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2617 else
2618 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2619 break;
2620 case NEXTHOP_TYPE_IPV6_IFINDEX:
2621 /* RFC 5549, v4 prefix with v6 NH */
2622 if (rn->p.family != AF_INET)
2623 family = AFI_IP6;
2624
2625 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2626 &mtu, vrf_id))
2627 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2628 else
2629 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2630 break;
2631 case NEXTHOP_TYPE_BLACKHOLE:
2632 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2633 break;
2634 default:
2635 break;
2636 }
2637
2638 skip_check:
2639
2640 if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
2641 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2642 zlog_debug(" %s: Unable to find active nexthop",
2643 __func__);
2644 return 0;
2645 }
2646
2647 /* Capture recursive nexthop mtu.
2648 * TODO -- the code used to just reset the re's value to zero
2649 * for each nexthop, and then jam any resolving route's mtu value in,
2650 * whether or not that was zero, or lt/gt any existing value? The
2651 * way this is used appears to be as a floor value, so let's try
2652 * using it that way here.
2653 */
2654 if (mtu > 0) {
2655 if (re->nexthop_mtu == 0 || re->nexthop_mtu > mtu)
2656 re->nexthop_mtu = mtu;
2657 }
2658
2659 /* XXX: What exactly do those checks do? Do we support
2660 * e.g. IPv4 routes with IPv6 nexthops or vice versa?
2661 */
2662 if (RIB_SYSTEM_ROUTE(re) || (family == AFI_IP && p->family != AF_INET)
2663 || (family == AFI_IP6 && p->family != AF_INET6))
2664 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2665
2666 /* The original code didn't determine the family correctly
2667 * e.g. for NEXTHOP_TYPE_IFINDEX. Retrieve the correct afi
2668 * from the rib_table_info in those cases.
2669 * Possibly it may be better to use only the rib_table_info
2670 * in every case.
2671 */
2672 if (family == 0) {
2673 struct rib_table_info *info;
2674
2675 info = srcdest_rnode_table_info(rn);
2676 family = info->afi;
2677 }
2678
2679 memset(&nexthop->rmap_src.ipv6, 0, sizeof(union g_addr));
2680
2681 zvrf = zebra_vrf_lookup_by_id(re->vrf_id);
2682 if (!zvrf) {
2683 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2684 zlog_debug(" %s: zvrf is NULL", __func__);
2685 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2686 }
2687
2688 /* It'll get set if required inside */
2689 ret = zebra_route_map_check(family, re->type, re->instance, p, nexthop,
2690 zvrf, re->tag);
2691 if (ret == RMAP_DENYMATCH) {
2692 if (IS_ZEBRA_DEBUG_RIB) {
2693 zlog_debug(
2694 "%u:%pRN: Filtering out with NH %pNHv due to route map",
2695 re->vrf_id, rn, nexthop);
2696 }
2697 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2698 }
2699 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2700 }
2701
2702 /* Helper function called after resolution to walk nhg rb trees
2703 * and toggle the NEXTHOP_GROUP_VALID flag if the nexthop
2704 * is active on singleton NHEs.
2705 */
2706 static bool zebra_nhg_set_valid_if_active(struct nhg_hash_entry *nhe)
2707 {
2708 struct nhg_connected *rb_node_dep = NULL;
2709 bool valid = false;
2710
2711 if (!zebra_nhg_depends_is_empty(nhe)) {
2712 /* Is at least one depend valid? */
2713 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
2714 if (zebra_nhg_set_valid_if_active(rb_node_dep->nhe))
2715 valid = true;
2716 }
2717
2718 goto done;
2719 }
2720
2721 /* should be fully resolved singleton at this point */
2722 if (CHECK_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2723 valid = true;
2724
2725 done:
2726 if (valid)
2727 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
2728
2729 return valid;
2730 }
2731
2732 /* Checks if the first nexthop is EVPN. If not, early return.
2733 *
2734 * This is used to determine if there is a mismatch between l3VNI
2735 * of the route's vrf and the nexthops in use's VNI labels.
2736 *
2737 * If there is a mismatch, we keep the labels as these MUST be DVNI nexthops.
2738 *
2739 * IF there is no mismatch, we remove the labels and handle the routes as
2740 * we have traditionally with evpn.
2741 */
2742 static bool nexthop_list_set_evpn_dvni(struct route_entry *re,
2743 struct nexthop_group *nhg)
2744 {
2745 struct nexthop *nexthop;
2746 vni_t re_vrf_vni;
2747 vni_t nh_vni;
2748 bool use_dvni = false;
2749
2750 nexthop = nhg->nexthop;
2751
2752 if (!nexthop->nh_label || nexthop->nh_label_type != ZEBRA_LSP_EVPN)
2753 return false;
2754
2755 re_vrf_vni = get_l3vni_vni(re->vrf_id);
2756
2757 for (; nexthop; nexthop = nexthop->next) {
2758 if (!nexthop->nh_label ||
2759 nexthop->nh_label_type != ZEBRA_LSP_EVPN)
2760 continue;
2761
2762 nh_vni = label2vni(&nexthop->nh_label->label[0]);
2763
2764 if (nh_vni != re_vrf_vni)
2765 use_dvni = true;
2766 }
2767
2768 /* Using traditional way, no VNI encap - remove labels */
2769 if (!use_dvni) {
2770 for (nexthop = nhg->nexthop; nexthop; nexthop = nexthop->next)
2771 nexthop_del_labels(nexthop);
2772 }
2773
2774 return use_dvni;
2775 }
2776
2777 /*
2778 * Process a list of nexthops, given an nhe, determining
2779 * whether each one is ACTIVE/installable at this time.
2780 */
2781 static uint32_t nexthop_list_active_update(struct route_node *rn,
2782 struct route_entry *re,
2783 struct nhg_hash_entry *nhe,
2784 bool is_backup)
2785 {
2786 union g_addr prev_src;
2787 unsigned int prev_active, new_active;
2788 ifindex_t prev_index;
2789 uint32_t counter = 0;
2790 struct nexthop *nexthop;
2791 struct nexthop_group *nhg = &nhe->nhg;
2792 bool vni_removed = false;
2793
2794 nexthop = nhg->nexthop;
2795
2796 /* Init recursive nh mtu */
2797 re->nexthop_mtu = 0;
2798
2799 /* Handler for dvni evpn nexthops. Has to be done at nhg level */
2800 vni_removed = !nexthop_list_set_evpn_dvni(re, nhg);
2801
2802 /* Process nexthops one-by-one */
2803 for ( ; nexthop; nexthop = nexthop->next) {
2804
2805 /* No protocol daemon provides src and so we're skipping
2806 * tracking it
2807 */
2808 prev_src = nexthop->rmap_src;
2809 prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2810 prev_index = nexthop->ifindex;
2811
2812 /* Include the containing nhe for primary nexthops: if there's
2813 * recursive resolution, we capture the backup info also.
2814 */
2815 new_active =
2816 nexthop_active_check(rn, re, nexthop,
2817 (is_backup ? NULL : nhe));
2818
2819 /*
2820 * We need to respect the multipath_num here
2821 * as that what we should be able to install from
2822 * a multipath perspective should not be a data plane
2823 * decision point.
2824 */
2825 if (new_active && counter >= zrouter.multipath_num) {
2826 struct nexthop *nh;
2827
2828 /* Set it and its resolved nexthop as inactive. */
2829 for (nh = nexthop; nh; nh = nh->resolved)
2830 UNSET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE);
2831
2832 new_active = 0;
2833 }
2834
2835 if (new_active)
2836 counter++;
2837
2838 /* Check for changes to the nexthop - set ROUTE_ENTRY_CHANGED */
2839 if (prev_active != new_active ||
2840 prev_index != nexthop->ifindex ||
2841 ((nexthop->type >= NEXTHOP_TYPE_IFINDEX &&
2842 nexthop->type < NEXTHOP_TYPE_IPV6) &&
2843 prev_src.ipv4.s_addr != nexthop->rmap_src.ipv4.s_addr) ||
2844 ((nexthop->type >= NEXTHOP_TYPE_IPV6 &&
2845 nexthop->type < NEXTHOP_TYPE_BLACKHOLE) &&
2846 !(IPV6_ADDR_SAME(&prev_src.ipv6,
2847 &nexthop->rmap_src.ipv6))) ||
2848 CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED) ||
2849 vni_removed)
2850 SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
2851 }
2852
2853 return counter;
2854 }
2855
2856
2857 static uint32_t proto_nhg_nexthop_active_update(struct nexthop_group *nhg)
2858 {
2859 struct nexthop *nh;
2860 uint32_t curr_active = 0;
2861
2862 /* Assume all active for now */
2863
2864 for (nh = nhg->nexthop; nh; nh = nh->next) {
2865 SET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE);
2866 curr_active++;
2867 }
2868
2869 return curr_active;
2870 }
2871
2872 /*
2873 * Iterate over all nexthops of the given RIB entry and refresh their
2874 * ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag,
2875 * the whole re structure is flagged with ROUTE_ENTRY_CHANGED.
2876 *
2877 * Return value is the new number of active nexthops.
2878 */
2879 int nexthop_active_update(struct route_node *rn, struct route_entry *re)
2880 {
2881 struct nhg_hash_entry *curr_nhe;
2882 uint32_t curr_active = 0, backup_active = 0;
2883
2884 if (PROTO_OWNED(re->nhe))
2885 return proto_nhg_nexthop_active_update(&re->nhe->nhg);
2886
2887 afi_t rt_afi = family2afi(rn->p.family);
2888
2889 UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
2890
2891 /* Make a local copy of the existing nhe, so we don't work on/modify
2892 * the shared nhe.
2893 */
2894 curr_nhe = zebra_nhe_copy(re->nhe, re->nhe->id);
2895
2896 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2897 zlog_debug("%s: re %p nhe %p (%pNG), curr_nhe %p", __func__, re,
2898 re->nhe, re->nhe, curr_nhe);
2899
2900 /* Clear the existing id, if any: this will avoid any confusion
2901 * if the id exists, and will also force the creation
2902 * of a new nhe reflecting the changes we may make in this local copy.
2903 */
2904 curr_nhe->id = 0;
2905
2906 /* Process nexthops */
2907 curr_active = nexthop_list_active_update(rn, re, curr_nhe, false);
2908
2909 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2910 zlog_debug("%s: re %p curr_active %u", __func__, re,
2911 curr_active);
2912
2913 /* If there are no backup nexthops, we are done */
2914 if (zebra_nhg_get_backup_nhg(curr_nhe) == NULL)
2915 goto backups_done;
2916
2917 backup_active = nexthop_list_active_update(
2918 rn, re, curr_nhe->backup_info->nhe, true /*is_backup*/);
2919
2920 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2921 zlog_debug("%s: re %p backup_active %u", __func__, re,
2922 backup_active);
2923
2924 backups_done:
2925
2926 /*
2927 * Ref or create an nhe that matches the current state of the
2928 * nexthop(s).
2929 */
2930 if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) {
2931 struct nhg_hash_entry *new_nhe = NULL;
2932
2933 new_nhe = zebra_nhg_rib_find_nhe(curr_nhe, rt_afi);
2934
2935 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2936 zlog_debug(
2937 "%s: re %p CHANGED: nhe %p (%pNG) => new_nhe %p (%pNG)",
2938 __func__, re, re->nhe, re->nhe, new_nhe,
2939 new_nhe);
2940
2941 route_entry_update_nhe(re, new_nhe);
2942 }
2943
2944
2945 /* Walk the NHE depends tree and toggle NEXTHOP_GROUP_VALID
2946 * flag where appropriate.
2947 */
2948 if (curr_active)
2949 zebra_nhg_set_valid_if_active(re->nhe);
2950
2951 /*
2952 * Do not need the old / copied nhe anymore since it
2953 * was either copied over into a new nhe or not
2954 * used at all.
2955 */
2956 zebra_nhg_free(curr_nhe);
2957 return curr_active;
2958 }
2959
2960 /* Recursively construct a grp array of fully resolved IDs.
2961 *
2962 * This function allows us to account for groups within groups,
2963 * by converting them into a flat array of IDs.
2964 *
2965 * nh_grp is modified at every level of recursion to append
2966 * to it the next unique, fully resolved ID from the entire tree.
2967 *
2968 *
2969 * Note:
2970 * I'm pretty sure we only allow ONE level of group within group currently.
2971 * But making this recursive just in case that ever changes.
2972 */
2973 static uint8_t zebra_nhg_nhe2grp_internal(struct nh_grp *grp,
2974 uint8_t curr_index,
2975 struct nhg_hash_entry *nhe,
2976 int max_num)
2977 {
2978 struct nhg_connected *rb_node_dep = NULL;
2979 struct nhg_hash_entry *depend = NULL;
2980 uint8_t i = curr_index;
2981
2982 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
2983 bool duplicate = false;
2984
2985 if (i >= max_num)
2986 goto done;
2987
2988 depend = rb_node_dep->nhe;
2989
2990 /*
2991 * If its recursive, use its resolved nhe in the group
2992 */
2993 if (CHECK_FLAG(depend->flags, NEXTHOP_GROUP_RECURSIVE)) {
2994 depend = zebra_nhg_resolve(depend);
2995 if (!depend) {
2996 flog_err(
2997 EC_ZEBRA_NHG_FIB_UPDATE,
2998 "Failed to recursively resolve Nexthop Hash Entry in the group id=%pNG",
2999 nhe);
3000 continue;
3001 }
3002 }
3003
3004 if (!zebra_nhg_depends_is_empty(depend)) {
3005 /* This is a group within a group */
3006 i = zebra_nhg_nhe2grp_internal(grp, i, depend, max_num);
3007 } else {
3008 if (!CHECK_FLAG(depend->flags, NEXTHOP_GROUP_VALID)) {
3009 if (IS_ZEBRA_DEBUG_RIB_DETAILED
3010 || IS_ZEBRA_DEBUG_NHG)
3011 zlog_debug(
3012 "%s: Nexthop ID (%u) not valid, not appending to dataplane install group",
3013 __func__, depend->id);
3014 continue;
3015 }
3016
3017 /* If the nexthop not installed/queued for install don't
3018 * put in the ID array.
3019 */
3020 if (!(CHECK_FLAG(depend->flags, NEXTHOP_GROUP_INSTALLED)
3021 || CHECK_FLAG(depend->flags,
3022 NEXTHOP_GROUP_QUEUED))) {
3023 if (IS_ZEBRA_DEBUG_RIB_DETAILED
3024 || IS_ZEBRA_DEBUG_NHG)
3025 zlog_debug(
3026 "%s: Nexthop ID (%u) not installed or queued for install, not appending to dataplane install group",
3027 __func__, depend->id);
3028 continue;
3029 }
3030
3031 /* Check for duplicate IDs, ignore if found. */
3032 for (int j = 0; j < i; j++) {
3033 if (depend->id == grp[j].id) {
3034 duplicate = true;
3035 break;
3036 }
3037 }
3038
3039 if (duplicate) {
3040 if (IS_ZEBRA_DEBUG_RIB_DETAILED
3041 || IS_ZEBRA_DEBUG_NHG)
3042 zlog_debug(
3043 "%s: Nexthop ID (%u) is duplicate, not appending to dataplane install group",
3044 __func__, depend->id);
3045 continue;
3046 }
3047
3048 grp[i].id = depend->id;
3049 grp[i].weight = depend->nhg.nexthop->weight;
3050 i++;
3051 }
3052 }
3053
3054 if (nhe->backup_info == NULL || nhe->backup_info->nhe == NULL)
3055 goto done;
3056
3057 /* TODO -- For now, we are not trying to use or install any
3058 * backup info in this nexthop-id path: we aren't prepared
3059 * to use the backups here yet. We're just debugging what we find.
3060 */
3061 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3062 zlog_debug("%s: skipping backup nhe", __func__);
3063
3064 done:
3065 return i;
3066 }
3067
3068 /* Convert a nhe into a group array */
3069 uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe,
3070 int max_num)
3071 {
3072 /* Call into the recursive function */
3073 return zebra_nhg_nhe2grp_internal(grp, 0, nhe, max_num);
3074 }
3075
3076 void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe)
3077 {
3078 struct nhg_connected *rb_node_dep = NULL;
3079
3080 /* Resolve it first */
3081 nhe = zebra_nhg_resolve(nhe);
3082
3083 /* Make sure all depends are installed/queued */
3084 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
3085 zebra_nhg_install_kernel(rb_node_dep->nhe);
3086 }
3087
3088 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID)
3089 && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)
3090 && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) {
3091 /* Change its type to us since we are installing it */
3092 if (!ZEBRA_NHG_CREATED(nhe))
3093 nhe->type = ZEBRA_ROUTE_NHG;
3094
3095 int ret = dplane_nexthop_add(nhe);
3096
3097 switch (ret) {
3098 case ZEBRA_DPLANE_REQUEST_QUEUED:
3099 SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
3100 break;
3101 case ZEBRA_DPLANE_REQUEST_FAILURE:
3102 flog_err(
3103 EC_ZEBRA_DP_INSTALL_FAIL,
3104 "Failed to install Nexthop ID (%pNG) into the kernel",
3105 nhe);
3106 break;
3107 case ZEBRA_DPLANE_REQUEST_SUCCESS:
3108 SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3109 zebra_nhg_handle_install(nhe);
3110 break;
3111 }
3112 }
3113 }
3114
3115 void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe)
3116 {
3117 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) {
3118 int ret = dplane_nexthop_delete(nhe);
3119
3120 switch (ret) {
3121 case ZEBRA_DPLANE_REQUEST_QUEUED:
3122 SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
3123 break;
3124 case ZEBRA_DPLANE_REQUEST_FAILURE:
3125 flog_err(
3126 EC_ZEBRA_DP_DELETE_FAIL,
3127 "Failed to uninstall Nexthop ID (%pNG) from the kernel",
3128 nhe);
3129 break;
3130 case ZEBRA_DPLANE_REQUEST_SUCCESS:
3131 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3132 break;
3133 }
3134 }
3135
3136 zebra_nhg_handle_uninstall(nhe);
3137 }
3138
3139 void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
3140 {
3141 enum dplane_op_e op;
3142 enum zebra_dplane_result status;
3143 uint32_t id = 0;
3144 struct nhg_hash_entry *nhe = NULL;
3145
3146 op = dplane_ctx_get_op(ctx);
3147 status = dplane_ctx_get_status(ctx);
3148
3149 id = dplane_ctx_get_nhe_id(ctx);
3150
3151 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL || IS_ZEBRA_DEBUG_NHG_DETAIL)
3152 zlog_debug(
3153 "Nexthop dplane ctx %p, op %s, nexthop ID (%u), result %s",
3154 ctx, dplane_op2str(op), id, dplane_res2str(status));
3155
3156 switch (op) {
3157 case DPLANE_OP_NH_DELETE:
3158 if (status != ZEBRA_DPLANE_REQUEST_SUCCESS)
3159 flog_err(
3160 EC_ZEBRA_DP_DELETE_FAIL,
3161 "Failed to uninstall Nexthop ID (%u) from the kernel",
3162 id);
3163
3164 /* We already free'd the data, nothing to do */
3165 break;
3166 case DPLANE_OP_NH_INSTALL:
3167 case DPLANE_OP_NH_UPDATE:
3168 nhe = zebra_nhg_lookup_id(id);
3169
3170 if (!nhe) {
3171 if (IS_ZEBRA_DEBUG_NHG)
3172 zlog_debug(
3173 "%s operation preformed on Nexthop ID (%u) in the kernel, that we no longer have in our table",
3174 dplane_op2str(op), id);
3175
3176 break;
3177 }
3178
3179 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
3180 if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) {
3181 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
3182 SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3183 zebra_nhg_handle_install(nhe);
3184
3185 /* If daemon nhg, send it an update */
3186 if (PROTO_OWNED(nhe))
3187 zsend_nhg_notify(nhe->type, nhe->zapi_instance,
3188 nhe->zapi_session, nhe->id,
3189 ZAPI_NHG_INSTALLED);
3190 } else {
3191 /* If daemon nhg, send it an update */
3192 if (PROTO_OWNED(nhe))
3193 zsend_nhg_notify(nhe->type, nhe->zapi_instance,
3194 nhe->zapi_session, nhe->id,
3195 ZAPI_NHG_FAIL_INSTALL);
3196
3197 if (!(zebra_nhg_proto_nexthops_only() &&
3198 !PROTO_OWNED(nhe)))
3199 flog_err(
3200 EC_ZEBRA_DP_INSTALL_FAIL,
3201 "Failed to install Nexthop (%pNG) into the kernel",
3202 nhe);
3203 }
3204 break;
3205
3206 case DPLANE_OP_ROUTE_INSTALL:
3207 case DPLANE_OP_ROUTE_UPDATE:
3208 case DPLANE_OP_ROUTE_DELETE:
3209 case DPLANE_OP_ROUTE_NOTIFY:
3210 case DPLANE_OP_LSP_INSTALL:
3211 case DPLANE_OP_LSP_UPDATE:
3212 case DPLANE_OP_LSP_DELETE:
3213 case DPLANE_OP_LSP_NOTIFY:
3214 case DPLANE_OP_PW_INSTALL:
3215 case DPLANE_OP_PW_UNINSTALL:
3216 case DPLANE_OP_SYS_ROUTE_ADD:
3217 case DPLANE_OP_SYS_ROUTE_DELETE:
3218 case DPLANE_OP_ADDR_INSTALL:
3219 case DPLANE_OP_ADDR_UNINSTALL:
3220 case DPLANE_OP_MAC_INSTALL:
3221 case DPLANE_OP_MAC_DELETE:
3222 case DPLANE_OP_NEIGH_INSTALL:
3223 case DPLANE_OP_NEIGH_UPDATE:
3224 case DPLANE_OP_NEIGH_DELETE:
3225 case DPLANE_OP_NEIGH_IP_INSTALL:
3226 case DPLANE_OP_NEIGH_IP_DELETE:
3227 case DPLANE_OP_VTEP_ADD:
3228 case DPLANE_OP_VTEP_DELETE:
3229 case DPLANE_OP_RULE_ADD:
3230 case DPLANE_OP_RULE_DELETE:
3231 case DPLANE_OP_RULE_UPDATE:
3232 case DPLANE_OP_NEIGH_DISCOVER:
3233 case DPLANE_OP_BR_PORT_UPDATE:
3234 case DPLANE_OP_NONE:
3235 case DPLANE_OP_IPTABLE_ADD:
3236 case DPLANE_OP_IPTABLE_DELETE:
3237 case DPLANE_OP_IPSET_ADD:
3238 case DPLANE_OP_IPSET_DELETE:
3239 case DPLANE_OP_IPSET_ENTRY_ADD:
3240 case DPLANE_OP_IPSET_ENTRY_DELETE:
3241 case DPLANE_OP_NEIGH_TABLE_UPDATE:
3242 case DPLANE_OP_GRE_SET:
3243 case DPLANE_OP_INTF_ADDR_ADD:
3244 case DPLANE_OP_INTF_ADDR_DEL:
3245 case DPLANE_OP_INTF_NETCONFIG:
3246 case DPLANE_OP_INTF_INSTALL:
3247 case DPLANE_OP_INTF_UPDATE:
3248 case DPLANE_OP_INTF_DELETE:
3249 case DPLANE_OP_TC_QDISC_INSTALL:
3250 case DPLANE_OP_TC_QDISC_UNINSTALL:
3251 case DPLANE_OP_TC_CLASS_ADD:
3252 case DPLANE_OP_TC_CLASS_DELETE:
3253 case DPLANE_OP_TC_CLASS_UPDATE:
3254 case DPLANE_OP_TC_FILTER_ADD:
3255 case DPLANE_OP_TC_FILTER_DELETE:
3256 case DPLANE_OP_TC_FILTER_UPDATE:
3257 break;
3258 }
3259 }
3260
3261 static int zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
3262 {
3263 struct nhg_hash_entry *nhe = NULL;
3264
3265 nhe = (struct nhg_hash_entry *)bucket->data;
3266
3267 /*
3268 * same logic as with routes.
3269 *
3270 * If older than startup time, we know we read them in from the
3271 * kernel and have not gotten and update for them since startup
3272 * from an upper level proto.
3273 */
3274 if (zrouter.startup_time < nhe->uptime)
3275 return HASHWALK_CONTINUE;
3276
3277 /*
3278 * If it's proto-owned and not being used by a route, remove it since
3279 * we haven't gotten an update about it from the proto since startup.
3280 * This means that either the config for it was removed or the daemon
3281 * didn't get started. This handles graceful restart & retain scenario.
3282 */
3283 if (PROTO_OWNED(nhe) && nhe->refcnt == 1) {
3284 zebra_nhg_decrement_ref(nhe);
3285 return HASHWALK_ABORT;
3286 }
3287
3288 /*
3289 * If its being ref'd by routes, just let it be uninstalled via a route
3290 * removal.
3291 */
3292 if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) {
3293 zebra_nhg_uninstall_kernel(nhe);
3294 return HASHWALK_ABORT;
3295 }
3296
3297 return HASHWALK_CONTINUE;
3298 }
3299
3300 void zebra_nhg_sweep_table(struct hash *hash)
3301 {
3302 uint32_t count;
3303
3304 /*
3305 * Yes this is extremely odd. Effectively nhg's have
3306 * other nexthop groups that depend on them and when you
3307 * remove them, you can have other entries blown up.
3308 * our hash code does not work with deleting multiple
3309 * entries at a time and will possibly cause crashes
3310 * So what to do? Whenever zebra_nhg_sweep_entry
3311 * deletes an entry it will return HASHWALK_ABORT,
3312 * cause that deletion might have triggered more.
3313 * then we can just keep sweeping this table
3314 * until nothing more is found to do.
3315 */
3316 do {
3317 count = hashcount(hash);
3318 hash_walk(hash, zebra_nhg_sweep_entry, NULL);
3319 } while (count != hashcount(hash));
3320 }
3321
3322 static void zebra_nhg_mark_keep_entry(struct hash_bucket *bucket, void *arg)
3323 {
3324 struct nhg_hash_entry *nhe = bucket->data;
3325
3326 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3327 }
3328
3329 /*
3330 * When we are shutting down and we have retain mode enabled
3331 * in zebra the process is to mark each vrf that it's
3332 * routes should not be deleted. The problem with that
3333 * is that shutdown actually free's up memory which
3334 * causes the nexthop group's ref counts to go to zero
3335 * we need a way to subtly tell the system to not remove
3336 * the nexthop groups from the kernel at the same time.
3337 * The easiest just looks like that we should not mark
3338 * the nhg's as installed any more and when the ref count
3339 * goes to zero we'll attempt to delete and do nothing
3340 */
3341 void zebra_nhg_mark_keep(void)
3342 {
3343 hash_iterate(zrouter.nhgs_id, zebra_nhg_mark_keep_entry, NULL);
3344 }
3345
3346 /* Global control to disable use of kernel nexthops, if available. We can't
3347 * force the kernel to support nexthop ids, of course, but we can disable
3348 * zebra's use of them, for testing e.g. By default, if the kernel supports
3349 * nexthop ids, zebra uses them.
3350 */
3351 void zebra_nhg_enable_kernel_nexthops(bool set)
3352 {
3353 g_nexthops_enabled = set;
3354 }
3355
3356 bool zebra_nhg_kernel_nexthops_enabled(void)
3357 {
3358 return g_nexthops_enabled;
3359 }
3360
3361 /* Global control for use of activated backups for recursive resolution. */
3362 void zebra_nhg_set_recursive_use_backups(bool set)
3363 {
3364 use_recursive_backups = set;
3365 }
3366
3367 bool zebra_nhg_recursive_use_backups(void)
3368 {
3369 return use_recursive_backups;
3370 }
3371
3372 /*
3373 * Global control to only use kernel nexthops for protocol created NHGs.
3374 * There are some use cases where you may not want zebra to implicitly
3375 * create kernel nexthops for all routes and only create them for NHGs
3376 * passed down by upper level protos.
3377 *
3378 * Default is off.
3379 */
3380 void zebra_nhg_set_proto_nexthops_only(bool set)
3381 {
3382 proto_nexthops_only = set;
3383 }
3384
3385 bool zebra_nhg_proto_nexthops_only(void)
3386 {
3387 return proto_nexthops_only;
3388 }
3389
3390 /* Add NHE from upper level proto */
3391 struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type,
3392 uint16_t instance, uint32_t session,
3393 struct nexthop_group *nhg, afi_t afi)
3394 {
3395 struct nhg_hash_entry lookup;
3396 struct nhg_hash_entry *new, *old;
3397 struct nhg_connected *rb_node_dep = NULL;
3398 struct nexthop *newhop;
3399 bool replace = false;
3400
3401 if (!nhg->nexthop) {
3402 if (IS_ZEBRA_DEBUG_NHG)
3403 zlog_debug("%s: id %u, no nexthops passed to add",
3404 __func__, id);
3405 return NULL;
3406 }
3407
3408
3409 /* Set nexthop list as active, since they wont go through rib
3410 * processing.
3411 *
3412 * Assuming valid/onlink for now.
3413 *
3414 * Once resolution is figured out, we won't need this!
3415 */
3416 for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
3417 if (CHECK_FLAG(newhop->flags, NEXTHOP_FLAG_HAS_BACKUP)) {
3418 if (IS_ZEBRA_DEBUG_NHG)
3419 zlog_debug(
3420 "%s: id %u, backup nexthops not supported",
3421 __func__, id);
3422 return NULL;
3423 }
3424
3425 if (newhop->type == NEXTHOP_TYPE_BLACKHOLE) {
3426 if (IS_ZEBRA_DEBUG_NHG)
3427 zlog_debug(
3428 "%s: id %u, blackhole nexthop not supported",
3429 __func__, id);
3430 return NULL;
3431 }
3432
3433 if (newhop->type == NEXTHOP_TYPE_IFINDEX) {
3434 if (IS_ZEBRA_DEBUG_NHG)
3435 zlog_debug(
3436 "%s: id %u, nexthop without gateway not supported",
3437 __func__, id);
3438 return NULL;
3439 }
3440
3441 if (!newhop->ifindex) {
3442 if (IS_ZEBRA_DEBUG_NHG)
3443 zlog_debug(
3444 "%s: id %u, nexthop without ifindex is not supported",
3445 __func__, id);
3446 return NULL;
3447 }
3448 SET_FLAG(newhop->flags, NEXTHOP_FLAG_ACTIVE);
3449 }
3450
3451 zebra_nhe_init(&lookup, afi, nhg->nexthop);
3452 lookup.nhg.nexthop = nhg->nexthop;
3453 lookup.nhg.nhgr = nhg->nhgr;
3454 lookup.id = id;
3455 lookup.type = type;
3456
3457 old = zebra_nhg_lookup_id(id);
3458
3459 if (old) {
3460 /*
3461 * This is a replace, just release NHE from ID for now, The
3462 * depends/dependents may still be used in the replacement so
3463 * we don't touch them other than to remove their refs to their
3464 * old parent.
3465 */
3466 replace = true;
3467 hash_release(zrouter.nhgs_id, old);
3468
3469 /* Free all the things */
3470 zebra_nhg_release_all_deps(old);
3471 }
3472
3473 new = zebra_nhg_rib_find_nhe(&lookup, afi);
3474
3475 zebra_nhg_increment_ref(new);
3476
3477 /* Capture zapi client info */
3478 new->zapi_instance = instance;
3479 new->zapi_session = session;
3480
3481 zebra_nhg_set_valid_if_active(new);
3482
3483 zebra_nhg_install_kernel(new);
3484
3485 if (old) {
3486 /*
3487 * Check to handle recving DEL while routes still in use then
3488 * a replace.
3489 *
3490 * In this case we would have decremented the refcnt already
3491 * but set the FLAG here. Go ahead and increment once to fix
3492 * the misordering we have been sent.
3493 */
3494 if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED))
3495 zebra_nhg_increment_ref(old);
3496
3497 rib_handle_nhg_replace(old, new);
3498
3499 /* We have to decrement its singletons
3500 * because some might not exist in NEW.
3501 */
3502 if (!zebra_nhg_depends_is_empty(old)) {
3503 frr_each (nhg_connected_tree, &old->nhg_depends,
3504 rb_node_dep)
3505 zebra_nhg_decrement_ref(rb_node_dep->nhe);
3506 }
3507
3508 /* Dont call the dec API, we dont want to uninstall the ID */
3509 old->refcnt = 0;
3510 THREAD_OFF(old->timer);
3511 zebra_nhg_free(old);
3512 old = NULL;
3513 }
3514
3515 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3516 zlog_debug("%s: %s nhe %p (%u), vrf %d, type %s", __func__,
3517 (replace ? "replaced" : "added"), new, new->id,
3518 new->vrf_id, zebra_route_string(new->type));
3519
3520 return new;
3521 }
3522
3523 /* Delete NHE from upper level proto, caller must decrement ref */
3524 struct nhg_hash_entry *zebra_nhg_proto_del(uint32_t id, int type)
3525 {
3526 struct nhg_hash_entry *nhe;
3527
3528 nhe = zebra_nhg_lookup_id(id);
3529
3530 if (!nhe) {
3531 if (IS_ZEBRA_DEBUG_NHG)
3532 zlog_debug("%s: id %u, lookup failed", __func__, id);
3533
3534 return NULL;
3535 }
3536
3537 if (type != nhe->type) {
3538 if (IS_ZEBRA_DEBUG_NHG)
3539 zlog_debug(
3540 "%s: id %u, type %s mismatch, sent by %s, ignoring",
3541 __func__, id, zebra_route_string(nhe->type),
3542 zebra_route_string(type));
3543 return NULL;
3544 }
3545
3546 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED)) {
3547 if (IS_ZEBRA_DEBUG_NHG)
3548 zlog_debug("%s: id %u, already released", __func__, id);
3549
3550 return NULL;
3551 }
3552
3553 SET_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED);
3554
3555 if (nhe->refcnt > 1) {
3556 if (IS_ZEBRA_DEBUG_NHG)
3557 zlog_debug(
3558 "%s: %pNG, still being used by routes refcnt %u",
3559 __func__, nhe, nhe->refcnt);
3560 return nhe;
3561 }
3562
3563 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3564 zlog_debug("%s: deleted nhe %p (%pNG), vrf %d, type %s",
3565 __func__, nhe, nhe, nhe->vrf_id,
3566 zebra_route_string(nhe->type));
3567
3568 return nhe;
3569 }
3570
3571 struct nhg_score_proto_iter {
3572 int type;
3573 struct list *found;
3574 };
3575
3576 static void zebra_nhg_score_proto_entry(struct hash_bucket *bucket, void *arg)
3577 {
3578 struct nhg_hash_entry *nhe;
3579 struct nhg_score_proto_iter *iter;
3580
3581 nhe = (struct nhg_hash_entry *)bucket->data;
3582 iter = arg;
3583
3584 /* Needs to match type and outside zebra ID space */
3585 if (nhe->type == iter->type && PROTO_OWNED(nhe)) {
3586 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3587 zlog_debug(
3588 "%s: found nhe %p (%pNG), vrf %d, type %s after client disconnect",
3589 __func__, nhe, nhe, nhe->vrf_id,
3590 zebra_route_string(nhe->type));
3591
3592 /* Add to removal list */
3593 listnode_add(iter->found, nhe);
3594 }
3595 }
3596
3597 /* Remove specific by proto NHGs */
3598 unsigned long zebra_nhg_score_proto(int type)
3599 {
3600 struct nhg_hash_entry *nhe;
3601 struct nhg_score_proto_iter iter = {};
3602 struct listnode *ln;
3603 unsigned long count;
3604
3605 iter.type = type;
3606 iter.found = list_new();
3607
3608 /* Find matching entries to remove */
3609 hash_iterate(zrouter.nhgs_id, zebra_nhg_score_proto_entry, &iter);
3610
3611 /* Now remove them */
3612 for (ALL_LIST_ELEMENTS_RO(iter.found, ln, nhe)) {
3613 /*
3614 * This should be the last ref if we remove client routes too,
3615 * and thus should remove and free them.
3616 */
3617 zebra_nhg_decrement_ref(nhe);
3618 }
3619
3620 count = iter.found->count;
3621 list_delete(&iter.found);
3622
3623 return count;
3624 }
3625
3626 printfrr_ext_autoreg_p("NG", printfrr_nhghe);
3627 static ssize_t printfrr_nhghe(struct fbuf *buf, struct printfrr_eargs *ea,
3628 const void *ptr)
3629 {
3630 const struct nhg_hash_entry *nhe = ptr;
3631 const struct nhg_connected *dep;
3632 ssize_t ret = 0;
3633
3634 if (!nhe)
3635 return bputs(buf, "[NULL]");
3636
3637 ret += bprintfrr(buf, "%u[", nhe->id);
3638 if (nhe->ifp)
3639 ret += printfrr_nhs(buf, nhe->nhg.nexthop);
3640 else {
3641 int count = zebra_nhg_depends_count(nhe);
3642
3643 frr_each (nhg_connected_tree_const, &nhe->nhg_depends, dep) {
3644 ret += bprintfrr(buf, "%u", dep->nhe->id);
3645 if (count > 1)
3646 ret += bputs(buf, "/");
3647 count--;
3648 }
3649 }
3650
3651 ret += bputs(buf, "]");
3652 return ret;
3653 }