]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_nhg.c
zebra: nhg resolution handler for d-vni
[mirror_frr.git] / zebra / zebra_nhg.c
1 /* Zebra Nexthop Group Code.
2 * Copyright (C) 2019 Cumulus Networks, Inc.
3 * Donald Sharp
4 * Stephen Worley
5 *
6 * This file is part of FRR.
7 *
8 * FRR is free software; you can redistribute it and/or modify it
9 * under the terms of the GNU General Public License as published by the
10 * Free Software Foundation; either version 2, or (at your option) any
11 * later version.
12 *
13 * FRR is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with FRR; see the file COPYING. If not, write to the Free
20 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 * 02111-1307, USA.
22 */
23 #include <zebra.h>
24
25 #include "lib/nexthop.h"
26 #include "lib/nexthop_group_private.h"
27 #include "lib/routemap.h"
28 #include "lib/mpls.h"
29 #include "lib/jhash.h"
30 #include "lib/debug.h"
31 #include "lib/lib_errors.h"
32
33 #include "zebra/connected.h"
34 #include "zebra/debug.h"
35 #include "zebra/zebra_router.h"
36 #include "zebra/zebra_nhg_private.h"
37 #include "zebra/zebra_rnh.h"
38 #include "zebra/zebra_routemap.h"
39 #include "zebra/zebra_srte.h"
40 #include "zebra/zserv.h"
41 #include "zebra/rt.h"
42 #include "zebra_errors.h"
43 #include "zebra_dplane.h"
44 #include "zebra/interface.h"
45 #include "zebra/zapi_msg.h"
46 #include "zebra/rib.h"
47 #include "zebra/zebra_vxlan.h"
48
49 DEFINE_MTYPE_STATIC(ZEBRA, NHG, "Nexthop Group Entry");
50 DEFINE_MTYPE_STATIC(ZEBRA, NHG_CONNECTED, "Nexthop Group Connected");
51 DEFINE_MTYPE_STATIC(ZEBRA, NHG_CTX, "Nexthop Group Context");
52
53 /* Map backup nexthop indices between two nhes */
54 struct backup_nh_map_s {
55 int map_count;
56
57 struct {
58 uint8_t orig_idx;
59 uint8_t new_idx;
60 } map[MULTIPATH_NUM];
61 };
62
63 /* id counter to keep in sync with kernel */
64 uint32_t id_counter;
65
66 /* Controlled through ui */
67 static bool g_nexthops_enabled = true;
68 static bool proto_nexthops_only;
69 static bool use_recursive_backups = true;
70
71 static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi,
72 int type, bool from_dplane);
73 static void depends_add(struct nhg_connected_tree_head *head,
74 struct nhg_hash_entry *depend);
75 static struct nhg_hash_entry *
76 depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
77 afi_t afi, int type, bool from_dplane);
78 static struct nhg_hash_entry *
79 depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id);
80 static void depends_decrement_free(struct nhg_connected_tree_head *head);
81
82 static struct nhg_backup_info *
83 nhg_backup_copy(const struct nhg_backup_info *orig);
84
85 /* Helper function for getting the next allocatable ID */
86 static uint32_t nhg_get_next_id(void)
87 {
88 while (1) {
89 id_counter++;
90
91 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
92 zlog_debug("%s: ID %u checking", __func__, id_counter);
93
94 if (id_counter == ZEBRA_NHG_PROTO_LOWER) {
95 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
96 zlog_debug("%s: ID counter wrapped", __func__);
97
98 id_counter = 0;
99 continue;
100 }
101
102 if (zebra_nhg_lookup_id(id_counter)) {
103 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
104 zlog_debug("%s: ID already exists", __func__);
105
106 continue;
107 }
108
109 break;
110 }
111
112 return id_counter;
113 }
114
115 static void nhg_connected_free(struct nhg_connected *dep)
116 {
117 XFREE(MTYPE_NHG_CONNECTED, dep);
118 }
119
120 static struct nhg_connected *nhg_connected_new(struct nhg_hash_entry *nhe)
121 {
122 struct nhg_connected *new = NULL;
123
124 new = XCALLOC(MTYPE_NHG_CONNECTED, sizeof(struct nhg_connected));
125 new->nhe = nhe;
126
127 return new;
128 }
129
130 void nhg_connected_tree_free(struct nhg_connected_tree_head *head)
131 {
132 struct nhg_connected *rb_node_dep = NULL;
133
134 if (!nhg_connected_tree_is_empty(head)) {
135 frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
136 nhg_connected_tree_del(head, rb_node_dep);
137 nhg_connected_free(rb_node_dep);
138 }
139 }
140 }
141
142 bool nhg_connected_tree_is_empty(const struct nhg_connected_tree_head *head)
143 {
144 return nhg_connected_tree_count(head) ? false : true;
145 }
146
147 struct nhg_connected *
148 nhg_connected_tree_root(struct nhg_connected_tree_head *head)
149 {
150 return nhg_connected_tree_first(head);
151 }
152
153 struct nhg_hash_entry *
154 nhg_connected_tree_del_nhe(struct nhg_connected_tree_head *head,
155 struct nhg_hash_entry *depend)
156 {
157 struct nhg_connected lookup = {};
158 struct nhg_connected *remove = NULL;
159 struct nhg_hash_entry *removed_nhe;
160
161 lookup.nhe = depend;
162
163 /* Lookup to find the element, then remove it */
164 remove = nhg_connected_tree_find(head, &lookup);
165 if (remove)
166 /* Re-returning here just in case this API changes..
167 * the _del list api's are a bit undefined at the moment.
168 *
169 * So hopefully returning here will make it fail if the api
170 * changes to something different than currently expected.
171 */
172 remove = nhg_connected_tree_del(head, remove);
173
174 /* If the entry was sucessfully removed, free the 'connected` struct */
175 if (remove) {
176 removed_nhe = remove->nhe;
177 nhg_connected_free(remove);
178 return removed_nhe;
179 }
180
181 return NULL;
182 }
183
184 /* Assuming UNIQUE RB tree. If this changes, assumptions here about
185 * insertion need to change.
186 */
187 struct nhg_hash_entry *
188 nhg_connected_tree_add_nhe(struct nhg_connected_tree_head *head,
189 struct nhg_hash_entry *depend)
190 {
191 struct nhg_connected *new = NULL;
192
193 new = nhg_connected_new(depend);
194
195 /* On success, NULL will be returned from the
196 * RB code.
197 */
198 if (new && (nhg_connected_tree_add(head, new) == NULL))
199 return NULL;
200
201 /* If it wasn't successful, it must be a duplicate. We enforce the
202 * unique property for the `nhg_connected` tree.
203 */
204 nhg_connected_free(new);
205
206 return depend;
207 }
208
209 static void
210 nhg_connected_tree_decrement_ref(struct nhg_connected_tree_head *head)
211 {
212 struct nhg_connected *rb_node_dep = NULL;
213
214 frr_each_safe(nhg_connected_tree, head, rb_node_dep) {
215 zebra_nhg_decrement_ref(rb_node_dep->nhe);
216 }
217 }
218
219 static void
220 nhg_connected_tree_increment_ref(struct nhg_connected_tree_head *head)
221 {
222 struct nhg_connected *rb_node_dep = NULL;
223
224 frr_each(nhg_connected_tree, head, rb_node_dep) {
225 zebra_nhg_increment_ref(rb_node_dep->nhe);
226 }
227 }
228
229 struct nhg_hash_entry *zebra_nhg_resolve(struct nhg_hash_entry *nhe)
230 {
231 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_RECURSIVE)
232 && !zebra_nhg_depends_is_empty(nhe)) {
233 nhe = nhg_connected_tree_root(&nhe->nhg_depends)->nhe;
234 return zebra_nhg_resolve(nhe);
235 }
236
237 return nhe;
238 }
239
240 unsigned int zebra_nhg_depends_count(const struct nhg_hash_entry *nhe)
241 {
242 return nhg_connected_tree_count(&nhe->nhg_depends);
243 }
244
245 bool zebra_nhg_depends_is_empty(const struct nhg_hash_entry *nhe)
246 {
247 return nhg_connected_tree_is_empty(&nhe->nhg_depends);
248 }
249
250 static void zebra_nhg_depends_del(struct nhg_hash_entry *from,
251 struct nhg_hash_entry *depend)
252 {
253 nhg_connected_tree_del_nhe(&from->nhg_depends, depend);
254 }
255
256 static void zebra_nhg_depends_init(struct nhg_hash_entry *nhe)
257 {
258 nhg_connected_tree_init(&nhe->nhg_depends);
259 }
260
261 unsigned int zebra_nhg_dependents_count(const struct nhg_hash_entry *nhe)
262 {
263 return nhg_connected_tree_count(&nhe->nhg_dependents);
264 }
265
266
267 bool zebra_nhg_dependents_is_empty(const struct nhg_hash_entry *nhe)
268 {
269 return nhg_connected_tree_is_empty(&nhe->nhg_dependents);
270 }
271
272 static void zebra_nhg_dependents_del(struct nhg_hash_entry *from,
273 struct nhg_hash_entry *dependent)
274 {
275 nhg_connected_tree_del_nhe(&from->nhg_dependents, dependent);
276 }
277
278 static void zebra_nhg_dependents_add(struct nhg_hash_entry *to,
279 struct nhg_hash_entry *dependent)
280 {
281 nhg_connected_tree_add_nhe(&to->nhg_dependents, dependent);
282 }
283
284 static void zebra_nhg_dependents_init(struct nhg_hash_entry *nhe)
285 {
286 nhg_connected_tree_init(&nhe->nhg_dependents);
287 }
288
289 /* Release this nhe from anything depending on it */
290 static void zebra_nhg_dependents_release(struct nhg_hash_entry *nhe)
291 {
292 struct nhg_connected *rb_node_dep = NULL;
293
294 frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep) {
295 zebra_nhg_depends_del(rb_node_dep->nhe, nhe);
296 /* recheck validity of the dependent */
297 zebra_nhg_check_valid(rb_node_dep->nhe);
298 }
299 }
300
301 /* Release this nhe from anything that it depends on */
302 static void zebra_nhg_depends_release(struct nhg_hash_entry *nhe)
303 {
304 if (!zebra_nhg_depends_is_empty(nhe)) {
305 struct nhg_connected *rb_node_dep = NULL;
306
307 frr_each_safe(nhg_connected_tree, &nhe->nhg_depends,
308 rb_node_dep) {
309 zebra_nhg_dependents_del(rb_node_dep->nhe, nhe);
310 }
311 }
312 }
313
314
315 struct nhg_hash_entry *zebra_nhg_lookup_id(uint32_t id)
316 {
317 struct nhg_hash_entry lookup = {};
318
319 lookup.id = id;
320 return hash_lookup(zrouter.nhgs_id, &lookup);
321 }
322
323 static int zebra_nhg_insert_id(struct nhg_hash_entry *nhe)
324 {
325 if (hash_lookup(zrouter.nhgs_id, nhe)) {
326 flog_err(
327 EC_ZEBRA_NHG_TABLE_INSERT_FAILED,
328 "Failed inserting NHG %pNG into the ID hash table, entry already exists",
329 nhe);
330 return -1;
331 }
332
333 (void)hash_get(zrouter.nhgs_id, nhe, hash_alloc_intern);
334
335 return 0;
336 }
337
338 static void zebra_nhg_set_if(struct nhg_hash_entry *nhe, struct interface *ifp)
339 {
340 nhe->ifp = ifp;
341 if_nhg_dependents_add(ifp, nhe);
342 }
343
344 static void
345 zebra_nhg_connect_depends(struct nhg_hash_entry *nhe,
346 struct nhg_connected_tree_head *nhg_depends)
347 {
348 struct nhg_connected *rb_node_dep = NULL;
349
350 /* This has been allocated higher above in the stack. Could probably
351 * re-allocate and free the old stuff but just using the same memory
352 * for now. Otherwise, their might be a time trade-off for repeated
353 * alloc/frees as startup.
354 */
355 nhe->nhg_depends = *nhg_depends;
356
357 /* Attach backpointer to anything that it depends on */
358 zebra_nhg_dependents_init(nhe);
359 if (!zebra_nhg_depends_is_empty(nhe)) {
360 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
361 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
362 zlog_debug("%s: nhe %p (%pNG), dep %p (%pNG)",
363 __func__, nhe, nhe, rb_node_dep->nhe,
364 rb_node_dep->nhe);
365
366 zebra_nhg_dependents_add(rb_node_dep->nhe, nhe);
367 }
368 }
369 }
370
371 /* Init an nhe, for use in a hash lookup for example */
372 void zebra_nhe_init(struct nhg_hash_entry *nhe, afi_t afi,
373 const struct nexthop *nh)
374 {
375 memset(nhe, 0, sizeof(struct nhg_hash_entry));
376 nhe->vrf_id = VRF_DEFAULT;
377 nhe->type = ZEBRA_ROUTE_NHG;
378 nhe->afi = AFI_UNSPEC;
379
380 /* There are some special rules that apply to groups representing
381 * a single nexthop.
382 */
383 if (nh && (nh->next == NULL)) {
384 switch (nh->type) {
385 case NEXTHOP_TYPE_IFINDEX:
386 case NEXTHOP_TYPE_BLACKHOLE:
387 /*
388 * This switch case handles setting the afi different
389 * for ipv4/v6 routes. Ifindex/blackhole nexthop
390 * objects cannot be ambiguous, they must be Address
391 * Family specific. If we get here, we will either use
392 * the AF of the route, or the one we got passed from
393 * here from the kernel.
394 */
395 nhe->afi = afi;
396 break;
397 case NEXTHOP_TYPE_IPV4_IFINDEX:
398 case NEXTHOP_TYPE_IPV4:
399 nhe->afi = AFI_IP;
400 break;
401 case NEXTHOP_TYPE_IPV6_IFINDEX:
402 case NEXTHOP_TYPE_IPV6:
403 nhe->afi = AFI_IP6;
404 break;
405 }
406 }
407 }
408
409 struct nhg_hash_entry *zebra_nhg_alloc(void)
410 {
411 struct nhg_hash_entry *nhe;
412
413 nhe = XCALLOC(MTYPE_NHG, sizeof(struct nhg_hash_entry));
414
415 return nhe;
416 }
417
418 /*
419 * Allocate new nhe and make shallow copy of 'orig'; no
420 * recursive info is copied.
421 */
422 struct nhg_hash_entry *zebra_nhe_copy(const struct nhg_hash_entry *orig,
423 uint32_t id)
424 {
425 struct nhg_hash_entry *nhe;
426
427 nhe = zebra_nhg_alloc();
428
429 nhe->id = id;
430
431 nexthop_group_copy(&(nhe->nhg), &(orig->nhg));
432
433 nhe->vrf_id = orig->vrf_id;
434 nhe->afi = orig->afi;
435 nhe->type = orig->type ? orig->type : ZEBRA_ROUTE_NHG;
436 nhe->refcnt = 0;
437 nhe->dplane_ref = zebra_router_get_next_sequence();
438
439 /* Copy backup info also, if present */
440 if (orig->backup_info)
441 nhe->backup_info = nhg_backup_copy(orig->backup_info);
442
443 return nhe;
444 }
445
446 /* Allocation via hash handler */
447 static void *zebra_nhg_hash_alloc(void *arg)
448 {
449 struct nhg_hash_entry *nhe = NULL;
450 struct nhg_hash_entry *copy = arg;
451
452 nhe = zebra_nhe_copy(copy, copy->id);
453
454 /* Mark duplicate nexthops in a group at creation time. */
455 nexthop_group_mark_duplicates(&(nhe->nhg));
456
457 /*
458 * Add the ifp now if it's not a group or recursive and has ifindex.
459 *
460 * A proto-owned ID is always a group.
461 */
462 if (!PROTO_OWNED(nhe) && nhe->nhg.nexthop && !nhe->nhg.nexthop->next
463 && !nhe->nhg.nexthop->resolved && nhe->nhg.nexthop->ifindex) {
464 struct interface *ifp = NULL;
465
466 ifp = if_lookup_by_index(nhe->nhg.nexthop->ifindex,
467 nhe->nhg.nexthop->vrf_id);
468 if (ifp)
469 zebra_nhg_set_if(nhe, ifp);
470 else {
471 if (IS_ZEBRA_DEBUG_NHG)
472 zlog_debug(
473 "Failed to lookup an interface with ifindex=%d in vrf=%u for NHE %pNG",
474 nhe->nhg.nexthop->ifindex,
475 nhe->nhg.nexthop->vrf_id, nhe);
476 }
477 }
478
479 return nhe;
480 }
481
482 uint32_t zebra_nhg_hash_key(const void *arg)
483 {
484 const struct nhg_hash_entry *nhe = arg;
485 uint32_t key = 0x5a351234;
486 uint32_t primary = 0;
487 uint32_t backup = 0;
488
489 primary = nexthop_group_hash(&(nhe->nhg));
490 if (nhe->backup_info)
491 backup = nexthop_group_hash(&(nhe->backup_info->nhe->nhg));
492
493 key = jhash_3words(primary, backup, nhe->type, key);
494
495 key = jhash_2words(nhe->vrf_id, nhe->afi, key);
496
497 return key;
498 }
499
500 uint32_t zebra_nhg_id_key(const void *arg)
501 {
502 const struct nhg_hash_entry *nhe = arg;
503
504 return nhe->id;
505 }
506
507 /* Helper with common nhg/nhe nexthop comparison logic */
508 static bool nhg_compare_nexthops(const struct nexthop *nh1,
509 const struct nexthop *nh2)
510 {
511 assert(nh1 != NULL && nh2 != NULL);
512
513 /*
514 * We have to check the active flag of each individual one,
515 * not just the overall active_num. This solves the special case
516 * issue of a route with a nexthop group with one nexthop
517 * resolving to itself and thus marking it inactive. If we
518 * have two different routes each wanting to mark a different
519 * nexthop inactive, they need to hash to two different groups.
520 *
521 * If we just hashed on num_active, they would hash the same
522 * which is incorrect.
523 *
524 * ex)
525 * 1.1.1.0/24
526 * -> 1.1.1.1 dummy1 (inactive)
527 * -> 1.1.2.1 dummy2
528 *
529 * 1.1.2.0/24
530 * -> 1.1.1.1 dummy1
531 * -> 1.1.2.1 dummy2 (inactive)
532 *
533 * Without checking each individual one, they would hash to
534 * the same group and both have 1.1.1.1 dummy1 marked inactive.
535 *
536 */
537 if (CHECK_FLAG(nh1->flags, NEXTHOP_FLAG_ACTIVE)
538 != CHECK_FLAG(nh2->flags, NEXTHOP_FLAG_ACTIVE))
539 return false;
540
541 if (!nexthop_same(nh1, nh2))
542 return false;
543
544 return true;
545 }
546
547 bool zebra_nhg_hash_equal(const void *arg1, const void *arg2)
548 {
549 const struct nhg_hash_entry *nhe1 = arg1;
550 const struct nhg_hash_entry *nhe2 = arg2;
551 struct nexthop *nexthop1;
552 struct nexthop *nexthop2;
553
554 /* No matter what if they equal IDs, assume equal */
555 if (nhe1->id && nhe2->id && (nhe1->id == nhe2->id))
556 return true;
557
558 if (nhe1->type != nhe2->type)
559 return false;
560
561 if (nhe1->vrf_id != nhe2->vrf_id)
562 return false;
563
564 if (nhe1->afi != nhe2->afi)
565 return false;
566
567 if (nhe1->nhg.nhgr.buckets != nhe2->nhg.nhgr.buckets)
568 return false;
569
570 if (nhe1->nhg.nhgr.idle_timer != nhe2->nhg.nhgr.idle_timer)
571 return false;
572
573 if (nhe1->nhg.nhgr.unbalanced_timer != nhe2->nhg.nhgr.unbalanced_timer)
574 return false;
575
576 /* Nexthops should be in-order, so we simply compare them in-place */
577 for (nexthop1 = nhe1->nhg.nexthop, nexthop2 = nhe2->nhg.nexthop;
578 nexthop1 && nexthop2;
579 nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
580
581 if (!nhg_compare_nexthops(nexthop1, nexthop2))
582 return false;
583 }
584
585 /* Check for unequal list lengths */
586 if (nexthop1 || nexthop2)
587 return false;
588
589 /* If there's no backup info, comparison is done. */
590 if ((nhe1->backup_info == NULL) && (nhe2->backup_info == NULL))
591 return true;
592
593 /* Compare backup info also - test the easy things first */
594 if (nhe1->backup_info && (nhe2->backup_info == NULL))
595 return false;
596 if (nhe2->backup_info && (nhe1->backup_info == NULL))
597 return false;
598
599 /* Compare number of backups before actually comparing any */
600 for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
601 nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
602 nexthop1 && nexthop2;
603 nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
604 ;
605 }
606
607 /* Did we find the end of one list before the other? */
608 if (nexthop1 || nexthop2)
609 return false;
610
611 /* Have to compare the backup nexthops */
612 for (nexthop1 = nhe1->backup_info->nhe->nhg.nexthop,
613 nexthop2 = nhe2->backup_info->nhe->nhg.nexthop;
614 nexthop1 && nexthop2;
615 nexthop1 = nexthop1->next, nexthop2 = nexthop2->next) {
616
617 if (!nhg_compare_nexthops(nexthop1, nexthop2))
618 return false;
619 }
620
621 return true;
622 }
623
624 bool zebra_nhg_hash_id_equal(const void *arg1, const void *arg2)
625 {
626 const struct nhg_hash_entry *nhe1 = arg1;
627 const struct nhg_hash_entry *nhe2 = arg2;
628
629 return nhe1->id == nhe2->id;
630 }
631
632 static int zebra_nhg_process_grp(struct nexthop_group *nhg,
633 struct nhg_connected_tree_head *depends,
634 struct nh_grp *grp, uint8_t count,
635 struct nhg_resilience *resilience)
636 {
637 nhg_connected_tree_init(depends);
638
639 for (int i = 0; i < count; i++) {
640 struct nhg_hash_entry *depend = NULL;
641 /* We do not care about nexthop_grp.weight at
642 * this time. But we should figure out
643 * how to adapt this to our code in
644 * the future.
645 */
646 depend = depends_find_id_add(depends, grp[i].id);
647
648 if (!depend) {
649 flog_err(
650 EC_ZEBRA_NHG_SYNC,
651 "Received Nexthop Group from the kernel with a dependent Nexthop ID (%u) which we do not have in our table",
652 grp[i].id);
653 return -1;
654 }
655
656 /*
657 * If this is a nexthop with its own group
658 * dependencies, add them as well. Not sure its
659 * even possible to have a group within a group
660 * in the kernel.
661 */
662
663 copy_nexthops(&nhg->nexthop, depend->nhg.nexthop, NULL);
664 }
665
666 if (resilience)
667 nhg->nhgr = *resilience;
668
669 return 0;
670 }
671
672 static void handle_recursive_depend(struct nhg_connected_tree_head *nhg_depends,
673 struct nexthop *nh, afi_t afi, int type)
674 {
675 struct nhg_hash_entry *depend = NULL;
676 struct nexthop_group resolved_ng = {};
677
678 resolved_ng.nexthop = nh;
679
680 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
681 zlog_debug("%s: head %p, nh %pNHv",
682 __func__, nhg_depends, nh);
683
684 depend = zebra_nhg_rib_find(0, &resolved_ng, afi, type);
685
686 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
687 zlog_debug("%s: nh %pNHv => %p (%u)",
688 __func__, nh, depend,
689 depend ? depend->id : 0);
690
691 if (depend)
692 depends_add(nhg_depends, depend);
693 }
694
695 /*
696 * Lookup an nhe in the global hash, using data from another nhe. If 'lookup'
697 * has an id value, that's used. Create a new global/shared nhe if not found.
698 */
699 static bool zebra_nhe_find(struct nhg_hash_entry **nhe, /* return value */
700 struct nhg_hash_entry *lookup,
701 struct nhg_connected_tree_head *nhg_depends,
702 afi_t afi, bool from_dplane)
703 {
704 bool created = false;
705 bool recursive = false;
706 struct nhg_hash_entry *newnhe, *backup_nhe;
707 struct nexthop *nh = NULL;
708
709 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
710 zlog_debug(
711 "%s: id %u, lookup %p, vrf %d, type %d, depends %p%s",
712 __func__, lookup->id, lookup, lookup->vrf_id,
713 lookup->type, nhg_depends,
714 (from_dplane ? " (from dplane)" : ""));
715
716 if (lookup->id)
717 (*nhe) = zebra_nhg_lookup_id(lookup->id);
718 else
719 (*nhe) = hash_lookup(zrouter.nhgs, lookup);
720
721 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
722 zlog_debug("%s: lookup => %p (%pNG)", __func__, *nhe, *nhe);
723
724 /* If we found an existing object, we're done */
725 if (*nhe)
726 goto done;
727
728 /* We're going to create/insert a new nhe:
729 * assign the next global id value if necessary.
730 */
731 if (lookup->id == 0)
732 lookup->id = nhg_get_next_id();
733
734 if (!from_dplane && lookup->id < ZEBRA_NHG_PROTO_LOWER) {
735 /*
736 * This is a zebra hashed/owned NHG.
737 *
738 * It goes in HASH and ID table.
739 */
740 newnhe = hash_get(zrouter.nhgs, lookup, zebra_nhg_hash_alloc);
741 zebra_nhg_insert_id(newnhe);
742 } else {
743 /*
744 * This is upperproto owned NHG or one we read in from dataplane
745 * and should not be hashed to.
746 *
747 * It goes in ID table.
748 */
749 newnhe =
750 hash_get(zrouter.nhgs_id, lookup, zebra_nhg_hash_alloc);
751 }
752
753 created = true;
754
755 /* Mail back the new object */
756 *nhe = newnhe;
757
758 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
759 zlog_debug("%s: => created %p (%pNG)", __func__, newnhe,
760 newnhe);
761
762 /* Only hash/lookup the depends if the first lookup
763 * fails to find something. This should hopefully save a
764 * lot of cycles for larger ecmp sizes.
765 */
766 if (nhg_depends) {
767 /* If you don't want to hash on each nexthop in the
768 * nexthop group struct you can pass the depends
769 * directly. Kernel-side we do this since it just looks
770 * them up via IDs.
771 */
772 zebra_nhg_connect_depends(newnhe, nhg_depends);
773 goto done;
774 }
775
776 /* Prepare dependency relationships if this is not a
777 * singleton nexthop. There are two cases: a single
778 * recursive nexthop, where we need a relationship to the
779 * resolving nexthop; or a group of nexthops, where we need
780 * relationships with the corresponding singletons.
781 */
782 zebra_nhg_depends_init(newnhe);
783
784 nh = newnhe->nhg.nexthop;
785
786 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE))
787 SET_FLAG(newnhe->flags, NEXTHOP_GROUP_VALID);
788
789 if (nh->next == NULL && newnhe->id < ZEBRA_NHG_PROTO_LOWER) {
790 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) {
791 /* Single recursive nexthop */
792 handle_recursive_depend(&newnhe->nhg_depends,
793 nh->resolved, afi,
794 newnhe->type);
795 recursive = true;
796 }
797 } else {
798 /* Proto-owned are groups by default */
799 /* List of nexthops */
800 for (nh = newnhe->nhg.nexthop; nh; nh = nh->next) {
801 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
802 zlog_debug("%s: depends NH %pNHv %s",
803 __func__, nh,
804 CHECK_FLAG(nh->flags,
805 NEXTHOP_FLAG_RECURSIVE) ?
806 "(R)" : "");
807
808 depends_find_add(&newnhe->nhg_depends, nh, afi,
809 newnhe->type, from_dplane);
810 }
811 }
812
813 if (recursive)
814 SET_FLAG(newnhe->flags, NEXTHOP_GROUP_RECURSIVE);
815
816 /* Attach dependent backpointers to singletons */
817 zebra_nhg_connect_depends(newnhe, &newnhe->nhg_depends);
818
819 /**
820 * Backup Nexthops
821 */
822
823 if (zebra_nhg_get_backup_nhg(newnhe) == NULL ||
824 zebra_nhg_get_backup_nhg(newnhe)->nexthop == NULL)
825 goto done;
826
827 /* If there are backup nexthops, add them to the backup
828 * depends tree. The rules here are a little different.
829 */
830 recursive = false;
831 backup_nhe = newnhe->backup_info->nhe;
832
833 nh = backup_nhe->nhg.nexthop;
834
835 /* Singleton recursive NH */
836 if (nh->next == NULL &&
837 CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE)) {
838 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
839 zlog_debug("%s: backup depend NH %pNHv (R)",
840 __func__, nh);
841
842 /* Single recursive nexthop */
843 handle_recursive_depend(&backup_nhe->nhg_depends, nh->resolved,
844 afi, backup_nhe->type);
845 recursive = true;
846 } else {
847 /* One or more backup NHs */
848 for (; nh; nh = nh->next) {
849 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
850 zlog_debug("%s: backup depend NH %pNHv %s",
851 __func__, nh,
852 CHECK_FLAG(nh->flags,
853 NEXTHOP_FLAG_RECURSIVE) ?
854 "(R)" : "");
855
856 depends_find_add(&backup_nhe->nhg_depends, nh, afi,
857 backup_nhe->type, from_dplane);
858 }
859 }
860
861 if (recursive)
862 SET_FLAG(backup_nhe->flags, NEXTHOP_GROUP_RECURSIVE);
863
864 done:
865 /* Reset time since last update */
866 (*nhe)->uptime = monotime(NULL);
867
868 return created;
869 }
870
871 /*
872 * Lookup or create an nhe, based on an nhg or an nhe id.
873 */
874 static bool zebra_nhg_find(struct nhg_hash_entry **nhe, uint32_t id,
875 struct nexthop_group *nhg,
876 struct nhg_connected_tree_head *nhg_depends,
877 vrf_id_t vrf_id, afi_t afi, int type,
878 bool from_dplane)
879 {
880 struct nhg_hash_entry lookup = {};
881 bool created = false;
882
883 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
884 zlog_debug("%s: id %u, nhg %p, vrf %d, type %d, depends %p",
885 __func__, id, nhg, vrf_id, type,
886 nhg_depends);
887
888 /* Use a temporary nhe and call into the superset/common code */
889 lookup.id = id;
890 lookup.type = type ? type : ZEBRA_ROUTE_NHG;
891 lookup.nhg = *nhg;
892
893 lookup.vrf_id = vrf_id;
894 if (nhg_depends || lookup.nhg.nexthop->next) {
895 /* Groups can have all vrfs and AF's in them */
896 lookup.afi = AFI_UNSPEC;
897 } else {
898 switch (lookup.nhg.nexthop->type) {
899 case (NEXTHOP_TYPE_IFINDEX):
900 case (NEXTHOP_TYPE_BLACKHOLE):
901 /*
902 * This switch case handles setting the afi different
903 * for ipv4/v6 routes. Ifindex/blackhole nexthop
904 * objects cannot be ambiguous, they must be Address
905 * Family specific. If we get here, we will either use
906 * the AF of the route, or the one we got passed from
907 * here from the kernel.
908 */
909 lookup.afi = afi;
910 break;
911 case (NEXTHOP_TYPE_IPV4_IFINDEX):
912 case (NEXTHOP_TYPE_IPV4):
913 lookup.afi = AFI_IP;
914 break;
915 case (NEXTHOP_TYPE_IPV6_IFINDEX):
916 case (NEXTHOP_TYPE_IPV6):
917 lookup.afi = AFI_IP6;
918 break;
919 }
920 }
921
922 created = zebra_nhe_find(nhe, &lookup, nhg_depends, afi, from_dplane);
923
924 return created;
925 }
926
927 /* Find/create a single nexthop */
928 static struct nhg_hash_entry *zebra_nhg_find_nexthop(uint32_t id,
929 struct nexthop *nh,
930 afi_t afi, int type,
931 bool from_dplane)
932 {
933 struct nhg_hash_entry *nhe = NULL;
934 struct nexthop_group nhg = {};
935 vrf_id_t vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nh->vrf_id;
936
937 nexthop_group_add_sorted(&nhg, nh);
938
939 zebra_nhg_find(&nhe, id, &nhg, NULL, vrf_id, afi, type, from_dplane);
940
941 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
942 zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe);
943
944 return nhe;
945 }
946
947 static uint32_t nhg_ctx_get_id(const struct nhg_ctx *ctx)
948 {
949 return ctx->id;
950 }
951
952 static void nhg_ctx_set_status(struct nhg_ctx *ctx, enum nhg_ctx_status status)
953 {
954 ctx->status = status;
955 }
956
957 static enum nhg_ctx_status nhg_ctx_get_status(const struct nhg_ctx *ctx)
958 {
959 return ctx->status;
960 }
961
962 static void nhg_ctx_set_op(struct nhg_ctx *ctx, enum nhg_ctx_op_e op)
963 {
964 ctx->op = op;
965 }
966
967 static enum nhg_ctx_op_e nhg_ctx_get_op(const struct nhg_ctx *ctx)
968 {
969 return ctx->op;
970 }
971
972 static vrf_id_t nhg_ctx_get_vrf_id(const struct nhg_ctx *ctx)
973 {
974 return ctx->vrf_id;
975 }
976
977 static int nhg_ctx_get_type(const struct nhg_ctx *ctx)
978 {
979 return ctx->type;
980 }
981
982 static int nhg_ctx_get_afi(const struct nhg_ctx *ctx)
983 {
984 return ctx->afi;
985 }
986
987 static struct nexthop *nhg_ctx_get_nh(struct nhg_ctx *ctx)
988 {
989 return &ctx->u.nh;
990 }
991
992 static uint8_t nhg_ctx_get_count(const struct nhg_ctx *ctx)
993 {
994 return ctx->count;
995 }
996
997 static struct nh_grp *nhg_ctx_get_grp(struct nhg_ctx *ctx)
998 {
999 return ctx->u.grp;
1000 }
1001
1002 static struct nhg_resilience *nhg_ctx_get_resilience(struct nhg_ctx *ctx)
1003 {
1004 return &ctx->resilience;
1005 }
1006
1007 static struct nhg_ctx *nhg_ctx_new(void)
1008 {
1009 struct nhg_ctx *new;
1010
1011 new = XCALLOC(MTYPE_NHG_CTX, sizeof(struct nhg_ctx));
1012
1013 return new;
1014 }
1015
1016 void nhg_ctx_free(struct nhg_ctx **ctx)
1017 {
1018 struct nexthop *nh;
1019
1020 if (ctx == NULL)
1021 return;
1022
1023 assert((*ctx) != NULL);
1024
1025 if (nhg_ctx_get_count(*ctx))
1026 goto done;
1027
1028 nh = nhg_ctx_get_nh(*ctx);
1029
1030 nexthop_del_labels(nh);
1031 nexthop_del_srv6_seg6local(nh);
1032 nexthop_del_srv6_seg6(nh);
1033
1034 done:
1035 XFREE(MTYPE_NHG_CTX, *ctx);
1036 }
1037
1038 static struct nhg_ctx *nhg_ctx_init(uint32_t id, struct nexthop *nh,
1039 struct nh_grp *grp, vrf_id_t vrf_id,
1040 afi_t afi, int type, uint8_t count,
1041 struct nhg_resilience *resilience)
1042 {
1043 struct nhg_ctx *ctx = NULL;
1044
1045 ctx = nhg_ctx_new();
1046
1047 ctx->id = id;
1048 ctx->vrf_id = vrf_id;
1049 ctx->afi = afi;
1050 ctx->type = type;
1051 ctx->count = count;
1052
1053 if (resilience)
1054 ctx->resilience = *resilience;
1055
1056 if (count)
1057 /* Copy over the array */
1058 memcpy(&ctx->u.grp, grp, count * sizeof(struct nh_grp));
1059 else if (nh)
1060 ctx->u.nh = *nh;
1061
1062 return ctx;
1063 }
1064
1065 static void zebra_nhg_set_valid(struct nhg_hash_entry *nhe)
1066 {
1067 struct nhg_connected *rb_node_dep;
1068
1069 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
1070
1071 frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
1072 zebra_nhg_set_valid(rb_node_dep->nhe);
1073 }
1074
1075 static void zebra_nhg_set_invalid(struct nhg_hash_entry *nhe)
1076 {
1077 struct nhg_connected *rb_node_dep;
1078
1079 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
1080
1081 /* If we're in shutdown, this interface event needs to clean
1082 * up installed NHGs, so don't clear that flag directly.
1083 */
1084 if (!zebra_router_in_shutdown())
1085 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
1086
1087 /* Update validity of nexthops depending on it */
1088 frr_each(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
1089 zebra_nhg_check_valid(rb_node_dep->nhe);
1090 }
1091
1092 void zebra_nhg_check_valid(struct nhg_hash_entry *nhe)
1093 {
1094 struct nhg_connected *rb_node_dep = NULL;
1095 bool valid = false;
1096
1097 /* If anthing else in the group is valid, the group is valid */
1098 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
1099 if (CHECK_FLAG(rb_node_dep->nhe->flags, NEXTHOP_GROUP_VALID)) {
1100 valid = true;
1101 goto done;
1102 }
1103 }
1104
1105 done:
1106 if (valid)
1107 zebra_nhg_set_valid(nhe);
1108 else
1109 zebra_nhg_set_invalid(nhe);
1110 }
1111
1112 static void zebra_nhg_release_all_deps(struct nhg_hash_entry *nhe)
1113 {
1114 /* Remove it from any lists it may be on */
1115 zebra_nhg_depends_release(nhe);
1116 zebra_nhg_dependents_release(nhe);
1117 if (nhe->ifp)
1118 if_nhg_dependents_del(nhe->ifp, nhe);
1119 }
1120
1121 static void zebra_nhg_release(struct nhg_hash_entry *nhe)
1122 {
1123 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1124 zlog_debug("%s: nhe %p (%pNG)", __func__, nhe, nhe);
1125
1126 zebra_nhg_release_all_deps(nhe);
1127
1128 /*
1129 * If its not zebra owned, we didn't store it here and have to be
1130 * sure we don't clear one thats actually being used.
1131 */
1132 if (nhe->id < ZEBRA_NHG_PROTO_LOWER)
1133 hash_release(zrouter.nhgs, nhe);
1134
1135 hash_release(zrouter.nhgs_id, nhe);
1136 }
1137
1138 static void zebra_nhg_handle_uninstall(struct nhg_hash_entry *nhe)
1139 {
1140 zebra_nhg_release(nhe);
1141 zebra_nhg_free(nhe);
1142 }
1143
1144 static void zebra_nhg_handle_install(struct nhg_hash_entry *nhe)
1145 {
1146 /* Update validity of groups depending on it */
1147 struct nhg_connected *rb_node_dep;
1148
1149 frr_each_safe(nhg_connected_tree, &nhe->nhg_dependents, rb_node_dep)
1150 zebra_nhg_set_valid(rb_node_dep->nhe);
1151 }
1152
1153 /*
1154 * The kernel/other program has changed the state of a nexthop object we are
1155 * using.
1156 */
1157 static void zebra_nhg_handle_kernel_state_change(struct nhg_hash_entry *nhe,
1158 bool is_delete)
1159 {
1160 if (nhe->refcnt) {
1161 flog_err(
1162 EC_ZEBRA_NHG_SYNC,
1163 "Kernel %s a nexthop group with ID (%pNG) that we are still using for a route, sending it back down",
1164 (is_delete ? "deleted" : "updated"), nhe);
1165
1166 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
1167 zebra_nhg_install_kernel(nhe);
1168 } else
1169 zebra_nhg_handle_uninstall(nhe);
1170 }
1171
1172 static int nhg_ctx_process_new(struct nhg_ctx *ctx)
1173 {
1174 struct nexthop_group *nhg = NULL;
1175 struct nhg_connected_tree_head nhg_depends = {};
1176 struct nhg_hash_entry *lookup = NULL;
1177 struct nhg_hash_entry *nhe = NULL;
1178
1179 uint32_t id = nhg_ctx_get_id(ctx);
1180 uint8_t count = nhg_ctx_get_count(ctx);
1181 vrf_id_t vrf_id = nhg_ctx_get_vrf_id(ctx);
1182 int type = nhg_ctx_get_type(ctx);
1183 afi_t afi = nhg_ctx_get_afi(ctx);
1184
1185 lookup = zebra_nhg_lookup_id(id);
1186
1187 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1188 zlog_debug("%s: id %u, count %d, lookup => %p",
1189 __func__, id, count, lookup);
1190
1191 if (lookup) {
1192 /* This is already present in our table, hence an update
1193 * that we did not initate.
1194 */
1195 zebra_nhg_handle_kernel_state_change(lookup, false);
1196 return 0;
1197 }
1198
1199 if (nhg_ctx_get_count(ctx)) {
1200 nhg = nexthop_group_new();
1201 if (zebra_nhg_process_grp(nhg, &nhg_depends,
1202 nhg_ctx_get_grp(ctx), count,
1203 nhg_ctx_get_resilience(ctx))) {
1204 depends_decrement_free(&nhg_depends);
1205 nexthop_group_delete(&nhg);
1206 return -ENOENT;
1207 }
1208
1209 if (!zebra_nhg_find(&nhe, id, nhg, &nhg_depends, vrf_id, afi,
1210 type, true))
1211 depends_decrement_free(&nhg_depends);
1212
1213 /* These got copied over in zebra_nhg_alloc() */
1214 nexthop_group_delete(&nhg);
1215 } else
1216 nhe = zebra_nhg_find_nexthop(id, nhg_ctx_get_nh(ctx), afi, type,
1217 true);
1218
1219 if (!nhe) {
1220 flog_err(
1221 EC_ZEBRA_TABLE_LOOKUP_FAILED,
1222 "Zebra failed to find or create a nexthop hash entry for ID (%u)",
1223 id);
1224 return -1;
1225 }
1226
1227 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1228 zlog_debug("%s: nhe %p (%pNG) is new", __func__, nhe, nhe);
1229
1230 /*
1231 * If daemon nhg from the kernel, add a refcnt here to indicate the
1232 * daemon owns it.
1233 */
1234 if (PROTO_OWNED(nhe))
1235 zebra_nhg_increment_ref(nhe);
1236
1237 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
1238 SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
1239
1240 return 0;
1241 }
1242
1243 static int nhg_ctx_process_del(struct nhg_ctx *ctx)
1244 {
1245 struct nhg_hash_entry *nhe = NULL;
1246 uint32_t id = nhg_ctx_get_id(ctx);
1247
1248 nhe = zebra_nhg_lookup_id(id);
1249
1250 if (!nhe) {
1251 flog_warn(
1252 EC_ZEBRA_BAD_NHG_MESSAGE,
1253 "Kernel delete message received for nexthop group ID (%u) that we do not have in our ID table",
1254 id);
1255 return -1;
1256 }
1257
1258 zebra_nhg_handle_kernel_state_change(nhe, true);
1259
1260 return 0;
1261 }
1262
1263 static void nhg_ctx_fini(struct nhg_ctx **ctx)
1264 {
1265 /*
1266 * Just freeing for now, maybe do something more in the future
1267 * based on flag.
1268 */
1269
1270 nhg_ctx_free(ctx);
1271 }
1272
1273 static int queue_add(struct nhg_ctx *ctx)
1274 {
1275 /* If its queued or already processed do nothing */
1276 if (nhg_ctx_get_status(ctx) == NHG_CTX_QUEUED)
1277 return 0;
1278
1279 if (rib_queue_nhg_ctx_add(ctx)) {
1280 nhg_ctx_set_status(ctx, NHG_CTX_FAILURE);
1281 return -1;
1282 }
1283
1284 nhg_ctx_set_status(ctx, NHG_CTX_QUEUED);
1285
1286 return 0;
1287 }
1288
1289 int nhg_ctx_process(struct nhg_ctx *ctx)
1290 {
1291 int ret = 0;
1292
1293 switch (nhg_ctx_get_op(ctx)) {
1294 case NHG_CTX_OP_NEW:
1295 ret = nhg_ctx_process_new(ctx);
1296 if (nhg_ctx_get_count(ctx) && ret == -ENOENT
1297 && nhg_ctx_get_status(ctx) != NHG_CTX_REQUEUED) {
1298 /**
1299 * We have entered a situation where we are
1300 * processing a group from the kernel
1301 * that has a contained nexthop which
1302 * we have not yet processed.
1303 *
1304 * Re-enqueue this ctx to be handled exactly one
1305 * more time (indicated by the flag).
1306 *
1307 * By the time we get back to it, we
1308 * should have processed its depends.
1309 */
1310 nhg_ctx_set_status(ctx, NHG_CTX_NONE);
1311 if (queue_add(ctx) == 0) {
1312 nhg_ctx_set_status(ctx, NHG_CTX_REQUEUED);
1313 return 0;
1314 }
1315 }
1316 break;
1317 case NHG_CTX_OP_DEL:
1318 ret = nhg_ctx_process_del(ctx);
1319 case NHG_CTX_OP_NONE:
1320 break;
1321 }
1322
1323 nhg_ctx_set_status(ctx, (ret ? NHG_CTX_FAILURE : NHG_CTX_SUCCESS));
1324
1325 nhg_ctx_fini(&ctx);
1326
1327 return ret;
1328 }
1329
1330 /* Kernel-side, you either get a single new nexthop or a array of ID's */
1331 int zebra_nhg_kernel_find(uint32_t id, struct nexthop *nh, struct nh_grp *grp,
1332 uint8_t count, vrf_id_t vrf_id, afi_t afi, int type,
1333 int startup, struct nhg_resilience *nhgr)
1334 {
1335 struct nhg_ctx *ctx = NULL;
1336
1337 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1338 zlog_debug("%s: nh %pNHv, id %u, count %d",
1339 __func__, nh, id, (int)count);
1340
1341 if (id > id_counter && id < ZEBRA_NHG_PROTO_LOWER)
1342 /* Increase our counter so we don't try to create
1343 * an ID that already exists
1344 */
1345 id_counter = id;
1346
1347 ctx = nhg_ctx_init(id, nh, grp, vrf_id, afi, type, count, nhgr);
1348 nhg_ctx_set_op(ctx, NHG_CTX_OP_NEW);
1349
1350 /* Under statup conditions, we need to handle them immediately
1351 * like we do for routes. Otherwise, we are going to get a route
1352 * with a nhe_id that we have not handled.
1353 */
1354 if (startup)
1355 return nhg_ctx_process(ctx);
1356
1357 if (queue_add(ctx)) {
1358 nhg_ctx_fini(&ctx);
1359 return -1;
1360 }
1361
1362 return 0;
1363 }
1364
1365 /* Kernel-side, received delete message */
1366 int zebra_nhg_kernel_del(uint32_t id, vrf_id_t vrf_id)
1367 {
1368 struct nhg_ctx *ctx = NULL;
1369
1370 ctx = nhg_ctx_init(id, NULL, NULL, vrf_id, 0, 0, 0, NULL);
1371
1372 nhg_ctx_set_op(ctx, NHG_CTX_OP_DEL);
1373
1374 if (queue_add(ctx)) {
1375 nhg_ctx_fini(&ctx);
1376 return -1;
1377 }
1378
1379 return 0;
1380 }
1381
1382 /* Some dependency helper functions */
1383 static struct nhg_hash_entry *depends_find_recursive(const struct nexthop *nh,
1384 afi_t afi, int type)
1385 {
1386 struct nhg_hash_entry *nhe;
1387 struct nexthop *lookup = NULL;
1388
1389 lookup = nexthop_dup(nh, NULL);
1390
1391 nhe = zebra_nhg_find_nexthop(0, lookup, afi, type, false);
1392
1393 nexthops_free(lookup);
1394
1395 return nhe;
1396 }
1397
1398 static struct nhg_hash_entry *depends_find_singleton(const struct nexthop *nh,
1399 afi_t afi, int type,
1400 bool from_dplane)
1401 {
1402 struct nhg_hash_entry *nhe;
1403 struct nexthop lookup = {};
1404
1405 /* Capture a snapshot of this single nh; it might be part of a list,
1406 * so we need to make a standalone copy.
1407 */
1408 nexthop_copy_no_recurse(&lookup, nh, NULL);
1409
1410 nhe = zebra_nhg_find_nexthop(0, &lookup, afi, type, from_dplane);
1411
1412 /* The copy may have allocated labels; free them if necessary. */
1413 nexthop_del_labels(&lookup);
1414 nexthop_del_srv6_seg6local(&lookup);
1415 nexthop_del_srv6_seg6(&lookup);
1416
1417 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1418 zlog_debug("%s: nh %pNHv => %p (%pNG)", __func__, nh, nhe, nhe);
1419
1420 return nhe;
1421 }
1422
1423 static struct nhg_hash_entry *depends_find(const struct nexthop *nh, afi_t afi,
1424 int type, bool from_dplane)
1425 {
1426 struct nhg_hash_entry *nhe = NULL;
1427
1428 if (!nh)
1429 goto done;
1430
1431 /* We are separating these functions out to increase handling speed
1432 * in the non-recursive case (by not alloc/freeing)
1433 */
1434 if (CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE))
1435 nhe = depends_find_recursive(nh, afi, type);
1436 else
1437 nhe = depends_find_singleton(nh, afi, type, from_dplane);
1438
1439
1440 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1441 zlog_debug("%s: nh %pNHv %s => %p (%pNG)", __func__, nh,
1442 CHECK_FLAG(nh->flags, NEXTHOP_FLAG_RECURSIVE) ? "(R)"
1443 : "",
1444 nhe, nhe);
1445 }
1446
1447 done:
1448 return nhe;
1449 }
1450
1451 static void depends_add(struct nhg_connected_tree_head *head,
1452 struct nhg_hash_entry *depend)
1453 {
1454 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1455 zlog_debug("%s: head %p nh %pNHv",
1456 __func__, head, depend->nhg.nexthop);
1457
1458 /* If NULL is returned, it was successfully added and
1459 * needs to have its refcnt incremented.
1460 *
1461 * Else the NHE is already present in the tree and doesn't
1462 * need to increment the refcnt.
1463 */
1464 if (nhg_connected_tree_add_nhe(head, depend) == NULL)
1465 zebra_nhg_increment_ref(depend);
1466 }
1467
1468 static struct nhg_hash_entry *
1469 depends_find_add(struct nhg_connected_tree_head *head, struct nexthop *nh,
1470 afi_t afi, int type, bool from_dplane)
1471 {
1472 struct nhg_hash_entry *depend = NULL;
1473
1474 depend = depends_find(nh, afi, type, from_dplane);
1475
1476 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1477 zlog_debug("%s: nh %pNHv => %p",
1478 __func__, nh, depend);
1479
1480 if (depend)
1481 depends_add(head, depend);
1482
1483 return depend;
1484 }
1485
1486 static struct nhg_hash_entry *
1487 depends_find_id_add(struct nhg_connected_tree_head *head, uint32_t id)
1488 {
1489 struct nhg_hash_entry *depend = NULL;
1490
1491 depend = zebra_nhg_lookup_id(id);
1492
1493 if (depend)
1494 depends_add(head, depend);
1495
1496 return depend;
1497 }
1498
1499 static void depends_decrement_free(struct nhg_connected_tree_head *head)
1500 {
1501 nhg_connected_tree_decrement_ref(head);
1502 nhg_connected_tree_free(head);
1503 }
1504
1505 /* Find an nhe based on a list of nexthops */
1506 struct nhg_hash_entry *zebra_nhg_rib_find(uint32_t id,
1507 struct nexthop_group *nhg,
1508 afi_t rt_afi, int type)
1509 {
1510 struct nhg_hash_entry *nhe = NULL;
1511 vrf_id_t vrf_id;
1512
1513 /*
1514 * CLANG SA is complaining that nexthop may be NULL
1515 * Make it happy but this is ridonc
1516 */
1517 assert(nhg->nexthop);
1518 vrf_id = !vrf_is_backend_netns() ? VRF_DEFAULT : nhg->nexthop->vrf_id;
1519
1520 zebra_nhg_find(&nhe, id, nhg, NULL, vrf_id, rt_afi, type, false);
1521
1522 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1523 zlog_debug("%s: => nhe %p (%pNG)", __func__, nhe, nhe);
1524
1525 return nhe;
1526 }
1527
1528 /* Find an nhe based on a route's nhe */
1529 struct nhg_hash_entry *
1530 zebra_nhg_rib_find_nhe(struct nhg_hash_entry *rt_nhe, afi_t rt_afi)
1531 {
1532 struct nhg_hash_entry *nhe = NULL;
1533
1534 if (!(rt_nhe && rt_nhe->nhg.nexthop)) {
1535 flog_err(EC_ZEBRA_TABLE_LOOKUP_FAILED,
1536 "No nexthop passed to %s", __func__);
1537 return NULL;
1538 }
1539
1540 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1541 zlog_debug("%s: rt_nhe %p (%pNG)", __func__, rt_nhe, rt_nhe);
1542
1543 zebra_nhe_find(&nhe, rt_nhe, NULL, rt_afi, false);
1544
1545 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1546 zlog_debug("%s: => nhe %p (%pNG)", __func__, nhe, nhe);
1547
1548 return nhe;
1549 }
1550
1551 /*
1552 * Allocate backup nexthop info object. Typically these are embedded in
1553 * nhg_hash_entry objects.
1554 */
1555 struct nhg_backup_info *zebra_nhg_backup_alloc(void)
1556 {
1557 struct nhg_backup_info *p;
1558
1559 p = XCALLOC(MTYPE_NHG, sizeof(struct nhg_backup_info));
1560
1561 p->nhe = zebra_nhg_alloc();
1562
1563 /* Identify the embedded group used to hold the list of backups */
1564 SET_FLAG(p->nhe->flags, NEXTHOP_GROUP_BACKUP);
1565
1566 return p;
1567 }
1568
1569 /*
1570 * Free backup nexthop info object, deal with any embedded allocations
1571 */
1572 void zebra_nhg_backup_free(struct nhg_backup_info **p)
1573 {
1574 if (p && *p) {
1575 if ((*p)->nhe)
1576 zebra_nhg_free((*p)->nhe);
1577
1578 XFREE(MTYPE_NHG, (*p));
1579 }
1580 }
1581
1582 /* Accessor for backup nexthop group */
1583 struct nexthop_group *zebra_nhg_get_backup_nhg(struct nhg_hash_entry *nhe)
1584 {
1585 struct nexthop_group *p = NULL;
1586
1587 if (nhe) {
1588 if (nhe->backup_info && nhe->backup_info->nhe)
1589 p = &(nhe->backup_info->nhe->nhg);
1590 }
1591
1592 return p;
1593 }
1594
1595 /*
1596 * Helper to return a copy of a backup_info - note that this is a shallow
1597 * copy, meant to be used when creating a new nhe from info passed in with
1598 * a route e.g.
1599 */
1600 static struct nhg_backup_info *
1601 nhg_backup_copy(const struct nhg_backup_info *orig)
1602 {
1603 struct nhg_backup_info *b;
1604
1605 b = zebra_nhg_backup_alloc();
1606
1607 /* Copy list of nexthops */
1608 nexthop_group_copy(&(b->nhe->nhg), &(orig->nhe->nhg));
1609
1610 return b;
1611 }
1612
1613 static void zebra_nhg_free_members(struct nhg_hash_entry *nhe)
1614 {
1615 nexthops_free(nhe->nhg.nexthop);
1616
1617 zebra_nhg_backup_free(&nhe->backup_info);
1618
1619 /* Decrement to remove connection ref */
1620 nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
1621 nhg_connected_tree_free(&nhe->nhg_depends);
1622 nhg_connected_tree_free(&nhe->nhg_dependents);
1623 }
1624
1625 void zebra_nhg_free(struct nhg_hash_entry *nhe)
1626 {
1627 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1628 /* Group or singleton? */
1629 if (nhe->nhg.nexthop && nhe->nhg.nexthop->next)
1630 zlog_debug("%s: nhe %p (%pNG), refcnt %d", __func__,
1631 nhe, nhe, nhe->refcnt);
1632 else
1633 zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv",
1634 __func__, nhe, nhe, nhe->refcnt,
1635 nhe->nhg.nexthop);
1636 }
1637
1638 THREAD_OFF(nhe->timer);
1639
1640 zebra_nhg_free_members(nhe);
1641
1642 XFREE(MTYPE_NHG, nhe);
1643 }
1644
1645 /*
1646 * Let's just drop the memory associated with each item
1647 */
1648 void zebra_nhg_hash_free(void *p)
1649 {
1650 struct nhg_hash_entry *nhe = p;
1651
1652 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1653 /* Group or singleton? */
1654 if (nhe->nhg.nexthop && nhe->nhg.nexthop->next)
1655 zlog_debug("%s: nhe %p (%u), refcnt %d", __func__, nhe,
1656 nhe->id, nhe->refcnt);
1657 else
1658 zlog_debug("%s: nhe %p (%pNG), refcnt %d, NH %pNHv",
1659 __func__, nhe, nhe, nhe->refcnt,
1660 nhe->nhg.nexthop);
1661 }
1662
1663 THREAD_OFF(nhe->timer);
1664
1665 nexthops_free(nhe->nhg.nexthop);
1666
1667 XFREE(MTYPE_NHG, nhe);
1668 }
1669
1670 /*
1671 * On cleanup there are nexthop groups that have not
1672 * been resolved at all( a nhe->id of 0 ). As such
1673 * zebra needs to clean up the memory associated with
1674 * those entries.
1675 */
1676 void zebra_nhg_hash_free_zero_id(struct hash_bucket *b, void *arg)
1677 {
1678 struct nhg_hash_entry *nhe = b->data;
1679 struct nhg_connected *dep;
1680
1681 while ((dep = nhg_connected_tree_pop(&nhe->nhg_depends))) {
1682 if (dep->nhe->id == 0)
1683 zebra_nhg_hash_free(dep->nhe);
1684
1685 nhg_connected_free(dep);
1686 }
1687
1688 while ((dep = nhg_connected_tree_pop(&nhe->nhg_dependents)))
1689 nhg_connected_free(dep);
1690
1691 if (nhe->backup_info && nhe->backup_info->nhe->id == 0) {
1692 while ((dep = nhg_connected_tree_pop(
1693 &nhe->backup_info->nhe->nhg_depends)))
1694 nhg_connected_free(dep);
1695
1696 zebra_nhg_hash_free(nhe->backup_info->nhe);
1697
1698 XFREE(MTYPE_NHG, nhe->backup_info);
1699 }
1700 }
1701
1702 static void zebra_nhg_timer(struct thread *thread)
1703 {
1704 struct nhg_hash_entry *nhe = THREAD_ARG(thread);
1705
1706 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1707 zlog_debug("Nexthop Timer for nhe: %pNG", nhe);
1708
1709 if (nhe->refcnt == 1)
1710 zebra_nhg_decrement_ref(nhe);
1711 }
1712
1713 void zebra_nhg_decrement_ref(struct nhg_hash_entry *nhe)
1714 {
1715 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1716 zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe,
1717 nhe->refcnt, nhe->refcnt - 1);
1718
1719 nhe->refcnt--;
1720
1721 if (!zebra_router_in_shutdown() && nhe->refcnt <= 0 &&
1722 CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED) &&
1723 !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND)) {
1724 nhe->refcnt = 1;
1725 SET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND);
1726 thread_add_timer(zrouter.master, zebra_nhg_timer, nhe,
1727 zrouter.nhg_keep, &nhe->timer);
1728 return;
1729 }
1730
1731 if (!zebra_nhg_depends_is_empty(nhe))
1732 nhg_connected_tree_decrement_ref(&nhe->nhg_depends);
1733
1734 if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0)
1735 zebra_nhg_uninstall_kernel(nhe);
1736 }
1737
1738 void zebra_nhg_increment_ref(struct nhg_hash_entry *nhe)
1739 {
1740 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1741 zlog_debug("%s: nhe %p (%pNG) %d => %d", __func__, nhe, nhe,
1742 nhe->refcnt, nhe->refcnt + 1);
1743
1744 nhe->refcnt++;
1745
1746 if (thread_is_scheduled(nhe->timer)) {
1747 THREAD_OFF(nhe->timer);
1748 nhe->refcnt--;
1749 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_KEEP_AROUND);
1750 }
1751
1752 if (!zebra_nhg_depends_is_empty(nhe))
1753 nhg_connected_tree_increment_ref(&nhe->nhg_depends);
1754 }
1755
1756 static struct nexthop *nexthop_set_resolved(afi_t afi,
1757 const struct nexthop *newhop,
1758 struct nexthop *nexthop,
1759 struct zebra_sr_policy *policy)
1760 {
1761 struct nexthop *resolved_hop;
1762 uint8_t num_labels = 0;
1763 mpls_label_t labels[MPLS_MAX_LABELS];
1764 enum lsp_types_t label_type = ZEBRA_LSP_NONE;
1765 int i = 0;
1766
1767 resolved_hop = nexthop_new();
1768 SET_FLAG(resolved_hop->flags, NEXTHOP_FLAG_ACTIVE);
1769
1770 resolved_hop->vrf_id = nexthop->vrf_id;
1771 switch (newhop->type) {
1772 case NEXTHOP_TYPE_IPV4:
1773 case NEXTHOP_TYPE_IPV4_IFINDEX:
1774 /* If the resolving route specifies a gateway, use it */
1775 resolved_hop->type = newhop->type;
1776 resolved_hop->gate.ipv4 = newhop->gate.ipv4;
1777
1778 if (newhop->ifindex) {
1779 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
1780 resolved_hop->ifindex = newhop->ifindex;
1781 }
1782 break;
1783 case NEXTHOP_TYPE_IPV6:
1784 case NEXTHOP_TYPE_IPV6_IFINDEX:
1785 resolved_hop->type = newhop->type;
1786 resolved_hop->gate.ipv6 = newhop->gate.ipv6;
1787
1788 if (newhop->ifindex) {
1789 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
1790 resolved_hop->ifindex = newhop->ifindex;
1791 }
1792 break;
1793 case NEXTHOP_TYPE_IFINDEX:
1794 /* If the resolving route is an interface route,
1795 * it means the gateway we are looking up is connected
1796 * to that interface. (The actual network is _not_ onlink).
1797 * Therefore, the resolved route should have the original
1798 * gateway as nexthop as it is directly connected.
1799 *
1800 * On Linux, we have to set the onlink netlink flag because
1801 * otherwise, the kernel won't accept the route.
1802 */
1803 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
1804 if (afi == AFI_IP) {
1805 resolved_hop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
1806 resolved_hop->gate.ipv4 = nexthop->gate.ipv4;
1807 } else if (afi == AFI_IP6) {
1808 resolved_hop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
1809 resolved_hop->gate.ipv6 = nexthop->gate.ipv6;
1810 }
1811 resolved_hop->ifindex = newhop->ifindex;
1812 break;
1813 case NEXTHOP_TYPE_BLACKHOLE:
1814 resolved_hop->type = NEXTHOP_TYPE_BLACKHOLE;
1815 resolved_hop->bh_type = newhop->bh_type;
1816 break;
1817 }
1818
1819 if (newhop->flags & NEXTHOP_FLAG_ONLINK)
1820 resolved_hop->flags |= NEXTHOP_FLAG_ONLINK;
1821
1822 /* Copy labels of the resolved route and the parent resolving to it */
1823 if (policy) {
1824 int label_num = 0;
1825
1826 /*
1827 * Don't push the first SID if the corresponding action in the
1828 * LFIB is POP.
1829 */
1830 if (!newhop->nh_label || !newhop->nh_label->num_labels
1831 || newhop->nh_label->label[0] == MPLS_LABEL_IMPLICIT_NULL)
1832 label_num = 1;
1833
1834 for (; label_num < policy->segment_list.label_num; label_num++)
1835 labels[num_labels++] =
1836 policy->segment_list.labels[label_num];
1837 label_type = policy->segment_list.type;
1838 } else if (newhop->nh_label) {
1839 for (i = 0; i < newhop->nh_label->num_labels; i++) {
1840 /* Be a bit picky about overrunning the local array */
1841 if (num_labels >= MPLS_MAX_LABELS) {
1842 if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB)
1843 zlog_debug("%s: too many labels in newhop %pNHv",
1844 __func__, newhop);
1845 break;
1846 }
1847 labels[num_labels++] = newhop->nh_label->label[i];
1848 }
1849 /* Use the "outer" type */
1850 label_type = newhop->nh_label_type;
1851 }
1852
1853 if (nexthop->nh_label) {
1854 for (i = 0; i < nexthop->nh_label->num_labels; i++) {
1855 /* Be a bit picky about overrunning the local array */
1856 if (num_labels >= MPLS_MAX_LABELS) {
1857 if (IS_ZEBRA_DEBUG_NHG || IS_ZEBRA_DEBUG_RIB)
1858 zlog_debug("%s: too many labels in nexthop %pNHv",
1859 __func__, nexthop);
1860 break;
1861 }
1862 labels[num_labels++] = nexthop->nh_label->label[i];
1863 }
1864
1865 /* If the parent has labels, use its type if
1866 * we don't already have one.
1867 */
1868 if (label_type == ZEBRA_LSP_NONE)
1869 label_type = nexthop->nh_label_type;
1870 }
1871
1872 if (num_labels)
1873 nexthop_add_labels(resolved_hop, label_type, num_labels,
1874 labels);
1875
1876 if (nexthop->nh_srv6) {
1877 nexthop_add_srv6_seg6local(resolved_hop,
1878 nexthop->nh_srv6->seg6local_action,
1879 &nexthop->nh_srv6->seg6local_ctx);
1880 nexthop_add_srv6_seg6(resolved_hop,
1881 &nexthop->nh_srv6->seg6_segs);
1882 }
1883
1884 resolved_hop->rparent = nexthop;
1885 _nexthop_add(&nexthop->resolved, resolved_hop);
1886
1887 return resolved_hop;
1888 }
1889
1890 /* Checks if nexthop we are trying to resolve to is valid */
1891 static bool nexthop_valid_resolve(const struct nexthop *nexthop,
1892 const struct nexthop *resolved)
1893 {
1894 /* Can't resolve to a recursive nexthop */
1895 if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_RECURSIVE))
1896 return false;
1897
1898 /* Must be ACTIVE */
1899 if (!CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_ACTIVE))
1900 return false;
1901
1902 /* Must not be duplicate */
1903 if (CHECK_FLAG(resolved->flags, NEXTHOP_FLAG_DUPLICATE))
1904 return false;
1905
1906 switch (nexthop->type) {
1907 case NEXTHOP_TYPE_IPV4_IFINDEX:
1908 case NEXTHOP_TYPE_IPV6_IFINDEX:
1909 /* If the nexthop we are resolving to does not match the
1910 * ifindex for the nexthop the route wanted, its not valid.
1911 */
1912 if (nexthop->ifindex != resolved->ifindex)
1913 return false;
1914 break;
1915 case NEXTHOP_TYPE_IPV4:
1916 case NEXTHOP_TYPE_IPV6:
1917 case NEXTHOP_TYPE_IFINDEX:
1918 case NEXTHOP_TYPE_BLACKHOLE:
1919 break;
1920 }
1921
1922 return true;
1923 }
1924
1925 /*
1926 * Downstream VNI and Single VXlan device check.
1927 *
1928 * If it has nexthop VNI labels at this point it must be D-VNI allocated
1929 * and all the nexthops have to be on an SVD.
1930 *
1931 * If SVD is not available, mark as inactive.
1932 */
1933 static bool nexthop_set_evpn_dvni_svd(vrf_id_t re_vrf_id,
1934 struct nexthop *nexthop)
1935 {
1936 if (!is_vrf_l3vni_svd_backed(re_vrf_id)) {
1937 if (IS_ZEBRA_DEBUG_NHG_DETAIL) {
1938 struct vrf *vrf = vrf_lookup_by_id(re_vrf_id);
1939
1940 zlog_debug(
1941 "nexthop %pNHv D-VNI but route's vrf %s(%u) doesn't use SVD",
1942 nexthop, VRF_LOGNAME(vrf), re_vrf_id);
1943 }
1944
1945 return false;
1946 }
1947
1948 nexthop->ifindex = get_l3vni_vxlan_ifindex(re_vrf_id);
1949 nexthop->vrf_id = 0;
1950
1951 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
1952 zlog_debug("nexthop %pNHv using SVD", nexthop);
1953
1954 return true;
1955 }
1956
1957 /*
1958 * Given a nexthop we need to properly recursively resolve
1959 * the route. As such, do a table lookup to find and match
1960 * if at all possible. Set the nexthop->ifindex and resolved_id
1961 * as appropriate
1962 */
1963 static int resolve_backup_nexthops(const struct nexthop *nexthop,
1964 const struct nhg_hash_entry *nhe,
1965 struct nexthop *resolved,
1966 struct nhg_hash_entry *resolve_nhe,
1967 struct backup_nh_map_s *map)
1968 {
1969 int i, j, idx;
1970 const struct nexthop *bnh;
1971 struct nexthop *nh, *newnh;
1972 mpls_label_t labels[MPLS_MAX_LABELS];
1973 uint8_t num_labels;
1974
1975 assert(nexthop->backup_num <= NEXTHOP_MAX_BACKUPS);
1976
1977 /* Locate backups from the original nexthop's backup index and nhe */
1978 for (i = 0; i < nexthop->backup_num; i++) {
1979 idx = nexthop->backup_idx[i];
1980
1981 /* Do we already know about this particular backup? */
1982 for (j = 0; j < map->map_count; j++) {
1983 if (map->map[j].orig_idx == idx)
1984 break;
1985 }
1986
1987 if (j < map->map_count) {
1988 resolved->backup_idx[resolved->backup_num] =
1989 map->map[j].new_idx;
1990 resolved->backup_num++;
1991
1992 SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP);
1993
1994 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
1995 zlog_debug("%s: found map idx orig %d, new %d",
1996 __func__, map->map[j].orig_idx,
1997 map->map[j].new_idx);
1998
1999 continue;
2000 }
2001
2002 /* We can't handle any new map entries at this point. */
2003 if (map->map_count == MULTIPATH_NUM)
2004 break;
2005
2006 /* Need to create/copy a new backup */
2007 bnh = nhe->backup_info->nhe->nhg.nexthop;
2008 for (j = 0; j < idx; j++) {
2009 if (bnh == NULL)
2010 break;
2011 bnh = bnh->next;
2012 }
2013
2014 /* Whoops - bad index in the nexthop? */
2015 if (bnh == NULL)
2016 continue;
2017
2018 if (resolve_nhe->backup_info == NULL)
2019 resolve_nhe->backup_info = zebra_nhg_backup_alloc();
2020
2021 /* Update backup info in the resolving nexthop and its nhe */
2022 newnh = nexthop_dup_no_recurse(bnh, NULL);
2023
2024 /* We may need some special handling for mpls labels: the new
2025 * backup needs to carry the recursive nexthop's labels,
2026 * if any: they may be vrf labels e.g.
2027 * The original/inner labels are in the stack of 'resolve_nhe',
2028 * if that is longer than the stack in 'nexthop'.
2029 */
2030 if (newnh->nh_label && resolved->nh_label &&
2031 nexthop->nh_label) {
2032 if (resolved->nh_label->num_labels >
2033 nexthop->nh_label->num_labels) {
2034 /* Prepare new label stack */
2035 num_labels = 0;
2036 for (j = 0; j < newnh->nh_label->num_labels;
2037 j++) {
2038 labels[j] = newnh->nh_label->label[j];
2039 num_labels++;
2040 }
2041
2042 /* Include inner labels */
2043 for (j = nexthop->nh_label->num_labels;
2044 j < resolved->nh_label->num_labels;
2045 j++) {
2046 labels[num_labels] =
2047 resolved->nh_label->label[j];
2048 num_labels++;
2049 }
2050
2051 /* Replace existing label stack in the backup */
2052 nexthop_del_labels(newnh);
2053 nexthop_add_labels(newnh, bnh->nh_label_type,
2054 num_labels, labels);
2055 }
2056 }
2057
2058 /* Need to compute the new backup index in the new
2059 * backup list, and add to map struct.
2060 */
2061 j = 0;
2062 nh = resolve_nhe->backup_info->nhe->nhg.nexthop;
2063 if (nh) {
2064 while (nh->next) {
2065 nh = nh->next;
2066 j++;
2067 }
2068
2069 nh->next = newnh;
2070 j++;
2071
2072 } else /* First one */
2073 resolve_nhe->backup_info->nhe->nhg.nexthop = newnh;
2074
2075 /* Capture index */
2076 resolved->backup_idx[resolved->backup_num] = j;
2077 resolved->backup_num++;
2078
2079 SET_FLAG(resolved->flags, NEXTHOP_FLAG_HAS_BACKUP);
2080
2081 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2082 zlog_debug("%s: added idx orig %d, new %d",
2083 __func__, idx, j);
2084
2085 /* Update map/cache */
2086 map->map[map->map_count].orig_idx = idx;
2087 map->map[map->map_count].new_idx = j;
2088 map->map_count++;
2089 }
2090
2091 return 0;
2092 }
2093
2094 /*
2095 * So this nexthop resolution has decided that a connected route
2096 * is the correct choice. At this point in time if FRR has multiple
2097 * connected routes that all point to the same prefix one will be
2098 * selected, *but* the particular interface may not be the one
2099 * that the nexthop points at. Let's look at all the available
2100 * connected routes on this node and if any of them auto match
2101 * the routes nexthops ifindex that is good enough for a match
2102 *
2103 * This code is depending on the fact that a nexthop->ifindex is 0
2104 * if it is not known, if this assumption changes, yummy!
2105 * Additionally a ifindx of 0 means figure it out for us.
2106 */
2107 static struct route_entry *
2108 zebra_nhg_connected_ifindex(struct route_node *rn, struct route_entry *match,
2109 int32_t curr_ifindex)
2110 {
2111 struct nexthop *newhop = match->nhe->nhg.nexthop;
2112 struct route_entry *re;
2113
2114 assert(newhop); /* What a kick in the patooey */
2115
2116 if (curr_ifindex == 0)
2117 return match;
2118
2119 if (curr_ifindex == newhop->ifindex)
2120 return match;
2121
2122 /*
2123 * At this point we know that this route is matching a connected
2124 * but there are possibly a bunch of connected routes that are
2125 * alive that should be considered as well. So let's iterate over
2126 * all the re's and see if they are connected as well and maybe one
2127 * of those ifindexes match as well.
2128 */
2129 RNODE_FOREACH_RE (rn, re) {
2130 if (re->type != ZEBRA_ROUTE_CONNECT)
2131 continue;
2132
2133 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
2134 continue;
2135
2136 /*
2137 * zebra has a connected route that is not removed
2138 * let's test if it is good
2139 */
2140 newhop = re->nhe->nhg.nexthop;
2141 assert(newhop);
2142 if (curr_ifindex == newhop->ifindex)
2143 return re;
2144 }
2145
2146 return match;
2147 }
2148
2149 /*
2150 * Given a nexthop we need to properly recursively resolve,
2151 * do a table lookup to find and match if at all possible.
2152 * Set the nexthop->ifindex and resolution info as appropriate.
2153 */
2154 static int nexthop_active(struct nexthop *nexthop, struct nhg_hash_entry *nhe,
2155 const struct prefix *top, int type, uint32_t flags,
2156 uint32_t *pmtu, vrf_id_t vrf_id)
2157 {
2158 struct prefix p;
2159 struct route_table *table;
2160 struct route_node *rn;
2161 struct route_entry *match = NULL;
2162 int resolved;
2163 struct zebra_nhlfe *nhlfe;
2164 struct nexthop *newhop;
2165 struct interface *ifp;
2166 rib_dest_t *dest;
2167 struct zebra_vrf *zvrf;
2168 struct in_addr local_ipv4;
2169 struct in_addr *ipv4;
2170 afi_t afi = AFI_IP;
2171
2172 /* Reset some nexthop attributes that we'll recompute if necessary */
2173 if ((nexthop->type == NEXTHOP_TYPE_IPV4)
2174 || (nexthop->type == NEXTHOP_TYPE_IPV6))
2175 nexthop->ifindex = 0;
2176
2177 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_RECURSIVE);
2178 nexthops_free(nexthop->resolved);
2179 nexthop->resolved = NULL;
2180
2181 /*
2182 * Set afi based on nexthop type.
2183 * Some nexthop types get special handling, possibly skipping
2184 * the normal processing.
2185 */
2186 switch (nexthop->type) {
2187 case NEXTHOP_TYPE_IFINDEX:
2188
2189 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
2190 /*
2191 * If the interface exists and its operative or its a kernel
2192 * route and interface is up, its active. We trust kernel routes
2193 * to be good.
2194 */
2195 if (ifp && (if_is_operative(ifp)))
2196 return 1;
2197 else
2198 return 0;
2199 break;
2200
2201 case NEXTHOP_TYPE_IPV6_IFINDEX:
2202 afi = AFI_IP6;
2203
2204 if (IN6_IS_ADDR_LINKLOCAL(&nexthop->gate.ipv6)) {
2205 ifp = if_lookup_by_index(nexthop->ifindex,
2206 nexthop->vrf_id);
2207 if (ifp && if_is_operative(ifp))
2208 return 1;
2209 else
2210 return 0;
2211 }
2212 break;
2213
2214 case NEXTHOP_TYPE_IPV4:
2215 case NEXTHOP_TYPE_IPV4_IFINDEX:
2216 afi = AFI_IP;
2217 break;
2218 case NEXTHOP_TYPE_IPV6:
2219 afi = AFI_IP6;
2220 break;
2221
2222 case NEXTHOP_TYPE_BLACKHOLE:
2223 return 1;
2224 }
2225
2226 /*
2227 * If the nexthop has been marked as 'onlink' we just need to make
2228 * sure the nexthop's interface is known and is operational.
2229 */
2230 if (CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ONLINK)) {
2231 /* DVNI/SVD Checks for EVPN routes */
2232 if (nexthop->nh_label &&
2233 nexthop->nh_label_type == ZEBRA_LSP_EVPN &&
2234 !nexthop_set_evpn_dvni_svd(vrf_id, nexthop))
2235 return 0;
2236
2237 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
2238 if (!ifp) {
2239 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2240 zlog_debug("nexthop %pNHv marked onlink but nhif %u doesn't exist",
2241 nexthop, nexthop->ifindex);
2242 return 0;
2243 }
2244 if (!if_is_operative(ifp)) {
2245 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2246 zlog_debug("nexthop %pNHv marked onlink but nhif %s is not operational",
2247 nexthop, ifp->name);
2248 return 0;
2249 }
2250 return 1;
2251 }
2252
2253 if (top &&
2254 ((top->family == AF_INET && top->prefixlen == IPV4_MAX_BITLEN &&
2255 nexthop->gate.ipv4.s_addr == top->u.prefix4.s_addr) ||
2256 (top->family == AF_INET6 && top->prefixlen == IPV6_MAX_BITLEN &&
2257 memcmp(&nexthop->gate.ipv6, &top->u.prefix6, IPV6_MAX_BYTELEN) ==
2258 0)) &&
2259 nexthop->vrf_id == vrf_id) {
2260 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2261 zlog_debug(
2262 " :%s: Attempting to install a max prefixlength route through itself",
2263 __func__);
2264 return 0;
2265 }
2266
2267 /* Validation for ipv4 mapped ipv6 nexthop. */
2268 if (IS_MAPPED_IPV6(&nexthop->gate.ipv6)) {
2269 afi = AFI_IP;
2270 ipv4 = &local_ipv4;
2271 ipv4_mapped_ipv6_to_ipv4(&nexthop->gate.ipv6, ipv4);
2272 } else {
2273 ipv4 = &nexthop->gate.ipv4;
2274 }
2275
2276 /* Processing for nexthops with SR 'color' attribute, using
2277 * the corresponding SR policy object.
2278 */
2279 if (nexthop->srte_color) {
2280 struct ipaddr endpoint = {0};
2281 struct zebra_sr_policy *policy;
2282
2283 switch (afi) {
2284 case AFI_IP:
2285 endpoint.ipa_type = IPADDR_V4;
2286 endpoint.ipaddr_v4 = *ipv4;
2287 break;
2288 case AFI_IP6:
2289 endpoint.ipa_type = IPADDR_V6;
2290 endpoint.ipaddr_v6 = nexthop->gate.ipv6;
2291 break;
2292 case AFI_UNSPEC:
2293 case AFI_L2VPN:
2294 case AFI_MAX:
2295 flog_err(EC_LIB_DEVELOPMENT,
2296 "%s: unknown address-family: %u", __func__,
2297 afi);
2298 exit(1);
2299 }
2300
2301 policy = zebra_sr_policy_find(nexthop->srte_color, &endpoint);
2302 if (policy && policy->status == ZEBRA_SR_POLICY_UP) {
2303 resolved = 0;
2304 frr_each_safe (nhlfe_list, &policy->lsp->nhlfe_list,
2305 nhlfe) {
2306 if (!CHECK_FLAG(nhlfe->flags,
2307 NHLFE_FLAG_SELECTED)
2308 || CHECK_FLAG(nhlfe->flags,
2309 NHLFE_FLAG_DELETED))
2310 continue;
2311 SET_FLAG(nexthop->flags,
2312 NEXTHOP_FLAG_RECURSIVE);
2313 nexthop_set_resolved(afi, nhlfe->nexthop,
2314 nexthop, policy);
2315 resolved = 1;
2316 }
2317 if (resolved)
2318 return 1;
2319 }
2320 }
2321
2322 /* Make lookup prefix. */
2323 memset(&p, 0, sizeof(struct prefix));
2324 switch (afi) {
2325 case AFI_IP:
2326 p.family = AF_INET;
2327 p.prefixlen = IPV4_MAX_BITLEN;
2328 p.u.prefix4 = *ipv4;
2329 break;
2330 case AFI_IP6:
2331 p.family = AF_INET6;
2332 p.prefixlen = IPV6_MAX_BITLEN;
2333 p.u.prefix6 = nexthop->gate.ipv6;
2334 break;
2335 case AFI_UNSPEC:
2336 case AFI_L2VPN:
2337 case AFI_MAX:
2338 assert(afi != AFI_IP && afi != AFI_IP6);
2339 break;
2340 }
2341 /* Lookup table. */
2342 table = zebra_vrf_table(afi, SAFI_UNICAST, nexthop->vrf_id);
2343 /* get zvrf */
2344 zvrf = zebra_vrf_lookup_by_id(nexthop->vrf_id);
2345 if (!table || !zvrf) {
2346 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2347 zlog_debug(" %s: Table not found", __func__);
2348 return 0;
2349 }
2350
2351 rn = route_node_match(table, (struct prefix *)&p);
2352 while (rn) {
2353 route_unlock_node(rn);
2354
2355 /* Lookup should halt if we've matched against ourselves ('top',
2356 * if specified) - i.e., we cannot have a nexthop NH1 is
2357 * resolved by a route NH1. The exception is if the route is a
2358 * host route.
2359 */
2360 if (prefix_same(&rn->p, top))
2361 if (((afi == AFI_IP)
2362 && (rn->p.prefixlen != IPV4_MAX_BITLEN))
2363 || ((afi == AFI_IP6)
2364 && (rn->p.prefixlen != IPV6_MAX_BITLEN))) {
2365 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2366 zlog_debug(
2367 " %s: Matched against ourself and prefix length is not max bit length",
2368 __func__);
2369 return 0;
2370 }
2371
2372 /* Pick up selected route. */
2373 /* However, do not resolve over default route unless explicitly
2374 * allowed.
2375 */
2376 if (is_default_prefix(&rn->p)
2377 && !rnh_resolve_via_default(zvrf, p.family)) {
2378 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2379 zlog_debug(
2380 " :%s: Resolved against default route",
2381 __func__);
2382 return 0;
2383 }
2384
2385 dest = rib_dest_from_rnode(rn);
2386 if (dest && dest->selected_fib
2387 && !CHECK_FLAG(dest->selected_fib->status,
2388 ROUTE_ENTRY_REMOVED)
2389 && dest->selected_fib->type != ZEBRA_ROUTE_TABLE)
2390 match = dest->selected_fib;
2391
2392 /* If there is no selected route or matched route is EGP, go up
2393 * tree.
2394 */
2395 if (!match) {
2396 do {
2397 rn = rn->parent;
2398 } while (rn && rn->info == NULL);
2399 if (rn)
2400 route_lock_node(rn);
2401
2402 continue;
2403 }
2404
2405 if ((match->type == ZEBRA_ROUTE_CONNECT) ||
2406 (RIB_SYSTEM_ROUTE(match) && RSYSTEM_ROUTE(type))) {
2407 match = zebra_nhg_connected_ifindex(rn, match,
2408 nexthop->ifindex);
2409
2410 newhop = match->nhe->nhg.nexthop;
2411 if (nexthop->type == NEXTHOP_TYPE_IPV4 ||
2412 nexthop->type == NEXTHOP_TYPE_IPV6)
2413 nexthop->ifindex = newhop->ifindex;
2414 else if (nexthop->ifindex != newhop->ifindex) {
2415 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2416 zlog_debug(
2417 "%s: %pNHv given ifindex does not match nexthops ifindex found: %pNHv",
2418 __func__, nexthop, newhop);
2419 /*
2420 * NEXTHOP_TYPE_*_IFINDEX but ifindex
2421 * doesn't match what we found.
2422 */
2423 return 0;
2424 }
2425
2426 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2427 zlog_debug(
2428 "%s: CONNECT match %p (%pNG), newhop %pNHv",
2429 __func__, match, match->nhe, newhop);
2430
2431 return 1;
2432 } else if (CHECK_FLAG(flags, ZEBRA_FLAG_ALLOW_RECURSION)) {
2433 struct nexthop_group *nhg;
2434 struct nexthop *resolver;
2435 struct backup_nh_map_s map = {};
2436
2437 resolved = 0;
2438
2439 /*
2440 * Only useful if installed or being Route Replacing
2441 * Why Being Route Replaced as well?
2442 * Imagine a route A and route B( that depends on A )
2443 * for recursive resolution and A already exists in the
2444 * zebra rib. If zebra receives the routes
2445 * for resolution at aproximately the same time in the [
2446 * B, A ] order on the workQ. If this happens then
2447 * normal route resolution will happen and B will be
2448 * resolved successfully and then A will be resolved
2449 * successfully. Now imagine the reversed order [A, B].
2450 * A will be resolved and then scheduled for installed
2451 * (Thus not having the ROUTE_ENTRY_INSTALLED flag ). B
2452 * will then get resolved and fail to be installed
2453 * because the original below test. Let's `loosen` this
2454 * up a tiny bit and allow the
2455 * ROUTE_ENTRY_ROUTE_REPLACING flag ( that is set when a
2456 * Route Replace operation is being initiated on A now )
2457 * to now satisfy this situation. This will allow
2458 * either order in the workQ to work properly.
2459 */
2460 if (!CHECK_FLAG(match->status, ROUTE_ENTRY_INSTALLED) &&
2461 !CHECK_FLAG(match->status,
2462 ROUTE_ENTRY_ROUTE_REPLACING)) {
2463 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2464 zlog_debug(
2465 "%s: match %p (%pNG) not installed or being Route Replaced",
2466 __func__, match, match->nhe);
2467
2468 goto done_with_match;
2469 }
2470
2471 /* Examine installed nexthops; note that there
2472 * may not be any installed primary nexthops if
2473 * only backups are installed.
2474 */
2475 nhg = rib_get_fib_nhg(match);
2476 for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
2477 if (!nexthop_valid_resolve(nexthop, newhop))
2478 continue;
2479
2480 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2481 zlog_debug(
2482 "%s: RECURSIVE match %p (%pNG), newhop %pNHv",
2483 __func__, match, match->nhe,
2484 newhop);
2485
2486 SET_FLAG(nexthop->flags,
2487 NEXTHOP_FLAG_RECURSIVE);
2488 resolver = nexthop_set_resolved(afi, newhop,
2489 nexthop, NULL);
2490 resolved = 1;
2491
2492 /* If there are backup nexthops, capture
2493 * that info with the resolving nexthop.
2494 */
2495 if (resolver && newhop->backup_num > 0) {
2496 resolve_backup_nexthops(newhop,
2497 match->nhe,
2498 resolver, nhe,
2499 &map);
2500 }
2501 }
2502
2503 /* Examine installed backup nexthops, if any. There
2504 * are only installed backups *if* there is a
2505 * dedicated fib list. The UI can also control use
2506 * of backups for resolution.
2507 */
2508 nhg = rib_get_fib_backup_nhg(match);
2509 if (!use_recursive_backups ||
2510 nhg == NULL || nhg->nexthop == NULL)
2511 goto done_with_match;
2512
2513 for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
2514 if (!nexthop_valid_resolve(nexthop, newhop))
2515 continue;
2516
2517 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2518 zlog_debug(
2519 "%s: RECURSIVE match backup %p (%pNG), newhop %pNHv",
2520 __func__, match, match->nhe,
2521 newhop);
2522
2523 SET_FLAG(nexthop->flags,
2524 NEXTHOP_FLAG_RECURSIVE);
2525 nexthop_set_resolved(afi, newhop, nexthop,
2526 NULL);
2527 resolved = 1;
2528 }
2529
2530 done_with_match:
2531 /* Capture resolving mtu */
2532 if (resolved) {
2533 if (pmtu)
2534 *pmtu = match->mtu;
2535
2536 } else if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2537 zlog_debug(
2538 " %s: Recursion failed to find",
2539 __func__);
2540
2541 return resolved;
2542 } else {
2543 if (IS_ZEBRA_DEBUG_RIB_DETAILED) {
2544 zlog_debug(
2545 " %s: Route Type %s has not turned on recursion",
2546 __func__, zebra_route_string(type));
2547 if (type == ZEBRA_ROUTE_BGP
2548 && !CHECK_FLAG(flags, ZEBRA_FLAG_IBGP))
2549 zlog_debug(
2550 " EBGP: see \"disable-ebgp-connected-route-check\" or \"disable-connected-check\"");
2551 }
2552 return 0;
2553 }
2554 }
2555 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2556 zlog_debug(" %s: Nexthop did not lookup in table",
2557 __func__);
2558 return 0;
2559 }
2560
2561 /* This function verifies reachability of one given nexthop, which can be
2562 * numbered or unnumbered, IPv4 or IPv6. The result is unconditionally stored
2563 * in nexthop->flags field. The nexthop->ifindex will be updated
2564 * appropriately as well.
2565 *
2566 * An existing route map can turn an otherwise active nexthop into inactive,
2567 * but not vice versa.
2568 *
2569 * The return value is the final value of 'ACTIVE' flag.
2570 */
2571 static unsigned nexthop_active_check(struct route_node *rn,
2572 struct route_entry *re,
2573 struct nexthop *nexthop,
2574 struct nhg_hash_entry *nhe)
2575 {
2576 route_map_result_t ret = RMAP_PERMITMATCH;
2577 afi_t family;
2578 const struct prefix *p, *src_p;
2579 struct zebra_vrf *zvrf;
2580 uint32_t mtu = 0;
2581 vrf_id_t vrf_id;
2582
2583 srcdest_rnode_prefixes(rn, &p, &src_p);
2584
2585 if (rn->p.family == AF_INET)
2586 family = AFI_IP;
2587 else if (rn->p.family == AF_INET6)
2588 family = AFI_IP6;
2589 else
2590 family = AF_UNSPEC;
2591
2592 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2593 zlog_debug("%s: re %p, nexthop %pNHv", __func__, re, nexthop);
2594
2595 /*
2596 * If this is a kernel route, then if the interface is *up* then
2597 * by golly gee whiz it's a good route.
2598 */
2599 if (re->type == ZEBRA_ROUTE_KERNEL || re->type == ZEBRA_ROUTE_SYSTEM) {
2600 struct interface *ifp;
2601
2602 ifp = if_lookup_by_index(nexthop->ifindex, nexthop->vrf_id);
2603
2604 if (ifp && (if_is_operative(ifp) || if_is_up(ifp))) {
2605 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2606 goto skip_check;
2607 }
2608 }
2609
2610 vrf_id = zvrf_id(rib_dest_vrf(rib_dest_from_rnode(rn)));
2611 switch (nexthop->type) {
2612 case NEXTHOP_TYPE_IFINDEX:
2613 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2614 &mtu, vrf_id))
2615 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2616 else
2617 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2618 break;
2619 case NEXTHOP_TYPE_IPV4:
2620 case NEXTHOP_TYPE_IPV4_IFINDEX:
2621 family = AFI_IP;
2622 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2623 &mtu, vrf_id))
2624 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2625 else
2626 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2627 break;
2628 case NEXTHOP_TYPE_IPV6:
2629 family = AFI_IP6;
2630 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2631 &mtu, vrf_id))
2632 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2633 else
2634 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2635 break;
2636 case NEXTHOP_TYPE_IPV6_IFINDEX:
2637 /* RFC 5549, v4 prefix with v6 NH */
2638 if (rn->p.family != AF_INET)
2639 family = AFI_IP6;
2640
2641 if (nexthop_active(nexthop, nhe, &rn->p, re->type, re->flags,
2642 &mtu, vrf_id))
2643 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2644 else
2645 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2646 break;
2647 case NEXTHOP_TYPE_BLACKHOLE:
2648 SET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2649 break;
2650 default:
2651 break;
2652 }
2653
2654 skip_check:
2655
2656 if (!CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE)) {
2657 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2658 zlog_debug(" %s: Unable to find active nexthop",
2659 __func__);
2660 return 0;
2661 }
2662
2663 /* Capture recursive nexthop mtu.
2664 * TODO -- the code used to just reset the re's value to zero
2665 * for each nexthop, and then jam any resolving route's mtu value in,
2666 * whether or not that was zero, or lt/gt any existing value? The
2667 * way this is used appears to be as a floor value, so let's try
2668 * using it that way here.
2669 */
2670 if (mtu > 0) {
2671 if (re->nexthop_mtu == 0 || re->nexthop_mtu > mtu)
2672 re->nexthop_mtu = mtu;
2673 }
2674
2675 /* XXX: What exactly do those checks do? Do we support
2676 * e.g. IPv4 routes with IPv6 nexthops or vice versa?
2677 */
2678 if (RIB_SYSTEM_ROUTE(re) || (family == AFI_IP && p->family != AF_INET)
2679 || (family == AFI_IP6 && p->family != AF_INET6))
2680 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2681
2682 /* The original code didn't determine the family correctly
2683 * e.g. for NEXTHOP_TYPE_IFINDEX. Retrieve the correct afi
2684 * from the rib_table_info in those cases.
2685 * Possibly it may be better to use only the rib_table_info
2686 * in every case.
2687 */
2688 if (family == 0) {
2689 struct rib_table_info *info;
2690
2691 info = srcdest_rnode_table_info(rn);
2692 family = info->afi;
2693 }
2694
2695 memset(&nexthop->rmap_src.ipv6, 0, sizeof(union g_addr));
2696
2697 zvrf = zebra_vrf_lookup_by_id(re->vrf_id);
2698 if (!zvrf) {
2699 if (IS_ZEBRA_DEBUG_RIB_DETAILED)
2700 zlog_debug(" %s: zvrf is NULL", __func__);
2701 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2702 }
2703
2704 /* It'll get set if required inside */
2705 ret = zebra_route_map_check(family, re->type, re->instance, p, nexthop,
2706 zvrf, re->tag);
2707 if (ret == RMAP_DENYMATCH) {
2708 if (IS_ZEBRA_DEBUG_RIB) {
2709 zlog_debug(
2710 "%u:%pRN: Filtering out with NH %pNHv due to route map",
2711 re->vrf_id, rn, nexthop);
2712 }
2713 UNSET_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2714 }
2715 return CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2716 }
2717
2718 /* Helper function called after resolution to walk nhg rb trees
2719 * and toggle the NEXTHOP_GROUP_VALID flag if the nexthop
2720 * is active on singleton NHEs.
2721 */
2722 static bool zebra_nhg_set_valid_if_active(struct nhg_hash_entry *nhe)
2723 {
2724 struct nhg_connected *rb_node_dep = NULL;
2725 bool valid = false;
2726
2727 if (!zebra_nhg_depends_is_empty(nhe)) {
2728 /* Is at least one depend valid? */
2729 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
2730 if (zebra_nhg_set_valid_if_active(rb_node_dep->nhe))
2731 valid = true;
2732 }
2733
2734 goto done;
2735 }
2736
2737 /* should be fully resolved singleton at this point */
2738 if (CHECK_FLAG(nhe->nhg.nexthop->flags, NEXTHOP_FLAG_ACTIVE))
2739 valid = true;
2740
2741 done:
2742 if (valid)
2743 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
2744
2745 return valid;
2746 }
2747
2748 /* Checks if the first nexthop is EVPN. If not, early return.
2749 *
2750 * This is used to determine if there is a mismatch between l3VNI
2751 * of the route's vrf and the nexthops in use's VNI labels.
2752 *
2753 * If there is a mismatch, we keep the labels as these MUST be DVNI nexthops.
2754 *
2755 * IF there is no mismatch, we remove the labels and handle the routes as
2756 * we have traditionally with evpn.
2757 */
2758 static bool nexthop_list_set_evpn_dvni(struct route_entry *re,
2759 struct nexthop_group *nhg)
2760 {
2761 struct nexthop *nexthop;
2762 vni_t re_vrf_vni;
2763 vni_t nh_vni;
2764 bool use_dvni = false;
2765
2766 nexthop = nhg->nexthop;
2767
2768 if (!nexthop->nh_label || nexthop->nh_label_type != ZEBRA_LSP_EVPN)
2769 return false;
2770
2771 re_vrf_vni = get_l3vni_vni(re->vrf_id);
2772
2773 for (; nexthop; nexthop = nexthop->next) {
2774 nh_vni = label2vni(&nexthop->nh_label->label[0]);
2775
2776 if (nh_vni != re_vrf_vni)
2777 use_dvni = true;
2778 }
2779
2780 /* Using traditional way, no VNI encap - remove labels */
2781 if (!use_dvni) {
2782 for (nexthop = nhg->nexthop; nexthop; nexthop = nexthop->next)
2783 nexthop_del_labels(nexthop);
2784 }
2785
2786 return use_dvni;
2787 }
2788
2789 /*
2790 * Process a list of nexthops, given an nhe, determining
2791 * whether each one is ACTIVE/installable at this time.
2792 */
2793 static uint32_t nexthop_list_active_update(struct route_node *rn,
2794 struct route_entry *re,
2795 struct nhg_hash_entry *nhe,
2796 bool is_backup)
2797 {
2798 union g_addr prev_src;
2799 unsigned int prev_active, new_active;
2800 ifindex_t prev_index;
2801 uint32_t counter = 0;
2802 struct nexthop *nexthop;
2803 struct nexthop_group *nhg = &nhe->nhg;
2804 bool vni_removed = false;
2805
2806 nexthop = nhg->nexthop;
2807
2808 /* Init recursive nh mtu */
2809 re->nexthop_mtu = 0;
2810
2811 /* Handler for dvni evpn nexthops. Has to be done at nhg level */
2812 vni_removed = !nexthop_list_set_evpn_dvni(re, nhg);
2813
2814 /* Process nexthops one-by-one */
2815 for ( ; nexthop; nexthop = nexthop->next) {
2816
2817 /* No protocol daemon provides src and so we're skipping
2818 * tracking it
2819 */
2820 prev_src = nexthop->rmap_src;
2821 prev_active = CHECK_FLAG(nexthop->flags, NEXTHOP_FLAG_ACTIVE);
2822 prev_index = nexthop->ifindex;
2823
2824 /* Include the containing nhe for primary nexthops: if there's
2825 * recursive resolution, we capture the backup info also.
2826 */
2827 new_active =
2828 nexthop_active_check(rn, re, nexthop,
2829 (is_backup ? NULL : nhe));
2830
2831 /*
2832 * We need to respect the multipath_num here
2833 * as that what we should be able to install from
2834 * a multipath perspective should not be a data plane
2835 * decision point.
2836 */
2837 if (new_active && counter >= zrouter.multipath_num) {
2838 struct nexthop *nh;
2839
2840 /* Set it and its resolved nexthop as inactive. */
2841 for (nh = nexthop; nh; nh = nh->resolved)
2842 UNSET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE);
2843
2844 new_active = 0;
2845 }
2846
2847 if (new_active)
2848 counter++;
2849
2850 /* Check for changes to the nexthop - set ROUTE_ENTRY_CHANGED */
2851 if (prev_active != new_active || prev_index != nexthop->ifindex
2852 || ((nexthop->type >= NEXTHOP_TYPE_IFINDEX
2853 && nexthop->type < NEXTHOP_TYPE_IPV6)
2854 && prev_src.ipv4.s_addr
2855 != nexthop->rmap_src.ipv4.s_addr)
2856 || ((nexthop->type >= NEXTHOP_TYPE_IPV6
2857 && nexthop->type < NEXTHOP_TYPE_BLACKHOLE)
2858 && !(IPV6_ADDR_SAME(&prev_src.ipv6,
2859 &nexthop->rmap_src.ipv6)))
2860 || CHECK_FLAG(re->status, ROUTE_ENTRY_LABELS_CHANGED)
2861 || vni_removed)
2862 SET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
2863 }
2864
2865 return counter;
2866 }
2867
2868
2869 static uint32_t proto_nhg_nexthop_active_update(struct nexthop_group *nhg)
2870 {
2871 struct nexthop *nh;
2872 uint32_t curr_active = 0;
2873
2874 /* Assume all active for now */
2875
2876 for (nh = nhg->nexthop; nh; nh = nh->next) {
2877 SET_FLAG(nh->flags, NEXTHOP_FLAG_ACTIVE);
2878 curr_active++;
2879 }
2880
2881 return curr_active;
2882 }
2883
2884 /*
2885 * Iterate over all nexthops of the given RIB entry and refresh their
2886 * ACTIVE flag. If any nexthop is found to toggle the ACTIVE flag,
2887 * the whole re structure is flagged with ROUTE_ENTRY_CHANGED.
2888 *
2889 * Return value is the new number of active nexthops.
2890 */
2891 int nexthop_active_update(struct route_node *rn, struct route_entry *re)
2892 {
2893 struct nhg_hash_entry *curr_nhe;
2894 uint32_t curr_active = 0, backup_active = 0;
2895
2896 if (PROTO_OWNED(re->nhe))
2897 return proto_nhg_nexthop_active_update(&re->nhe->nhg);
2898
2899 afi_t rt_afi = family2afi(rn->p.family);
2900
2901 UNSET_FLAG(re->status, ROUTE_ENTRY_CHANGED);
2902
2903 /* Make a local copy of the existing nhe, so we don't work on/modify
2904 * the shared nhe.
2905 */
2906 curr_nhe = zebra_nhe_copy(re->nhe, re->nhe->id);
2907
2908 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2909 zlog_debug("%s: re %p nhe %p (%pNG), curr_nhe %p", __func__, re,
2910 re->nhe, re->nhe, curr_nhe);
2911
2912 /* Clear the existing id, if any: this will avoid any confusion
2913 * if the id exists, and will also force the creation
2914 * of a new nhe reflecting the changes we may make in this local copy.
2915 */
2916 curr_nhe->id = 0;
2917
2918 /* Process nexthops */
2919 curr_active = nexthop_list_active_update(rn, re, curr_nhe, false);
2920
2921 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2922 zlog_debug("%s: re %p curr_active %u", __func__, re,
2923 curr_active);
2924
2925 /* If there are no backup nexthops, we are done */
2926 if (zebra_nhg_get_backup_nhg(curr_nhe) == NULL)
2927 goto backups_done;
2928
2929 backup_active = nexthop_list_active_update(
2930 rn, re, curr_nhe->backup_info->nhe, true /*is_backup*/);
2931
2932 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2933 zlog_debug("%s: re %p backup_active %u", __func__, re,
2934 backup_active);
2935
2936 backups_done:
2937
2938 /*
2939 * Ref or create an nhe that matches the current state of the
2940 * nexthop(s).
2941 */
2942 if (CHECK_FLAG(re->status, ROUTE_ENTRY_CHANGED)) {
2943 struct nhg_hash_entry *new_nhe = NULL;
2944
2945 new_nhe = zebra_nhg_rib_find_nhe(curr_nhe, rt_afi);
2946
2947 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
2948 zlog_debug(
2949 "%s: re %p CHANGED: nhe %p (%pNG) => new_nhe %p (%pNG)",
2950 __func__, re, re->nhe, re->nhe, new_nhe,
2951 new_nhe);
2952
2953 route_entry_update_nhe(re, new_nhe);
2954 }
2955
2956
2957 /* Walk the NHE depends tree and toggle NEXTHOP_GROUP_VALID
2958 * flag where appropriate.
2959 */
2960 if (curr_active)
2961 zebra_nhg_set_valid_if_active(re->nhe);
2962
2963 /*
2964 * Do not need the old / copied nhe anymore since it
2965 * was either copied over into a new nhe or not
2966 * used at all.
2967 */
2968 zebra_nhg_free(curr_nhe);
2969 return curr_active;
2970 }
2971
2972 /* Recursively construct a grp array of fully resolved IDs.
2973 *
2974 * This function allows us to account for groups within groups,
2975 * by converting them into a flat array of IDs.
2976 *
2977 * nh_grp is modified at every level of recursion to append
2978 * to it the next unique, fully resolved ID from the entire tree.
2979 *
2980 *
2981 * Note:
2982 * I'm pretty sure we only allow ONE level of group within group currently.
2983 * But making this recursive just in case that ever changes.
2984 */
2985 static uint8_t zebra_nhg_nhe2grp_internal(struct nh_grp *grp,
2986 uint8_t curr_index,
2987 struct nhg_hash_entry *nhe,
2988 int max_num)
2989 {
2990 struct nhg_connected *rb_node_dep = NULL;
2991 struct nhg_hash_entry *depend = NULL;
2992 uint8_t i = curr_index;
2993
2994 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
2995 bool duplicate = false;
2996
2997 if (i >= max_num)
2998 goto done;
2999
3000 depend = rb_node_dep->nhe;
3001
3002 /*
3003 * If its recursive, use its resolved nhe in the group
3004 */
3005 if (CHECK_FLAG(depend->flags, NEXTHOP_GROUP_RECURSIVE)) {
3006 depend = zebra_nhg_resolve(depend);
3007 if (!depend) {
3008 flog_err(
3009 EC_ZEBRA_NHG_FIB_UPDATE,
3010 "Failed to recursively resolve Nexthop Hash Entry in the group id=%pNG",
3011 nhe);
3012 continue;
3013 }
3014 }
3015
3016 if (!zebra_nhg_depends_is_empty(depend)) {
3017 /* This is a group within a group */
3018 i = zebra_nhg_nhe2grp_internal(grp, i, depend, max_num);
3019 } else {
3020 if (!CHECK_FLAG(depend->flags, NEXTHOP_GROUP_VALID)) {
3021 if (IS_ZEBRA_DEBUG_RIB_DETAILED
3022 || IS_ZEBRA_DEBUG_NHG)
3023 zlog_debug(
3024 "%s: Nexthop ID (%u) not valid, not appending to dataplane install group",
3025 __func__, depend->id);
3026 continue;
3027 }
3028
3029 /* If the nexthop not installed/queued for install don't
3030 * put in the ID array.
3031 */
3032 if (!(CHECK_FLAG(depend->flags, NEXTHOP_GROUP_INSTALLED)
3033 || CHECK_FLAG(depend->flags,
3034 NEXTHOP_GROUP_QUEUED))) {
3035 if (IS_ZEBRA_DEBUG_RIB_DETAILED
3036 || IS_ZEBRA_DEBUG_NHG)
3037 zlog_debug(
3038 "%s: Nexthop ID (%u) not installed or queued for install, not appending to dataplane install group",
3039 __func__, depend->id);
3040 continue;
3041 }
3042
3043 /* Check for duplicate IDs, ignore if found. */
3044 for (int j = 0; j < i; j++) {
3045 if (depend->id == grp[j].id) {
3046 duplicate = true;
3047 break;
3048 }
3049 }
3050
3051 if (duplicate) {
3052 if (IS_ZEBRA_DEBUG_RIB_DETAILED
3053 || IS_ZEBRA_DEBUG_NHG)
3054 zlog_debug(
3055 "%s: Nexthop ID (%u) is duplicate, not appending to dataplane install group",
3056 __func__, depend->id);
3057 continue;
3058 }
3059
3060 grp[i].id = depend->id;
3061 grp[i].weight = depend->nhg.nexthop->weight;
3062 i++;
3063 }
3064 }
3065
3066 if (nhe->backup_info == NULL || nhe->backup_info->nhe == NULL)
3067 goto done;
3068
3069 /* TODO -- For now, we are not trying to use or install any
3070 * backup info in this nexthop-id path: we aren't prepared
3071 * to use the backups here yet. We're just debugging what we find.
3072 */
3073 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3074 zlog_debug("%s: skipping backup nhe", __func__);
3075
3076 done:
3077 return i;
3078 }
3079
3080 /* Convert a nhe into a group array */
3081 uint8_t zebra_nhg_nhe2grp(struct nh_grp *grp, struct nhg_hash_entry *nhe,
3082 int max_num)
3083 {
3084 /* Call into the recursive function */
3085 return zebra_nhg_nhe2grp_internal(grp, 0, nhe, max_num);
3086 }
3087
3088 void zebra_nhg_install_kernel(struct nhg_hash_entry *nhe)
3089 {
3090 struct nhg_connected *rb_node_dep = NULL;
3091
3092 /* Resolve it first */
3093 nhe = zebra_nhg_resolve(nhe);
3094
3095 /* Make sure all depends are installed/queued */
3096 frr_each(nhg_connected_tree, &nhe->nhg_depends, rb_node_dep) {
3097 zebra_nhg_install_kernel(rb_node_dep->nhe);
3098 }
3099
3100 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_VALID)
3101 && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)
3102 && !CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED)) {
3103 /* Change its type to us since we are installing it */
3104 if (!ZEBRA_NHG_CREATED(nhe))
3105 nhe->type = ZEBRA_ROUTE_NHG;
3106
3107 int ret = dplane_nexthop_add(nhe);
3108
3109 switch (ret) {
3110 case ZEBRA_DPLANE_REQUEST_QUEUED:
3111 SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
3112 break;
3113 case ZEBRA_DPLANE_REQUEST_FAILURE:
3114 flog_err(
3115 EC_ZEBRA_DP_INSTALL_FAIL,
3116 "Failed to install Nexthop ID (%pNG) into the kernel",
3117 nhe);
3118 break;
3119 case ZEBRA_DPLANE_REQUEST_SUCCESS:
3120 SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3121 zebra_nhg_handle_install(nhe);
3122 break;
3123 }
3124 }
3125 }
3126
3127 void zebra_nhg_uninstall_kernel(struct nhg_hash_entry *nhe)
3128 {
3129 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED)) {
3130 int ret = dplane_nexthop_delete(nhe);
3131
3132 switch (ret) {
3133 case ZEBRA_DPLANE_REQUEST_QUEUED:
3134 SET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
3135 break;
3136 case ZEBRA_DPLANE_REQUEST_FAILURE:
3137 flog_err(
3138 EC_ZEBRA_DP_DELETE_FAIL,
3139 "Failed to uninstall Nexthop ID (%pNG) from the kernel",
3140 nhe);
3141 break;
3142 case ZEBRA_DPLANE_REQUEST_SUCCESS:
3143 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3144 break;
3145 }
3146 }
3147
3148 zebra_nhg_handle_uninstall(nhe);
3149 }
3150
3151 void zebra_nhg_dplane_result(struct zebra_dplane_ctx *ctx)
3152 {
3153 enum dplane_op_e op;
3154 enum zebra_dplane_result status;
3155 uint32_t id = 0;
3156 struct nhg_hash_entry *nhe = NULL;
3157
3158 op = dplane_ctx_get_op(ctx);
3159 status = dplane_ctx_get_status(ctx);
3160
3161 id = dplane_ctx_get_nhe_id(ctx);
3162
3163 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL || IS_ZEBRA_DEBUG_NHG_DETAIL)
3164 zlog_debug(
3165 "Nexthop dplane ctx %p, op %s, nexthop ID (%u), result %s",
3166 ctx, dplane_op2str(op), id, dplane_res2str(status));
3167
3168 switch (op) {
3169 case DPLANE_OP_NH_DELETE:
3170 if (status != ZEBRA_DPLANE_REQUEST_SUCCESS)
3171 flog_err(
3172 EC_ZEBRA_DP_DELETE_FAIL,
3173 "Failed to uninstall Nexthop ID (%u) from the kernel",
3174 id);
3175
3176 /* We already free'd the data, nothing to do */
3177 break;
3178 case DPLANE_OP_NH_INSTALL:
3179 case DPLANE_OP_NH_UPDATE:
3180 nhe = zebra_nhg_lookup_id(id);
3181
3182 if (!nhe) {
3183 if (IS_ZEBRA_DEBUG_NHG)
3184 zlog_debug(
3185 "%s operation preformed on Nexthop ID (%u) in the kernel, that we no longer have in our table",
3186 dplane_op2str(op), id);
3187
3188 break;
3189 }
3190
3191 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_QUEUED);
3192 if (status == ZEBRA_DPLANE_REQUEST_SUCCESS) {
3193 SET_FLAG(nhe->flags, NEXTHOP_GROUP_VALID);
3194 SET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3195 zebra_nhg_handle_install(nhe);
3196
3197 /* If daemon nhg, send it an update */
3198 if (PROTO_OWNED(nhe))
3199 zsend_nhg_notify(nhe->type, nhe->zapi_instance,
3200 nhe->zapi_session, nhe->id,
3201 ZAPI_NHG_INSTALLED);
3202 } else {
3203 /* If daemon nhg, send it an update */
3204 if (PROTO_OWNED(nhe))
3205 zsend_nhg_notify(nhe->type, nhe->zapi_instance,
3206 nhe->zapi_session, nhe->id,
3207 ZAPI_NHG_FAIL_INSTALL);
3208
3209 if (!(zebra_nhg_proto_nexthops_only() &&
3210 !PROTO_OWNED(nhe)))
3211 flog_err(
3212 EC_ZEBRA_DP_INSTALL_FAIL,
3213 "Failed to install Nexthop (%pNG) into the kernel",
3214 nhe);
3215 }
3216 break;
3217
3218 case DPLANE_OP_ROUTE_INSTALL:
3219 case DPLANE_OP_ROUTE_UPDATE:
3220 case DPLANE_OP_ROUTE_DELETE:
3221 case DPLANE_OP_ROUTE_NOTIFY:
3222 case DPLANE_OP_LSP_INSTALL:
3223 case DPLANE_OP_LSP_UPDATE:
3224 case DPLANE_OP_LSP_DELETE:
3225 case DPLANE_OP_LSP_NOTIFY:
3226 case DPLANE_OP_PW_INSTALL:
3227 case DPLANE_OP_PW_UNINSTALL:
3228 case DPLANE_OP_SYS_ROUTE_ADD:
3229 case DPLANE_OP_SYS_ROUTE_DELETE:
3230 case DPLANE_OP_ADDR_INSTALL:
3231 case DPLANE_OP_ADDR_UNINSTALL:
3232 case DPLANE_OP_MAC_INSTALL:
3233 case DPLANE_OP_MAC_DELETE:
3234 case DPLANE_OP_NEIGH_INSTALL:
3235 case DPLANE_OP_NEIGH_UPDATE:
3236 case DPLANE_OP_NEIGH_DELETE:
3237 case DPLANE_OP_NEIGH_IP_INSTALL:
3238 case DPLANE_OP_NEIGH_IP_DELETE:
3239 case DPLANE_OP_VTEP_ADD:
3240 case DPLANE_OP_VTEP_DELETE:
3241 case DPLANE_OP_RULE_ADD:
3242 case DPLANE_OP_RULE_DELETE:
3243 case DPLANE_OP_RULE_UPDATE:
3244 case DPLANE_OP_NEIGH_DISCOVER:
3245 case DPLANE_OP_BR_PORT_UPDATE:
3246 case DPLANE_OP_NONE:
3247 case DPLANE_OP_IPTABLE_ADD:
3248 case DPLANE_OP_IPTABLE_DELETE:
3249 case DPLANE_OP_IPSET_ADD:
3250 case DPLANE_OP_IPSET_DELETE:
3251 case DPLANE_OP_IPSET_ENTRY_ADD:
3252 case DPLANE_OP_IPSET_ENTRY_DELETE:
3253 case DPLANE_OP_NEIGH_TABLE_UPDATE:
3254 case DPLANE_OP_GRE_SET:
3255 case DPLANE_OP_INTF_ADDR_ADD:
3256 case DPLANE_OP_INTF_ADDR_DEL:
3257 case DPLANE_OP_INTF_NETCONFIG:
3258 case DPLANE_OP_INTF_INSTALL:
3259 case DPLANE_OP_INTF_UPDATE:
3260 case DPLANE_OP_INTF_DELETE:
3261 case DPLANE_OP_TC_QDISC_INSTALL:
3262 case DPLANE_OP_TC_QDISC_UNINSTALL:
3263 case DPLANE_OP_TC_CLASS_ADD:
3264 case DPLANE_OP_TC_CLASS_DELETE:
3265 case DPLANE_OP_TC_CLASS_UPDATE:
3266 case DPLANE_OP_TC_FILTER_ADD:
3267 case DPLANE_OP_TC_FILTER_DELETE:
3268 case DPLANE_OP_TC_FILTER_UPDATE:
3269 break;
3270 }
3271 }
3272
3273 static int zebra_nhg_sweep_entry(struct hash_bucket *bucket, void *arg)
3274 {
3275 struct nhg_hash_entry *nhe = NULL;
3276
3277 nhe = (struct nhg_hash_entry *)bucket->data;
3278
3279 /*
3280 * same logic as with routes.
3281 *
3282 * If older than startup time, we know we read them in from the
3283 * kernel and have not gotten and update for them since startup
3284 * from an upper level proto.
3285 */
3286 if (zrouter.startup_time < nhe->uptime)
3287 return HASHWALK_CONTINUE;
3288
3289 /*
3290 * If it's proto-owned and not being used by a route, remove it since
3291 * we haven't gotten an update about it from the proto since startup.
3292 * This means that either the config for it was removed or the daemon
3293 * didn't get started. This handles graceful restart & retain scenario.
3294 */
3295 if (PROTO_OWNED(nhe) && nhe->refcnt == 1) {
3296 zebra_nhg_decrement_ref(nhe);
3297 return HASHWALK_ABORT;
3298 }
3299
3300 /*
3301 * If its being ref'd by routes, just let it be uninstalled via a route
3302 * removal.
3303 */
3304 if (ZEBRA_NHG_CREATED(nhe) && nhe->refcnt <= 0) {
3305 zebra_nhg_uninstall_kernel(nhe);
3306 return HASHWALK_ABORT;
3307 }
3308
3309 return HASHWALK_CONTINUE;
3310 }
3311
3312 void zebra_nhg_sweep_table(struct hash *hash)
3313 {
3314 uint32_t count;
3315
3316 /*
3317 * Yes this is extremely odd. Effectively nhg's have
3318 * other nexthop groups that depend on them and when you
3319 * remove them, you can have other entries blown up.
3320 * our hash code does not work with deleting multiple
3321 * entries at a time and will possibly cause crashes
3322 * So what to do? Whenever zebra_nhg_sweep_entry
3323 * deletes an entry it will return HASHWALK_ABORT,
3324 * cause that deletion might have triggered more.
3325 * then we can just keep sweeping this table
3326 * until nothing more is found to do.
3327 */
3328 do {
3329 count = hashcount(hash);
3330 hash_walk(hash, zebra_nhg_sweep_entry, NULL);
3331 } while (count != hashcount(hash));
3332 }
3333
3334 static void zebra_nhg_mark_keep_entry(struct hash_bucket *bucket, void *arg)
3335 {
3336 struct nhg_hash_entry *nhe = bucket->data;
3337
3338 UNSET_FLAG(nhe->flags, NEXTHOP_GROUP_INSTALLED);
3339 }
3340
3341 /*
3342 * When we are shutting down and we have retain mode enabled
3343 * in zebra the process is to mark each vrf that it's
3344 * routes should not be deleted. The problem with that
3345 * is that shutdown actually free's up memory which
3346 * causes the nexthop group's ref counts to go to zero
3347 * we need a way to subtly tell the system to not remove
3348 * the nexthop groups from the kernel at the same time.
3349 * The easiest just looks like that we should not mark
3350 * the nhg's as installed any more and when the ref count
3351 * goes to zero we'll attempt to delete and do nothing
3352 */
3353 void zebra_nhg_mark_keep(void)
3354 {
3355 hash_iterate(zrouter.nhgs_id, zebra_nhg_mark_keep_entry, NULL);
3356 }
3357
3358 /* Global control to disable use of kernel nexthops, if available. We can't
3359 * force the kernel to support nexthop ids, of course, but we can disable
3360 * zebra's use of them, for testing e.g. By default, if the kernel supports
3361 * nexthop ids, zebra uses them.
3362 */
3363 void zebra_nhg_enable_kernel_nexthops(bool set)
3364 {
3365 g_nexthops_enabled = set;
3366 }
3367
3368 bool zebra_nhg_kernel_nexthops_enabled(void)
3369 {
3370 return g_nexthops_enabled;
3371 }
3372
3373 /* Global control for use of activated backups for recursive resolution. */
3374 void zebra_nhg_set_recursive_use_backups(bool set)
3375 {
3376 use_recursive_backups = set;
3377 }
3378
3379 bool zebra_nhg_recursive_use_backups(void)
3380 {
3381 return use_recursive_backups;
3382 }
3383
3384 /*
3385 * Global control to only use kernel nexthops for protocol created NHGs.
3386 * There are some use cases where you may not want zebra to implicitly
3387 * create kernel nexthops for all routes and only create them for NHGs
3388 * passed down by upper level protos.
3389 *
3390 * Default is off.
3391 */
3392 void zebra_nhg_set_proto_nexthops_only(bool set)
3393 {
3394 proto_nexthops_only = set;
3395 }
3396
3397 bool zebra_nhg_proto_nexthops_only(void)
3398 {
3399 return proto_nexthops_only;
3400 }
3401
3402 /* Add NHE from upper level proto */
3403 struct nhg_hash_entry *zebra_nhg_proto_add(uint32_t id, int type,
3404 uint16_t instance, uint32_t session,
3405 struct nexthop_group *nhg, afi_t afi)
3406 {
3407 struct nhg_hash_entry lookup;
3408 struct nhg_hash_entry *new, *old;
3409 struct nhg_connected *rb_node_dep = NULL;
3410 struct nexthop *newhop;
3411 bool replace = false;
3412
3413 if (!nhg->nexthop) {
3414 if (IS_ZEBRA_DEBUG_NHG)
3415 zlog_debug("%s: id %u, no nexthops passed to add",
3416 __func__, id);
3417 return NULL;
3418 }
3419
3420
3421 /* Set nexthop list as active, since they wont go through rib
3422 * processing.
3423 *
3424 * Assuming valid/onlink for now.
3425 *
3426 * Once resolution is figured out, we won't need this!
3427 */
3428 for (ALL_NEXTHOPS_PTR(nhg, newhop)) {
3429 if (CHECK_FLAG(newhop->flags, NEXTHOP_FLAG_HAS_BACKUP)) {
3430 if (IS_ZEBRA_DEBUG_NHG)
3431 zlog_debug(
3432 "%s: id %u, backup nexthops not supported",
3433 __func__, id);
3434 return NULL;
3435 }
3436
3437 if (newhop->type == NEXTHOP_TYPE_BLACKHOLE) {
3438 if (IS_ZEBRA_DEBUG_NHG)
3439 zlog_debug(
3440 "%s: id %u, blackhole nexthop not supported",
3441 __func__, id);
3442 return NULL;
3443 }
3444
3445 if (newhop->type == NEXTHOP_TYPE_IFINDEX) {
3446 if (IS_ZEBRA_DEBUG_NHG)
3447 zlog_debug(
3448 "%s: id %u, nexthop without gateway not supported",
3449 __func__, id);
3450 return NULL;
3451 }
3452
3453 if (!newhop->ifindex) {
3454 if (IS_ZEBRA_DEBUG_NHG)
3455 zlog_debug(
3456 "%s: id %u, nexthop without ifindex is not supported",
3457 __func__, id);
3458 return NULL;
3459 }
3460 SET_FLAG(newhop->flags, NEXTHOP_FLAG_ACTIVE);
3461 }
3462
3463 zebra_nhe_init(&lookup, afi, nhg->nexthop);
3464 lookup.nhg.nexthop = nhg->nexthop;
3465 lookup.nhg.nhgr = nhg->nhgr;
3466 lookup.id = id;
3467 lookup.type = type;
3468
3469 old = zebra_nhg_lookup_id(id);
3470
3471 if (old) {
3472 /*
3473 * This is a replace, just release NHE from ID for now, The
3474 * depends/dependents may still be used in the replacement so
3475 * we don't touch them other than to remove their refs to their
3476 * old parent.
3477 */
3478 replace = true;
3479 hash_release(zrouter.nhgs_id, old);
3480
3481 /* Free all the things */
3482 zebra_nhg_release_all_deps(old);
3483 }
3484
3485 new = zebra_nhg_rib_find_nhe(&lookup, afi);
3486
3487 zebra_nhg_increment_ref(new);
3488
3489 /* Capture zapi client info */
3490 new->zapi_instance = instance;
3491 new->zapi_session = session;
3492
3493 zebra_nhg_set_valid_if_active(new);
3494
3495 zebra_nhg_install_kernel(new);
3496
3497 if (old) {
3498 /*
3499 * Check to handle recving DEL while routes still in use then
3500 * a replace.
3501 *
3502 * In this case we would have decremented the refcnt already
3503 * but set the FLAG here. Go ahead and increment once to fix
3504 * the misordering we have been sent.
3505 */
3506 if (CHECK_FLAG(old->flags, NEXTHOP_GROUP_PROTO_RELEASED))
3507 zebra_nhg_increment_ref(old);
3508
3509 rib_handle_nhg_replace(old, new);
3510
3511 /* We have to decrement its singletons
3512 * because some might not exist in NEW.
3513 */
3514 if (!zebra_nhg_depends_is_empty(old)) {
3515 frr_each (nhg_connected_tree, &old->nhg_depends,
3516 rb_node_dep)
3517 zebra_nhg_decrement_ref(rb_node_dep->nhe);
3518 }
3519
3520 /* Dont call the dec API, we dont want to uninstall the ID */
3521 old->refcnt = 0;
3522 THREAD_OFF(old->timer);
3523 zebra_nhg_free(old);
3524 old = NULL;
3525 }
3526
3527 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3528 zlog_debug("%s: %s nhe %p (%u), vrf %d, type %s", __func__,
3529 (replace ? "replaced" : "added"), new, new->id,
3530 new->vrf_id, zebra_route_string(new->type));
3531
3532 return new;
3533 }
3534
3535 /* Delete NHE from upper level proto, caller must decrement ref */
3536 struct nhg_hash_entry *zebra_nhg_proto_del(uint32_t id, int type)
3537 {
3538 struct nhg_hash_entry *nhe;
3539
3540 nhe = zebra_nhg_lookup_id(id);
3541
3542 if (!nhe) {
3543 if (IS_ZEBRA_DEBUG_NHG)
3544 zlog_debug("%s: id %u, lookup failed", __func__, id);
3545
3546 return NULL;
3547 }
3548
3549 if (type != nhe->type) {
3550 if (IS_ZEBRA_DEBUG_NHG)
3551 zlog_debug(
3552 "%s: id %u, type %s mismatch, sent by %s, ignoring",
3553 __func__, id, zebra_route_string(nhe->type),
3554 zebra_route_string(type));
3555 return NULL;
3556 }
3557
3558 if (CHECK_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED)) {
3559 if (IS_ZEBRA_DEBUG_NHG)
3560 zlog_debug("%s: id %u, already released", __func__, id);
3561
3562 return NULL;
3563 }
3564
3565 SET_FLAG(nhe->flags, NEXTHOP_GROUP_PROTO_RELEASED);
3566
3567 if (nhe->refcnt > 1) {
3568 if (IS_ZEBRA_DEBUG_NHG)
3569 zlog_debug(
3570 "%s: %pNG, still being used by routes refcnt %u",
3571 __func__, nhe, nhe->refcnt);
3572 return nhe;
3573 }
3574
3575 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3576 zlog_debug("%s: deleted nhe %p (%pNG), vrf %d, type %s",
3577 __func__, nhe, nhe, nhe->vrf_id,
3578 zebra_route_string(nhe->type));
3579
3580 return nhe;
3581 }
3582
3583 struct nhg_score_proto_iter {
3584 int type;
3585 struct list *found;
3586 };
3587
3588 static void zebra_nhg_score_proto_entry(struct hash_bucket *bucket, void *arg)
3589 {
3590 struct nhg_hash_entry *nhe;
3591 struct nhg_score_proto_iter *iter;
3592
3593 nhe = (struct nhg_hash_entry *)bucket->data;
3594 iter = arg;
3595
3596 /* Needs to match type and outside zebra ID space */
3597 if (nhe->type == iter->type && PROTO_OWNED(nhe)) {
3598 if (IS_ZEBRA_DEBUG_NHG_DETAIL)
3599 zlog_debug(
3600 "%s: found nhe %p (%pNG), vrf %d, type %s after client disconnect",
3601 __func__, nhe, nhe, nhe->vrf_id,
3602 zebra_route_string(nhe->type));
3603
3604 /* Add to removal list */
3605 listnode_add(iter->found, nhe);
3606 }
3607 }
3608
3609 /* Remove specific by proto NHGs */
3610 unsigned long zebra_nhg_score_proto(int type)
3611 {
3612 struct nhg_hash_entry *nhe;
3613 struct nhg_score_proto_iter iter = {};
3614 struct listnode *ln;
3615 unsigned long count;
3616
3617 iter.type = type;
3618 iter.found = list_new();
3619
3620 /* Find matching entries to remove */
3621 hash_iterate(zrouter.nhgs_id, zebra_nhg_score_proto_entry, &iter);
3622
3623 /* Now remove them */
3624 for (ALL_LIST_ELEMENTS_RO(iter.found, ln, nhe)) {
3625 /*
3626 * This should be the last ref if we remove client routes too,
3627 * and thus should remove and free them.
3628 */
3629 zebra_nhg_decrement_ref(nhe);
3630 }
3631
3632 count = iter.found->count;
3633 list_delete(&iter.found);
3634
3635 return count;
3636 }
3637
3638 printfrr_ext_autoreg_p("NG", printfrr_nhghe);
3639 static ssize_t printfrr_nhghe(struct fbuf *buf, struct printfrr_eargs *ea,
3640 const void *ptr)
3641 {
3642 const struct nhg_hash_entry *nhe = ptr;
3643 const struct nhg_connected *dep;
3644 ssize_t ret = 0;
3645
3646 if (!nhe)
3647 return bputs(buf, "[NULL]");
3648
3649 ret += bprintfrr(buf, "%u[", nhe->id);
3650 if (nhe->ifp)
3651 ret += printfrr_nhs(buf, nhe->nhg.nexthop);
3652 else {
3653 int count = zebra_nhg_depends_count(nhe);
3654
3655 frr_each (nhg_connected_tree_const, &nhe->nhg_depends, dep) {
3656 ret += bprintfrr(buf, "%u", dep->nhe->id);
3657 if (count > 1)
3658 ret += bputs(buf, "/");
3659 count--;
3660 }
3661 }
3662
3663 ret += bputs(buf, "]");
3664 return ret;
3665 }