]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_nht.c
Merge pull request #13649 from donaldsharp/unlock_the_node_or_else
[mirror_frr.git] / pimd / pim_nht.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * PIM for Quagga
4 * Copyright (C) 2017 Cumulus Networks, Inc.
5 * Chirag Shah
6 */
7 #include <zebra.h>
8 #include "network.h"
9 #include "zclient.h"
10 #include "stream.h"
11 #include "nexthop.h"
12 #include "if.h"
13 #include "hash.h"
14 #include "jhash.h"
15
16 #include "lib/printfrr.h"
17
18 #include "pimd.h"
19 #include "pimd/pim_nht.h"
20 #include "pim_instance.h"
21 #include "log.h"
22 #include "pim_time.h"
23 #include "pim_oil.h"
24 #include "pim_ifchannel.h"
25 #include "pim_mroute.h"
26 #include "pim_zebra.h"
27 #include "pim_upstream.h"
28 #include "pim_join.h"
29 #include "pim_jp_agg.h"
30 #include "pim_zebra.h"
31 #include "pim_zlookup.h"
32 #include "pim_rp.h"
33 #include "pim_addr.h"
34
35 /**
36 * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
37 * command to Zebra.
38 */
39 void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient,
40 struct pim_nexthop_cache *pnc, int command)
41 {
42 struct prefix p;
43 int ret;
44
45 pim_addr_to_prefix(&p, pnc->rpf.rpf_addr);
46 ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false,
47 pim->vrf->vrf_id);
48 if (ret == ZCLIENT_SEND_FAILURE)
49 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
50
51 if (PIM_DEBUG_PIM_NHT)
52 zlog_debug(
53 "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ",
54 __func__,
55 (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p,
56 pim->vrf->name, ret);
57
58 return;
59 }
60
61 struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim,
62 struct pim_rpf *rpf)
63 {
64 struct pim_nexthop_cache *pnc = NULL;
65 struct pim_nexthop_cache lookup;
66
67 lookup.rpf.rpf_addr = rpf->rpf_addr;
68 pnc = hash_lookup(pim->rpf_hash, &lookup);
69
70 return pnc;
71 }
72
73 static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim,
74 struct pim_rpf *rpf_addr)
75 {
76 struct pim_nexthop_cache *pnc;
77 char hash_name[64];
78
79 pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE,
80 sizeof(struct pim_nexthop_cache));
81 pnc->rpf.rpf_addr = rpf_addr->rpf_addr;
82
83 pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern);
84
85 pnc->rp_list = list_new();
86 pnc->rp_list->cmp = pim_rp_list_cmp;
87
88 snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash",
89 &pnc->rpf.rpf_addr, pim->vrf->name);
90 pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key,
91 pim_upstream_equal, hash_name);
92
93 return pnc;
94 }
95
96 static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim,
97 pim_addr addr)
98 {
99 struct pim_nexthop_cache *pnc = NULL;
100 struct pim_rpf rpf;
101 struct zclient *zclient = NULL;
102
103 zclient = pim_zebra_zclient_get();
104 memset(&rpf, 0, sizeof(rpf));
105 rpf.rpf_addr = addr;
106
107 pnc = pim_nexthop_cache_find(pim, &rpf);
108 if (!pnc) {
109 pnc = pim_nexthop_cache_add(pim, &rpf);
110 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
111 ZEBRA_NEXTHOP_REGISTER);
112 if (PIM_DEBUG_PIM_NHT_DETAIL)
113 zlog_debug(
114 "%s: NHT cache and zebra notification added for %pPA(%s)",
115 __func__, &addr, pim->vrf->name);
116 }
117
118 return pnc;
119 }
120
121 /* TBD: this does several distinct things and should probably be split up.
122 * (checking state vs. returning pnc vs. adding upstream vs. adding rp)
123 */
124 int pim_find_or_track_nexthop(struct pim_instance *pim, pim_addr addr,
125 struct pim_upstream *up, struct rp_info *rp,
126 struct pim_nexthop_cache *out_pnc)
127 {
128 struct pim_nexthop_cache *pnc;
129 struct listnode *ch_node = NULL;
130
131 pnc = pim_nht_get(pim, addr);
132
133 assertf(up || rp, "addr=%pPA", &addr);
134
135 if (rp != NULL) {
136 ch_node = listnode_lookup(pnc->rp_list, rp);
137 if (ch_node == NULL)
138 listnode_add_sort(pnc->rp_list, rp);
139 }
140
141 if (up != NULL)
142 (void)hash_get(pnc->upstream_hash, up, hash_alloc_intern);
143
144 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) {
145 if (out_pnc)
146 memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache));
147 return 1;
148 }
149
150 return 0;
151 }
152
153 void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr)
154 {
155 struct pim_nexthop_cache *pnc;
156
157 pnc = pim_nht_get(pim, addr);
158
159 pnc->bsr_count++;
160 }
161
162 static void pim_nht_drop_maybe(struct pim_instance *pim,
163 struct pim_nexthop_cache *pnc)
164 {
165 if (PIM_DEBUG_PIM_NHT)
166 zlog_debug(
167 "%s: NHT %pPA(%s) rp_list count:%d upstream count:%ld BSR count:%u",
168 __func__, &pnc->rpf.rpf_addr, pim->vrf->name,
169 pnc->rp_list->count, pnc->upstream_hash->count,
170 pnc->bsr_count);
171
172 if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0
173 && pnc->bsr_count == 0) {
174 struct zclient *zclient = pim_zebra_zclient_get();
175
176 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
177 ZEBRA_NEXTHOP_UNREGISTER);
178
179 list_delete(&pnc->rp_list);
180 hash_free(pnc->upstream_hash);
181
182 hash_release(pim->rpf_hash, pnc);
183 if (pnc->nexthop)
184 nexthops_free(pnc->nexthop);
185 XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
186 }
187 }
188
189 void pim_delete_tracked_nexthop(struct pim_instance *pim, pim_addr addr,
190 struct pim_upstream *up, struct rp_info *rp)
191 {
192 struct pim_nexthop_cache *pnc = NULL;
193 struct pim_nexthop_cache lookup;
194 struct pim_upstream *upstream = NULL;
195
196 /* Remove from RPF hash if it is the last entry */
197 lookup.rpf.rpf_addr = addr;
198 pnc = hash_lookup(pim->rpf_hash, &lookup);
199 if (!pnc) {
200 zlog_warn("attempting to delete nonexistent NHT entry %pPA",
201 &addr);
202 return;
203 }
204
205 if (rp) {
206 /* Release the (*, G)upstream from pnc->upstream_hash,
207 * whose Group belongs to the RP getting deleted
208 */
209 frr_each (rb_pim_upstream, &pim->upstream_head, upstream) {
210 struct prefix grp;
211 struct rp_info *trp_info;
212
213 if (!pim_addr_is_any(upstream->sg.src))
214 continue;
215
216 pim_addr_to_prefix(&grp, upstream->sg.grp);
217 trp_info = pim_rp_find_match_group(pim, &grp);
218 if (trp_info == rp)
219 hash_release(pnc->upstream_hash, upstream);
220 }
221 listnode_delete(pnc->rp_list, rp);
222 }
223
224 if (up)
225 hash_release(pnc->upstream_hash, up);
226
227 pim_nht_drop_maybe(pim, pnc);
228 }
229
230 void pim_nht_bsr_del(struct pim_instance *pim, pim_addr addr)
231 {
232 struct pim_nexthop_cache *pnc = NULL;
233 struct pim_nexthop_cache lookup;
234
235 /*
236 * Nothing to do here if the address to unregister
237 * is 0.0.0.0 as that the BSR has not been registered
238 * for tracking yet.
239 */
240 if (pim_addr_is_any(addr))
241 return;
242
243 lookup.rpf.rpf_addr = addr;
244
245 pnc = hash_lookup(pim->rpf_hash, &lookup);
246
247 if (!pnc) {
248 zlog_warn("attempting to delete nonexistent NHT BSR entry %pPA",
249 &addr);
250 return;
251 }
252
253 assertf(pnc->bsr_count > 0, "addr=%pPA", &addr);
254 pnc->bsr_count--;
255
256 pim_nht_drop_maybe(pim, pnc);
257 }
258
259 bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr,
260 struct interface *src_ifp, pim_addr src_ip)
261 {
262 struct pim_nexthop_cache *pnc = NULL;
263 struct pim_nexthop_cache lookup;
264 struct pim_neighbor *nbr = NULL;
265 struct nexthop *nh;
266 struct interface *ifp;
267
268 lookup.rpf.rpf_addr = bsr_addr;
269
270 pnc = hash_lookup(pim->rpf_hash, &lookup);
271 if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) {
272 /* BSM from a new freshly registered BSR - do a synchronous
273 * zebra query since otherwise we'd drop the first packet,
274 * leading to additional delay in picking up BSM data
275 */
276
277 /* FIXME: this should really be moved into a generic NHT
278 * function that does "add and get immediate result" or maybe
279 * "check cache or get immediate result." But until that can
280 * be worked in, here's a copy of the code below :(
281 */
282 struct pim_zlookup_nexthop nexthop_tab[router->multipath];
283 ifindex_t i;
284 struct interface *ifp = NULL;
285 int num_ifindex;
286
287 memset(nexthop_tab, 0, sizeof(nexthop_tab));
288 num_ifindex = zclient_lookup_nexthop(
289 pim, nexthop_tab, router->multipath, bsr_addr,
290 PIM_NEXTHOP_LOOKUP_MAX);
291
292 if (num_ifindex <= 0)
293 return false;
294
295 for (i = 0; i < num_ifindex; i++) {
296 struct pim_zlookup_nexthop *znh = &nexthop_tab[i];
297
298 /* pim_zlookup_nexthop has no ->type */
299
300 /* 1:1 match code below with znh instead of nh */
301 ifp = if_lookup_by_index(znh->ifindex,
302 pim->vrf->vrf_id);
303
304 if (!ifp || !ifp->info)
305 continue;
306
307 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
308 return true;
309
310 nbr = pim_neighbor_find(ifp, znh->nexthop_addr, true);
311 if (!nbr)
312 continue;
313
314 return znh->ifindex == src_ifp->ifindex;
315 }
316 return false;
317 }
318
319 if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID))
320 return false;
321
322 /* if we accept BSMs from more than one ECMP nexthop, this will cause
323 * BSM message "multiplication" for each ECMP hop. i.e. if you have
324 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
325 * message.
326 *
327 * so... only accept the first (IPv4) valid nexthop as source.
328 */
329
330 for (nh = pnc->nexthop; nh; nh = nh->next) {
331 pim_addr nhaddr;
332
333 switch (nh->type) {
334 #if PIM_IPV == 4
335 case NEXTHOP_TYPE_IPV4:
336 if (nh->ifindex == IFINDEX_INTERNAL)
337 continue;
338
339 /* fallthru */
340 case NEXTHOP_TYPE_IPV4_IFINDEX:
341 nhaddr = nh->gate.ipv4;
342 break;
343 case NEXTHOP_TYPE_IPV6:
344 case NEXTHOP_TYPE_IPV6_IFINDEX:
345 continue;
346 #else
347 case NEXTHOP_TYPE_IPV6:
348 if (nh->ifindex == IFINDEX_INTERNAL)
349 continue;
350
351 /* fallthru */
352 case NEXTHOP_TYPE_IPV6_IFINDEX:
353 nhaddr = nh->gate.ipv6;
354 break;
355 case NEXTHOP_TYPE_IPV4:
356 case NEXTHOP_TYPE_IPV4_IFINDEX:
357 continue;
358 #endif
359 case NEXTHOP_TYPE_IFINDEX:
360 nhaddr = bsr_addr;
361 break;
362
363 case NEXTHOP_TYPE_BLACKHOLE:
364 continue;
365 }
366
367 ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id);
368 if (!ifp || !ifp->info)
369 continue;
370
371 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
372 return true;
373
374 /* MRIB (IGP) may be pointing at a router where PIM is down */
375
376 nbr = pim_neighbor_find(ifp, nhaddr, true);
377
378 if (!nbr)
379 continue;
380
381 return nh->ifindex == src_ifp->ifindex;
382 }
383 return false;
384 }
385
386 void pim_rp_nexthop_del(struct rp_info *rp_info)
387 {
388 rp_info->rp.source_nexthop.interface = NULL;
389 rp_info->rp.source_nexthop.mrib_nexthop_addr = PIMADDR_ANY;
390 rp_info->rp.source_nexthop.mrib_metric_preference =
391 router->infinite_assert_metric.metric_preference;
392 rp_info->rp.source_nexthop.mrib_route_metric =
393 router->infinite_assert_metric.route_metric;
394 }
395
396 /* Update RP nexthop info based on Nexthop update received from Zebra.*/
397 static void pim_update_rp_nh(struct pim_instance *pim,
398 struct pim_nexthop_cache *pnc)
399 {
400 struct listnode *node = NULL;
401 struct rp_info *rp_info = NULL;
402
403 /*Traverse RP list and update each RP Nexthop info */
404 for (ALL_LIST_ELEMENTS_RO(pnc->rp_list, node, rp_info)) {
405 if (pim_rpf_addr_is_inaddr_any(&rp_info->rp))
406 continue;
407
408 // Compute PIM RPF using cached nexthop
409 if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop,
410 rp_info->rp.rpf_addr,
411 &rp_info->group, 1))
412 pim_rp_nexthop_del(rp_info);
413 }
414 }
415
416 /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
417 static int pim_update_upstream_nh_helper(struct hash_bucket *bucket, void *arg)
418 {
419 struct pim_instance *pim = (struct pim_instance *)arg;
420 struct pim_upstream *up = (struct pim_upstream *)bucket->data;
421
422 enum pim_rpf_result rpf_result;
423 struct pim_rpf old;
424
425 old.source_nexthop.interface = up->rpf.source_nexthop.interface;
426 rpf_result = pim_rpf_update(pim, up, &old, __func__);
427
428 /* update kernel multicast forwarding cache (MFC); if the
429 * RPF nbr is now unreachable the MFC has already been updated
430 * by pim_rpf_clear
431 */
432 if (rpf_result == PIM_RPF_CHANGED)
433 pim_upstream_mroute_iif_update(up->channel_oil, __func__);
434
435 if (rpf_result == PIM_RPF_CHANGED ||
436 (rpf_result == PIM_RPF_FAILURE && old.source_nexthop.interface))
437 pim_zebra_upstream_rpf_changed(pim, up, &old);
438
439
440 if (PIM_DEBUG_PIM_NHT) {
441 zlog_debug(
442 "%s: NHT upstream %s(%s) old ifp %s new ifp %s",
443 __func__, up->sg_str, pim->vrf->name,
444 old.source_nexthop.interface ? old.source_nexthop
445 .interface->name
446 : "Unknown",
447 up->rpf.source_nexthop.interface ? up->rpf.source_nexthop
448 .interface->name
449 : "Unknown");
450 }
451
452 return HASHWALK_CONTINUE;
453 }
454
455 static int pim_update_upstream_nh(struct pim_instance *pim,
456 struct pim_nexthop_cache *pnc)
457 {
458 hash_walk(pnc->upstream_hash, pim_update_upstream_nh_helper, pim);
459
460 pim_zebra_update_all_interfaces(pim);
461
462 return 0;
463 }
464
465 static int pim_upstream_nh_if_update_helper(struct hash_bucket *bucket,
466 void *arg)
467 {
468 struct pim_nexthop_cache *pnc = bucket->data;
469 struct pnc_hash_walk_data *pwd = arg;
470 struct pim_instance *pim = pwd->pim;
471 struct interface *ifp = pwd->ifp;
472 struct nexthop *nh_node = NULL;
473 ifindex_t first_ifindex;
474
475 for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next) {
476 first_ifindex = nh_node->ifindex;
477 if (ifp != if_lookup_by_index(first_ifindex, pim->vrf->vrf_id))
478 continue;
479
480 if (pnc->upstream_hash->count) {
481 pim_update_upstream_nh(pim, pnc);
482 break;
483 }
484 }
485
486 return HASHWALK_CONTINUE;
487 }
488
489 void pim_upstream_nh_if_update(struct pim_instance *pim, struct interface *ifp)
490 {
491 struct pnc_hash_walk_data pwd;
492
493 pwd.pim = pim;
494 pwd.ifp = ifp;
495
496 hash_walk(pim->rpf_hash, pim_upstream_nh_if_update_helper, &pwd);
497 }
498
499 uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
500 {
501 uint32_t hash_val;
502
503 if (!src)
504 return 0;
505
506 hash_val = prefix_hash_key(src);
507 if (grp)
508 hash_val ^= prefix_hash_key(grp);
509 return hash_val;
510 }
511
512 static int pim_ecmp_nexthop_search(struct pim_instance *pim,
513 struct pim_nexthop_cache *pnc,
514 struct pim_nexthop *nexthop, pim_addr src,
515 struct prefix *grp, int neighbor_needed)
516 {
517 struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
518 struct interface *ifps[router->multipath];
519 struct nexthop *nh_node = NULL;
520 ifindex_t first_ifindex;
521 struct interface *ifp = NULL;
522 uint32_t hash_val = 0, mod_val = 0;
523 uint8_t nh_iter = 0, found = 0;
524 uint32_t i, num_nbrs = 0;
525 struct pim_interface *pim_ifp;
526
527 if (!pnc || !pnc->nexthop_num || !nexthop)
528 return 0;
529
530 pim_addr nh_addr = nexthop->mrib_nexthop_addr;
531 pim_addr grp_addr = pim_addr_from_prefix(grp);
532
533 memset(&nbrs, 0, sizeof(nbrs));
534 memset(&ifps, 0, sizeof(ifps));
535
536
537 // Current Nexthop is VALID, check to stay on the current path.
538 if (nexthop->interface && nexthop->interface->info &&
539 (!pim_addr_is_any(nh_addr))) {
540 /* User configured knob to explicitly switch
541 to new path is disabled or current path
542 metric is less than nexthop update.
543 */
544
545 if (pim->ecmp_rebalance_enable == 0) {
546 uint8_t curr_route_valid = 0;
547 // Check if current nexthop is present in new updated
548 // Nexthop list.
549 // If the current nexthop is not valid, candidate to
550 // choose new Nexthop.
551 for (nh_node = pnc->nexthop; nh_node;
552 nh_node = nh_node->next) {
553 curr_route_valid = (nexthop->interface->ifindex
554 == nh_node->ifindex);
555 if (curr_route_valid)
556 break;
557 }
558
559 if (curr_route_valid &&
560 !pim_if_connected_to_source(nexthop->interface,
561 src)) {
562 nbr = pim_neighbor_find(
563 nexthop->interface,
564 nexthop->mrib_nexthop_addr, true);
565 if (!nbr
566 && !if_is_loopback(nexthop->interface)) {
567 if (PIM_DEBUG_PIM_NHT)
568 zlog_debug(
569 "%s: current nexthop does not have nbr ",
570 __func__);
571 } else {
572 /* update metric even if the upstream
573 * neighbor stays unchanged
574 */
575 nexthop->mrib_metric_preference =
576 pnc->distance;
577 nexthop->mrib_route_metric =
578 pnc->metric;
579 if (PIM_DEBUG_PIM_NHT)
580 zlog_debug(
581 "%s: (%pPA,%pPA)(%s) current nexthop %s is valid, skipping new path selection",
582 __func__, &src,
583 &grp_addr,
584 pim->vrf->name,
585 nexthop->interface->name);
586 return 1;
587 }
588 }
589 }
590 }
591
592 /*
593 * Look up all interfaces and neighbors,
594 * store for later usage
595 */
596 for (nh_node = pnc->nexthop, i = 0; nh_node;
597 nh_node = nh_node->next, i++) {
598 ifps[i] =
599 if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
600 if (ifps[i]) {
601 #if PIM_IPV == 4
602 pim_addr nhaddr = nh_node->gate.ipv4;
603 #else
604 pim_addr nhaddr = nh_node->gate.ipv6;
605 #endif
606 nbrs[i] = pim_neighbor_find(ifps[i], nhaddr, true);
607 if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
608 num_nbrs++;
609 }
610 }
611 if (pim->ecmp_enable) {
612 struct prefix src_pfx;
613 uint32_t consider = pnc->nexthop_num;
614
615 if (neighbor_needed && num_nbrs < consider)
616 consider = num_nbrs;
617
618 if (consider == 0)
619 return 0;
620
621 // PIM ECMP flag is enable then choose ECMP path.
622 pim_addr_to_prefix(&src_pfx, src);
623 hash_val = pim_compute_ecmp_hash(&src_pfx, grp);
624 mod_val = hash_val % consider;
625 }
626
627 for (nh_node = pnc->nexthop; nh_node && (found == 0);
628 nh_node = nh_node->next) {
629 first_ifindex = nh_node->ifindex;
630 ifp = ifps[nh_iter];
631 if (!ifp) {
632 if (PIM_DEBUG_PIM_NHT)
633 zlog_debug(
634 "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
635 __FILE__, __func__, first_ifindex, &src,
636 pim->vrf->name);
637 if (nh_iter == mod_val)
638 mod_val++; // Select nexthpath
639 nh_iter++;
640 continue;
641 }
642
643 pim_ifp = ifp->info;
644
645 if (!pim_ifp || !pim_ifp->pim_enable) {
646 if (PIM_DEBUG_PIM_NHT)
647 zlog_debug(
648 "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
649 __func__, ifp->name, pim->vrf->name,
650 first_ifindex, &src);
651 if (nh_iter == mod_val)
652 mod_val++; // Select nexthpath
653 nh_iter++;
654 continue;
655 }
656
657 if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
658 nbr = nbrs[nh_iter];
659 if (!nbr && !if_is_loopback(ifp)) {
660 if (PIM_DEBUG_PIM_NHT)
661 zlog_debug(
662 "%s: pim nbr not found on input interface %s(%s)",
663 __func__, ifp->name,
664 pim->vrf->name);
665 if (nh_iter == mod_val)
666 mod_val++; // Select nexthpath
667 nh_iter++;
668 continue;
669 }
670 }
671
672 if (nh_iter == mod_val) {
673 nexthop->interface = ifp;
674 #if PIM_IPV == 4
675 nexthop->mrib_nexthop_addr = nh_node->gate.ipv4;
676 #else
677 nexthop->mrib_nexthop_addr = nh_node->gate.ipv6;
678 #endif
679 nexthop->mrib_metric_preference = pnc->distance;
680 nexthop->mrib_route_metric = pnc->metric;
681 nexthop->last_lookup = src;
682 nexthop->last_lookup_time = pim_time_monotonic_usec();
683 nexthop->nbr = nbr;
684 found = 1;
685 if (PIM_DEBUG_PIM_NHT)
686 zlog_debug(
687 "%s: (%pPA,%pPA)(%s) selected nhop interface %s addr %pPAs mod_val %u iter %d ecmp %d",
688 __func__, &src, &grp_addr,
689 pim->vrf->name, ifp->name, &nh_addr,
690 mod_val, nh_iter, pim->ecmp_enable);
691 }
692 nh_iter++;
693 }
694
695 if (found)
696 return 1;
697 else
698 return 0;
699 }
700
701 /* This API is used to parse Registered address nexthop update coming from Zebra
702 */
703 int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS)
704 {
705 struct nexthop *nexthop;
706 struct nexthop *nhlist_head = NULL;
707 struct nexthop *nhlist_tail = NULL;
708 int i;
709 struct pim_rpf rpf;
710 struct pim_nexthop_cache *pnc = NULL;
711 struct interface *ifp = NULL;
712 struct vrf *vrf = vrf_lookup_by_id(vrf_id);
713 struct pim_instance *pim;
714 struct zapi_route nhr;
715 struct prefix match;
716
717 if (!vrf)
718 return 0;
719 pim = vrf->info;
720
721 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
722 zlog_err("%s: Decode of nexthop update from zebra failed",
723 __func__);
724 return 0;
725 }
726
727 rpf.rpf_addr = pim_addr_from_prefix(&match);
728 pnc = pim_nexthop_cache_find(pim, &rpf);
729 if (!pnc) {
730 if (PIM_DEBUG_PIM_NHT)
731 zlog_debug(
732 "%s: Skipping NHT update, addr %pPA is not in local cached DB.",
733 __func__, &rpf.rpf_addr);
734 return 0;
735 }
736
737 pnc->last_update = pim_time_monotonic_usec();
738
739 if (nhr.nexthop_num) {
740 pnc->nexthop_num = 0;
741
742 for (i = 0; i < nhr.nexthop_num; i++) {
743 nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]);
744 switch (nexthop->type) {
745 case NEXTHOP_TYPE_IFINDEX:
746 /*
747 * Connected route (i.e. no nexthop), use
748 * RPF address from nexthop cache (i.e.
749 * destination) as PIM nexthop.
750 */
751 #if PIM_IPV == 4
752 nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
753 nexthop->gate.ipv4 = pnc->rpf.rpf_addr;
754 #else
755 nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
756 nexthop->gate.ipv6 = pnc->rpf.rpf_addr;
757 #endif
758 break;
759 #if PIM_IPV == 4
760 /* RFC5549 IPv4-over-IPv6 nexthop handling:
761 * if we get an IPv6 nexthop in IPv4 PIM, hunt down a
762 * PIM neighbor and use that instead.
763 */
764 case NEXTHOP_TYPE_IPV6_IFINDEX: {
765 struct interface *ifp1 = NULL;
766 struct pim_neighbor *nbr = NULL;
767
768 ifp1 = if_lookup_by_index(nexthop->ifindex,
769 pim->vrf->vrf_id);
770
771 if (!ifp1)
772 nbr = NULL;
773 else
774 /* FIXME: should really use nbr's
775 * secondary address list here
776 */
777 nbr = pim_neighbor_find_if(ifp1);
778
779 /* Overwrite with Nbr address as NH addr */
780 if (nbr)
781 nexthop->gate.ipv4 = nbr->source_addr;
782 else
783 // Mark nexthop address to 0 until PIM
784 // Nbr is resolved.
785 nexthop->gate.ipv4 = PIMADDR_ANY;
786
787 break;
788 }
789 #else
790 case NEXTHOP_TYPE_IPV6_IFINDEX:
791 #endif
792 case NEXTHOP_TYPE_IPV6:
793 case NEXTHOP_TYPE_IPV4:
794 case NEXTHOP_TYPE_IPV4_IFINDEX:
795 case NEXTHOP_TYPE_BLACKHOLE:
796 /* nothing to do for the other nexthop types */
797 break;
798 }
799
800 ifp = if_lookup_by_index(nexthop->ifindex,
801 pim->vrf->vrf_id);
802 if (!ifp) {
803 if (PIM_DEBUG_PIM_NHT) {
804 char buf[NEXTHOP_STRLEN];
805 zlog_debug(
806 "%s: could not find interface for ifindex %d(%s) (addr %s)",
807 __func__, nexthop->ifindex,
808 pim->vrf->name,
809 nexthop2str(nexthop, buf,
810 sizeof(buf)));
811 }
812 nexthop_free(nexthop);
813 continue;
814 }
815
816 if (PIM_DEBUG_PIM_NHT) {
817 #if PIM_IPV == 4
818 pim_addr nhaddr = nexthop->gate.ipv4;
819 #else
820 pim_addr nhaddr = nexthop->gate.ipv6;
821 #endif
822 zlog_debug(
823 "%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ",
824 __func__, &match, pim->vrf->name, i + 1,
825 &nhaddr, ifp->name, nexthop->type,
826 nhr.distance, nhr.metric);
827 }
828
829 if (!ifp->info) {
830 /*
831 * Though Multicast is not enabled on this
832 * Interface store it in database otheriwse we
833 * may miss this update and this will not cause
834 * any issue, because while choosing the path we
835 * are ommitting the Interfaces which are not
836 * multicast enabled
837 */
838 if (PIM_DEBUG_PIM_NHT) {
839 char buf[NEXTHOP_STRLEN];
840
841 zlog_debug(
842 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
843 __func__, ifp->name,
844 pim->vrf->name,
845 nexthop->ifindex,
846 nexthop2str(nexthop, buf,
847 sizeof(buf)));
848 }
849 }
850
851 if (nhlist_tail) {
852 nhlist_tail->next = nexthop;
853 nhlist_tail = nexthop;
854 } else {
855 nhlist_tail = nexthop;
856 nhlist_head = nexthop;
857 }
858
859 // Keep track of all nexthops, even PIM-disabled ones.
860 pnc->nexthop_num++;
861 }
862 /* Reset existing pnc->nexthop before assigning new list */
863 nexthops_free(pnc->nexthop);
864 pnc->nexthop = nhlist_head;
865 if (pnc->nexthop_num) {
866 pnc->flags |= PIM_NEXTHOP_VALID;
867 pnc->distance = nhr.distance;
868 pnc->metric = nhr.metric;
869 }
870 } else {
871 pnc->flags &= ~PIM_NEXTHOP_VALID;
872 pnc->nexthop_num = nhr.nexthop_num;
873 nexthops_free(pnc->nexthop);
874 pnc->nexthop = NULL;
875 }
876 SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED);
877
878 if (PIM_DEBUG_PIM_NHT)
879 zlog_debug(
880 "%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
881 __func__, &match, pim->vrf->name, nhr.nexthop_num,
882 pnc->nexthop_num, vrf_id, pnc->upstream_hash->count,
883 listcount(pnc->rp_list));
884
885 pim_rpf_set_refresh_time(pim);
886
887 if (listcount(pnc->rp_list))
888 pim_update_rp_nh(pim, pnc);
889 if (pnc->upstream_hash->count)
890 pim_update_upstream_nh(pim, pnc);
891
892 return 0;
893 }
894
895 int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
896 struct pim_nexthop *nexthop, pim_addr src,
897 struct prefix *grp, int neighbor_needed)
898 {
899 struct pim_nexthop_cache *pnc;
900 struct pim_zlookup_nexthop nexthop_tab[router->multipath];
901 struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
902 struct pim_rpf rpf;
903 int num_ifindex;
904 struct interface *ifps[router->multipath], *ifp;
905 int first_ifindex;
906 int found = 0;
907 uint8_t i = 0;
908 uint32_t hash_val = 0, mod_val = 0;
909 uint32_t num_nbrs = 0;
910 struct pim_interface *pim_ifp;
911
912 if (PIM_DEBUG_PIM_NHT_DETAIL)
913 zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld",
914 __func__, &src, pim->vrf->name,
915 nexthop->last_lookup_time);
916
917 rpf.rpf_addr = src;
918
919 pnc = pim_nexthop_cache_find(pim, &rpf);
920 if (pnc) {
921 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED))
922 return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp,
923 neighbor_needed);
924 }
925
926 memset(nexthop_tab, 0,
927 sizeof(struct pim_zlookup_nexthop) * router->multipath);
928 num_ifindex =
929 zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src,
930 PIM_NEXTHOP_LOOKUP_MAX);
931 if (num_ifindex < 1) {
932 if (PIM_DEBUG_PIM_NHT)
933 zlog_warn(
934 "%s: could not find nexthop ifindex for address %pPA(%s)",
935 __func__, &src, pim->vrf->name);
936 return 0;
937 }
938
939 memset(&nbrs, 0, sizeof(nbrs));
940 memset(&ifps, 0, sizeof(ifps));
941
942 /*
943 * Look up all interfaces and neighbors,
944 * store for later usage
945 */
946 for (i = 0; i < num_ifindex; i++) {
947 ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex,
948 pim->vrf->vrf_id);
949 if (ifps[i]) {
950 nbrs[i] = pim_neighbor_find(
951 ifps[i], nexthop_tab[i].nexthop_addr, true);
952
953 if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
954 num_nbrs++;
955 }
956 }
957
958 // If PIM ECMP enable then choose ECMP path.
959 if (pim->ecmp_enable) {
960 struct prefix src_pfx;
961 uint32_t consider = num_ifindex;
962
963 if (neighbor_needed && num_nbrs < consider)
964 consider = num_nbrs;
965
966 if (consider == 0)
967 return 0;
968
969 pim_addr_to_prefix(&src_pfx, src);
970 hash_val = pim_compute_ecmp_hash(&src_pfx, grp);
971 mod_val = hash_val % consider;
972 if (PIM_DEBUG_PIM_NHT_DETAIL)
973 zlog_debug("%s: hash_val %u mod_val %u", __func__,
974 hash_val, mod_val);
975 }
976
977 i = 0;
978 while (!found && (i < num_ifindex)) {
979 first_ifindex = nexthop_tab[i].ifindex;
980
981 ifp = ifps[i];
982 if (!ifp) {
983 if (PIM_DEBUG_PIM_NHT)
984 zlog_debug(
985 "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
986 __FILE__, __func__, first_ifindex, &src,
987 pim->vrf->name);
988 if (i == mod_val)
989 mod_val++;
990 i++;
991 continue;
992 }
993
994 pim_ifp = ifp->info;
995
996 if (!pim_ifp || !pim_ifp->pim_enable) {
997 if (PIM_DEBUG_PIM_NHT)
998 zlog_debug(
999 "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
1000 __func__, ifp->name, pim->vrf->name,
1001 first_ifindex, &src);
1002 if (i == mod_val)
1003 mod_val++;
1004 i++;
1005 continue;
1006 }
1007 if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
1008 nbr = nbrs[i];
1009 if (PIM_DEBUG_PIM_NHT_DETAIL)
1010 zlog_debug("ifp name: %s(%s), pim nbr: %p",
1011 ifp->name, pim->vrf->name, nbr);
1012 if (!nbr && !if_is_loopback(ifp)) {
1013 if (i == mod_val)
1014 mod_val++;
1015 if (PIM_DEBUG_PIM_NHT)
1016 zlog_debug(
1017 "%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)",
1018 __func__,
1019 &nexthop_tab[i].nexthop_addr,
1020 ifp->name, pim->vrf->name,
1021 &src);
1022 i++;
1023 continue;
1024 }
1025 }
1026
1027 if (i == mod_val) {
1028 if (PIM_DEBUG_PIM_NHT)
1029 zlog_debug(
1030 "%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d",
1031 __func__, &nexthop_tab[i].nexthop_addr,
1032 &src, ifp->name, pim->vrf->name,
1033 nexthop_tab[i].route_metric,
1034 nexthop_tab[i].protocol_distance);
1035 /* update nexthop data */
1036 nexthop->interface = ifp;
1037 nexthop->mrib_nexthop_addr =
1038 nexthop_tab[i].nexthop_addr;
1039 nexthop->mrib_metric_preference =
1040 nexthop_tab[i].protocol_distance;
1041 nexthop->mrib_route_metric =
1042 nexthop_tab[i].route_metric;
1043 nexthop->last_lookup = src;
1044 nexthop->last_lookup_time = pim_time_monotonic_usec();
1045 nexthop->nbr = nbr;
1046 found = 1;
1047 }
1048 i++;
1049 }
1050
1051 if (found)
1052 return 1;
1053 else
1054 return 0;
1055 }
1056
1057 int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim, pim_addr src,
1058 struct prefix *grp)
1059 {
1060 struct pim_nexthop nhop;
1061 int vif_index;
1062 ifindex_t ifindex;
1063
1064 memset(&nhop, 0, sizeof(nhop));
1065 if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) {
1066 if (PIM_DEBUG_PIM_NHT)
1067 zlog_debug(
1068 "%s: could not find nexthop ifindex for address %pPA(%s)",
1069 __func__, &src, pim->vrf->name);
1070 return -1;
1071 }
1072
1073 ifindex = nhop.interface->ifindex;
1074 if (PIM_DEBUG_PIM_NHT)
1075 zlog_debug(
1076 "%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA",
1077 __func__, ifindex,
1078 ifindex2ifname(ifindex, pim->vrf->vrf_id),
1079 pim->vrf->name, &src);
1080
1081 vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);
1082
1083 if (vif_index < 0) {
1084 if (PIM_DEBUG_PIM_NHT) {
1085 zlog_debug(
1086 "%s: low vif_index=%d(%s) < 1 nexthop for address %pPA",
1087 __func__, vif_index, pim->vrf->name, &src);
1088 }
1089 return -2;
1090 }
1091
1092 return vif_index;
1093 }