]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_nht.c
cb0d8c5c9231a857f3d7130fa50070bdcceb8b52
[mirror_frr.git] / pimd / pim_nht.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * PIM for Quagga
4 * Copyright (C) 2017 Cumulus Networks, Inc.
5 * Chirag Shah
6 */
7 #include <zebra.h>
8 #include "network.h"
9 #include "zclient.h"
10 #include "stream.h"
11 #include "nexthop.h"
12 #include "if.h"
13 #include "hash.h"
14 #include "jhash.h"
15
16 #include "lib/printfrr.h"
17
18 #include "pimd.h"
19 #include "pimd/pim_nht.h"
20 #include "pim_instance.h"
21 #include "log.h"
22 #include "pim_time.h"
23 #include "pim_oil.h"
24 #include "pim_ifchannel.h"
25 #include "pim_mroute.h"
26 #include "pim_zebra.h"
27 #include "pim_upstream.h"
28 #include "pim_join.h"
29 #include "pim_jp_agg.h"
30 #include "pim_zebra.h"
31 #include "pim_zlookup.h"
32 #include "pim_rp.h"
33 #include "pim_addr.h"
34
35 /**
36 * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
37 * command to Zebra.
38 */
39 void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient,
40 struct pim_nexthop_cache *pnc, int command)
41 {
42 struct prefix p;
43 int ret;
44
45 pim_addr_to_prefix(&p, pnc->rpf.rpf_addr);
46 ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false,
47 pim->vrf->vrf_id);
48 if (ret == ZCLIENT_SEND_FAILURE)
49 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
50
51 if (PIM_DEBUG_PIM_NHT)
52 zlog_debug(
53 "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ",
54 __func__,
55 (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p,
56 pim->vrf->name, ret);
57
58 return;
59 }
60
61 struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim,
62 struct pim_rpf *rpf)
63 {
64 struct pim_nexthop_cache *pnc = NULL;
65 struct pim_nexthop_cache lookup;
66
67 lookup.rpf.rpf_addr = rpf->rpf_addr;
68 pnc = hash_lookup(pim->rpf_hash, &lookup);
69
70 return pnc;
71 }
72
73 static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim,
74 struct pim_rpf *rpf_addr)
75 {
76 struct pim_nexthop_cache *pnc;
77 char hash_name[64];
78
79 pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE,
80 sizeof(struct pim_nexthop_cache));
81 pnc->rpf.rpf_addr = rpf_addr->rpf_addr;
82
83 pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern);
84
85 pnc->rp_list = list_new();
86 pnc->rp_list->cmp = pim_rp_list_cmp;
87
88 snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash",
89 &pnc->rpf.rpf_addr, pim->vrf->name);
90 pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key,
91 pim_upstream_equal, hash_name);
92
93 return pnc;
94 }
95
96 static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim,
97 pim_addr addr)
98 {
99 struct pim_nexthop_cache *pnc = NULL;
100 struct pim_rpf rpf;
101 struct zclient *zclient = NULL;
102
103 zclient = pim_zebra_zclient_get();
104 memset(&rpf, 0, sizeof(rpf));
105 rpf.rpf_addr = addr;
106
107 pnc = pim_nexthop_cache_find(pim, &rpf);
108 if (!pnc) {
109 pnc = pim_nexthop_cache_add(pim, &rpf);
110 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
111 ZEBRA_NEXTHOP_REGISTER);
112 if (PIM_DEBUG_PIM_NHT_DETAIL)
113 zlog_debug(
114 "%s: NHT cache and zebra notification added for %pPA(%s)",
115 __func__, &addr, pim->vrf->name);
116 }
117
118 return pnc;
119 }
120
121 /* TBD: this does several distinct things and should probably be split up.
122 * (checking state vs. returning pnc vs. adding upstream vs. adding rp)
123 */
124 int pim_find_or_track_nexthop(struct pim_instance *pim, pim_addr addr,
125 struct pim_upstream *up, struct rp_info *rp,
126 struct pim_nexthop_cache *out_pnc)
127 {
128 struct pim_nexthop_cache *pnc;
129 struct listnode *ch_node = NULL;
130
131 pnc = pim_nht_get(pim, addr);
132
133 assertf(up || rp, "addr=%pPA", &addr);
134
135 if (rp != NULL) {
136 ch_node = listnode_lookup(pnc->rp_list, rp);
137 if (ch_node == NULL)
138 listnode_add_sort(pnc->rp_list, rp);
139 }
140
141 if (up != NULL)
142 (void)hash_get(pnc->upstream_hash, up, hash_alloc_intern);
143
144 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) {
145 if (out_pnc)
146 memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache));
147 return 1;
148 }
149
150 return 0;
151 }
152
153 void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr)
154 {
155 struct pim_nexthop_cache *pnc;
156
157 pnc = pim_nht_get(pim, addr);
158
159 pnc->bsr_count++;
160 }
161
162 static void pim_nht_drop_maybe(struct pim_instance *pim,
163 struct pim_nexthop_cache *pnc)
164 {
165 if (PIM_DEBUG_PIM_NHT)
166 zlog_debug(
167 "%s: NHT %pPA(%s) rp_list count:%d upstream count:%ld BSR count:%u",
168 __func__, &pnc->rpf.rpf_addr, pim->vrf->name,
169 pnc->rp_list->count, pnc->upstream_hash->count,
170 pnc->bsr_count);
171
172 if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0
173 && pnc->bsr_count == 0) {
174 struct zclient *zclient = pim_zebra_zclient_get();
175
176 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
177 ZEBRA_NEXTHOP_UNREGISTER);
178
179 list_delete(&pnc->rp_list);
180 hash_free(pnc->upstream_hash);
181
182 hash_release(pim->rpf_hash, pnc);
183 if (pnc->nexthop)
184 nexthops_free(pnc->nexthop);
185 XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
186 }
187 }
188
189 void pim_delete_tracked_nexthop(struct pim_instance *pim, pim_addr addr,
190 struct pim_upstream *up, struct rp_info *rp)
191 {
192 struct pim_nexthop_cache *pnc = NULL;
193 struct pim_nexthop_cache lookup;
194 struct pim_upstream *upstream = NULL;
195
196 /* Remove from RPF hash if it is the last entry */
197 lookup.rpf.rpf_addr = addr;
198 pnc = hash_lookup(pim->rpf_hash, &lookup);
199 if (!pnc) {
200 zlog_warn("attempting to delete nonexistent NHT entry %pPA",
201 &addr);
202 return;
203 }
204
205 if (rp) {
206 /* Release the (*, G)upstream from pnc->upstream_hash,
207 * whose Group belongs to the RP getting deleted
208 */
209 frr_each (rb_pim_upstream, &pim->upstream_head, upstream) {
210 struct prefix grp;
211 struct rp_info *trp_info;
212
213 if (!pim_addr_is_any(upstream->sg.src))
214 continue;
215
216 pim_addr_to_prefix(&grp, upstream->sg.grp);
217 trp_info = pim_rp_find_match_group(pim, &grp);
218 if (trp_info == rp)
219 hash_release(pnc->upstream_hash, upstream);
220 }
221 listnode_delete(pnc->rp_list, rp);
222 }
223
224 if (up)
225 hash_release(pnc->upstream_hash, up);
226
227 pim_nht_drop_maybe(pim, pnc);
228 }
229
230 void pim_nht_bsr_del(struct pim_instance *pim, pim_addr addr)
231 {
232 struct pim_nexthop_cache *pnc = NULL;
233 struct pim_nexthop_cache lookup;
234
235 /*
236 * Nothing to do here if the address to unregister
237 * is 0.0.0.0 as that the BSR has not been registered
238 * for tracking yet.
239 */
240 if (pim_addr_is_any(addr))
241 return;
242
243 lookup.rpf.rpf_addr = addr;
244
245 pnc = hash_lookup(pim->rpf_hash, &lookup);
246
247 if (!pnc) {
248 zlog_warn("attempting to delete nonexistent NHT BSR entry %pPA",
249 &addr);
250 return;
251 }
252
253 assertf(pnc->bsr_count > 0, "addr=%pPA", &addr);
254 pnc->bsr_count--;
255
256 pim_nht_drop_maybe(pim, pnc);
257 }
258
259 bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr,
260 struct interface *src_ifp, pim_addr src_ip)
261 {
262 struct pim_nexthop_cache *pnc = NULL;
263 struct pim_nexthop_cache lookup;
264 struct pim_neighbor *nbr = NULL;
265 struct nexthop *nh;
266 struct interface *ifp;
267
268 lookup.rpf.rpf_addr = bsr_addr;
269
270 pnc = hash_lookup(pim->rpf_hash, &lookup);
271 if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) {
272 /* BSM from a new freshly registered BSR - do a synchronous
273 * zebra query since otherwise we'd drop the first packet,
274 * leading to additional delay in picking up BSM data
275 */
276
277 /* FIXME: this should really be moved into a generic NHT
278 * function that does "add and get immediate result" or maybe
279 * "check cache or get immediate result." But until that can
280 * be worked in, here's a copy of the code below :(
281 */
282 struct pim_zlookup_nexthop nexthop_tab[router->multipath];
283 ifindex_t i;
284 struct interface *ifp = NULL;
285 int num_ifindex;
286
287 memset(nexthop_tab, 0, sizeof(nexthop_tab));
288 num_ifindex = zclient_lookup_nexthop(
289 pim, nexthop_tab, router->multipath, bsr_addr,
290 PIM_NEXTHOP_LOOKUP_MAX);
291
292 if (num_ifindex <= 0)
293 return false;
294
295 for (i = 0; i < num_ifindex; i++) {
296 struct pim_zlookup_nexthop *znh = &nexthop_tab[i];
297
298 /* pim_zlookup_nexthop has no ->type */
299
300 /* 1:1 match code below with znh instead of nh */
301 ifp = if_lookup_by_index(znh->ifindex,
302 pim->vrf->vrf_id);
303
304 if (!ifp || !ifp->info)
305 continue;
306
307 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
308 return true;
309
310 nbr = pim_neighbor_find(ifp, znh->nexthop_addr, true);
311 if (!nbr)
312 continue;
313
314 return znh->ifindex == src_ifp->ifindex;
315 }
316 return false;
317 }
318
319 if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID))
320 return false;
321
322 /* if we accept BSMs from more than one ECMP nexthop, this will cause
323 * BSM message "multiplication" for each ECMP hop. i.e. if you have
324 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
325 * message.
326 *
327 * so... only accept the first (IPv4) valid nexthop as source.
328 */
329
330 for (nh = pnc->nexthop; nh; nh = nh->next) {
331 pim_addr nhaddr;
332
333 switch (nh->type) {
334 #if PIM_IPV == 4
335 case NEXTHOP_TYPE_IPV4:
336 if (nh->ifindex == IFINDEX_INTERNAL)
337 continue;
338
339 /* fallthru */
340 case NEXTHOP_TYPE_IPV4_IFINDEX:
341 nhaddr = nh->gate.ipv4;
342 break;
343 case NEXTHOP_TYPE_IPV6:
344 case NEXTHOP_TYPE_IPV6_IFINDEX:
345 continue;
346 #else
347 case NEXTHOP_TYPE_IPV6:
348 if (nh->ifindex == IFINDEX_INTERNAL)
349 continue;
350
351 /* fallthru */
352 case NEXTHOP_TYPE_IPV6_IFINDEX:
353 nhaddr = nh->gate.ipv6;
354 break;
355 case NEXTHOP_TYPE_IPV4:
356 case NEXTHOP_TYPE_IPV4_IFINDEX:
357 continue;
358 #endif
359 case NEXTHOP_TYPE_IFINDEX:
360 nhaddr = bsr_addr;
361 break;
362
363 case NEXTHOP_TYPE_BLACKHOLE:
364 continue;
365 }
366
367 ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id);
368 if (!ifp || !ifp->info)
369 continue;
370
371 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
372 return true;
373
374 /* MRIB (IGP) may be pointing at a router where PIM is down */
375
376 nbr = pim_neighbor_find(ifp, nhaddr, true);
377
378 if (!nbr)
379 continue;
380
381 return nh->ifindex == src_ifp->ifindex;
382 }
383 return false;
384 }
385
386 void pim_rp_nexthop_del(struct rp_info *rp_info)
387 {
388 rp_info->rp.source_nexthop.interface = NULL;
389 rp_info->rp.source_nexthop.mrib_nexthop_addr = PIMADDR_ANY;
390 rp_info->rp.source_nexthop.mrib_metric_preference =
391 router->infinite_assert_metric.metric_preference;
392 rp_info->rp.source_nexthop.mrib_route_metric =
393 router->infinite_assert_metric.route_metric;
394 }
395
396 /* Update RP nexthop info based on Nexthop update received from Zebra.*/
397 static void pim_update_rp_nh(struct pim_instance *pim,
398 struct pim_nexthop_cache *pnc)
399 {
400 struct listnode *node = NULL;
401 struct rp_info *rp_info = NULL;
402
403 /*Traverse RP list and update each RP Nexthop info */
404 for (ALL_LIST_ELEMENTS_RO(pnc->rp_list, node, rp_info)) {
405 if (pim_rpf_addr_is_inaddr_any(&rp_info->rp))
406 continue;
407
408 // Compute PIM RPF using cached nexthop
409 if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop,
410 rp_info->rp.rpf_addr,
411 &rp_info->group, 1))
412 pim_rp_nexthop_del(rp_info);
413 }
414 }
415
416 /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
417 static int pim_update_upstream_nh_helper(struct hash_bucket *bucket, void *arg)
418 {
419 struct pim_instance *pim = (struct pim_instance *)arg;
420 struct pim_upstream *up = (struct pim_upstream *)bucket->data;
421
422 enum pim_rpf_result rpf_result;
423 struct pim_rpf old;
424
425 old.source_nexthop.interface = up->rpf.source_nexthop.interface;
426 rpf_result = pim_rpf_update(pim, up, &old, __func__);
427
428 /* update kernel multicast forwarding cache (MFC); if the
429 * RPF nbr is now unreachable the MFC has already been updated
430 * by pim_rpf_clear
431 */
432 if (rpf_result == PIM_RPF_CHANGED)
433 pim_upstream_mroute_iif_update(up->channel_oil, __func__);
434
435 if (rpf_result == PIM_RPF_CHANGED ||
436 (rpf_result == PIM_RPF_FAILURE && old.source_nexthop.interface))
437 pim_zebra_upstream_rpf_changed(pim, up, &old);
438
439
440 if (PIM_DEBUG_PIM_NHT) {
441 zlog_debug(
442 "%s: NHT upstream %s(%s) old ifp %s new ifp %s",
443 __func__, up->sg_str, pim->vrf->name,
444 old.source_nexthop.interface ? old.source_nexthop
445 .interface->name
446 : "Unknown",
447 up->rpf.source_nexthop.interface ? up->rpf.source_nexthop
448 .interface->name
449 : "Unknown");
450 }
451
452 return HASHWALK_CONTINUE;
453 }
454
455 static int pim_update_upstream_nh(struct pim_instance *pim,
456 struct pim_nexthop_cache *pnc)
457 {
458 hash_walk(pnc->upstream_hash, pim_update_upstream_nh_helper, pim);
459
460 pim_zebra_update_all_interfaces(pim);
461
462 return 0;
463 }
464
465 static int pim_upstream_nh_if_update_helper(struct hash_bucket *bucket,
466 void *arg)
467 {
468 struct pim_nexthop_cache *pnc = bucket->data;
469 struct pnc_hash_walk_data *pwd = arg;
470 struct pim_instance *pim = pwd->pim;
471 struct interface *ifp = pwd->ifp;
472 struct nexthop *nh_node = NULL;
473 ifindex_t first_ifindex;
474
475 for (nh_node = pnc->nexthop; nh_node; nh_node = nh_node->next) {
476 first_ifindex = nh_node->ifindex;
477 if (ifp != if_lookup_by_index(first_ifindex, pim->vrf->vrf_id))
478 continue;
479
480 if (pnc->upstream_hash->count) {
481 pim_update_upstream_nh(pim, pnc);
482 break;
483 }
484 }
485
486 return HASHWALK_CONTINUE;
487 }
488
489 void pim_upstream_nh_if_update(struct pim_instance *pim, struct interface *ifp)
490 {
491 struct pnc_hash_walk_data pwd;
492
493 pwd.pim = pim;
494 pwd.ifp = ifp;
495
496 hash_walk(pim->rpf_hash, pim_upstream_nh_if_update_helper, &pwd);
497 }
498
499 uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
500 {
501 uint32_t hash_val;
502
503 if (!src)
504 return 0;
505
506 hash_val = prefix_hash_key(src);
507 if (grp)
508 hash_val ^= prefix_hash_key(grp);
509 return hash_val;
510 }
511
512 static int pim_ecmp_nexthop_search(struct pim_instance *pim,
513 struct pim_nexthop_cache *pnc,
514 struct pim_nexthop *nexthop, pim_addr src,
515 struct prefix *grp, int neighbor_needed)
516 {
517 struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
518 struct interface *ifps[router->multipath];
519 struct nexthop *nh_node = NULL;
520 ifindex_t first_ifindex;
521 struct interface *ifp = NULL;
522 uint32_t hash_val = 0, mod_val = 0;
523 uint8_t nh_iter = 0, found = 0;
524 uint32_t i, num_nbrs = 0;
525 struct pim_interface *pim_ifp;
526
527 if (!pnc || !pnc->nexthop_num || !nexthop)
528 return 0;
529
530 pim_addr nh_addr = nexthop->mrib_nexthop_addr;
531 pim_addr grp_addr = pim_addr_from_prefix(grp);
532
533 memset(&nbrs, 0, sizeof(nbrs));
534 memset(&ifps, 0, sizeof(ifps));
535
536
537 // Current Nexthop is VALID, check to stay on the current path.
538 if (nexthop->interface && nexthop->interface->info &&
539 (!pim_addr_is_any(nh_addr))) {
540 /* User configured knob to explicitly switch
541 to new path is disabled or current path
542 metric is less than nexthop update.
543 */
544
545 if (pim->ecmp_rebalance_enable == 0) {
546 uint8_t curr_route_valid = 0;
547 // Check if current nexthop is present in new updated
548 // Nexthop list.
549 // If the current nexthop is not valid, candidate to
550 // choose new Nexthop.
551 for (nh_node = pnc->nexthop; nh_node;
552 nh_node = nh_node->next) {
553 curr_route_valid = (nexthop->interface->ifindex
554 == nh_node->ifindex);
555 if (curr_route_valid)
556 break;
557 }
558
559 if (curr_route_valid &&
560 !pim_if_connected_to_source(nexthop->interface,
561 src)) {
562 nbr = pim_neighbor_find(
563 nexthop->interface,
564 nexthop->mrib_nexthop_addr, true);
565 if (!nbr
566 && !if_is_loopback(nexthop->interface)) {
567 if (PIM_DEBUG_PIM_NHT)
568 zlog_debug(
569 "%s: current nexthop does not have nbr ",
570 __func__);
571 } else {
572 /* update metric even if the upstream
573 * neighbor stays unchanged
574 */
575 nexthop->mrib_metric_preference =
576 pnc->distance;
577 nexthop->mrib_route_metric =
578 pnc->metric;
579 if (PIM_DEBUG_PIM_NHT)
580 zlog_debug(
581 "%s: (%pPA,%pPA)(%s) current nexthop %s is valid, skipping new path selection",
582 __func__, &src,
583 &grp_addr,
584 pim->vrf->name,
585 nexthop->interface->name);
586 return 1;
587 }
588 }
589 }
590 }
591
592 /*
593 * Look up all interfaces and neighbors,
594 * store for later usage
595 */
596 for (nh_node = pnc->nexthop, i = 0; nh_node;
597 nh_node = nh_node->next, i++) {
598 ifps[i] =
599 if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
600 if (ifps[i]) {
601 #if PIM_IPV == 4
602 pim_addr nhaddr = nh_node->gate.ipv4;
603 #else
604 pim_addr nhaddr = nh_node->gate.ipv6;
605 #endif
606 nbrs[i] = pim_neighbor_find(ifps[i], nhaddr, true);
607 if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
608 num_nbrs++;
609 }
610 }
611 if (pim->ecmp_enable) {
612 struct prefix src_pfx;
613 uint32_t consider = pnc->nexthop_num;
614
615 if (neighbor_needed && num_nbrs < consider)
616 consider = num_nbrs;
617
618 if (consider == 0)
619 return 0;
620
621 // PIM ECMP flag is enable then choose ECMP path.
622 pim_addr_to_prefix(&src_pfx, src);
623 hash_val = pim_compute_ecmp_hash(&src_pfx, grp);
624 mod_val = hash_val % consider;
625 }
626
627 for (nh_node = pnc->nexthop; nh_node && (found == 0);
628 nh_node = nh_node->next) {
629 first_ifindex = nh_node->ifindex;
630 ifp = ifps[nh_iter];
631 if (!ifp) {
632 if (PIM_DEBUG_PIM_NHT)
633 zlog_debug(
634 "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
635 __FILE__, __func__, first_ifindex, &src,
636 pim->vrf->name);
637 if (nh_iter == mod_val)
638 mod_val++; // Select nexthpath
639 nh_iter++;
640 continue;
641 }
642
643 pim_ifp = ifp->info;
644
645 if (!pim_ifp || !pim_ifp->pim_enable) {
646 if (PIM_DEBUG_PIM_NHT)
647 zlog_debug(
648 "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
649 __func__, ifp->name, pim->vrf->name,
650 first_ifindex, &src);
651 if (nh_iter == mod_val)
652 mod_val++; // Select nexthpath
653 nh_iter++;
654 continue;
655 }
656
657 if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
658 nbr = nbrs[nh_iter];
659 if (!nbr && !if_is_loopback(ifp)) {
660 if (PIM_DEBUG_PIM_NHT)
661 zlog_debug(
662 "%s: pim nbr not found on input interface %s(%s)",
663 __func__, ifp->name,
664 pim->vrf->name);
665 if (nh_iter == mod_val)
666 mod_val++; // Select nexthpath
667 nh_iter++;
668 continue;
669 }
670 }
671
672 if (nh_iter == mod_val) {
673 nexthop->interface = ifp;
674 #if PIM_IPV == 4
675 nexthop->mrib_nexthop_addr = nh_node->gate.ipv4;
676 #else
677 nexthop->mrib_nexthop_addr = nh_node->gate.ipv6;
678 #endif
679 nexthop->mrib_metric_preference = pnc->distance;
680 nexthop->mrib_route_metric = pnc->metric;
681 nexthop->last_lookup = src;
682 nexthop->last_lookup_time = pim_time_monotonic_usec();
683 nexthop->nbr = nbr;
684 found = 1;
685 if (PIM_DEBUG_PIM_NHT)
686 zlog_debug(
687 "%s: (%pPA,%pPA)(%s) selected nhop interface %s addr %pPAs mod_val %u iter %d ecmp %d",
688 __func__, &src, &grp_addr,
689 pim->vrf->name, ifp->name, &nh_addr,
690 mod_val, nh_iter, pim->ecmp_enable);
691 }
692 nh_iter++;
693 }
694
695 if (found)
696 return 1;
697 else
698 return 0;
699 }
700
701 /* This API is used to parse Registered address nexthop update coming from Zebra
702 */
703 int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS)
704 {
705 struct nexthop *nexthop;
706 struct nexthop *nhlist_head = NULL;
707 struct nexthop *nhlist_tail = NULL;
708 int i;
709 struct pim_rpf rpf;
710 struct pim_nexthop_cache *pnc = NULL;
711 struct interface *ifp = NULL;
712 struct vrf *vrf = vrf_lookup_by_id(vrf_id);
713 struct pim_instance *pim;
714 struct zapi_route nhr;
715 struct prefix match;
716
717 if (!vrf)
718 return 0;
719 pim = vrf->info;
720
721 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
722 zlog_err("%s: Decode of nexthop update from zebra failed",
723 __func__);
724 return 0;
725 }
726
727 rpf.rpf_addr = pim_addr_from_prefix(&match);
728 pnc = pim_nexthop_cache_find(pim, &rpf);
729 if (!pnc) {
730 if (PIM_DEBUG_PIM_NHT)
731 zlog_debug(
732 "%s: Skipping NHT update, addr %pPA is not in local cached DB.",
733 __func__, &rpf.rpf_addr);
734 return 0;
735 }
736
737 pnc->last_update = pim_time_monotonic_usec();
738
739 if (nhr.nexthop_num) {
740 pnc->nexthop_num = 0; // Only increment for pim enabled rpf.
741
742 for (i = 0; i < nhr.nexthop_num; i++) {
743 nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]);
744 switch (nexthop->type) {
745 case NEXTHOP_TYPE_IFINDEX:
746 /*
747 * Connected route (i.e. no nexthop), use
748 * RPF address from nexthop cache (i.e.
749 * destination) as PIM nexthop.
750 */
751 #if PIM_IPV == 4
752 nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
753 nexthop->gate.ipv4 = pnc->rpf.rpf_addr;
754 #else
755 nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
756 nexthop->gate.ipv6 = pnc->rpf.rpf_addr;
757 #endif
758 break;
759 #if PIM_IPV == 4
760 /* RFC5549 IPv4-over-IPv6 nexthop handling:
761 * if we get an IPv6 nexthop in IPv4 PIM, hunt down a
762 * PIM neighbor and use that instead.
763 */
764 case NEXTHOP_TYPE_IPV6_IFINDEX: {
765 struct interface *ifp1 = NULL;
766 struct pim_neighbor *nbr = NULL;
767
768 ifp1 = if_lookup_by_index(nexthop->ifindex,
769 pim->vrf->vrf_id);
770
771 if (!ifp1)
772 nbr = NULL;
773 else
774 /* FIXME: should really use nbr's
775 * secondary address list here
776 */
777 nbr = pim_neighbor_find_if(ifp1);
778
779 /* Overwrite with Nbr address as NH addr */
780 if (nbr)
781 nexthop->gate.ipv4 = nbr->source_addr;
782 else
783 // Mark nexthop address to 0 until PIM
784 // Nbr is resolved.
785 nexthop->gate.ipv4 = PIMADDR_ANY;
786
787 break;
788 }
789 #else
790 case NEXTHOP_TYPE_IPV6_IFINDEX:
791 #endif
792 case NEXTHOP_TYPE_IPV6:
793 case NEXTHOP_TYPE_IPV4:
794 case NEXTHOP_TYPE_IPV4_IFINDEX:
795 case NEXTHOP_TYPE_BLACKHOLE:
796 /* nothing to do for the other nexthop types */
797 break;
798 }
799
800 ifp = if_lookup_by_index(nexthop->ifindex,
801 pim->vrf->vrf_id);
802 if (!ifp) {
803 if (PIM_DEBUG_PIM_NHT) {
804 char buf[NEXTHOP_STRLEN];
805 zlog_debug(
806 "%s: could not find interface for ifindex %d(%s) (addr %s)",
807 __func__, nexthop->ifindex,
808 pim->vrf->name,
809 nexthop2str(nexthop, buf,
810 sizeof(buf)));
811 }
812 nexthop_free(nexthop);
813 continue;
814 }
815
816 if (PIM_DEBUG_PIM_NHT) {
817 #if PIM_IPV == 4
818 pim_addr nhaddr = nexthop->gate.ipv4;
819 #else
820 pim_addr nhaddr = nexthop->gate.ipv6;
821 #endif
822 zlog_debug(
823 "%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ",
824 __func__, &match, pim->vrf->name, i + 1,
825 &nhaddr, ifp->name, nexthop->type,
826 nhr.distance, nhr.metric);
827 }
828
829 if (!ifp->info) {
830 /*
831 * Though Multicast is not enabled on this
832 * Interface store it in database otheriwse we
833 * may miss this update and this will not cause
834 * any issue, because while choosing the path we
835 * are ommitting the Interfaces which are not
836 * multicast enabled
837 */
838 if (PIM_DEBUG_PIM_NHT) {
839 char buf[NEXTHOP_STRLEN];
840
841 zlog_debug(
842 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
843 __func__, ifp->name,
844 pim->vrf->name,
845 nexthop->ifindex,
846 nexthop2str(nexthop, buf,
847 sizeof(buf)));
848 }
849 }
850
851 if (nhlist_tail) {
852 nhlist_tail->next = nexthop;
853 nhlist_tail = nexthop;
854 } else {
855 nhlist_tail = nexthop;
856 nhlist_head = nexthop;
857 }
858 // Only keep track of nexthops which are PIM enabled.
859 pnc->nexthop_num++;
860 }
861 /* Reset existing pnc->nexthop before assigning new list */
862 nexthops_free(pnc->nexthop);
863 pnc->nexthop = nhlist_head;
864 if (pnc->nexthop_num) {
865 pnc->flags |= PIM_NEXTHOP_VALID;
866 pnc->distance = nhr.distance;
867 pnc->metric = nhr.metric;
868 }
869 } else {
870 pnc->flags &= ~PIM_NEXTHOP_VALID;
871 pnc->nexthop_num = nhr.nexthop_num;
872 nexthops_free(pnc->nexthop);
873 pnc->nexthop = NULL;
874 }
875 SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED);
876
877 if (PIM_DEBUG_PIM_NHT)
878 zlog_debug(
879 "%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
880 __func__, &match, pim->vrf->name, nhr.nexthop_num,
881 pnc->nexthop_num, vrf_id, pnc->upstream_hash->count,
882 listcount(pnc->rp_list));
883
884 pim_rpf_set_refresh_time(pim);
885
886 if (listcount(pnc->rp_list))
887 pim_update_rp_nh(pim, pnc);
888 if (pnc->upstream_hash->count)
889 pim_update_upstream_nh(pim, pnc);
890
891 return 0;
892 }
893
894 int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
895 struct pim_nexthop *nexthop, pim_addr src,
896 struct prefix *grp, int neighbor_needed)
897 {
898 struct pim_nexthop_cache *pnc;
899 struct pim_zlookup_nexthop nexthop_tab[router->multipath];
900 struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
901 struct pim_rpf rpf;
902 int num_ifindex;
903 struct interface *ifps[router->multipath], *ifp;
904 int first_ifindex;
905 int found = 0;
906 uint8_t i = 0;
907 uint32_t hash_val = 0, mod_val = 0;
908 uint32_t num_nbrs = 0;
909 struct pim_interface *pim_ifp;
910
911 if (PIM_DEBUG_PIM_NHT_DETAIL)
912 zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld",
913 __func__, &src, pim->vrf->name,
914 nexthop->last_lookup_time);
915
916 rpf.rpf_addr = src;
917
918 pnc = pim_nexthop_cache_find(pim, &rpf);
919 if (pnc) {
920 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED))
921 return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp,
922 neighbor_needed);
923 }
924
925 memset(nexthop_tab, 0,
926 sizeof(struct pim_zlookup_nexthop) * router->multipath);
927 num_ifindex =
928 zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src,
929 PIM_NEXTHOP_LOOKUP_MAX);
930 if (num_ifindex < 1) {
931 if (PIM_DEBUG_PIM_NHT)
932 zlog_warn(
933 "%s: could not find nexthop ifindex for address %pPA(%s)",
934 __func__, &src, pim->vrf->name);
935 return 0;
936 }
937
938 memset(&nbrs, 0, sizeof(nbrs));
939 memset(&ifps, 0, sizeof(ifps));
940
941 /*
942 * Look up all interfaces and neighbors,
943 * store for later usage
944 */
945 for (i = 0; i < num_ifindex; i++) {
946 ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex,
947 pim->vrf->vrf_id);
948 if (ifps[i]) {
949 nbrs[i] = pim_neighbor_find(
950 ifps[i], nexthop_tab[i].nexthop_addr, true);
951
952 if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
953 num_nbrs++;
954 }
955 }
956
957 // If PIM ECMP enable then choose ECMP path.
958 if (pim->ecmp_enable) {
959 struct prefix src_pfx;
960 uint32_t consider = num_ifindex;
961
962 if (neighbor_needed && num_nbrs < consider)
963 consider = num_nbrs;
964
965 if (consider == 0)
966 return 0;
967
968 pim_addr_to_prefix(&src_pfx, src);
969 hash_val = pim_compute_ecmp_hash(&src_pfx, grp);
970 mod_val = hash_val % consider;
971 if (PIM_DEBUG_PIM_NHT_DETAIL)
972 zlog_debug("%s: hash_val %u mod_val %u", __func__,
973 hash_val, mod_val);
974 }
975
976 i = 0;
977 while (!found && (i < num_ifindex)) {
978 first_ifindex = nexthop_tab[i].ifindex;
979
980 ifp = ifps[i];
981 if (!ifp) {
982 if (PIM_DEBUG_PIM_NHT)
983 zlog_debug(
984 "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
985 __FILE__, __func__, first_ifindex, &src,
986 pim->vrf->name);
987 if (i == mod_val)
988 mod_val++;
989 i++;
990 continue;
991 }
992
993 pim_ifp = ifp->info;
994
995 if (!pim_ifp || !pim_ifp->pim_enable) {
996 if (PIM_DEBUG_PIM_NHT)
997 zlog_debug(
998 "%s: pim not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
999 __func__, ifp->name, pim->vrf->name,
1000 first_ifindex, &src);
1001 if (i == mod_val)
1002 mod_val++;
1003 i++;
1004 continue;
1005 }
1006 if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
1007 nbr = nbrs[i];
1008 if (PIM_DEBUG_PIM_NHT_DETAIL)
1009 zlog_debug("ifp name: %s(%s), pim nbr: %p",
1010 ifp->name, pim->vrf->name, nbr);
1011 if (!nbr && !if_is_loopback(ifp)) {
1012 if (i == mod_val)
1013 mod_val++;
1014 if (PIM_DEBUG_PIM_NHT)
1015 zlog_debug(
1016 "%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)",
1017 __func__,
1018 &nexthop_tab[i].nexthop_addr,
1019 ifp->name, pim->vrf->name,
1020 &src);
1021 i++;
1022 continue;
1023 }
1024 }
1025
1026 if (i == mod_val) {
1027 if (PIM_DEBUG_PIM_NHT)
1028 zlog_debug(
1029 "%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d",
1030 __func__, &nexthop_tab[i].nexthop_addr,
1031 &src, ifp->name, pim->vrf->name,
1032 nexthop_tab[i].route_metric,
1033 nexthop_tab[i].protocol_distance);
1034 /* update nexthop data */
1035 nexthop->interface = ifp;
1036 nexthop->mrib_nexthop_addr =
1037 nexthop_tab[i].nexthop_addr;
1038 nexthop->mrib_metric_preference =
1039 nexthop_tab[i].protocol_distance;
1040 nexthop->mrib_route_metric =
1041 nexthop_tab[i].route_metric;
1042 nexthop->last_lookup = src;
1043 nexthop->last_lookup_time = pim_time_monotonic_usec();
1044 nexthop->nbr = nbr;
1045 found = 1;
1046 }
1047 i++;
1048 }
1049
1050 if (found)
1051 return 1;
1052 else
1053 return 0;
1054 }
1055
1056 int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim, pim_addr src,
1057 struct prefix *grp)
1058 {
1059 struct pim_nexthop nhop;
1060 int vif_index;
1061 ifindex_t ifindex;
1062
1063 memset(&nhop, 0, sizeof(nhop));
1064 if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) {
1065 if (PIM_DEBUG_PIM_NHT)
1066 zlog_debug(
1067 "%s: could not find nexthop ifindex for address %pPA(%s)",
1068 __func__, &src, pim->vrf->name);
1069 return -1;
1070 }
1071
1072 ifindex = nhop.interface->ifindex;
1073 if (PIM_DEBUG_PIM_NHT)
1074 zlog_debug(
1075 "%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA",
1076 __func__, ifindex,
1077 ifindex2ifname(ifindex, pim->vrf->vrf_id),
1078 pim->vrf->name, &src);
1079
1080 vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);
1081
1082 if (vif_index < 0) {
1083 if (PIM_DEBUG_PIM_NHT) {
1084 zlog_debug(
1085 "%s: low vif_index=%d(%s) < 1 nexthop for address %pPA",
1086 __func__, vif_index, pim->vrf->name, &src);
1087 }
1088 return -2;
1089 }
1090
1091 return vif_index;
1092 }