3 * Copyright (C) 2017 Cumulus Networks, Inc.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
29 #include "lib/printfrr.h"
32 #include "pimd/pim_nht.h"
36 #include "pim_ifchannel.h"
37 #include "pim_mroute.h"
38 #include "pim_zebra.h"
39 #include "pim_upstream.h"
41 #include "pim_jp_agg.h"
42 #include "pim_zebra.h"
43 #include "pim_zlookup.h"
47 * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
50 void pim_sendmsg_zebra_rnh(struct pim_instance
*pim
, struct zclient
*zclient
,
51 struct pim_nexthop_cache
*pnc
, int command
)
56 p
= &(pnc
->rpf
.rpf_addr
);
57 ret
= zclient_send_rnh(zclient
, command
, p
, false, false,
59 if (ret
== ZCLIENT_SEND_FAILURE
)
60 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
62 if (PIM_DEBUG_PIM_NHT
)
64 "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ",
66 (command
== ZEBRA_NEXTHOP_REGISTER
) ? " " : "de", p
,
72 struct pim_nexthop_cache
*pim_nexthop_cache_find(struct pim_instance
*pim
,
75 struct pim_nexthop_cache
*pnc
= NULL
;
76 struct pim_nexthop_cache lookup
;
78 lookup
.rpf
.rpf_addr
.family
= rpf
->rpf_addr
.family
;
79 lookup
.rpf
.rpf_addr
.prefixlen
= rpf
->rpf_addr
.prefixlen
;
80 lookup
.rpf
.rpf_addr
.u
.prefix4
.s_addr
= rpf
->rpf_addr
.u
.prefix4
.s_addr
;
82 pnc
= hash_lookup(pim
->rpf_hash
, &lookup
);
87 static struct pim_nexthop_cache
*pim_nexthop_cache_add(struct pim_instance
*pim
,
88 struct pim_rpf
*rpf_addr
)
90 struct pim_nexthop_cache
*pnc
;
93 pnc
= XCALLOC(MTYPE_PIM_NEXTHOP_CACHE
,
94 sizeof(struct pim_nexthop_cache
));
95 pnc
->rpf
.rpf_addr
.family
= rpf_addr
->rpf_addr
.family
;
96 pnc
->rpf
.rpf_addr
.prefixlen
= rpf_addr
->rpf_addr
.prefixlen
;
97 pnc
->rpf
.rpf_addr
.u
.prefix4
.s_addr
=
98 rpf_addr
->rpf_addr
.u
.prefix4
.s_addr
;
100 pnc
= hash_get(pim
->rpf_hash
, pnc
, hash_alloc_intern
);
102 pnc
->rp_list
= list_new();
103 pnc
->rp_list
->cmp
= pim_rp_list_cmp
;
105 snprintfrr(hash_name
, sizeof(hash_name
), "PNC %pFX(%s) Upstream Hash",
106 &pnc
->rpf
.rpf_addr
, pim
->vrf
->name
);
107 pnc
->upstream_hash
= hash_create_size(8192, pim_upstream_hash_key
,
108 pim_upstream_equal
, hash_name
);
113 static struct pim_nexthop_cache
*pim_nht_get(struct pim_instance
*pim
,
116 struct pim_nexthop_cache
*pnc
= NULL
;
118 struct zclient
*zclient
= NULL
;
120 zclient
= pim_zebra_zclient_get();
121 memset(&rpf
, 0, sizeof(struct pim_rpf
));
122 rpf
.rpf_addr
.family
= addr
->family
;
123 rpf
.rpf_addr
.prefixlen
= addr
->prefixlen
;
124 rpf
.rpf_addr
.u
.prefix4
= addr
->u
.prefix4
;
126 pnc
= pim_nexthop_cache_find(pim
, &rpf
);
128 pnc
= pim_nexthop_cache_add(pim
, &rpf
);
129 pim_sendmsg_zebra_rnh(pim
, zclient
, pnc
,
130 ZEBRA_NEXTHOP_REGISTER
);
131 if (PIM_DEBUG_PIM_NHT
)
133 "%s: NHT cache and zebra notification added for %pFX(%s)",
134 __func__
, addr
, pim
->vrf
->name
);
140 /* TBD: this does several distinct things and should probably be split up.
141 * (checking state vs. returning pnc vs. adding upstream vs. adding rp)
143 int pim_find_or_track_nexthop(struct pim_instance
*pim
, struct prefix
*addr
,
144 struct pim_upstream
*up
, struct rp_info
*rp
,
145 struct pim_nexthop_cache
*out_pnc
)
147 struct pim_nexthop_cache
*pnc
;
148 struct listnode
*ch_node
= NULL
;
150 pnc
= pim_nht_get(pim
, addr
);
152 assertf(up
|| rp
, "addr=%pFX", addr
);
155 ch_node
= listnode_lookup(pnc
->rp_list
, rp
);
157 listnode_add_sort(pnc
->rp_list
, rp
);
161 hash_get(pnc
->upstream_hash
, up
, hash_alloc_intern
);
163 if (CHECK_FLAG(pnc
->flags
, PIM_NEXTHOP_VALID
)) {
165 memcpy(out_pnc
, pnc
, sizeof(struct pim_nexthop_cache
));
172 void pim_nht_bsr_add(struct pim_instance
*pim
, struct in_addr addr
)
174 struct pim_nexthop_cache
*pnc
;
177 pfx
.family
= AF_INET
;
178 pfx
.prefixlen
= IPV4_MAX_BITLEN
;
179 pfx
.u
.prefix4
= addr
;
181 pnc
= pim_nht_get(pim
, &pfx
);
186 static void pim_nht_drop_maybe(struct pim_instance
*pim
,
187 struct pim_nexthop_cache
*pnc
)
189 if (PIM_DEBUG_PIM_NHT
)
191 "%s: NHT %pFX(%s) rp_list count:%d upstream count:%ld BSR count:%u",
192 __func__
, &pnc
->rpf
.rpf_addr
, pim
->vrf
->name
,
193 pnc
->rp_list
->count
, pnc
->upstream_hash
->count
,
196 if (pnc
->rp_list
->count
== 0 && pnc
->upstream_hash
->count
== 0
197 && pnc
->bsr_count
== 0) {
198 struct zclient
*zclient
= pim_zebra_zclient_get();
200 pim_sendmsg_zebra_rnh(pim
, zclient
, pnc
,
201 ZEBRA_NEXTHOP_UNREGISTER
);
203 list_delete(&pnc
->rp_list
);
204 hash_free(pnc
->upstream_hash
);
206 hash_release(pim
->rpf_hash
, pnc
);
208 nexthops_free(pnc
->nexthop
);
209 XFREE(MTYPE_PIM_NEXTHOP_CACHE
, pnc
);
213 void pim_delete_tracked_nexthop(struct pim_instance
*pim
, struct prefix
*addr
,
214 struct pim_upstream
*up
, struct rp_info
*rp
)
216 struct pim_nexthop_cache
*pnc
= NULL
;
217 struct pim_nexthop_cache lookup
;
218 struct pim_upstream
*upstream
= NULL
;
220 /* Remove from RPF hash if it is the last entry */
221 lookup
.rpf
.rpf_addr
= *addr
;
222 pnc
= hash_lookup(pim
->rpf_hash
, &lookup
);
224 zlog_warn("attempting to delete nonexistent NHT entry %pFX",
230 /* Release the (*, G)upstream from pnc->upstream_hash,
231 * whose Group belongs to the RP getting deleted
233 frr_each (rb_pim_upstream
, &pim
->upstream_head
, upstream
) {
235 struct rp_info
*trp_info
;
237 if (!pim_addr_is_any(upstream
->sg
.src
))
240 pim_addr_to_prefix(&grp
, upstream
->sg
.grp
);
241 trp_info
= pim_rp_find_match_group(pim
, &grp
);
243 hash_release(pnc
->upstream_hash
, upstream
);
245 listnode_delete(pnc
->rp_list
, rp
);
249 hash_release(pnc
->upstream_hash
, up
);
251 pim_nht_drop_maybe(pim
, pnc
);
254 void pim_nht_bsr_del(struct pim_instance
*pim
, struct in_addr addr
)
256 struct pim_nexthop_cache
*pnc
= NULL
;
257 struct pim_nexthop_cache lookup
;
260 * Nothing to do here if the address to unregister
261 * is 0.0.0.0 as that the BSR has not been registered
264 if (addr
.s_addr
== INADDR_ANY
)
267 lookup
.rpf
.rpf_addr
.family
= AF_INET
;
268 lookup
.rpf
.rpf_addr
.prefixlen
= IPV4_MAX_BITLEN
;
269 lookup
.rpf
.rpf_addr
.u
.prefix4
= addr
;
271 pnc
= hash_lookup(pim
->rpf_hash
, &lookup
);
274 zlog_warn("attempting to delete nonexistent NHT BSR entry %pI4",
279 assertf(pnc
->bsr_count
> 0, "addr=%pI4", &addr
);
282 pim_nht_drop_maybe(pim
, pnc
);
285 bool pim_nht_bsr_rpf_check(struct pim_instance
*pim
, struct in_addr bsr_addr
,
286 struct interface
*src_ifp
, struct in_addr src_ip
)
288 struct pim_nexthop_cache
*pnc
= NULL
;
289 struct pim_nexthop_cache lookup
;
290 struct pim_neighbor
*nbr
= NULL
;
292 struct interface
*ifp
;
294 lookup
.rpf
.rpf_addr
.family
= AF_INET
;
295 lookup
.rpf
.rpf_addr
.prefixlen
= IPV4_MAX_BITLEN
;
296 lookup
.rpf
.rpf_addr
.u
.prefix4
= bsr_addr
;
298 pnc
= hash_lookup(pim
->rpf_hash
, &lookup
);
299 if (!pnc
|| !CHECK_FLAG(pnc
->flags
, PIM_NEXTHOP_ANSWER_RECEIVED
)) {
300 /* BSM from a new freshly registered BSR - do a synchronous
301 * zebra query since otherwise we'd drop the first packet,
302 * leading to additional delay in picking up BSM data
305 /* FIXME: this should really be moved into a generic NHT
306 * function that does "add and get immediate result" or maybe
307 * "check cache or get immediate result." But until that can
308 * be worked in, here's a copy of the code below :(
310 struct pim_zlookup_nexthop nexthop_tab
[MULTIPATH_NUM
];
312 struct interface
*ifp
= NULL
;
315 memset(nexthop_tab
, 0, sizeof(nexthop_tab
));
316 num_ifindex
= zclient_lookup_nexthop(pim
, nexthop_tab
,
317 MULTIPATH_NUM
, bsr_addr
,
318 PIM_NEXTHOP_LOOKUP_MAX
);
320 if (num_ifindex
<= 0)
323 for (i
= 0; i
< num_ifindex
; i
++) {
324 struct pim_zlookup_nexthop
*znh
= &nexthop_tab
[i
];
326 /* pim_zlookup_nexthop has no ->type */
328 /* 1:1 match code below with znh instead of nh */
329 ifp
= if_lookup_by_index(znh
->ifindex
,
332 if (!ifp
|| !ifp
->info
)
335 if (if_is_loopback(ifp
) && if_is_loopback(src_ifp
))
338 nbr
= pim_neighbor_find_prefix(ifp
, &znh
->nexthop_addr
);
342 return znh
->ifindex
== src_ifp
->ifindex
343 && znh
->nexthop_addr
.u
.prefix4
.s_addr
349 if (!CHECK_FLAG(pnc
->flags
, PIM_NEXTHOP_VALID
))
352 /* if we accept BSMs from more than one ECMP nexthop, this will cause
353 * BSM message "multiplication" for each ECMP hop. i.e. if you have
354 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
357 * so... only accept the first (IPv4) valid nexthop as source.
360 for (nh
= pnc
->nexthop
; nh
; nh
= nh
->next
) {
364 #if PIM_IPV == 4 || !defined(PIM_V6_TEMP_BREAK)
365 case NEXTHOP_TYPE_IPV4
:
366 if (nh
->ifindex
== IFINDEX_INTERNAL
)
370 case NEXTHOP_TYPE_IPV4_IFINDEX
:
371 nhaddr
= nh
->gate
.ipv4
;
374 case NEXTHOP_TYPE_IPV6
:
375 if (nh
->ifindex
== IFINDEX_INTERNAL
)
379 case NEXTHOP_TYPE_IPV6_IFINDEX
:
380 nhaddr
= nh
->gate
.ipv6
;
383 case NEXTHOP_TYPE_IFINDEX
:
391 ifp
= if_lookup_by_index(nh
->ifindex
, pim
->vrf
->vrf_id
);
392 if (!ifp
|| !ifp
->info
)
395 if (if_is_loopback(ifp
) && if_is_loopback(src_ifp
))
398 /* MRIB (IGP) may be pointing at a router where PIM is down */
399 nbr
= pim_neighbor_find(ifp
, nhaddr
);
403 return nh
->ifindex
== src_ifp
->ifindex
404 && nhaddr
.s_addr
== src_ip
.s_addr
;
409 void pim_rp_nexthop_del(struct rp_info
*rp_info
)
411 rp_info
->rp
.source_nexthop
.interface
= NULL
;
412 rp_info
->rp
.source_nexthop
.mrib_nexthop_addr
.u
.prefix4
.s_addr
=
414 rp_info
->rp
.source_nexthop
.mrib_metric_preference
=
415 router
->infinite_assert_metric
.metric_preference
;
416 rp_info
->rp
.source_nexthop
.mrib_route_metric
=
417 router
->infinite_assert_metric
.route_metric
;
420 /* Update RP nexthop info based on Nexthop update received from Zebra.*/
421 static void pim_update_rp_nh(struct pim_instance
*pim
,
422 struct pim_nexthop_cache
*pnc
)
424 struct listnode
*node
= NULL
;
425 struct rp_info
*rp_info
= NULL
;
427 /*Traverse RP list and update each RP Nexthop info */
428 for (ALL_LIST_ELEMENTS_RO(pnc
->rp_list
, node
, rp_info
)) {
429 if (rp_info
->rp
.rpf_addr
.u
.prefix4
.s_addr
== INADDR_NONE
)
432 // Compute PIM RPF using cached nexthop
433 if (!pim_ecmp_nexthop_lookup(pim
, &rp_info
->rp
.source_nexthop
,
434 &rp_info
->rp
.rpf_addr
,
436 pim_rp_nexthop_del(rp_info
);
440 /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
441 static int pim_update_upstream_nh_helper(struct hash_bucket
*bucket
, void *arg
)
443 struct pim_instance
*pim
= (struct pim_instance
*)arg
;
444 struct pim_upstream
*up
= (struct pim_upstream
*)bucket
->data
;
446 enum pim_rpf_result rpf_result
;
449 old
.source_nexthop
.interface
= up
->rpf
.source_nexthop
.interface
;
450 rpf_result
= pim_rpf_update(pim
, up
, &old
, __func__
);
452 /* update kernel multicast forwarding cache (MFC); if the
453 * RPF nbr is now unreachable the MFC has already been updated
456 if (rpf_result
!= PIM_RPF_FAILURE
)
457 pim_upstream_mroute_iif_update(up
->channel_oil
, __func__
);
459 if (rpf_result
== PIM_RPF_CHANGED
||
460 (rpf_result
== PIM_RPF_FAILURE
&& old
.source_nexthop
.interface
))
461 pim_zebra_upstream_rpf_changed(pim
, up
, &old
);
464 if (PIM_DEBUG_PIM_NHT
) {
466 "%s: NHT upstream %s(%s) old ifp %s new ifp %s",
467 __func__
, up
->sg_str
, pim
->vrf
->name
,
468 old
.source_nexthop
.interface
? old
.source_nexthop
471 up
->rpf
.source_nexthop
.interface
? up
->rpf
.source_nexthop
476 return HASHWALK_CONTINUE
;
479 static int pim_update_upstream_nh(struct pim_instance
*pim
,
480 struct pim_nexthop_cache
*pnc
)
482 hash_walk(pnc
->upstream_hash
, pim_update_upstream_nh_helper
, pim
);
484 pim_zebra_update_all_interfaces(pim
);
489 uint32_t pim_compute_ecmp_hash(struct prefix
*src
, struct prefix
*grp
)
492 uint32_t s
= 0, g
= 0;
497 switch (src
->family
) {
499 s
= src
->u
.prefix4
.s_addr
;
502 g
= grp
->u
.prefix4
.s_addr
;
508 hash_val
= jhash_2words(g
, s
, 101);
512 static int pim_ecmp_nexthop_search(struct pim_instance
*pim
,
513 struct pim_nexthop_cache
*pnc
,
514 struct pim_nexthop
*nexthop
,
515 struct prefix
*src
, struct prefix
*grp
,
518 struct pim_neighbor
*nbrs
[MULTIPATH_NUM
], *nbr
= NULL
;
519 struct interface
*ifps
[MULTIPATH_NUM
];
520 struct nexthop
*nh_node
= NULL
;
521 ifindex_t first_ifindex
;
522 struct interface
*ifp
= NULL
;
523 uint32_t hash_val
= 0, mod_val
= 0;
524 uint8_t nh_iter
= 0, found
= 0;
525 uint32_t i
, num_nbrs
= 0;
527 if (!pnc
|| !pnc
->nexthop_num
|| !nexthop
)
530 memset(&nbrs
, 0, sizeof(nbrs
));
531 memset(&ifps
, 0, sizeof(ifps
));
533 // Current Nexthop is VALID, check to stay on the current path.
534 if (nexthop
->interface
&& nexthop
->interface
->info
535 && nexthop
->mrib_nexthop_addr
.u
.prefix4
.s_addr
536 != PIM_NET_INADDR_ANY
) {
537 /* User configured knob to explicitly switch
538 to new path is disabled or current path
539 metric is less than nexthop update.
542 if (pim
->ecmp_rebalance_enable
== 0) {
543 uint8_t curr_route_valid
= 0;
544 // Check if current nexthop is present in new updated
546 // If the current nexthop is not valid, candidate to
547 // choose new Nexthop.
548 for (nh_node
= pnc
->nexthop
; nh_node
;
549 nh_node
= nh_node
->next
) {
550 curr_route_valid
= (nexthop
->interface
->ifindex
551 == nh_node
->ifindex
);
552 if (curr_route_valid
)
557 && !pim_if_connected_to_source(nexthop
->interface
,
559 nbr
= pim_neighbor_find_prefix(
561 &nexthop
->mrib_nexthop_addr
);
563 && !if_is_loopback(nexthop
->interface
)) {
564 if (PIM_DEBUG_PIM_NHT
)
566 "%s: current nexthop does not have nbr ",
569 /* update metric even if the upstream
570 * neighbor stays unchanged
572 nexthop
->mrib_metric_preference
=
574 nexthop
->mrib_route_metric
=
576 if (PIM_DEBUG_PIM_NHT
) {
577 char src_str
[INET_ADDRSTRLEN
];
578 pim_inet4_dump("<addr?>",
582 char grp_str
[INET_ADDRSTRLEN
];
583 pim_inet4_dump("<addr?>",
588 "%s: (%s,%s)(%s) current nexthop %s is valid, skipping new path selection",
590 grp_str
, pim
->vrf
->name
,
591 nexthop
->interface
->name
);
600 * Look up all interfaces and neighbors,
601 * store for later usage
603 for (nh_node
= pnc
->nexthop
, i
= 0; nh_node
;
604 nh_node
= nh_node
->next
, i
++) {
606 if_lookup_by_index(nh_node
->ifindex
, pim
->vrf
->vrf_id
);
608 #if PIM_IPV == 4 || !defined(PIM_V6_TEMP_BREAK)
609 pim_addr nhaddr
= nh_node
->gate
.ipv4
;
611 pim_addr nhaddr
= nh_node
->gate
.ipv6
;
613 nbrs
[i
] = pim_neighbor_find(ifps
[i
], nhaddr
);
614 if (nbrs
[i
] || pim_if_connected_to_source(ifps
[i
],
620 if (pim
->ecmp_enable
) {
621 uint32_t consider
= pnc
->nexthop_num
;
623 if (neighbor_needed
&& num_nbrs
< consider
)
629 // PIM ECMP flag is enable then choose ECMP path.
630 hash_val
= pim_compute_ecmp_hash(src
, grp
);
631 mod_val
= hash_val
% consider
;
634 for (nh_node
= pnc
->nexthop
; nh_node
&& (found
== 0);
635 nh_node
= nh_node
->next
) {
636 first_ifindex
= nh_node
->ifindex
;
639 if (PIM_DEBUG_PIM_NHT
) {
640 char addr_str
[INET_ADDRSTRLEN
];
641 pim_inet4_dump("<addr?>", src
->u
.prefix4
,
642 addr_str
, sizeof(addr_str
));
644 "%s %s: could not find interface for ifindex %d (address %s(%s))",
645 __FILE__
, __func__
, first_ifindex
,
646 addr_str
, pim
->vrf
->name
);
648 if (nh_iter
== mod_val
)
649 mod_val
++; // Select nexthpath
654 if (PIM_DEBUG_PIM_NHT
) {
655 char addr_str
[INET_ADDRSTRLEN
];
656 pim_inet4_dump("<addr?>", src
->u
.prefix4
,
657 addr_str
, sizeof(addr_str
));
659 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %s)",
660 __func__
, ifp
->name
, pim
->vrf
->name
,
661 first_ifindex
, addr_str
);
663 if (nh_iter
== mod_val
)
664 mod_val
++; // Select nexthpath
670 && !pim_if_connected_to_source(ifp
, src
->u
.prefix4
)) {
672 if (!nbr
&& !if_is_loopback(ifp
)) {
673 if (PIM_DEBUG_PIM_NHT
)
675 "%s: pim nbr not found on input interface %s(%s)",
678 if (nh_iter
== mod_val
)
679 mod_val
++; // Select nexthpath
685 if (nh_iter
== mod_val
) {
686 nexthop
->interface
= ifp
;
687 nexthop
->mrib_nexthop_addr
.family
= AF_INET
;
688 nexthop
->mrib_nexthop_addr
.prefixlen
= IPV4_MAX_BITLEN
;
689 nexthop
->mrib_nexthop_addr
.u
.prefix4
=
691 nexthop
->mrib_metric_preference
= pnc
->distance
;
692 nexthop
->mrib_route_metric
= pnc
->metric
;
693 nexthop
->last_lookup
= src
->u
.prefix4
;
694 nexthop
->last_lookup_time
= pim_time_monotonic_usec();
697 if (PIM_DEBUG_PIM_NHT
) {
698 char buf
[INET_ADDRSTRLEN
];
699 char buf2
[INET_ADDRSTRLEN
];
700 char buf3
[INET_ADDRSTRLEN
];
701 pim_inet4_dump("<src?>", src
->u
.prefix4
, buf2
,
703 pim_inet4_dump("<grp?>", grp
->u
.prefix4
, buf3
,
707 nexthop
->mrib_nexthop_addr
.u
.prefix4
,
710 "%s: (%s,%s)(%s) selected nhop interface %s addr %s mod_val %u iter %d ecmp %d",
711 __func__
, buf2
, buf3
, pim
->vrf
->name
,
712 ifp
->name
, buf
, mod_val
, nh_iter
,
725 /* This API is used to parse Registered address nexthop update coming from Zebra
727 int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS
)
729 struct nexthop
*nexthop
;
730 struct nexthop
*nhlist_head
= NULL
;
731 struct nexthop
*nhlist_tail
= NULL
;
734 struct pim_nexthop_cache
*pnc
= NULL
;
735 struct pim_neighbor
*nbr
= NULL
;
736 struct interface
*ifp
= NULL
;
737 struct interface
*ifp1
= NULL
;
738 struct vrf
*vrf
= vrf_lookup_by_id(vrf_id
);
739 struct pim_instance
*pim
;
740 struct zapi_route nhr
;
746 if (!zapi_nexthop_update_decode(zclient
->ibuf
, &nhr
)) {
747 zlog_err("%s: Decode of nexthop update from zebra failed",
752 if (cmd
== ZEBRA_NEXTHOP_UPDATE
) {
753 prefix_copy(&rpf
.rpf_addr
, &nhr
.prefix
);
754 pnc
= pim_nexthop_cache_find(pim
, &rpf
);
756 if (PIM_DEBUG_PIM_NHT
)
758 "%s: Skipping NHT update, addr %pFX is not in local cached DB.",
759 __func__
, &rpf
.rpf_addr
);
764 * We do not currently handle ZEBRA_IMPORT_CHECK_UPDATE
769 pnc
->last_update
= pim_time_monotonic_usec();
771 if (nhr
.nexthop_num
) {
772 pnc
->nexthop_num
= 0; // Only increment for pim enabled rpf.
774 for (i
= 0; i
< nhr
.nexthop_num
; i
++) {
775 nexthop
= nexthop_from_zapi_nexthop(&nhr
.nexthops
[i
]);
776 switch (nexthop
->type
) {
777 case NEXTHOP_TYPE_IPV4
:
778 case NEXTHOP_TYPE_IPV4_IFINDEX
:
779 case NEXTHOP_TYPE_IPV6
:
780 case NEXTHOP_TYPE_BLACKHOLE
:
782 case NEXTHOP_TYPE_IFINDEX
:
784 * Connected route (i.e. no nexthop), use
785 * RPF address from nexthop cache (i.e.
786 * destination) as PIM nexthop.
788 nexthop
->type
= NEXTHOP_TYPE_IPV4_IFINDEX
;
790 pnc
->rpf
.rpf_addr
.u
.prefix4
;
792 case NEXTHOP_TYPE_IPV6_IFINDEX
:
793 ifp1
= if_lookup_by_index(nexthop
->ifindex
,
799 nbr
= pim_neighbor_find_if(ifp1
);
800 /* Overwrite with Nbr address as NH addr */
802 #if PIM_IPV == 4 || !defined(PIM_V6_TEMP_BREAK)
803 nexthop
->gate
.ipv4
= nbr
->source_addr
;
805 nexthop
->gate
.ipv6
= nbr
->source_addr
;
808 // Mark nexthop address to 0 until PIM
810 nexthop
->gate
.ipv4
.s_addr
=
817 ifp
= if_lookup_by_index(nexthop
->ifindex
,
820 if (PIM_DEBUG_PIM_NHT
) {
821 char buf
[NEXTHOP_STRLEN
];
823 "%s: could not find interface for ifindex %d(%s) (addr %s)",
824 __func__
, nexthop
->ifindex
,
826 nexthop2str(nexthop
, buf
,
829 nexthop_free(nexthop
);
833 if (PIM_DEBUG_PIM_NHT
)
835 "%s: NHT addr %pFX(%s) %d-nhop via %pI4(%s) type %d distance:%u metric:%u ",
836 __func__
, &nhr
.prefix
, pim
->vrf
->name
,
837 i
+ 1, &nexthop
->gate
.ipv4
,
838 ifp
->name
, nexthop
->type
, nhr
.distance
,
843 * Though Multicast is not enabled on this
844 * Interface store it in database otheriwse we
845 * may miss this update and this will not cause
846 * any issue, because while choosing the path we
847 * are ommitting the Interfaces which are not
850 if (PIM_DEBUG_PIM_NHT
) {
851 char buf
[NEXTHOP_STRLEN
];
854 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
858 nexthop2str(nexthop
, buf
,
864 nhlist_tail
->next
= nexthop
;
865 nhlist_tail
= nexthop
;
867 nhlist_tail
= nexthop
;
868 nhlist_head
= nexthop
;
870 // Only keep track of nexthops which are PIM enabled.
873 /* Reset existing pnc->nexthop before assigning new list */
874 nexthops_free(pnc
->nexthop
);
875 pnc
->nexthop
= nhlist_head
;
876 if (pnc
->nexthop_num
) {
877 pnc
->flags
|= PIM_NEXTHOP_VALID
;
878 pnc
->distance
= nhr
.distance
;
879 pnc
->metric
= nhr
.metric
;
882 pnc
->flags
&= ~PIM_NEXTHOP_VALID
;
883 pnc
->nexthop_num
= nhr
.nexthop_num
;
884 nexthops_free(pnc
->nexthop
);
887 SET_FLAG(pnc
->flags
, PIM_NEXTHOP_ANSWER_RECEIVED
);
889 if (PIM_DEBUG_PIM_NHT
)
891 "%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
892 __func__
, &nhr
.prefix
, pim
->vrf
->name
, nhr
.nexthop_num
,
893 pnc
->nexthop_num
, vrf_id
, pnc
->upstream_hash
->count
,
894 listcount(pnc
->rp_list
));
896 pim_rpf_set_refresh_time(pim
);
898 if (listcount(pnc
->rp_list
))
899 pim_update_rp_nh(pim
, pnc
);
900 if (pnc
->upstream_hash
->count
)
901 pim_update_upstream_nh(pim
, pnc
);
906 int pim_ecmp_nexthop_lookup(struct pim_instance
*pim
,
907 struct pim_nexthop
*nexthop
, struct prefix
*src
,
908 struct prefix
*grp
, int neighbor_needed
)
910 struct pim_nexthop_cache
*pnc
;
911 struct pim_zlookup_nexthop nexthop_tab
[MULTIPATH_NUM
];
912 struct pim_neighbor
*nbrs
[MULTIPATH_NUM
], *nbr
= NULL
;
915 struct interface
*ifps
[MULTIPATH_NUM
], *ifp
;
919 uint32_t hash_val
= 0, mod_val
= 0;
920 uint32_t num_nbrs
= 0;
921 char addr_str
[PREFIX_STRLEN
];
923 if (PIM_DEBUG_PIM_NHT
) {
924 pim_inet4_dump("<addr?>", src
->u
.prefix4
, addr_str
,
926 zlog_debug("%s: Looking up: %s(%s), last lookup time: %lld",
927 __func__
, addr_str
, pim
->vrf
->name
,
928 nexthop
->last_lookup_time
);
931 memset(&rpf
, 0, sizeof(struct pim_rpf
));
932 rpf
.rpf_addr
.family
= AF_INET
;
933 rpf
.rpf_addr
.prefixlen
= IPV4_MAX_BITLEN
;
934 rpf
.rpf_addr
.u
.prefix4
= src
->u
.prefix4
;
936 pnc
= pim_nexthop_cache_find(pim
, &rpf
);
938 if (CHECK_FLAG(pnc
->flags
, PIM_NEXTHOP_ANSWER_RECEIVED
))
939 return pim_ecmp_nexthop_search(pim
, pnc
, nexthop
, src
, grp
,
943 memset(nexthop_tab
, 0,
944 sizeof(struct pim_zlookup_nexthop
) * MULTIPATH_NUM
);
946 zclient_lookup_nexthop(pim
, nexthop_tab
, MULTIPATH_NUM
,
947 src
->u
.prefix4
, PIM_NEXTHOP_LOOKUP_MAX
);
948 if (num_ifindex
< 1) {
949 if (PIM_DEBUG_PIM_NHT
)
951 "%s: could not find nexthop ifindex for address %s(%s)",
952 __func__
, addr_str
, pim
->vrf
->name
);
956 memset(&nbrs
, 0, sizeof(nbrs
));
957 memset(&ifps
, 0, sizeof(ifps
));
960 * Look up all interfaces and neighbors,
961 * store for later usage
963 for (i
= 0; i
< num_ifindex
; i
++) {
964 ifps
[i
] = if_lookup_by_index(nexthop_tab
[i
].ifindex
,
967 nbrs
[i
] = pim_neighbor_find_prefix(
968 ifps
[i
], &nexthop_tab
[i
].nexthop_addr
);
970 || pim_if_connected_to_source(ifps
[i
],
976 // If PIM ECMP enable then choose ECMP path.
977 if (pim
->ecmp_enable
) {
978 uint32_t consider
= num_ifindex
;
980 if (neighbor_needed
&& num_nbrs
< consider
)
986 hash_val
= pim_compute_ecmp_hash(src
, grp
);
987 mod_val
= hash_val
% consider
;
988 if (PIM_DEBUG_PIM_NHT_DETAIL
)
989 zlog_debug("%s: hash_val %u mod_val %u", __func__
,
994 while (!found
&& (i
< num_ifindex
)) {
995 first_ifindex
= nexthop_tab
[i
].ifindex
;
999 if (PIM_DEBUG_PIM_NHT
)
1001 "%s %s: could not find interface for ifindex %d (address %s(%s))",
1002 __FILE__
, __func__
, first_ifindex
,
1003 addr_str
, pim
->vrf
->name
);
1011 if (PIM_DEBUG_PIM_NHT
)
1013 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %s)",
1014 __func__
, ifp
->name
, pim
->vrf
->name
,
1015 first_ifindex
, addr_str
);
1022 && !pim_if_connected_to_source(ifp
, src
->u
.prefix4
)) {
1024 if (PIM_DEBUG_PIM_NHT_DETAIL
)
1025 zlog_debug("ifp name: %s(%s), pim nbr: %p",
1026 ifp
->name
, pim
->vrf
->name
, nbr
);
1027 if (!nbr
&& !if_is_loopback(ifp
)) {
1031 if (PIM_DEBUG_PIM_NHT
)
1033 "%s: NBR not found on input interface %s(%s) (RPF for source %s)",
1034 __func__
, ifp
->name
,
1035 pim
->vrf
->name
, addr_str
);
1041 if (PIM_DEBUG_PIM_NHT
) {
1042 char nexthop_str
[PREFIX_STRLEN
];
1044 pim_addr_dump("<nexthop?>",
1045 &nexthop_tab
[i
].nexthop_addr
,
1046 nexthop_str
, sizeof(nexthop_str
));
1048 "%s: found nhop %s for addr %s interface %s(%s) metric %d dist %d",
1049 __func__
, nexthop_str
, addr_str
,
1050 ifp
->name
, pim
->vrf
->name
,
1051 nexthop_tab
[i
].route_metric
,
1052 nexthop_tab
[i
].protocol_distance
);
1054 /* update nexthop data */
1055 nexthop
->interface
= ifp
;
1056 nexthop
->mrib_nexthop_addr
=
1057 nexthop_tab
[i
].nexthop_addr
;
1058 nexthop
->mrib_metric_preference
=
1059 nexthop_tab
[i
].protocol_distance
;
1060 nexthop
->mrib_route_metric
=
1061 nexthop_tab
[i
].route_metric
;
1062 nexthop
->last_lookup
= src
->u
.prefix4
;
1063 nexthop
->last_lookup_time
= pim_time_monotonic_usec();
1076 int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance
*pim
,
1077 struct prefix
*src
, struct prefix
*grp
)
1079 struct pim_nexthop nhop
;
1082 char addr_str
[PREFIX_STRLEN
];
1084 if (PIM_DEBUG_PIM_NHT
)
1085 pim_inet4_dump("<addr?>", src
->u
.prefix4
, addr_str
,
1088 memset(&nhop
, 0, sizeof(nhop
));
1089 if (!pim_ecmp_nexthop_lookup(pim
, &nhop
, src
, grp
, 1)) {
1090 if (PIM_DEBUG_PIM_NHT
)
1092 "%s: could not find nexthop ifindex for address %s(%s)",
1093 __func__
, addr_str
, pim
->vrf
->name
);
1097 ifindex
= nhop
.interface
->ifindex
;
1098 if (PIM_DEBUG_PIM_NHT
)
1100 "%s: found nexthop ifindex=%d (interface %s(%s)) for address %s",
1102 ifindex2ifname(ifindex
, pim
->vrf
->vrf_id
),
1103 pim
->vrf
->name
, addr_str
);
1105 vif_index
= pim_if_find_vifindex_by_ifindex(pim
, ifindex
);
1107 if (vif_index
< 0) {
1108 if (PIM_DEBUG_PIM_NHT
) {
1110 "%s: low vif_index=%d(%s) < 1 nexthop for address %s",
1111 __func__
, vif_index
, pim
->vrf
->name
, addr_str
);