]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_nht.c
Merge pull request #8458 from opensourcerouting/xref-5424
[mirror_frr.git] / pimd / pim_nht.c
1 /*
2 * PIM for Quagga
3 * Copyright (C) 2017 Cumulus Networks, Inc.
4 * Chirag Shah
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20 #include <zebra.h>
21 #include "network.h"
22 #include "zclient.h"
23 #include "stream.h"
24 #include "nexthop.h"
25 #include "if.h"
26 #include "hash.h"
27 #include "jhash.h"
28
29 #include "lib/printfrr.h"
30
31 #include "pimd.h"
32 #include "pimd/pim_nht.h"
33 #include "log.h"
34 #include "pim_time.h"
35 #include "pim_oil.h"
36 #include "pim_ifchannel.h"
37 #include "pim_mroute.h"
38 #include "pim_zebra.h"
39 #include "pim_upstream.h"
40 #include "pim_join.h"
41 #include "pim_jp_agg.h"
42 #include "pim_zebra.h"
43 #include "pim_zlookup.h"
44 #include "pim_rp.h"
45
46 /**
47 * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
48 * command to Zebra.
49 */
50 void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient,
51 struct pim_nexthop_cache *pnc, int command)
52 {
53 struct prefix *p;
54 int ret;
55
56 p = &(pnc->rpf.rpf_addr);
57 ret = zclient_send_rnh(zclient, command, p, false, false,
58 pim->vrf->vrf_id);
59 if (ret == ZCLIENT_SEND_FAILURE)
60 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
61
62 if (PIM_DEBUG_PIM_NHT)
63 zlog_debug(
64 "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ",
65 __func__,
66 (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", p,
67 pim->vrf->name, ret);
68
69 return;
70 }
71
72 struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim,
73 struct pim_rpf *rpf)
74 {
75 struct pim_nexthop_cache *pnc = NULL;
76 struct pim_nexthop_cache lookup;
77
78 lookup.rpf.rpf_addr.family = rpf->rpf_addr.family;
79 lookup.rpf.rpf_addr.prefixlen = rpf->rpf_addr.prefixlen;
80 lookup.rpf.rpf_addr.u.prefix4.s_addr = rpf->rpf_addr.u.prefix4.s_addr;
81
82 pnc = hash_lookup(pim->rpf_hash, &lookup);
83
84 return pnc;
85 }
86
87 static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim,
88 struct pim_rpf *rpf_addr)
89 {
90 struct pim_nexthop_cache *pnc;
91 char hash_name[64];
92
93 pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE,
94 sizeof(struct pim_nexthop_cache));
95 pnc->rpf.rpf_addr.family = rpf_addr->rpf_addr.family;
96 pnc->rpf.rpf_addr.prefixlen = rpf_addr->rpf_addr.prefixlen;
97 pnc->rpf.rpf_addr.u.prefix4.s_addr =
98 rpf_addr->rpf_addr.u.prefix4.s_addr;
99
100 pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern);
101
102 pnc->rp_list = list_new();
103 pnc->rp_list->cmp = pim_rp_list_cmp;
104
105 snprintfrr(hash_name, sizeof(hash_name), "PNC %pFX(%s) Upstream Hash",
106 &pnc->rpf.rpf_addr, pim->vrf->name);
107 pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key,
108 pim_upstream_equal, hash_name);
109
110 return pnc;
111 }
112
113 static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim,
114 struct prefix *addr)
115 {
116 struct pim_nexthop_cache *pnc = NULL;
117 struct pim_rpf rpf;
118 struct zclient *zclient = NULL;
119
120 zclient = pim_zebra_zclient_get();
121 memset(&rpf, 0, sizeof(struct pim_rpf));
122 rpf.rpf_addr.family = addr->family;
123 rpf.rpf_addr.prefixlen = addr->prefixlen;
124 rpf.rpf_addr.u.prefix4 = addr->u.prefix4;
125
126 pnc = pim_nexthop_cache_find(pim, &rpf);
127 if (!pnc) {
128 pnc = pim_nexthop_cache_add(pim, &rpf);
129 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
130 ZEBRA_NEXTHOP_REGISTER);
131 if (PIM_DEBUG_PIM_NHT)
132 zlog_debug(
133 "%s: NHT cache and zebra notification added for %pFX(%s)",
134 __func__, addr, pim->vrf->name);
135 }
136
137 return pnc;
138 }
139
140 /* TBD: this does several distinct things and should probably be split up.
141 * (checking state vs. returning pnc vs. adding upstream vs. adding rp)
142 */
143 int pim_find_or_track_nexthop(struct pim_instance *pim, struct prefix *addr,
144 struct pim_upstream *up, struct rp_info *rp,
145 struct pim_nexthop_cache *out_pnc)
146 {
147 struct pim_nexthop_cache *pnc;
148 struct listnode *ch_node = NULL;
149
150 pnc = pim_nht_get(pim, addr);
151
152 assertf(up || rp, "addr=%pFX", addr);
153
154 if (rp != NULL) {
155 ch_node = listnode_lookup(pnc->rp_list, rp);
156 if (ch_node == NULL)
157 listnode_add_sort(pnc->rp_list, rp);
158 }
159
160 if (up != NULL)
161 hash_get(pnc->upstream_hash, up, hash_alloc_intern);
162
163 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) {
164 if (out_pnc)
165 memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache));
166 return 1;
167 }
168
169 return 0;
170 }
171
172 void pim_nht_bsr_add(struct pim_instance *pim, struct in_addr addr)
173 {
174 struct pim_nexthop_cache *pnc;
175 struct prefix pfx;
176
177 pfx.family = AF_INET;
178 pfx.prefixlen = IPV4_MAX_BITLEN;
179 pfx.u.prefix4 = addr;
180
181 pnc = pim_nht_get(pim, &pfx);
182
183 pnc->bsr_count++;
184 }
185
186 static void pim_nht_drop_maybe(struct pim_instance *pim,
187 struct pim_nexthop_cache *pnc)
188 {
189 if (PIM_DEBUG_PIM_NHT)
190 zlog_debug(
191 "%s: NHT %pFX(%s) rp_list count:%d upstream count:%ld BSR count:%u",
192 __func__, &pnc->rpf.rpf_addr, pim->vrf->name,
193 pnc->rp_list->count, pnc->upstream_hash->count,
194 pnc->bsr_count);
195
196 if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0
197 && pnc->bsr_count == 0) {
198 struct zclient *zclient = pim_zebra_zclient_get();
199
200 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
201 ZEBRA_NEXTHOP_UNREGISTER);
202
203 list_delete(&pnc->rp_list);
204 hash_free(pnc->upstream_hash);
205
206 hash_release(pim->rpf_hash, pnc);
207 if (pnc->nexthop)
208 nexthops_free(pnc->nexthop);
209 XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
210 }
211 }
212
213 void pim_delete_tracked_nexthop(struct pim_instance *pim, struct prefix *addr,
214 struct pim_upstream *up, struct rp_info *rp)
215 {
216 struct pim_nexthop_cache *pnc = NULL;
217 struct pim_nexthop_cache lookup;
218 struct pim_upstream *upstream = NULL;
219
220 /* Remove from RPF hash if it is the last entry */
221 lookup.rpf.rpf_addr = *addr;
222 pnc = hash_lookup(pim->rpf_hash, &lookup);
223 if (!pnc) {
224 zlog_warn("attempting to delete nonexistent NHT entry %pFX",
225 addr);
226 return;
227 }
228
229 if (rp) {
230 /* Release the (*, G)upstream from pnc->upstream_hash,
231 * whose Group belongs to the RP getting deleted
232 */
233 frr_each (rb_pim_upstream, &pim->upstream_head, upstream) {
234 struct prefix grp;
235 struct rp_info *trp_info;
236
237 if (!pim_addr_is_any(upstream->sg.src))
238 continue;
239
240 grp.family = AF_INET;
241 grp.prefixlen = IPV4_MAX_BITLEN;
242 grp.u.prefix4 = upstream->sg.grp;
243
244 trp_info = pim_rp_find_match_group(pim, &grp);
245 if (trp_info == rp)
246 hash_release(pnc->upstream_hash, upstream);
247 }
248 listnode_delete(pnc->rp_list, rp);
249 }
250
251 if (up)
252 hash_release(pnc->upstream_hash, up);
253
254 pim_nht_drop_maybe(pim, pnc);
255 }
256
257 void pim_nht_bsr_del(struct pim_instance *pim, struct in_addr addr)
258 {
259 struct pim_nexthop_cache *pnc = NULL;
260 struct pim_nexthop_cache lookup;
261
262 /*
263 * Nothing to do here if the address to unregister
264 * is 0.0.0.0 as that the BSR has not been registered
265 * for tracking yet.
266 */
267 if (addr.s_addr == INADDR_ANY)
268 return;
269
270 lookup.rpf.rpf_addr.family = AF_INET;
271 lookup.rpf.rpf_addr.prefixlen = IPV4_MAX_BITLEN;
272 lookup.rpf.rpf_addr.u.prefix4 = addr;
273
274 pnc = hash_lookup(pim->rpf_hash, &lookup);
275
276 if (!pnc) {
277 zlog_warn("attempting to delete nonexistent NHT BSR entry %pI4",
278 &addr);
279 return;
280 }
281
282 assertf(pnc->bsr_count > 0, "addr=%pI4", &addr);
283 pnc->bsr_count--;
284
285 pim_nht_drop_maybe(pim, pnc);
286 }
287
288 bool pim_nht_bsr_rpf_check(struct pim_instance *pim, struct in_addr bsr_addr,
289 struct interface *src_ifp, struct in_addr src_ip)
290 {
291 struct pim_nexthop_cache *pnc = NULL;
292 struct pim_nexthop_cache lookup;
293 struct pim_neighbor *nbr = NULL;
294 struct nexthop *nh;
295 struct interface *ifp;
296
297 lookup.rpf.rpf_addr.family = AF_INET;
298 lookup.rpf.rpf_addr.prefixlen = IPV4_MAX_BITLEN;
299 lookup.rpf.rpf_addr.u.prefix4 = bsr_addr;
300
301 pnc = hash_lookup(pim->rpf_hash, &lookup);
302 if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) {
303 /* BSM from a new freshly registered BSR - do a synchronous
304 * zebra query since otherwise we'd drop the first packet,
305 * leading to additional delay in picking up BSM data
306 */
307
308 /* FIXME: this should really be moved into a generic NHT
309 * function that does "add and get immediate result" or maybe
310 * "check cache or get immediate result." But until that can
311 * be worked in, here's a copy of the code below :(
312 */
313 struct pim_zlookup_nexthop nexthop_tab[MULTIPATH_NUM];
314 ifindex_t i;
315 struct interface *ifp = NULL;
316 int num_ifindex;
317
318 memset(nexthop_tab, 0, sizeof(nexthop_tab));
319 num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab,
320 MULTIPATH_NUM, bsr_addr,
321 PIM_NEXTHOP_LOOKUP_MAX);
322
323 if (num_ifindex <= 0)
324 return false;
325
326 for (i = 0; i < num_ifindex; i++) {
327 struct pim_zlookup_nexthop *znh = &nexthop_tab[i];
328
329 /* pim_zlookup_nexthop has no ->type */
330
331 /* 1:1 match code below with znh instead of nh */
332 ifp = if_lookup_by_index(znh->ifindex,
333 pim->vrf->vrf_id);
334
335 if (!ifp || !ifp->info)
336 continue;
337
338 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
339 return true;
340
341 nbr = pim_neighbor_find(ifp,
342 znh->nexthop_addr.u.prefix4);
343 if (!nbr)
344 continue;
345
346 return znh->ifindex == src_ifp->ifindex
347 && znh->nexthop_addr.u.prefix4.s_addr
348 == src_ip.s_addr;
349 }
350 return false;
351 }
352
353 if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID))
354 return false;
355
356 /* if we accept BSMs from more than one ECMP nexthop, this will cause
357 * BSM message "multiplication" for each ECMP hop. i.e. if you have
358 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
359 * message.
360 *
361 * so... only accept the first (IPv4) valid nexthop as source.
362 */
363
364 for (nh = pnc->nexthop; nh; nh = nh->next) {
365 struct in_addr nhaddr;
366
367 switch (nh->type) {
368 case NEXTHOP_TYPE_IPV4:
369 if (nh->ifindex == IFINDEX_INTERNAL)
370 continue;
371
372 /* fallthru */
373 case NEXTHOP_TYPE_IPV4_IFINDEX:
374 nhaddr = nh->gate.ipv4;
375 break;
376
377 case NEXTHOP_TYPE_IFINDEX:
378 nhaddr = bsr_addr;
379 break;
380
381 default:
382 continue;
383 }
384
385 ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id);
386 if (!ifp || !ifp->info)
387 continue;
388
389 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
390 return true;
391
392 /* MRIB (IGP) may be pointing at a router where PIM is down */
393 nbr = pim_neighbor_find(ifp, nhaddr);
394 if (!nbr)
395 continue;
396
397 return nh->ifindex == src_ifp->ifindex
398 && nhaddr.s_addr == src_ip.s_addr;
399 }
400 return false;
401 }
402
403 void pim_rp_nexthop_del(struct rp_info *rp_info)
404 {
405 rp_info->rp.source_nexthop.interface = NULL;
406 rp_info->rp.source_nexthop.mrib_nexthop_addr.u.prefix4.s_addr =
407 PIM_NET_INADDR_ANY;
408 rp_info->rp.source_nexthop.mrib_metric_preference =
409 router->infinite_assert_metric.metric_preference;
410 rp_info->rp.source_nexthop.mrib_route_metric =
411 router->infinite_assert_metric.route_metric;
412 }
413
414 /* Update RP nexthop info based on Nexthop update received from Zebra.*/
415 static void pim_update_rp_nh(struct pim_instance *pim,
416 struct pim_nexthop_cache *pnc)
417 {
418 struct listnode *node = NULL;
419 struct rp_info *rp_info = NULL;
420
421 /*Traverse RP list and update each RP Nexthop info */
422 for (ALL_LIST_ELEMENTS_RO(pnc->rp_list, node, rp_info)) {
423 if (rp_info->rp.rpf_addr.u.prefix4.s_addr == INADDR_NONE)
424 continue;
425
426 // Compute PIM RPF using cached nexthop
427 if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop,
428 &rp_info->rp.rpf_addr,
429 &rp_info->group, 1))
430 pim_rp_nexthop_del(rp_info);
431 }
432 }
433
434 /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
435 static int pim_update_upstream_nh_helper(struct hash_bucket *bucket, void *arg)
436 {
437 struct pim_instance *pim = (struct pim_instance *)arg;
438 struct pim_upstream *up = (struct pim_upstream *)bucket->data;
439
440 enum pim_rpf_result rpf_result;
441 struct pim_rpf old;
442
443 old.source_nexthop.interface = up->rpf.source_nexthop.interface;
444 rpf_result = pim_rpf_update(pim, up, &old, __func__);
445
446 /* update kernel multicast forwarding cache (MFC); if the
447 * RPF nbr is now unreachable the MFC has already been updated
448 * by pim_rpf_clear
449 */
450 if (rpf_result != PIM_RPF_FAILURE)
451 pim_upstream_mroute_iif_update(up->channel_oil, __func__);
452
453 if (rpf_result == PIM_RPF_CHANGED ||
454 (rpf_result == PIM_RPF_FAILURE && old.source_nexthop.interface))
455 pim_zebra_upstream_rpf_changed(pim, up, &old);
456
457
458 if (PIM_DEBUG_PIM_NHT) {
459 zlog_debug(
460 "%s: NHT upstream %s(%s) old ifp %s new ifp %s",
461 __func__, up->sg_str, pim->vrf->name,
462 old.source_nexthop.interface ? old.source_nexthop
463 .interface->name
464 : "Unknown",
465 up->rpf.source_nexthop.interface ? up->rpf.source_nexthop
466 .interface->name
467 : "Unknown");
468 }
469
470 return HASHWALK_CONTINUE;
471 }
472
473 static int pim_update_upstream_nh(struct pim_instance *pim,
474 struct pim_nexthop_cache *pnc)
475 {
476 hash_walk(pnc->upstream_hash, pim_update_upstream_nh_helper, pim);
477
478 pim_zebra_update_all_interfaces(pim);
479
480 return 0;
481 }
482
483 uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
484 {
485 uint32_t hash_val;
486 uint32_t s = 0, g = 0;
487
488 if ((!src))
489 return 0;
490
491 switch (src->family) {
492 case AF_INET: {
493 s = src->u.prefix4.s_addr;
494 s = s == 0 ? 1 : s;
495 if (grp)
496 g = grp->u.prefix4.s_addr;
497 } break;
498 default:
499 break;
500 }
501
502 hash_val = jhash_2words(g, s, 101);
503 return hash_val;
504 }
505
506 static int pim_ecmp_nexthop_search(struct pim_instance *pim,
507 struct pim_nexthop_cache *pnc,
508 struct pim_nexthop *nexthop,
509 struct prefix *src, struct prefix *grp,
510 int neighbor_needed)
511 {
512 struct pim_neighbor *nbrs[MULTIPATH_NUM], *nbr = NULL;
513 struct interface *ifps[MULTIPATH_NUM];
514 struct nexthop *nh_node = NULL;
515 ifindex_t first_ifindex;
516 struct interface *ifp = NULL;
517 uint32_t hash_val = 0, mod_val = 0;
518 uint8_t nh_iter = 0, found = 0;
519 uint32_t i, num_nbrs = 0;
520
521 if (!pnc || !pnc->nexthop_num || !nexthop)
522 return 0;
523
524 memset(&nbrs, 0, sizeof(nbrs));
525 memset(&ifps, 0, sizeof(ifps));
526
527 // Current Nexthop is VALID, check to stay on the current path.
528 if (nexthop->interface && nexthop->interface->info
529 && nexthop->mrib_nexthop_addr.u.prefix4.s_addr
530 != PIM_NET_INADDR_ANY) {
531 /* User configured knob to explicitly switch
532 to new path is disabled or current path
533 metric is less than nexthop update.
534 */
535
536 if (pim->ecmp_rebalance_enable == 0) {
537 uint8_t curr_route_valid = 0;
538 // Check if current nexthop is present in new updated
539 // Nexthop list.
540 // If the current nexthop is not valid, candidate to
541 // choose new Nexthop.
542 for (nh_node = pnc->nexthop; nh_node;
543 nh_node = nh_node->next) {
544 curr_route_valid = (nexthop->interface->ifindex
545 == nh_node->ifindex);
546 if (curr_route_valid)
547 break;
548 }
549
550 if (curr_route_valid
551 && !pim_if_connected_to_source(nexthop->interface,
552 src->u.prefix4)) {
553 nbr = pim_neighbor_find(
554 nexthop->interface,
555 nexthop->mrib_nexthop_addr.u.prefix4);
556 if (!nbr
557 && !if_is_loopback(nexthop->interface)) {
558 if (PIM_DEBUG_PIM_NHT)
559 zlog_debug(
560 "%s: current nexthop does not have nbr ",
561 __func__);
562 } else {
563 /* update metric even if the upstream
564 * neighbor stays unchanged
565 */
566 nexthop->mrib_metric_preference =
567 pnc->distance;
568 nexthop->mrib_route_metric =
569 pnc->metric;
570 if (PIM_DEBUG_PIM_NHT) {
571 char src_str[INET_ADDRSTRLEN];
572 pim_inet4_dump("<addr?>",
573 src->u.prefix4,
574 src_str,
575 sizeof(src_str));
576 char grp_str[INET_ADDRSTRLEN];
577 pim_inet4_dump("<addr?>",
578 grp->u.prefix4,
579 grp_str,
580 sizeof(grp_str));
581 zlog_debug(
582 "%s: (%s,%s)(%s) current nexthop %s is valid, skipping new path selection",
583 __func__, src_str,
584 grp_str, pim->vrf->name,
585 nexthop->interface->name);
586 }
587 return 1;
588 }
589 }
590 }
591 }
592
593 /*
594 * Look up all interfaces and neighbors,
595 * store for later usage
596 */
597 for (nh_node = pnc->nexthop, i = 0; nh_node;
598 nh_node = nh_node->next, i++) {
599 ifps[i] =
600 if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
601 if (ifps[i]) {
602 nbrs[i] = pim_neighbor_find(ifps[i],
603 nh_node->gate.ipv4);
604 if (nbrs[i] || pim_if_connected_to_source(ifps[i],
605
606 src->u.prefix4))
607 num_nbrs++;
608 }
609 }
610 if (pim->ecmp_enable) {
611 uint32_t consider = pnc->nexthop_num;
612
613 if (neighbor_needed && num_nbrs < consider)
614 consider = num_nbrs;
615
616 if (consider == 0)
617 return 0;
618
619 // PIM ECMP flag is enable then choose ECMP path.
620 hash_val = pim_compute_ecmp_hash(src, grp);
621 mod_val = hash_val % consider;
622 }
623
624 for (nh_node = pnc->nexthop; nh_node && (found == 0);
625 nh_node = nh_node->next) {
626 first_ifindex = nh_node->ifindex;
627 ifp = ifps[nh_iter];
628 if (!ifp) {
629 if (PIM_DEBUG_PIM_NHT) {
630 char addr_str[INET_ADDRSTRLEN];
631 pim_inet4_dump("<addr?>", src->u.prefix4,
632 addr_str, sizeof(addr_str));
633 zlog_debug(
634 "%s %s: could not find interface for ifindex %d (address %s(%s))",
635 __FILE__, __func__, first_ifindex,
636 addr_str, pim->vrf->name);
637 }
638 if (nh_iter == mod_val)
639 mod_val++; // Select nexthpath
640 nh_iter++;
641 continue;
642 }
643 if (!ifp->info) {
644 if (PIM_DEBUG_PIM_NHT) {
645 char addr_str[INET_ADDRSTRLEN];
646 pim_inet4_dump("<addr?>", src->u.prefix4,
647 addr_str, sizeof(addr_str));
648 zlog_debug(
649 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %s)",
650 __func__, ifp->name, pim->vrf->name,
651 first_ifindex, addr_str);
652 }
653 if (nh_iter == mod_val)
654 mod_val++; // Select nexthpath
655 nh_iter++;
656 continue;
657 }
658
659 if (neighbor_needed
660 && !pim_if_connected_to_source(ifp, src->u.prefix4)) {
661 nbr = nbrs[nh_iter];
662 if (!nbr && !if_is_loopback(ifp)) {
663 if (PIM_DEBUG_PIM_NHT)
664 zlog_debug(
665 "%s: pim nbr not found on input interface %s(%s)",
666 __func__, ifp->name,
667 pim->vrf->name);
668 if (nh_iter == mod_val)
669 mod_val++; // Select nexthpath
670 nh_iter++;
671 continue;
672 }
673 }
674
675 if (nh_iter == mod_val) {
676 nexthop->interface = ifp;
677 nexthop->mrib_nexthop_addr.family = AF_INET;
678 nexthop->mrib_nexthop_addr.prefixlen = IPV4_MAX_BITLEN;
679 nexthop->mrib_nexthop_addr.u.prefix4 =
680 nh_node->gate.ipv4;
681 nexthop->mrib_metric_preference = pnc->distance;
682 nexthop->mrib_route_metric = pnc->metric;
683 nexthop->last_lookup = src->u.prefix4;
684 nexthop->last_lookup_time = pim_time_monotonic_usec();
685 nexthop->nbr = nbr;
686 found = 1;
687 if (PIM_DEBUG_PIM_NHT) {
688 char buf[INET_ADDRSTRLEN];
689 char buf2[INET_ADDRSTRLEN];
690 char buf3[INET_ADDRSTRLEN];
691 pim_inet4_dump("<src?>", src->u.prefix4, buf2,
692 sizeof(buf2));
693 pim_inet4_dump("<grp?>", grp->u.prefix4, buf3,
694 sizeof(buf3));
695 pim_inet4_dump(
696 "<rpf?>",
697 nexthop->mrib_nexthop_addr.u.prefix4,
698 buf, sizeof(buf));
699 zlog_debug(
700 "%s: (%s,%s)(%s) selected nhop interface %s addr %s mod_val %u iter %d ecmp %d",
701 __func__, buf2, buf3, pim->vrf->name,
702 ifp->name, buf, mod_val, nh_iter,
703 pim->ecmp_enable);
704 }
705 }
706 nh_iter++;
707 }
708
709 if (found)
710 return 1;
711 else
712 return 0;
713 }
714
715 /* This API is used to parse Registered address nexthop update coming from Zebra
716 */
717 int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS)
718 {
719 struct nexthop *nexthop;
720 struct nexthop *nhlist_head = NULL;
721 struct nexthop *nhlist_tail = NULL;
722 int i;
723 struct pim_rpf rpf;
724 struct pim_nexthop_cache *pnc = NULL;
725 struct pim_neighbor *nbr = NULL;
726 struct interface *ifp = NULL;
727 struct interface *ifp1 = NULL;
728 struct vrf *vrf = vrf_lookup_by_id(vrf_id);
729 struct pim_instance *pim;
730 struct zapi_route nhr;
731
732 if (!vrf)
733 return 0;
734 pim = vrf->info;
735
736 if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) {
737 zlog_err("%s: Decode of nexthop update from zebra failed",
738 __func__);
739 return 0;
740 }
741
742 if (cmd == ZEBRA_NEXTHOP_UPDATE) {
743 prefix_copy(&rpf.rpf_addr, &nhr.prefix);
744 pnc = pim_nexthop_cache_find(pim, &rpf);
745 if (!pnc) {
746 if (PIM_DEBUG_PIM_NHT)
747 zlog_debug(
748 "%s: Skipping NHT update, addr %pFX is not in local cached DB.",
749 __func__, &rpf.rpf_addr);
750 return 0;
751 }
752 } else {
753 /*
754 * We do not currently handle ZEBRA_IMPORT_CHECK_UPDATE
755 */
756 return 0;
757 }
758
759 pnc->last_update = pim_time_monotonic_usec();
760
761 if (nhr.nexthop_num) {
762 pnc->nexthop_num = 0; // Only increment for pim enabled rpf.
763
764 for (i = 0; i < nhr.nexthop_num; i++) {
765 nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]);
766 switch (nexthop->type) {
767 case NEXTHOP_TYPE_IPV4:
768 case NEXTHOP_TYPE_IPV4_IFINDEX:
769 case NEXTHOP_TYPE_IPV6:
770 case NEXTHOP_TYPE_BLACKHOLE:
771 break;
772 case NEXTHOP_TYPE_IFINDEX:
773 /*
774 * Connected route (i.e. no nexthop), use
775 * RPF address from nexthop cache (i.e.
776 * destination) as PIM nexthop.
777 */
778 nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
779 nexthop->gate.ipv4 =
780 pnc->rpf.rpf_addr.u.prefix4;
781 break;
782 case NEXTHOP_TYPE_IPV6_IFINDEX:
783 ifp1 = if_lookup_by_index(nexthop->ifindex,
784 pim->vrf->vrf_id);
785
786 if (!ifp1)
787 nbr = NULL;
788 else
789 nbr = pim_neighbor_find_if(ifp1);
790 /* Overwrite with Nbr address as NH addr */
791 if (nbr)
792 nexthop->gate.ipv4 = nbr->source_addr;
793 else {
794 // Mark nexthop address to 0 until PIM
795 // Nbr is resolved.
796 nexthop->gate.ipv4.s_addr =
797 PIM_NET_INADDR_ANY;
798 }
799
800 break;
801 }
802
803 ifp = if_lookup_by_index(nexthop->ifindex,
804 pim->vrf->vrf_id);
805 if (!ifp) {
806 if (PIM_DEBUG_PIM_NHT) {
807 char buf[NEXTHOP_STRLEN];
808 zlog_debug(
809 "%s: could not find interface for ifindex %d(%s) (addr %s)",
810 __func__, nexthop->ifindex,
811 pim->vrf->name,
812 nexthop2str(nexthop, buf,
813 sizeof(buf)));
814 }
815 nexthop_free(nexthop);
816 continue;
817 }
818
819 if (PIM_DEBUG_PIM_NHT)
820 zlog_debug(
821 "%s: NHT addr %pFX(%s) %d-nhop via %pI4(%s) type %d distance:%u metric:%u ",
822 __func__, &nhr.prefix, pim->vrf->name,
823 i + 1, &nexthop->gate.ipv4,
824 ifp->name, nexthop->type, nhr.distance,
825 nhr.metric);
826
827 if (!ifp->info) {
828 /*
829 * Though Multicast is not enabled on this
830 * Interface store it in database otheriwse we
831 * may miss this update and this will not cause
832 * any issue, because while choosing the path we
833 * are ommitting the Interfaces which are not
834 * multicast enabled
835 */
836 if (PIM_DEBUG_PIM_NHT) {
837 char buf[NEXTHOP_STRLEN];
838
839 zlog_debug(
840 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
841 __func__, ifp->name,
842 pim->vrf->name,
843 nexthop->ifindex,
844 nexthop2str(nexthop, buf,
845 sizeof(buf)));
846 }
847 }
848
849 if (nhlist_tail) {
850 nhlist_tail->next = nexthop;
851 nhlist_tail = nexthop;
852 } else {
853 nhlist_tail = nexthop;
854 nhlist_head = nexthop;
855 }
856 // Only keep track of nexthops which are PIM enabled.
857 pnc->nexthop_num++;
858 }
859 /* Reset existing pnc->nexthop before assigning new list */
860 nexthops_free(pnc->nexthop);
861 pnc->nexthop = nhlist_head;
862 if (pnc->nexthop_num) {
863 pnc->flags |= PIM_NEXTHOP_VALID;
864 pnc->distance = nhr.distance;
865 pnc->metric = nhr.metric;
866 }
867 } else {
868 pnc->flags &= ~PIM_NEXTHOP_VALID;
869 pnc->nexthop_num = nhr.nexthop_num;
870 nexthops_free(pnc->nexthop);
871 pnc->nexthop = NULL;
872 }
873 SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED);
874
875 if (PIM_DEBUG_PIM_NHT)
876 zlog_debug(
877 "%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
878 __func__, &nhr.prefix, pim->vrf->name, nhr.nexthop_num,
879 pnc->nexthop_num, vrf_id, pnc->upstream_hash->count,
880 listcount(pnc->rp_list));
881
882 pim_rpf_set_refresh_time(pim);
883
884 if (listcount(pnc->rp_list))
885 pim_update_rp_nh(pim, pnc);
886 if (pnc->upstream_hash->count)
887 pim_update_upstream_nh(pim, pnc);
888
889 return 0;
890 }
891
892 int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
893 struct pim_nexthop *nexthop, struct prefix *src,
894 struct prefix *grp, int neighbor_needed)
895 {
896 struct pim_nexthop_cache *pnc;
897 struct pim_zlookup_nexthop nexthop_tab[MULTIPATH_NUM];
898 struct pim_neighbor *nbrs[MULTIPATH_NUM], *nbr = NULL;
899 struct pim_rpf rpf;
900 int num_ifindex;
901 struct interface *ifps[MULTIPATH_NUM], *ifp;
902 int first_ifindex;
903 int found = 0;
904 uint8_t i = 0;
905 uint32_t hash_val = 0, mod_val = 0;
906 uint32_t num_nbrs = 0;
907 char addr_str[PREFIX_STRLEN];
908
909 if (PIM_DEBUG_PIM_NHT) {
910 pim_inet4_dump("<addr?>", src->u.prefix4, addr_str,
911 sizeof(addr_str));
912 zlog_debug("%s: Looking up: %s(%s), last lookup time: %lld",
913 __func__, addr_str, pim->vrf->name,
914 nexthop->last_lookup_time);
915 }
916
917 memset(&rpf, 0, sizeof(struct pim_rpf));
918 rpf.rpf_addr.family = AF_INET;
919 rpf.rpf_addr.prefixlen = IPV4_MAX_BITLEN;
920 rpf.rpf_addr.u.prefix4 = src->u.prefix4;
921
922 pnc = pim_nexthop_cache_find(pim, &rpf);
923 if (pnc) {
924 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED))
925 return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp,
926 neighbor_needed);
927 }
928
929 memset(nexthop_tab, 0,
930 sizeof(struct pim_zlookup_nexthop) * MULTIPATH_NUM);
931 num_ifindex =
932 zclient_lookup_nexthop(pim, nexthop_tab, MULTIPATH_NUM,
933 src->u.prefix4, PIM_NEXTHOP_LOOKUP_MAX);
934 if (num_ifindex < 1) {
935 if (PIM_DEBUG_PIM_NHT)
936 zlog_warn(
937 "%s: could not find nexthop ifindex for address %s(%s)",
938 __func__, addr_str, pim->vrf->name);
939 return 0;
940 }
941
942 memset(&nbrs, 0, sizeof(nbrs));
943 memset(&ifps, 0, sizeof(ifps));
944
945 /*
946 * Look up all interfaces and neighbors,
947 * store for later usage
948 */
949 for (i = 0; i < num_ifindex; i++) {
950 ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex,
951 pim->vrf->vrf_id);
952 if (ifps[i]) {
953 nbrs[i] = pim_neighbor_find(
954 ifps[i], nexthop_tab[i].nexthop_addr.u.prefix4);
955 if (nbrs[i]
956 || pim_if_connected_to_source(ifps[i],
957 src->u.prefix4))
958 num_nbrs++;
959 }
960 }
961
962 // If PIM ECMP enable then choose ECMP path.
963 if (pim->ecmp_enable) {
964 uint32_t consider = num_ifindex;
965
966 if (neighbor_needed && num_nbrs < consider)
967 consider = num_nbrs;
968
969 if (consider == 0)
970 return 0;
971
972 hash_val = pim_compute_ecmp_hash(src, grp);
973 mod_val = hash_val % consider;
974 if (PIM_DEBUG_PIM_NHT_DETAIL)
975 zlog_debug("%s: hash_val %u mod_val %u", __func__,
976 hash_val, mod_val);
977 }
978
979 i = 0;
980 while (!found && (i < num_ifindex)) {
981 first_ifindex = nexthop_tab[i].ifindex;
982
983 ifp = ifps[i];
984 if (!ifp) {
985 if (PIM_DEBUG_PIM_NHT)
986 zlog_debug(
987 "%s %s: could not find interface for ifindex %d (address %s(%s))",
988 __FILE__, __func__, first_ifindex,
989 addr_str, pim->vrf->name);
990 if (i == mod_val)
991 mod_val++;
992 i++;
993 continue;
994 }
995
996 if (!ifp->info) {
997 if (PIM_DEBUG_PIM_NHT)
998 zlog_debug(
999 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %s)",
1000 __func__, ifp->name, pim->vrf->name,
1001 first_ifindex, addr_str);
1002 if (i == mod_val)
1003 mod_val++;
1004 i++;
1005 continue;
1006 }
1007 if (neighbor_needed
1008 && !pim_if_connected_to_source(ifp, src->u.prefix4)) {
1009 nbr = nbrs[i];
1010 if (PIM_DEBUG_PIM_NHT_DETAIL)
1011 zlog_debug("ifp name: %s(%s), pim nbr: %p",
1012 ifp->name, pim->vrf->name, nbr);
1013 if (!nbr && !if_is_loopback(ifp)) {
1014 if (i == mod_val)
1015 mod_val++;
1016 i++;
1017 if (PIM_DEBUG_PIM_NHT)
1018 zlog_debug(
1019 "%s: NBR not found on input interface %s(%s) (RPF for source %s)",
1020 __func__, ifp->name,
1021 pim->vrf->name, addr_str);
1022 continue;
1023 }
1024 }
1025
1026 if (i == mod_val) {
1027 if (PIM_DEBUG_PIM_NHT) {
1028 char nexthop_str[PREFIX_STRLEN];
1029
1030 pim_addr_dump("<nexthop?>",
1031 &nexthop_tab[i].nexthop_addr,
1032 nexthop_str, sizeof(nexthop_str));
1033 zlog_debug(
1034 "%s: found nhop %s for addr %s interface %s(%s) metric %d dist %d",
1035 __func__, nexthop_str, addr_str,
1036 ifp->name, pim->vrf->name,
1037 nexthop_tab[i].route_metric,
1038 nexthop_tab[i].protocol_distance);
1039 }
1040 /* update nexthop data */
1041 nexthop->interface = ifp;
1042 nexthop->mrib_nexthop_addr =
1043 nexthop_tab[i].nexthop_addr;
1044 nexthop->mrib_metric_preference =
1045 nexthop_tab[i].protocol_distance;
1046 nexthop->mrib_route_metric =
1047 nexthop_tab[i].route_metric;
1048 nexthop->last_lookup = src->u.prefix4;
1049 nexthop->last_lookup_time = pim_time_monotonic_usec();
1050 nexthop->nbr = nbr;
1051 found = 1;
1052 }
1053 i++;
1054 }
1055
1056 if (found)
1057 return 1;
1058 else
1059 return 0;
1060 }
1061
1062 int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim,
1063 struct prefix *src, struct prefix *grp)
1064 {
1065 struct pim_nexthop nhop;
1066 int vif_index;
1067 ifindex_t ifindex;
1068 char addr_str[PREFIX_STRLEN];
1069
1070 if (PIM_DEBUG_PIM_NHT)
1071 pim_inet4_dump("<addr?>", src->u.prefix4, addr_str,
1072 sizeof(addr_str));
1073
1074 memset(&nhop, 0, sizeof(nhop));
1075 if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) {
1076 if (PIM_DEBUG_PIM_NHT)
1077 zlog_debug(
1078 "%s: could not find nexthop ifindex for address %s(%s)",
1079 __func__, addr_str, pim->vrf->name);
1080 return -1;
1081 }
1082
1083 ifindex = nhop.interface->ifindex;
1084 if (PIM_DEBUG_PIM_NHT)
1085 zlog_debug(
1086 "%s: found nexthop ifindex=%d (interface %s(%s)) for address %s",
1087 __func__, ifindex,
1088 ifindex2ifname(ifindex, pim->vrf->vrf_id),
1089 pim->vrf->name, addr_str);
1090
1091 vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);
1092
1093 if (vif_index < 0) {
1094 if (PIM_DEBUG_PIM_NHT) {
1095 zlog_debug(
1096 "%s: low vif_index=%d(%s) < 1 nexthop for address %s",
1097 __func__, vif_index, pim->vrf->name, addr_str);
1098 }
1099 return -2;
1100 }
1101
1102 return vif_index;
1103 }