]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_nht.c
Merge pull request #10590 from donaldsharp/bgp_error_codes
[mirror_frr.git] / pimd / pim_nht.c
1 /*
2 * PIM for Quagga
3 * Copyright (C) 2017 Cumulus Networks, Inc.
4 * Chirag Shah
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20 #include <zebra.h>
21 #include "network.h"
22 #include "zclient.h"
23 #include "stream.h"
24 #include "nexthop.h"
25 #include "if.h"
26 #include "hash.h"
27 #include "jhash.h"
28
29 #include "lib/printfrr.h"
30
31 #include "pimd.h"
32 #include "pimd/pim_nht.h"
33 #include "log.h"
34 #include "pim_time.h"
35 #include "pim_oil.h"
36 #include "pim_ifchannel.h"
37 #include "pim_mroute.h"
38 #include "pim_zebra.h"
39 #include "pim_upstream.h"
40 #include "pim_join.h"
41 #include "pim_jp_agg.h"
42 #include "pim_zebra.h"
43 #include "pim_zlookup.h"
44 #include "pim_rp.h"
45
46 /**
47 * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
48 * command to Zebra.
49 */
50 void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient,
51 struct pim_nexthop_cache *pnc, int command)
52 {
53 struct prefix *p;
54 int ret;
55
56 p = &(pnc->rpf.rpf_addr);
57 ret = zclient_send_rnh(zclient, command, p, false, false,
58 pim->vrf->vrf_id);
59 if (ret == ZCLIENT_SEND_FAILURE)
60 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
61
62 if (PIM_DEBUG_PIM_NHT)
63 zlog_debug(
64 "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ",
65 __func__,
66 (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", p,
67 pim->vrf->name, ret);
68
69 return;
70 }
71
72 struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim,
73 struct pim_rpf *rpf)
74 {
75 struct pim_nexthop_cache *pnc = NULL;
76 struct pim_nexthop_cache lookup;
77
78 lookup.rpf.rpf_addr.family = rpf->rpf_addr.family;
79 lookup.rpf.rpf_addr.prefixlen = rpf->rpf_addr.prefixlen;
80 lookup.rpf.rpf_addr.u.prefix4.s_addr = rpf->rpf_addr.u.prefix4.s_addr;
81
82 pnc = hash_lookup(pim->rpf_hash, &lookup);
83
84 return pnc;
85 }
86
87 static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim,
88 struct pim_rpf *rpf_addr)
89 {
90 struct pim_nexthop_cache *pnc;
91 char hash_name[64];
92
93 pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE,
94 sizeof(struct pim_nexthop_cache));
95 pnc->rpf.rpf_addr.family = rpf_addr->rpf_addr.family;
96 pnc->rpf.rpf_addr.prefixlen = rpf_addr->rpf_addr.prefixlen;
97 pnc->rpf.rpf_addr.u.prefix4.s_addr =
98 rpf_addr->rpf_addr.u.prefix4.s_addr;
99
100 pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern);
101
102 pnc->rp_list = list_new();
103 pnc->rp_list->cmp = pim_rp_list_cmp;
104
105 snprintfrr(hash_name, sizeof(hash_name), "PNC %pFX(%s) Upstream Hash",
106 &pnc->rpf.rpf_addr, pim->vrf->name);
107 pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key,
108 pim_upstream_equal, hash_name);
109
110 return pnc;
111 }
112
113 static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim,
114 struct prefix *addr)
115 {
116 struct pim_nexthop_cache *pnc = NULL;
117 struct pim_rpf rpf;
118 struct zclient *zclient = NULL;
119
120 zclient = pim_zebra_zclient_get();
121 memset(&rpf, 0, sizeof(struct pim_rpf));
122 rpf.rpf_addr.family = addr->family;
123 rpf.rpf_addr.prefixlen = addr->prefixlen;
124 rpf.rpf_addr.u.prefix4 = addr->u.prefix4;
125
126 pnc = pim_nexthop_cache_find(pim, &rpf);
127 if (!pnc) {
128 pnc = pim_nexthop_cache_add(pim, &rpf);
129 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
130 ZEBRA_NEXTHOP_REGISTER);
131 if (PIM_DEBUG_PIM_NHT)
132 zlog_debug(
133 "%s: NHT cache and zebra notification added for %pFX(%s)",
134 __func__, addr, pim->vrf->name);
135 }
136
137 return pnc;
138 }
139
140 /* TBD: this does several distinct things and should probably be split up.
141 * (checking state vs. returning pnc vs. adding upstream vs. adding rp)
142 */
143 int pim_find_or_track_nexthop(struct pim_instance *pim, struct prefix *addr,
144 struct pim_upstream *up, struct rp_info *rp,
145 struct pim_nexthop_cache *out_pnc)
146 {
147 struct pim_nexthop_cache *pnc;
148 struct listnode *ch_node = NULL;
149
150 pnc = pim_nht_get(pim, addr);
151
152 assertf(up || rp, "addr=%pFX", addr);
153
154 if (rp != NULL) {
155 ch_node = listnode_lookup(pnc->rp_list, rp);
156 if (ch_node == NULL)
157 listnode_add_sort(pnc->rp_list, rp);
158 }
159
160 if (up != NULL)
161 hash_get(pnc->upstream_hash, up, hash_alloc_intern);
162
163 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) {
164 if (out_pnc)
165 memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache));
166 return 1;
167 }
168
169 return 0;
170 }
171
172 void pim_nht_bsr_add(struct pim_instance *pim, struct in_addr addr)
173 {
174 struct pim_nexthop_cache *pnc;
175 struct prefix pfx;
176
177 pfx.family = AF_INET;
178 pfx.prefixlen = IPV4_MAX_BITLEN;
179 pfx.u.prefix4 = addr;
180
181 pnc = pim_nht_get(pim, &pfx);
182
183 pnc->bsr_count++;
184 }
185
186 static void pim_nht_drop_maybe(struct pim_instance *pim,
187 struct pim_nexthop_cache *pnc)
188 {
189 if (PIM_DEBUG_PIM_NHT)
190 zlog_debug(
191 "%s: NHT %pFX(%s) rp_list count:%d upstream count:%ld BSR count:%u",
192 __func__, &pnc->rpf.rpf_addr, pim->vrf->name,
193 pnc->rp_list->count, pnc->upstream_hash->count,
194 pnc->bsr_count);
195
196 if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0
197 && pnc->bsr_count == 0) {
198 struct zclient *zclient = pim_zebra_zclient_get();
199
200 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
201 ZEBRA_NEXTHOP_UNREGISTER);
202
203 list_delete(&pnc->rp_list);
204 hash_free(pnc->upstream_hash);
205
206 hash_release(pim->rpf_hash, pnc);
207 if (pnc->nexthop)
208 nexthops_free(pnc->nexthop);
209 XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
210 }
211 }
212
213 void pim_delete_tracked_nexthop(struct pim_instance *pim, struct prefix *addr,
214 struct pim_upstream *up, struct rp_info *rp)
215 {
216 struct pim_nexthop_cache *pnc = NULL;
217 struct pim_nexthop_cache lookup;
218 struct pim_upstream *upstream = NULL;
219
220 /* Remove from RPF hash if it is the last entry */
221 lookup.rpf.rpf_addr = *addr;
222 pnc = hash_lookup(pim->rpf_hash, &lookup);
223 if (!pnc) {
224 zlog_warn("attempting to delete nonexistent NHT entry %pFX",
225 addr);
226 return;
227 }
228
229 if (rp) {
230 /* Release the (*, G)upstream from pnc->upstream_hash,
231 * whose Group belongs to the RP getting deleted
232 */
233 frr_each (rb_pim_upstream, &pim->upstream_head, upstream) {
234 struct prefix grp;
235 struct rp_info *trp_info;
236
237 if (!pim_addr_is_any(upstream->sg.src))
238 continue;
239
240 pim_addr_to_prefix(&grp, upstream->sg.grp);
241 trp_info = pim_rp_find_match_group(pim, &grp);
242 if (trp_info == rp)
243 hash_release(pnc->upstream_hash, upstream);
244 }
245 listnode_delete(pnc->rp_list, rp);
246 }
247
248 if (up)
249 hash_release(pnc->upstream_hash, up);
250
251 pim_nht_drop_maybe(pim, pnc);
252 }
253
254 void pim_nht_bsr_del(struct pim_instance *pim, struct in_addr addr)
255 {
256 struct pim_nexthop_cache *pnc = NULL;
257 struct pim_nexthop_cache lookup;
258
259 /*
260 * Nothing to do here if the address to unregister
261 * is 0.0.0.0 as that the BSR has not been registered
262 * for tracking yet.
263 */
264 if (addr.s_addr == INADDR_ANY)
265 return;
266
267 lookup.rpf.rpf_addr.family = AF_INET;
268 lookup.rpf.rpf_addr.prefixlen = IPV4_MAX_BITLEN;
269 lookup.rpf.rpf_addr.u.prefix4 = addr;
270
271 pnc = hash_lookup(pim->rpf_hash, &lookup);
272
273 if (!pnc) {
274 zlog_warn("attempting to delete nonexistent NHT BSR entry %pI4",
275 &addr);
276 return;
277 }
278
279 assertf(pnc->bsr_count > 0, "addr=%pI4", &addr);
280 pnc->bsr_count--;
281
282 pim_nht_drop_maybe(pim, pnc);
283 }
284
285 bool pim_nht_bsr_rpf_check(struct pim_instance *pim, struct in_addr bsr_addr,
286 struct interface *src_ifp, struct in_addr src_ip)
287 {
288 struct pim_nexthop_cache *pnc = NULL;
289 struct pim_nexthop_cache lookup;
290 struct pim_neighbor *nbr = NULL;
291 struct nexthop *nh;
292 struct interface *ifp;
293
294 lookup.rpf.rpf_addr.family = AF_INET;
295 lookup.rpf.rpf_addr.prefixlen = IPV4_MAX_BITLEN;
296 lookup.rpf.rpf_addr.u.prefix4 = bsr_addr;
297
298 pnc = hash_lookup(pim->rpf_hash, &lookup);
299 if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) {
300 /* BSM from a new freshly registered BSR - do a synchronous
301 * zebra query since otherwise we'd drop the first packet,
302 * leading to additional delay in picking up BSM data
303 */
304
305 /* FIXME: this should really be moved into a generic NHT
306 * function that does "add and get immediate result" or maybe
307 * "check cache or get immediate result." But until that can
308 * be worked in, here's a copy of the code below :(
309 */
310 struct pim_zlookup_nexthop nexthop_tab[MULTIPATH_NUM];
311 ifindex_t i;
312 struct interface *ifp = NULL;
313 int num_ifindex;
314
315 memset(nexthop_tab, 0, sizeof(nexthop_tab));
316 num_ifindex = zclient_lookup_nexthop(pim, nexthop_tab,
317 MULTIPATH_NUM, bsr_addr,
318 PIM_NEXTHOP_LOOKUP_MAX);
319
320 if (num_ifindex <= 0)
321 return false;
322
323 for (i = 0; i < num_ifindex; i++) {
324 struct pim_zlookup_nexthop *znh = &nexthop_tab[i];
325
326 /* pim_zlookup_nexthop has no ->type */
327
328 /* 1:1 match code below with znh instead of nh */
329 ifp = if_lookup_by_index(znh->ifindex,
330 pim->vrf->vrf_id);
331
332 if (!ifp || !ifp->info)
333 continue;
334
335 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
336 return true;
337
338 nbr = pim_neighbor_find_prefix(ifp, &znh->nexthop_addr);
339 if (!nbr)
340 continue;
341
342 return znh->ifindex == src_ifp->ifindex
343 && znh->nexthop_addr.u.prefix4.s_addr
344 == src_ip.s_addr;
345 }
346 return false;
347 }
348
349 if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID))
350 return false;
351
352 /* if we accept BSMs from more than one ECMP nexthop, this will cause
353 * BSM message "multiplication" for each ECMP hop. i.e. if you have
354 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
355 * message.
356 *
357 * so... only accept the first (IPv4) valid nexthop as source.
358 */
359
360 for (nh = pnc->nexthop; nh; nh = nh->next) {
361 pim_addr nhaddr;
362
363 switch (nh->type) {
364 #if PIM_IPV == 4 || !defined(PIM_V6_TEMP_BREAK)
365 case NEXTHOP_TYPE_IPV4:
366 if (nh->ifindex == IFINDEX_INTERNAL)
367 continue;
368
369 /* fallthru */
370 case NEXTHOP_TYPE_IPV4_IFINDEX:
371 nhaddr = nh->gate.ipv4;
372 break;
373 #else
374 case NEXTHOP_TYPE_IPV6:
375 if (nh->ifindex == IFINDEX_INTERNAL)
376 continue;
377
378 /* fallthru */
379 case NEXTHOP_TYPE_IPV6_IFINDEX:
380 nhaddr = nh->gate.ipv6;
381 break;
382 #endif
383 case NEXTHOP_TYPE_IFINDEX:
384 nhaddr = bsr_addr;
385 break;
386
387 default:
388 continue;
389 }
390
391 ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id);
392 if (!ifp || !ifp->info)
393 continue;
394
395 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
396 return true;
397
398 /* MRIB (IGP) may be pointing at a router where PIM is down */
399 nbr = pim_neighbor_find(ifp, nhaddr);
400 if (!nbr)
401 continue;
402
403 return nh->ifindex == src_ifp->ifindex
404 && nhaddr.s_addr == src_ip.s_addr;
405 }
406 return false;
407 }
408
409 void pim_rp_nexthop_del(struct rp_info *rp_info)
410 {
411 rp_info->rp.source_nexthop.interface = NULL;
412 rp_info->rp.source_nexthop.mrib_nexthop_addr.u.prefix4.s_addr =
413 PIM_NET_INADDR_ANY;
414 rp_info->rp.source_nexthop.mrib_metric_preference =
415 router->infinite_assert_metric.metric_preference;
416 rp_info->rp.source_nexthop.mrib_route_metric =
417 router->infinite_assert_metric.route_metric;
418 }
419
420 /* Update RP nexthop info based on Nexthop update received from Zebra.*/
421 static void pim_update_rp_nh(struct pim_instance *pim,
422 struct pim_nexthop_cache *pnc)
423 {
424 struct listnode *node = NULL;
425 struct rp_info *rp_info = NULL;
426
427 /*Traverse RP list and update each RP Nexthop info */
428 for (ALL_LIST_ELEMENTS_RO(pnc->rp_list, node, rp_info)) {
429 if (rp_info->rp.rpf_addr.u.prefix4.s_addr == INADDR_NONE)
430 continue;
431
432 // Compute PIM RPF using cached nexthop
433 if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop,
434 &rp_info->rp.rpf_addr,
435 &rp_info->group, 1))
436 pim_rp_nexthop_del(rp_info);
437 }
438 }
439
440 /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
441 static int pim_update_upstream_nh_helper(struct hash_bucket *bucket, void *arg)
442 {
443 struct pim_instance *pim = (struct pim_instance *)arg;
444 struct pim_upstream *up = (struct pim_upstream *)bucket->data;
445
446 enum pim_rpf_result rpf_result;
447 struct pim_rpf old;
448
449 old.source_nexthop.interface = up->rpf.source_nexthop.interface;
450 rpf_result = pim_rpf_update(pim, up, &old, __func__);
451
452 /* update kernel multicast forwarding cache (MFC); if the
453 * RPF nbr is now unreachable the MFC has already been updated
454 * by pim_rpf_clear
455 */
456 if (rpf_result != PIM_RPF_FAILURE)
457 pim_upstream_mroute_iif_update(up->channel_oil, __func__);
458
459 if (rpf_result == PIM_RPF_CHANGED ||
460 (rpf_result == PIM_RPF_FAILURE && old.source_nexthop.interface))
461 pim_zebra_upstream_rpf_changed(pim, up, &old);
462
463
464 if (PIM_DEBUG_PIM_NHT) {
465 zlog_debug(
466 "%s: NHT upstream %s(%s) old ifp %s new ifp %s",
467 __func__, up->sg_str, pim->vrf->name,
468 old.source_nexthop.interface ? old.source_nexthop
469 .interface->name
470 : "Unknown",
471 up->rpf.source_nexthop.interface ? up->rpf.source_nexthop
472 .interface->name
473 : "Unknown");
474 }
475
476 return HASHWALK_CONTINUE;
477 }
478
479 static int pim_update_upstream_nh(struct pim_instance *pim,
480 struct pim_nexthop_cache *pnc)
481 {
482 hash_walk(pnc->upstream_hash, pim_update_upstream_nh_helper, pim);
483
484 pim_zebra_update_all_interfaces(pim);
485
486 return 0;
487 }
488
489 uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
490 {
491 uint32_t hash_val;
492 uint32_t s = 0, g = 0;
493
494 if ((!src))
495 return 0;
496
497 switch (src->family) {
498 case AF_INET: {
499 s = src->u.prefix4.s_addr;
500 s = s == 0 ? 1 : s;
501 if (grp)
502 g = grp->u.prefix4.s_addr;
503 } break;
504 default:
505 break;
506 }
507
508 hash_val = jhash_2words(g, s, 101);
509 return hash_val;
510 }
511
512 static int pim_ecmp_nexthop_search(struct pim_instance *pim,
513 struct pim_nexthop_cache *pnc,
514 struct pim_nexthop *nexthop,
515 struct prefix *src, struct prefix *grp,
516 int neighbor_needed)
517 {
518 struct pim_neighbor *nbrs[MULTIPATH_NUM], *nbr = NULL;
519 struct interface *ifps[MULTIPATH_NUM];
520 struct nexthop *nh_node = NULL;
521 ifindex_t first_ifindex;
522 struct interface *ifp = NULL;
523 uint32_t hash_val = 0, mod_val = 0;
524 uint8_t nh_iter = 0, found = 0;
525 uint32_t i, num_nbrs = 0;
526
527 if (!pnc || !pnc->nexthop_num || !nexthop)
528 return 0;
529
530 memset(&nbrs, 0, sizeof(nbrs));
531 memset(&ifps, 0, sizeof(ifps));
532
533 // Current Nexthop is VALID, check to stay on the current path.
534 if (nexthop->interface && nexthop->interface->info
535 && nexthop->mrib_nexthop_addr.u.prefix4.s_addr
536 != PIM_NET_INADDR_ANY) {
537 /* User configured knob to explicitly switch
538 to new path is disabled or current path
539 metric is less than nexthop update.
540 */
541
542 if (pim->ecmp_rebalance_enable == 0) {
543 uint8_t curr_route_valid = 0;
544 // Check if current nexthop is present in new updated
545 // Nexthop list.
546 // If the current nexthop is not valid, candidate to
547 // choose new Nexthop.
548 for (nh_node = pnc->nexthop; nh_node;
549 nh_node = nh_node->next) {
550 curr_route_valid = (nexthop->interface->ifindex
551 == nh_node->ifindex);
552 if (curr_route_valid)
553 break;
554 }
555
556 if (curr_route_valid
557 && !pim_if_connected_to_source(nexthop->interface,
558 src->u.prefix4)) {
559 nbr = pim_neighbor_find_prefix(
560 nexthop->interface,
561 &nexthop->mrib_nexthop_addr);
562 if (!nbr
563 && !if_is_loopback(nexthop->interface)) {
564 if (PIM_DEBUG_PIM_NHT)
565 zlog_debug(
566 "%s: current nexthop does not have nbr ",
567 __func__);
568 } else {
569 /* update metric even if the upstream
570 * neighbor stays unchanged
571 */
572 nexthop->mrib_metric_preference =
573 pnc->distance;
574 nexthop->mrib_route_metric =
575 pnc->metric;
576 if (PIM_DEBUG_PIM_NHT) {
577 char src_str[INET_ADDRSTRLEN];
578 pim_inet4_dump("<addr?>",
579 src->u.prefix4,
580 src_str,
581 sizeof(src_str));
582 char grp_str[INET_ADDRSTRLEN];
583 pim_inet4_dump("<addr?>",
584 grp->u.prefix4,
585 grp_str,
586 sizeof(grp_str));
587 zlog_debug(
588 "%s: (%s,%s)(%s) current nexthop %s is valid, skipping new path selection",
589 __func__, src_str,
590 grp_str, pim->vrf->name,
591 nexthop->interface->name);
592 }
593 return 1;
594 }
595 }
596 }
597 }
598
599 /*
600 * Look up all interfaces and neighbors,
601 * store for later usage
602 */
603 for (nh_node = pnc->nexthop, i = 0; nh_node;
604 nh_node = nh_node->next, i++) {
605 ifps[i] =
606 if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
607 if (ifps[i]) {
608 #if PIM_IPV == 4 || !defined(PIM_V6_TEMP_BREAK)
609 pim_addr nhaddr = nh_node->gate.ipv4;
610 #else
611 pim_addr nhaddr = nh_node->gate.ipv6;
612 #endif
613 nbrs[i] = pim_neighbor_find(ifps[i], nhaddr);
614 if (nbrs[i] || pim_if_connected_to_source(ifps[i],
615
616 src->u.prefix4))
617 num_nbrs++;
618 }
619 }
620 if (pim->ecmp_enable) {
621 uint32_t consider = pnc->nexthop_num;
622
623 if (neighbor_needed && num_nbrs < consider)
624 consider = num_nbrs;
625
626 if (consider == 0)
627 return 0;
628
629 // PIM ECMP flag is enable then choose ECMP path.
630 hash_val = pim_compute_ecmp_hash(src, grp);
631 mod_val = hash_val % consider;
632 }
633
634 for (nh_node = pnc->nexthop; nh_node && (found == 0);
635 nh_node = nh_node->next) {
636 first_ifindex = nh_node->ifindex;
637 ifp = ifps[nh_iter];
638 if (!ifp) {
639 if (PIM_DEBUG_PIM_NHT) {
640 char addr_str[INET_ADDRSTRLEN];
641 pim_inet4_dump("<addr?>", src->u.prefix4,
642 addr_str, sizeof(addr_str));
643 zlog_debug(
644 "%s %s: could not find interface for ifindex %d (address %s(%s))",
645 __FILE__, __func__, first_ifindex,
646 addr_str, pim->vrf->name);
647 }
648 if (nh_iter == mod_val)
649 mod_val++; // Select nexthpath
650 nh_iter++;
651 continue;
652 }
653 if (!ifp->info) {
654 if (PIM_DEBUG_PIM_NHT) {
655 char addr_str[INET_ADDRSTRLEN];
656 pim_inet4_dump("<addr?>", src->u.prefix4,
657 addr_str, sizeof(addr_str));
658 zlog_debug(
659 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %s)",
660 __func__, ifp->name, pim->vrf->name,
661 first_ifindex, addr_str);
662 }
663 if (nh_iter == mod_val)
664 mod_val++; // Select nexthpath
665 nh_iter++;
666 continue;
667 }
668
669 if (neighbor_needed
670 && !pim_if_connected_to_source(ifp, src->u.prefix4)) {
671 nbr = nbrs[nh_iter];
672 if (!nbr && !if_is_loopback(ifp)) {
673 if (PIM_DEBUG_PIM_NHT)
674 zlog_debug(
675 "%s: pim nbr not found on input interface %s(%s)",
676 __func__, ifp->name,
677 pim->vrf->name);
678 if (nh_iter == mod_val)
679 mod_val++; // Select nexthpath
680 nh_iter++;
681 continue;
682 }
683 }
684
685 if (nh_iter == mod_val) {
686 nexthop->interface = ifp;
687 nexthop->mrib_nexthop_addr.family = AF_INET;
688 nexthop->mrib_nexthop_addr.prefixlen = IPV4_MAX_BITLEN;
689 nexthop->mrib_nexthop_addr.u.prefix4 =
690 nh_node->gate.ipv4;
691 nexthop->mrib_metric_preference = pnc->distance;
692 nexthop->mrib_route_metric = pnc->metric;
693 nexthop->last_lookup = src->u.prefix4;
694 nexthop->last_lookup_time = pim_time_monotonic_usec();
695 nexthop->nbr = nbr;
696 found = 1;
697 if (PIM_DEBUG_PIM_NHT) {
698 char buf[INET_ADDRSTRLEN];
699 char buf2[INET_ADDRSTRLEN];
700 char buf3[INET_ADDRSTRLEN];
701 pim_inet4_dump("<src?>", src->u.prefix4, buf2,
702 sizeof(buf2));
703 pim_inet4_dump("<grp?>", grp->u.prefix4, buf3,
704 sizeof(buf3));
705 pim_inet4_dump(
706 "<rpf?>",
707 nexthop->mrib_nexthop_addr.u.prefix4,
708 buf, sizeof(buf));
709 zlog_debug(
710 "%s: (%s,%s)(%s) selected nhop interface %s addr %s mod_val %u iter %d ecmp %d",
711 __func__, buf2, buf3, pim->vrf->name,
712 ifp->name, buf, mod_val, nh_iter,
713 pim->ecmp_enable);
714 }
715 }
716 nh_iter++;
717 }
718
719 if (found)
720 return 1;
721 else
722 return 0;
723 }
724
725 /* This API is used to parse Registered address nexthop update coming from Zebra
726 */
727 int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS)
728 {
729 struct nexthop *nexthop;
730 struct nexthop *nhlist_head = NULL;
731 struct nexthop *nhlist_tail = NULL;
732 int i;
733 struct pim_rpf rpf;
734 struct pim_nexthop_cache *pnc = NULL;
735 struct pim_neighbor *nbr = NULL;
736 struct interface *ifp = NULL;
737 struct interface *ifp1 = NULL;
738 struct vrf *vrf = vrf_lookup_by_id(vrf_id);
739 struct pim_instance *pim;
740 struct zapi_route nhr;
741
742 if (!vrf)
743 return 0;
744 pim = vrf->info;
745
746 if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) {
747 zlog_err("%s: Decode of nexthop update from zebra failed",
748 __func__);
749 return 0;
750 }
751
752 if (cmd == ZEBRA_NEXTHOP_UPDATE) {
753 prefix_copy(&rpf.rpf_addr, &nhr.prefix);
754 pnc = pim_nexthop_cache_find(pim, &rpf);
755 if (!pnc) {
756 if (PIM_DEBUG_PIM_NHT)
757 zlog_debug(
758 "%s: Skipping NHT update, addr %pFX is not in local cached DB.",
759 __func__, &rpf.rpf_addr);
760 return 0;
761 }
762 } else {
763 /*
764 * We do not currently handle ZEBRA_IMPORT_CHECK_UPDATE
765 */
766 return 0;
767 }
768
769 pnc->last_update = pim_time_monotonic_usec();
770
771 if (nhr.nexthop_num) {
772 pnc->nexthop_num = 0; // Only increment for pim enabled rpf.
773
774 for (i = 0; i < nhr.nexthop_num; i++) {
775 nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]);
776 switch (nexthop->type) {
777 case NEXTHOP_TYPE_IPV4:
778 case NEXTHOP_TYPE_IPV4_IFINDEX:
779 case NEXTHOP_TYPE_IPV6:
780 case NEXTHOP_TYPE_BLACKHOLE:
781 break;
782 case NEXTHOP_TYPE_IFINDEX:
783 /*
784 * Connected route (i.e. no nexthop), use
785 * RPF address from nexthop cache (i.e.
786 * destination) as PIM nexthop.
787 */
788 nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
789 nexthop->gate.ipv4 =
790 pnc->rpf.rpf_addr.u.prefix4;
791 break;
792 case NEXTHOP_TYPE_IPV6_IFINDEX:
793 ifp1 = if_lookup_by_index(nexthop->ifindex,
794 pim->vrf->vrf_id);
795
796 if (!ifp1)
797 nbr = NULL;
798 else
799 nbr = pim_neighbor_find_if(ifp1);
800 /* Overwrite with Nbr address as NH addr */
801 if (nbr)
802 #if PIM_IPV == 4 || !defined(PIM_V6_TEMP_BREAK)
803 nexthop->gate.ipv4 = nbr->source_addr;
804 #else
805 nexthop->gate.ipv6 = nbr->source_addr;
806 #endif
807 else {
808 // Mark nexthop address to 0 until PIM
809 // Nbr is resolved.
810 nexthop->gate.ipv4.s_addr =
811 PIM_NET_INADDR_ANY;
812 }
813
814 break;
815 }
816
817 ifp = if_lookup_by_index(nexthop->ifindex,
818 pim->vrf->vrf_id);
819 if (!ifp) {
820 if (PIM_DEBUG_PIM_NHT) {
821 char buf[NEXTHOP_STRLEN];
822 zlog_debug(
823 "%s: could not find interface for ifindex %d(%s) (addr %s)",
824 __func__, nexthop->ifindex,
825 pim->vrf->name,
826 nexthop2str(nexthop, buf,
827 sizeof(buf)));
828 }
829 nexthop_free(nexthop);
830 continue;
831 }
832
833 if (PIM_DEBUG_PIM_NHT)
834 zlog_debug(
835 "%s: NHT addr %pFX(%s) %d-nhop via %pI4(%s) type %d distance:%u metric:%u ",
836 __func__, &nhr.prefix, pim->vrf->name,
837 i + 1, &nexthop->gate.ipv4,
838 ifp->name, nexthop->type, nhr.distance,
839 nhr.metric);
840
841 if (!ifp->info) {
842 /*
843 * Though Multicast is not enabled on this
844 * Interface store it in database otheriwse we
845 * may miss this update and this will not cause
846 * any issue, because while choosing the path we
847 * are ommitting the Interfaces which are not
848 * multicast enabled
849 */
850 if (PIM_DEBUG_PIM_NHT) {
851 char buf[NEXTHOP_STRLEN];
852
853 zlog_debug(
854 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
855 __func__, ifp->name,
856 pim->vrf->name,
857 nexthop->ifindex,
858 nexthop2str(nexthop, buf,
859 sizeof(buf)));
860 }
861 }
862
863 if (nhlist_tail) {
864 nhlist_tail->next = nexthop;
865 nhlist_tail = nexthop;
866 } else {
867 nhlist_tail = nexthop;
868 nhlist_head = nexthop;
869 }
870 // Only keep track of nexthops which are PIM enabled.
871 pnc->nexthop_num++;
872 }
873 /* Reset existing pnc->nexthop before assigning new list */
874 nexthops_free(pnc->nexthop);
875 pnc->nexthop = nhlist_head;
876 if (pnc->nexthop_num) {
877 pnc->flags |= PIM_NEXTHOP_VALID;
878 pnc->distance = nhr.distance;
879 pnc->metric = nhr.metric;
880 }
881 } else {
882 pnc->flags &= ~PIM_NEXTHOP_VALID;
883 pnc->nexthop_num = nhr.nexthop_num;
884 nexthops_free(pnc->nexthop);
885 pnc->nexthop = NULL;
886 }
887 SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED);
888
889 if (PIM_DEBUG_PIM_NHT)
890 zlog_debug(
891 "%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
892 __func__, &nhr.prefix, pim->vrf->name, nhr.nexthop_num,
893 pnc->nexthop_num, vrf_id, pnc->upstream_hash->count,
894 listcount(pnc->rp_list));
895
896 pim_rpf_set_refresh_time(pim);
897
898 if (listcount(pnc->rp_list))
899 pim_update_rp_nh(pim, pnc);
900 if (pnc->upstream_hash->count)
901 pim_update_upstream_nh(pim, pnc);
902
903 return 0;
904 }
905
906 int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
907 struct pim_nexthop *nexthop, struct prefix *src,
908 struct prefix *grp, int neighbor_needed)
909 {
910 struct pim_nexthop_cache *pnc;
911 struct pim_zlookup_nexthop nexthop_tab[MULTIPATH_NUM];
912 struct pim_neighbor *nbrs[MULTIPATH_NUM], *nbr = NULL;
913 struct pim_rpf rpf;
914 int num_ifindex;
915 struct interface *ifps[MULTIPATH_NUM], *ifp;
916 int first_ifindex;
917 int found = 0;
918 uint8_t i = 0;
919 uint32_t hash_val = 0, mod_val = 0;
920 uint32_t num_nbrs = 0;
921 char addr_str[PREFIX_STRLEN];
922
923 if (PIM_DEBUG_PIM_NHT) {
924 pim_inet4_dump("<addr?>", src->u.prefix4, addr_str,
925 sizeof(addr_str));
926 zlog_debug("%s: Looking up: %s(%s), last lookup time: %lld",
927 __func__, addr_str, pim->vrf->name,
928 nexthop->last_lookup_time);
929 }
930
931 memset(&rpf, 0, sizeof(struct pim_rpf));
932 rpf.rpf_addr.family = AF_INET;
933 rpf.rpf_addr.prefixlen = IPV4_MAX_BITLEN;
934 rpf.rpf_addr.u.prefix4 = src->u.prefix4;
935
936 pnc = pim_nexthop_cache_find(pim, &rpf);
937 if (pnc) {
938 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED))
939 return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp,
940 neighbor_needed);
941 }
942
943 memset(nexthop_tab, 0,
944 sizeof(struct pim_zlookup_nexthop) * MULTIPATH_NUM);
945 num_ifindex =
946 zclient_lookup_nexthop(pim, nexthop_tab, MULTIPATH_NUM,
947 src->u.prefix4, PIM_NEXTHOP_LOOKUP_MAX);
948 if (num_ifindex < 1) {
949 if (PIM_DEBUG_PIM_NHT)
950 zlog_warn(
951 "%s: could not find nexthop ifindex for address %s(%s)",
952 __func__, addr_str, pim->vrf->name);
953 return 0;
954 }
955
956 memset(&nbrs, 0, sizeof(nbrs));
957 memset(&ifps, 0, sizeof(ifps));
958
959 /*
960 * Look up all interfaces and neighbors,
961 * store for later usage
962 */
963 for (i = 0; i < num_ifindex; i++) {
964 ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex,
965 pim->vrf->vrf_id);
966 if (ifps[i]) {
967 nbrs[i] = pim_neighbor_find_prefix(
968 ifps[i], &nexthop_tab[i].nexthop_addr);
969 if (nbrs[i]
970 || pim_if_connected_to_source(ifps[i],
971 src->u.prefix4))
972 num_nbrs++;
973 }
974 }
975
976 // If PIM ECMP enable then choose ECMP path.
977 if (pim->ecmp_enable) {
978 uint32_t consider = num_ifindex;
979
980 if (neighbor_needed && num_nbrs < consider)
981 consider = num_nbrs;
982
983 if (consider == 0)
984 return 0;
985
986 hash_val = pim_compute_ecmp_hash(src, grp);
987 mod_val = hash_val % consider;
988 if (PIM_DEBUG_PIM_NHT_DETAIL)
989 zlog_debug("%s: hash_val %u mod_val %u", __func__,
990 hash_val, mod_val);
991 }
992
993 i = 0;
994 while (!found && (i < num_ifindex)) {
995 first_ifindex = nexthop_tab[i].ifindex;
996
997 ifp = ifps[i];
998 if (!ifp) {
999 if (PIM_DEBUG_PIM_NHT)
1000 zlog_debug(
1001 "%s %s: could not find interface for ifindex %d (address %s(%s))",
1002 __FILE__, __func__, first_ifindex,
1003 addr_str, pim->vrf->name);
1004 if (i == mod_val)
1005 mod_val++;
1006 i++;
1007 continue;
1008 }
1009
1010 if (!ifp->info) {
1011 if (PIM_DEBUG_PIM_NHT)
1012 zlog_debug(
1013 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %s)",
1014 __func__, ifp->name, pim->vrf->name,
1015 first_ifindex, addr_str);
1016 if (i == mod_val)
1017 mod_val++;
1018 i++;
1019 continue;
1020 }
1021 if (neighbor_needed
1022 && !pim_if_connected_to_source(ifp, src->u.prefix4)) {
1023 nbr = nbrs[i];
1024 if (PIM_DEBUG_PIM_NHT_DETAIL)
1025 zlog_debug("ifp name: %s(%s), pim nbr: %p",
1026 ifp->name, pim->vrf->name, nbr);
1027 if (!nbr && !if_is_loopback(ifp)) {
1028 if (i == mod_val)
1029 mod_val++;
1030 i++;
1031 if (PIM_DEBUG_PIM_NHT)
1032 zlog_debug(
1033 "%s: NBR not found on input interface %s(%s) (RPF for source %s)",
1034 __func__, ifp->name,
1035 pim->vrf->name, addr_str);
1036 continue;
1037 }
1038 }
1039
1040 if (i == mod_val) {
1041 if (PIM_DEBUG_PIM_NHT) {
1042 char nexthop_str[PREFIX_STRLEN];
1043
1044 pim_addr_dump("<nexthop?>",
1045 &nexthop_tab[i].nexthop_addr,
1046 nexthop_str, sizeof(nexthop_str));
1047 zlog_debug(
1048 "%s: found nhop %s for addr %s interface %s(%s) metric %d dist %d",
1049 __func__, nexthop_str, addr_str,
1050 ifp->name, pim->vrf->name,
1051 nexthop_tab[i].route_metric,
1052 nexthop_tab[i].protocol_distance);
1053 }
1054 /* update nexthop data */
1055 nexthop->interface = ifp;
1056 nexthop->mrib_nexthop_addr =
1057 nexthop_tab[i].nexthop_addr;
1058 nexthop->mrib_metric_preference =
1059 nexthop_tab[i].protocol_distance;
1060 nexthop->mrib_route_metric =
1061 nexthop_tab[i].route_metric;
1062 nexthop->last_lookup = src->u.prefix4;
1063 nexthop->last_lookup_time = pim_time_monotonic_usec();
1064 nexthop->nbr = nbr;
1065 found = 1;
1066 }
1067 i++;
1068 }
1069
1070 if (found)
1071 return 1;
1072 else
1073 return 0;
1074 }
1075
1076 int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim,
1077 struct prefix *src, struct prefix *grp)
1078 {
1079 struct pim_nexthop nhop;
1080 int vif_index;
1081 ifindex_t ifindex;
1082 char addr_str[PREFIX_STRLEN];
1083
1084 if (PIM_DEBUG_PIM_NHT)
1085 pim_inet4_dump("<addr?>", src->u.prefix4, addr_str,
1086 sizeof(addr_str));
1087
1088 memset(&nhop, 0, sizeof(nhop));
1089 if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) {
1090 if (PIM_DEBUG_PIM_NHT)
1091 zlog_debug(
1092 "%s: could not find nexthop ifindex for address %s(%s)",
1093 __func__, addr_str, pim->vrf->name);
1094 return -1;
1095 }
1096
1097 ifindex = nhop.interface->ifindex;
1098 if (PIM_DEBUG_PIM_NHT)
1099 zlog_debug(
1100 "%s: found nexthop ifindex=%d (interface %s(%s)) for address %s",
1101 __func__, ifindex,
1102 ifindex2ifname(ifindex, pim->vrf->vrf_id),
1103 pim->vrf->name, addr_str);
1104
1105 vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);
1106
1107 if (vif_index < 0) {
1108 if (PIM_DEBUG_PIM_NHT) {
1109 zlog_debug(
1110 "%s: low vif_index=%d(%s) < 1 nexthop for address %s",
1111 __func__, vif_index, pim->vrf->name, addr_str);
1112 }
1113 return -2;
1114 }
1115
1116 return vif_index;
1117 }