]> git.proxmox.com Git - mirror_frr.git/blob - pimd/pim_nht.c
Merge pull request #11606 from LabNConsulting/zebra-nbr-memleak
[mirror_frr.git] / pimd / pim_nht.c
1 /*
2 * PIM for Quagga
3 * Copyright (C) 2017 Cumulus Networks, Inc.
4 * Chirag Shah
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20 #include <zebra.h>
21 #include "network.h"
22 #include "zclient.h"
23 #include "stream.h"
24 #include "nexthop.h"
25 #include "if.h"
26 #include "hash.h"
27 #include "jhash.h"
28
29 #include "lib/printfrr.h"
30
31 #include "pimd.h"
32 #include "pimd/pim_nht.h"
33 #include "pim_instance.h"
34 #include "log.h"
35 #include "pim_time.h"
36 #include "pim_oil.h"
37 #include "pim_ifchannel.h"
38 #include "pim_mroute.h"
39 #include "pim_zebra.h"
40 #include "pim_upstream.h"
41 #include "pim_join.h"
42 #include "pim_jp_agg.h"
43 #include "pim_zebra.h"
44 #include "pim_zlookup.h"
45 #include "pim_rp.h"
46 #include "pim_addr.h"
47
48 /**
49 * pim_sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
50 * command to Zebra.
51 */
52 void pim_sendmsg_zebra_rnh(struct pim_instance *pim, struct zclient *zclient,
53 struct pim_nexthop_cache *pnc, int command)
54 {
55 struct prefix p;
56 int ret;
57
58 pim_addr_to_prefix(&p, pnc->rpf.rpf_addr);
59 ret = zclient_send_rnh(zclient, command, &p, SAFI_UNICAST, false, false,
60 pim->vrf->vrf_id);
61 if (ret == ZCLIENT_SEND_FAILURE)
62 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
63
64 if (PIM_DEBUG_PIM_NHT)
65 zlog_debug(
66 "%s: NHT %sregistered addr %pFX(%s) with Zebra ret:%d ",
67 __func__,
68 (command == ZEBRA_NEXTHOP_REGISTER) ? " " : "de", &p,
69 pim->vrf->name, ret);
70
71 return;
72 }
73
74 struct pim_nexthop_cache *pim_nexthop_cache_find(struct pim_instance *pim,
75 struct pim_rpf *rpf)
76 {
77 struct pim_nexthop_cache *pnc = NULL;
78 struct pim_nexthop_cache lookup;
79
80 lookup.rpf.rpf_addr = rpf->rpf_addr;
81 pnc = hash_lookup(pim->rpf_hash, &lookup);
82
83 return pnc;
84 }
85
86 static struct pim_nexthop_cache *pim_nexthop_cache_add(struct pim_instance *pim,
87 struct pim_rpf *rpf_addr)
88 {
89 struct pim_nexthop_cache *pnc;
90 char hash_name[64];
91
92 pnc = XCALLOC(MTYPE_PIM_NEXTHOP_CACHE,
93 sizeof(struct pim_nexthop_cache));
94 pnc->rpf.rpf_addr = rpf_addr->rpf_addr;
95
96 pnc = hash_get(pim->rpf_hash, pnc, hash_alloc_intern);
97
98 pnc->rp_list = list_new();
99 pnc->rp_list->cmp = pim_rp_list_cmp;
100
101 snprintfrr(hash_name, sizeof(hash_name), "PNC %pPA(%s) Upstream Hash",
102 &pnc->rpf.rpf_addr, pim->vrf->name);
103 pnc->upstream_hash = hash_create_size(8192, pim_upstream_hash_key,
104 pim_upstream_equal, hash_name);
105
106 return pnc;
107 }
108
109 static struct pim_nexthop_cache *pim_nht_get(struct pim_instance *pim,
110 pim_addr addr)
111 {
112 struct pim_nexthop_cache *pnc = NULL;
113 struct pim_rpf rpf;
114 struct zclient *zclient = NULL;
115
116 zclient = pim_zebra_zclient_get();
117 memset(&rpf, 0, sizeof(rpf));
118 rpf.rpf_addr = addr;
119
120 pnc = pim_nexthop_cache_find(pim, &rpf);
121 if (!pnc) {
122 pnc = pim_nexthop_cache_add(pim, &rpf);
123 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
124 ZEBRA_NEXTHOP_REGISTER);
125 if (PIM_DEBUG_PIM_NHT_DETAIL)
126 zlog_debug(
127 "%s: NHT cache and zebra notification added for %pPA(%s)",
128 __func__, &addr, pim->vrf->name);
129 }
130
131 return pnc;
132 }
133
134 /* TBD: this does several distinct things and should probably be split up.
135 * (checking state vs. returning pnc vs. adding upstream vs. adding rp)
136 */
137 int pim_find_or_track_nexthop(struct pim_instance *pim, pim_addr addr,
138 struct pim_upstream *up, struct rp_info *rp,
139 struct pim_nexthop_cache *out_pnc)
140 {
141 struct pim_nexthop_cache *pnc;
142 struct listnode *ch_node = NULL;
143
144 pnc = pim_nht_get(pim, addr);
145
146 assertf(up || rp, "addr=%pPA", &addr);
147
148 if (rp != NULL) {
149 ch_node = listnode_lookup(pnc->rp_list, rp);
150 if (ch_node == NULL)
151 listnode_add_sort(pnc->rp_list, rp);
152 }
153
154 if (up != NULL)
155 (void)hash_get(pnc->upstream_hash, up, hash_alloc_intern);
156
157 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID)) {
158 if (out_pnc)
159 memcpy(out_pnc, pnc, sizeof(struct pim_nexthop_cache));
160 return 1;
161 }
162
163 return 0;
164 }
165
166 void pim_nht_bsr_add(struct pim_instance *pim, pim_addr addr)
167 {
168 struct pim_nexthop_cache *pnc;
169
170 pnc = pim_nht_get(pim, addr);
171
172 pnc->bsr_count++;
173 }
174
175 static void pim_nht_drop_maybe(struct pim_instance *pim,
176 struct pim_nexthop_cache *pnc)
177 {
178 if (PIM_DEBUG_PIM_NHT)
179 zlog_debug(
180 "%s: NHT %pPA(%s) rp_list count:%d upstream count:%ld BSR count:%u",
181 __func__, &pnc->rpf.rpf_addr, pim->vrf->name,
182 pnc->rp_list->count, pnc->upstream_hash->count,
183 pnc->bsr_count);
184
185 if (pnc->rp_list->count == 0 && pnc->upstream_hash->count == 0
186 && pnc->bsr_count == 0) {
187 struct zclient *zclient = pim_zebra_zclient_get();
188
189 pim_sendmsg_zebra_rnh(pim, zclient, pnc,
190 ZEBRA_NEXTHOP_UNREGISTER);
191
192 list_delete(&pnc->rp_list);
193 hash_free(pnc->upstream_hash);
194
195 hash_release(pim->rpf_hash, pnc);
196 if (pnc->nexthop)
197 nexthops_free(pnc->nexthop);
198 XFREE(MTYPE_PIM_NEXTHOP_CACHE, pnc);
199 }
200 }
201
202 void pim_delete_tracked_nexthop(struct pim_instance *pim, pim_addr addr,
203 struct pim_upstream *up, struct rp_info *rp)
204 {
205 struct pim_nexthop_cache *pnc = NULL;
206 struct pim_nexthop_cache lookup;
207 struct pim_upstream *upstream = NULL;
208
209 /* Remove from RPF hash if it is the last entry */
210 lookup.rpf.rpf_addr = addr;
211 pnc = hash_lookup(pim->rpf_hash, &lookup);
212 if (!pnc) {
213 zlog_warn("attempting to delete nonexistent NHT entry %pPA",
214 &addr);
215 return;
216 }
217
218 if (rp) {
219 /* Release the (*, G)upstream from pnc->upstream_hash,
220 * whose Group belongs to the RP getting deleted
221 */
222 frr_each (rb_pim_upstream, &pim->upstream_head, upstream) {
223 struct prefix grp;
224 struct rp_info *trp_info;
225
226 if (!pim_addr_is_any(upstream->sg.src))
227 continue;
228
229 pim_addr_to_prefix(&grp, upstream->sg.grp);
230 trp_info = pim_rp_find_match_group(pim, &grp);
231 if (trp_info == rp)
232 hash_release(pnc->upstream_hash, upstream);
233 }
234 listnode_delete(pnc->rp_list, rp);
235 }
236
237 if (up)
238 hash_release(pnc->upstream_hash, up);
239
240 pim_nht_drop_maybe(pim, pnc);
241 }
242
243 void pim_nht_bsr_del(struct pim_instance *pim, pim_addr addr)
244 {
245 struct pim_nexthop_cache *pnc = NULL;
246 struct pim_nexthop_cache lookup;
247
248 /*
249 * Nothing to do here if the address to unregister
250 * is 0.0.0.0 as that the BSR has not been registered
251 * for tracking yet.
252 */
253 if (pim_addr_is_any(addr))
254 return;
255
256 lookup.rpf.rpf_addr = addr;
257
258 pnc = hash_lookup(pim->rpf_hash, &lookup);
259
260 if (!pnc) {
261 zlog_warn("attempting to delete nonexistent NHT BSR entry %pPA",
262 &addr);
263 return;
264 }
265
266 assertf(pnc->bsr_count > 0, "addr=%pPA", &addr);
267 pnc->bsr_count--;
268
269 pim_nht_drop_maybe(pim, pnc);
270 }
271
272 bool pim_nht_bsr_rpf_check(struct pim_instance *pim, pim_addr bsr_addr,
273 struct interface *src_ifp, pim_addr src_ip)
274 {
275 struct pim_nexthop_cache *pnc = NULL;
276 struct pim_nexthop_cache lookup;
277 struct pim_neighbor *nbr = NULL;
278 struct nexthop *nh;
279 struct interface *ifp;
280
281 lookup.rpf.rpf_addr = bsr_addr;
282
283 pnc = hash_lookup(pim->rpf_hash, &lookup);
284 if (!pnc || !CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED)) {
285 /* BSM from a new freshly registered BSR - do a synchronous
286 * zebra query since otherwise we'd drop the first packet,
287 * leading to additional delay in picking up BSM data
288 */
289
290 /* FIXME: this should really be moved into a generic NHT
291 * function that does "add and get immediate result" or maybe
292 * "check cache or get immediate result." But until that can
293 * be worked in, here's a copy of the code below :(
294 */
295 struct pim_zlookup_nexthop nexthop_tab[router->multipath];
296 ifindex_t i;
297 struct interface *ifp = NULL;
298 int num_ifindex;
299
300 memset(nexthop_tab, 0, sizeof(nexthop_tab));
301 num_ifindex = zclient_lookup_nexthop(
302 pim, nexthop_tab, router->multipath, bsr_addr,
303 PIM_NEXTHOP_LOOKUP_MAX);
304
305 if (num_ifindex <= 0)
306 return false;
307
308 for (i = 0; i < num_ifindex; i++) {
309 struct pim_zlookup_nexthop *znh = &nexthop_tab[i];
310
311 /* pim_zlookup_nexthop has no ->type */
312
313 /* 1:1 match code below with znh instead of nh */
314 ifp = if_lookup_by_index(znh->ifindex,
315 pim->vrf->vrf_id);
316
317 if (!ifp || !ifp->info)
318 continue;
319
320 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
321 return true;
322
323 nbr = pim_neighbor_find(ifp, znh->nexthop_addr);
324 if (!nbr)
325 continue;
326
327 return znh->ifindex == src_ifp->ifindex &&
328 (!pim_addr_cmp(znh->nexthop_addr, src_ip));
329 }
330 return false;
331 }
332
333 if (!CHECK_FLAG(pnc->flags, PIM_NEXTHOP_VALID))
334 return false;
335
336 /* if we accept BSMs from more than one ECMP nexthop, this will cause
337 * BSM message "multiplication" for each ECMP hop. i.e. if you have
338 * 4-way ECMP and 4 hops you end up with 256 copies of each BSM
339 * message.
340 *
341 * so... only accept the first (IPv4) valid nexthop as source.
342 */
343
344 for (nh = pnc->nexthop; nh; nh = nh->next) {
345 pim_addr nhaddr;
346
347 switch (nh->type) {
348 #if PIM_IPV == 4
349 case NEXTHOP_TYPE_IPV4:
350 if (nh->ifindex == IFINDEX_INTERNAL)
351 continue;
352
353 /* fallthru */
354 case NEXTHOP_TYPE_IPV4_IFINDEX:
355 nhaddr = nh->gate.ipv4;
356 break;
357 #else
358 case NEXTHOP_TYPE_IPV6:
359 if (nh->ifindex == IFINDEX_INTERNAL)
360 continue;
361
362 /* fallthru */
363 case NEXTHOP_TYPE_IPV6_IFINDEX:
364 nhaddr = nh->gate.ipv6;
365 break;
366 #endif
367 case NEXTHOP_TYPE_IFINDEX:
368 nhaddr = bsr_addr;
369 break;
370
371 default:
372 continue;
373 }
374
375 ifp = if_lookup_by_index(nh->ifindex, pim->vrf->vrf_id);
376 if (!ifp || !ifp->info)
377 continue;
378
379 if (if_is_loopback(ifp) && if_is_loopback(src_ifp))
380 return true;
381
382 /* MRIB (IGP) may be pointing at a router where PIM is down */
383 nbr = pim_neighbor_find(ifp, nhaddr);
384 if (!nbr)
385 continue;
386
387 return nh->ifindex == src_ifp->ifindex &&
388 (!pim_addr_cmp(nhaddr, src_ip));
389 }
390 return false;
391 }
392
393 void pim_rp_nexthop_del(struct rp_info *rp_info)
394 {
395 rp_info->rp.source_nexthop.interface = NULL;
396 rp_info->rp.source_nexthop.mrib_nexthop_addr = PIMADDR_ANY;
397 rp_info->rp.source_nexthop.mrib_metric_preference =
398 router->infinite_assert_metric.metric_preference;
399 rp_info->rp.source_nexthop.mrib_route_metric =
400 router->infinite_assert_metric.route_metric;
401 }
402
403 /* Update RP nexthop info based on Nexthop update received from Zebra.*/
404 static void pim_update_rp_nh(struct pim_instance *pim,
405 struct pim_nexthop_cache *pnc)
406 {
407 struct listnode *node = NULL;
408 struct rp_info *rp_info = NULL;
409
410 /*Traverse RP list and update each RP Nexthop info */
411 for (ALL_LIST_ELEMENTS_RO(pnc->rp_list, node, rp_info)) {
412 if (pim_rpf_addr_is_inaddr_any(&rp_info->rp))
413 continue;
414
415 // Compute PIM RPF using cached nexthop
416 if (!pim_ecmp_nexthop_lookup(pim, &rp_info->rp.source_nexthop,
417 rp_info->rp.rpf_addr,
418 &rp_info->group, 1))
419 pim_rp_nexthop_del(rp_info);
420 }
421 }
422
423 /* Update Upstream nexthop info based on Nexthop update received from Zebra.*/
424 static int pim_update_upstream_nh_helper(struct hash_bucket *bucket, void *arg)
425 {
426 struct pim_instance *pim = (struct pim_instance *)arg;
427 struct pim_upstream *up = (struct pim_upstream *)bucket->data;
428
429 enum pim_rpf_result rpf_result;
430 struct pim_rpf old;
431
432 old.source_nexthop.interface = up->rpf.source_nexthop.interface;
433 rpf_result = pim_rpf_update(pim, up, &old, __func__);
434
435 /* update kernel multicast forwarding cache (MFC); if the
436 * RPF nbr is now unreachable the MFC has already been updated
437 * by pim_rpf_clear
438 */
439 if (rpf_result != PIM_RPF_CHANGED)
440 pim_upstream_mroute_iif_update(up->channel_oil, __func__);
441
442 if (rpf_result == PIM_RPF_CHANGED ||
443 (rpf_result == PIM_RPF_FAILURE && old.source_nexthop.interface))
444 pim_zebra_upstream_rpf_changed(pim, up, &old);
445
446
447 if (PIM_DEBUG_PIM_NHT) {
448 zlog_debug(
449 "%s: NHT upstream %s(%s) old ifp %s new ifp %s",
450 __func__, up->sg_str, pim->vrf->name,
451 old.source_nexthop.interface ? old.source_nexthop
452 .interface->name
453 : "Unknown",
454 up->rpf.source_nexthop.interface ? up->rpf.source_nexthop
455 .interface->name
456 : "Unknown");
457 }
458
459 return HASHWALK_CONTINUE;
460 }
461
462 static int pim_update_upstream_nh(struct pim_instance *pim,
463 struct pim_nexthop_cache *pnc)
464 {
465 hash_walk(pnc->upstream_hash, pim_update_upstream_nh_helper, pim);
466
467 pim_zebra_update_all_interfaces(pim);
468
469 return 0;
470 }
471
472 uint32_t pim_compute_ecmp_hash(struct prefix *src, struct prefix *grp)
473 {
474 uint32_t hash_val;
475
476 if (!src)
477 return 0;
478
479 hash_val = prefix_hash_key(src);
480 if (grp)
481 hash_val ^= prefix_hash_key(grp);
482 return hash_val;
483 }
484
485 static int pim_ecmp_nexthop_search(struct pim_instance *pim,
486 struct pim_nexthop_cache *pnc,
487 struct pim_nexthop *nexthop, pim_addr src,
488 struct prefix *grp, int neighbor_needed)
489 {
490 struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
491 struct interface *ifps[router->multipath];
492 struct nexthop *nh_node = NULL;
493 ifindex_t first_ifindex;
494 struct interface *ifp = NULL;
495 uint32_t hash_val = 0, mod_val = 0;
496 uint8_t nh_iter = 0, found = 0;
497 uint32_t i, num_nbrs = 0;
498 pim_addr nh_addr = nexthop->mrib_nexthop_addr;
499 pim_addr grp_addr = pim_addr_from_prefix(grp);
500
501 if (!pnc || !pnc->nexthop_num || !nexthop)
502 return 0;
503
504 memset(&nbrs, 0, sizeof(nbrs));
505 memset(&ifps, 0, sizeof(ifps));
506
507
508 // Current Nexthop is VALID, check to stay on the current path.
509 if (nexthop->interface && nexthop->interface->info &&
510 (!pim_addr_is_any(nh_addr))) {
511 /* User configured knob to explicitly switch
512 to new path is disabled or current path
513 metric is less than nexthop update.
514 */
515
516 if (pim->ecmp_rebalance_enable == 0) {
517 uint8_t curr_route_valid = 0;
518 // Check if current nexthop is present in new updated
519 // Nexthop list.
520 // If the current nexthop is not valid, candidate to
521 // choose new Nexthop.
522 for (nh_node = pnc->nexthop; nh_node;
523 nh_node = nh_node->next) {
524 curr_route_valid = (nexthop->interface->ifindex
525 == nh_node->ifindex);
526 if (curr_route_valid)
527 break;
528 }
529
530 if (curr_route_valid &&
531 !pim_if_connected_to_source(nexthop->interface,
532 src)) {
533 nbr = pim_neighbor_find(
534 nexthop->interface,
535 nexthop->mrib_nexthop_addr);
536 if (!nbr
537 && !if_is_loopback(nexthop->interface)) {
538 if (PIM_DEBUG_PIM_NHT)
539 zlog_debug(
540 "%s: current nexthop does not have nbr ",
541 __func__);
542 } else {
543 /* update metric even if the upstream
544 * neighbor stays unchanged
545 */
546 nexthop->mrib_metric_preference =
547 pnc->distance;
548 nexthop->mrib_route_metric =
549 pnc->metric;
550 if (PIM_DEBUG_PIM_NHT)
551 zlog_debug(
552 "%s: (%pPA,%pPA)(%s) current nexthop %s is valid, skipping new path selection",
553 __func__, &src,
554 &grp_addr,
555 pim->vrf->name,
556 nexthop->interface->name);
557 return 1;
558 }
559 }
560 }
561 }
562
563 /*
564 * Look up all interfaces and neighbors,
565 * store for later usage
566 */
567 for (nh_node = pnc->nexthop, i = 0; nh_node;
568 nh_node = nh_node->next, i++) {
569 ifps[i] =
570 if_lookup_by_index(nh_node->ifindex, pim->vrf->vrf_id);
571 if (ifps[i]) {
572 #if PIM_IPV == 4
573 pim_addr nhaddr = nh_node->gate.ipv4;
574 #else
575 pim_addr nhaddr = nh_node->gate.ipv6;
576 #endif
577 nbrs[i] = pim_neighbor_find(ifps[i], nhaddr);
578 if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
579 num_nbrs++;
580 }
581 }
582 if (pim->ecmp_enable) {
583 struct prefix src_pfx;
584 uint32_t consider = pnc->nexthop_num;
585
586 if (neighbor_needed && num_nbrs < consider)
587 consider = num_nbrs;
588
589 if (consider == 0)
590 return 0;
591
592 // PIM ECMP flag is enable then choose ECMP path.
593 pim_addr_to_prefix(&src_pfx, src);
594 hash_val = pim_compute_ecmp_hash(&src_pfx, grp);
595 mod_val = hash_val % consider;
596 }
597
598 for (nh_node = pnc->nexthop; nh_node && (found == 0);
599 nh_node = nh_node->next) {
600 first_ifindex = nh_node->ifindex;
601 ifp = ifps[nh_iter];
602 if (!ifp) {
603 if (PIM_DEBUG_PIM_NHT)
604 zlog_debug(
605 "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
606 __FILE__, __func__, first_ifindex, &src,
607 pim->vrf->name);
608 if (nh_iter == mod_val)
609 mod_val++; // Select nexthpath
610 nh_iter++;
611 continue;
612 }
613 if (!ifp->info) {
614 if (PIM_DEBUG_PIM_NHT)
615 zlog_debug(
616 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
617 __func__, ifp->name, pim->vrf->name,
618 first_ifindex, &src);
619 if (nh_iter == mod_val)
620 mod_val++; // Select nexthpath
621 nh_iter++;
622 continue;
623 }
624
625 if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
626 nbr = nbrs[nh_iter];
627 if (!nbr && !if_is_loopback(ifp)) {
628 if (PIM_DEBUG_PIM_NHT)
629 zlog_debug(
630 "%s: pim nbr not found on input interface %s(%s)",
631 __func__, ifp->name,
632 pim->vrf->name);
633 if (nh_iter == mod_val)
634 mod_val++; // Select nexthpath
635 nh_iter++;
636 continue;
637 }
638 }
639
640 if (nh_iter == mod_val) {
641 nexthop->interface = ifp;
642 #if PIM_IPV == 4
643 nexthop->mrib_nexthop_addr = nh_node->gate.ipv4;
644 #else
645 nexthop->mrib_nexthop_addr = nh_node->gate.ipv6;
646 #endif
647 nexthop->mrib_metric_preference = pnc->distance;
648 nexthop->mrib_route_metric = pnc->metric;
649 nexthop->last_lookup = src;
650 nexthop->last_lookup_time = pim_time_monotonic_usec();
651 nexthop->nbr = nbr;
652 found = 1;
653 if (PIM_DEBUG_PIM_NHT)
654 zlog_debug(
655 "%s: (%pPA,%pPA)(%s) selected nhop interface %s addr %pPAs mod_val %u iter %d ecmp %d",
656 __func__, &src, &grp_addr,
657 pim->vrf->name, ifp->name, &nh_addr,
658 mod_val, nh_iter, pim->ecmp_enable);
659 }
660 nh_iter++;
661 }
662
663 if (found)
664 return 1;
665 else
666 return 0;
667 }
668
669 /* This API is used to parse Registered address nexthop update coming from Zebra
670 */
671 int pim_parse_nexthop_update(ZAPI_CALLBACK_ARGS)
672 {
673 struct nexthop *nexthop;
674 struct nexthop *nhlist_head = NULL;
675 struct nexthop *nhlist_tail = NULL;
676 int i;
677 struct pim_rpf rpf;
678 struct pim_nexthop_cache *pnc = NULL;
679 struct interface *ifp = NULL;
680 struct vrf *vrf = vrf_lookup_by_id(vrf_id);
681 struct pim_instance *pim;
682 struct zapi_route nhr;
683 struct prefix match;
684
685 if (!vrf)
686 return 0;
687 pim = vrf->info;
688
689 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
690 zlog_err("%s: Decode of nexthop update from zebra failed",
691 __func__);
692 return 0;
693 }
694
695 if (cmd == ZEBRA_NEXTHOP_UPDATE) {
696 rpf.rpf_addr = pim_addr_from_prefix(&match);
697 pnc = pim_nexthop_cache_find(pim, &rpf);
698 if (!pnc) {
699 if (PIM_DEBUG_PIM_NHT)
700 zlog_debug(
701 "%s: Skipping NHT update, addr %pPA is not in local cached DB.",
702 __func__, &rpf.rpf_addr);
703 return 0;
704 }
705 } else {
706 /*
707 * We do not currently handle ZEBRA_IMPORT_CHECK_UPDATE
708 */
709 return 0;
710 }
711
712 pnc->last_update = pim_time_monotonic_usec();
713
714 if (nhr.nexthop_num) {
715 pnc->nexthop_num = 0; // Only increment for pim enabled rpf.
716
717 for (i = 0; i < nhr.nexthop_num; i++) {
718 nexthop = nexthop_from_zapi_nexthop(&nhr.nexthops[i]);
719 switch (nexthop->type) {
720 case NEXTHOP_TYPE_IFINDEX:
721 /*
722 * Connected route (i.e. no nexthop), use
723 * RPF address from nexthop cache (i.e.
724 * destination) as PIM nexthop.
725 */
726 #if PIM_IPV == 4
727 nexthop->type = NEXTHOP_TYPE_IPV4_IFINDEX;
728 nexthop->gate.ipv4 = pnc->rpf.rpf_addr;
729 #else
730 nexthop->type = NEXTHOP_TYPE_IPV6_IFINDEX;
731 nexthop->gate.ipv6 = pnc->rpf.rpf_addr;
732 #endif
733 break;
734 #if PIM_IPV == 4
735 /* RFC5549 IPv4-over-IPv6 nexthop handling:
736 * if we get an IPv6 nexthop in IPv4 PIM, hunt down a
737 * PIM neighbor and use that instead.
738 */
739 case NEXTHOP_TYPE_IPV6_IFINDEX: {
740 struct interface *ifp1 = NULL;
741 struct pim_neighbor *nbr = NULL;
742
743 ifp1 = if_lookup_by_index(nexthop->ifindex,
744 pim->vrf->vrf_id);
745
746 if (!ifp1)
747 nbr = NULL;
748 else
749 /* FIXME: should really use nbr's
750 * secondary address list here
751 */
752 nbr = pim_neighbor_find_if(ifp1);
753
754 /* Overwrite with Nbr address as NH addr */
755 if (nbr)
756 nexthop->gate.ipv4 = nbr->source_addr;
757 else
758 // Mark nexthop address to 0 until PIM
759 // Nbr is resolved.
760 nexthop->gate.ipv4 = PIMADDR_ANY;
761
762 break;
763 }
764 #else
765 case NEXTHOP_TYPE_IPV6_IFINDEX:
766 #endif
767 case NEXTHOP_TYPE_IPV6:
768 case NEXTHOP_TYPE_IPV4:
769 case NEXTHOP_TYPE_IPV4_IFINDEX:
770 case NEXTHOP_TYPE_BLACKHOLE:
771 /* nothing to do for the other nexthop types */
772 break;
773 }
774
775 ifp = if_lookup_by_index(nexthop->ifindex,
776 pim->vrf->vrf_id);
777 if (!ifp) {
778 if (PIM_DEBUG_PIM_NHT) {
779 char buf[NEXTHOP_STRLEN];
780 zlog_debug(
781 "%s: could not find interface for ifindex %d(%s) (addr %s)",
782 __func__, nexthop->ifindex,
783 pim->vrf->name,
784 nexthop2str(nexthop, buf,
785 sizeof(buf)));
786 }
787 nexthop_free(nexthop);
788 continue;
789 }
790
791 if (PIM_DEBUG_PIM_NHT) {
792 #if PIM_IPV == 4
793 pim_addr nhaddr = nexthop->gate.ipv4;
794 #else
795 pim_addr nhaddr = nexthop->gate.ipv6;
796 #endif
797 zlog_debug(
798 "%s: NHT addr %pFX(%s) %d-nhop via %pPA(%s) type %d distance:%u metric:%u ",
799 __func__, &match, pim->vrf->name, i + 1,
800 &nhaddr, ifp->name, nexthop->type,
801 nhr.distance, nhr.metric);
802 }
803
804 if (!ifp->info) {
805 /*
806 * Though Multicast is not enabled on this
807 * Interface store it in database otheriwse we
808 * may miss this update and this will not cause
809 * any issue, because while choosing the path we
810 * are ommitting the Interfaces which are not
811 * multicast enabled
812 */
813 if (PIM_DEBUG_PIM_NHT) {
814 char buf[NEXTHOP_STRLEN];
815
816 zlog_debug(
817 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, addr %s)",
818 __func__, ifp->name,
819 pim->vrf->name,
820 nexthop->ifindex,
821 nexthop2str(nexthop, buf,
822 sizeof(buf)));
823 }
824 }
825
826 if (nhlist_tail) {
827 nhlist_tail->next = nexthop;
828 nhlist_tail = nexthop;
829 } else {
830 nhlist_tail = nexthop;
831 nhlist_head = nexthop;
832 }
833 // Only keep track of nexthops which are PIM enabled.
834 pnc->nexthop_num++;
835 }
836 /* Reset existing pnc->nexthop before assigning new list */
837 nexthops_free(pnc->nexthop);
838 pnc->nexthop = nhlist_head;
839 if (pnc->nexthop_num) {
840 pnc->flags |= PIM_NEXTHOP_VALID;
841 pnc->distance = nhr.distance;
842 pnc->metric = nhr.metric;
843 }
844 } else {
845 pnc->flags &= ~PIM_NEXTHOP_VALID;
846 pnc->nexthop_num = nhr.nexthop_num;
847 nexthops_free(pnc->nexthop);
848 pnc->nexthop = NULL;
849 }
850 SET_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED);
851
852 if (PIM_DEBUG_PIM_NHT)
853 zlog_debug(
854 "%s: NHT Update for %pFX(%s) num_nh %d num_pim_nh %d vrf:%u up %ld rp %d",
855 __func__, &match, pim->vrf->name, nhr.nexthop_num,
856 pnc->nexthop_num, vrf_id, pnc->upstream_hash->count,
857 listcount(pnc->rp_list));
858
859 pim_rpf_set_refresh_time(pim);
860
861 if (listcount(pnc->rp_list))
862 pim_update_rp_nh(pim, pnc);
863 if (pnc->upstream_hash->count)
864 pim_update_upstream_nh(pim, pnc);
865
866 return 0;
867 }
868
869 int pim_ecmp_nexthop_lookup(struct pim_instance *pim,
870 struct pim_nexthop *nexthop, pim_addr src,
871 struct prefix *grp, int neighbor_needed)
872 {
873 struct pim_nexthop_cache *pnc;
874 struct pim_zlookup_nexthop nexthop_tab[router->multipath];
875 struct pim_neighbor *nbrs[router->multipath], *nbr = NULL;
876 struct pim_rpf rpf;
877 int num_ifindex;
878 struct interface *ifps[router->multipath], *ifp;
879 int first_ifindex;
880 int found = 0;
881 uint8_t i = 0;
882 uint32_t hash_val = 0, mod_val = 0;
883 uint32_t num_nbrs = 0;
884
885 if (PIM_DEBUG_PIM_NHT_DETAIL)
886 zlog_debug("%s: Looking up: %pPA(%s), last lookup time: %lld",
887 __func__, &src, pim->vrf->name,
888 nexthop->last_lookup_time);
889
890 rpf.rpf_addr = src;
891
892 pnc = pim_nexthop_cache_find(pim, &rpf);
893 if (pnc) {
894 if (CHECK_FLAG(pnc->flags, PIM_NEXTHOP_ANSWER_RECEIVED))
895 return pim_ecmp_nexthop_search(pim, pnc, nexthop, src, grp,
896 neighbor_needed);
897 }
898
899 memset(nexthop_tab, 0,
900 sizeof(struct pim_zlookup_nexthop) * router->multipath);
901 num_ifindex =
902 zclient_lookup_nexthop(pim, nexthop_tab, router->multipath, src,
903 PIM_NEXTHOP_LOOKUP_MAX);
904 if (num_ifindex < 1) {
905 if (PIM_DEBUG_PIM_NHT)
906 zlog_warn(
907 "%s: could not find nexthop ifindex for address %pPA(%s)",
908 __func__, &src, pim->vrf->name);
909 return 0;
910 }
911
912 memset(&nbrs, 0, sizeof(nbrs));
913 memset(&ifps, 0, sizeof(ifps));
914
915 /*
916 * Look up all interfaces and neighbors,
917 * store for later usage
918 */
919 for (i = 0; i < num_ifindex; i++) {
920 ifps[i] = if_lookup_by_index(nexthop_tab[i].ifindex,
921 pim->vrf->vrf_id);
922 if (ifps[i]) {
923 nbrs[i] = pim_neighbor_find(
924 ifps[i], nexthop_tab[i].nexthop_addr);
925 if (nbrs[i] || pim_if_connected_to_source(ifps[i], src))
926 num_nbrs++;
927 }
928 }
929
930 // If PIM ECMP enable then choose ECMP path.
931 if (pim->ecmp_enable) {
932 struct prefix src_pfx;
933 uint32_t consider = num_ifindex;
934
935 if (neighbor_needed && num_nbrs < consider)
936 consider = num_nbrs;
937
938 if (consider == 0)
939 return 0;
940
941 pim_addr_to_prefix(&src_pfx, src);
942 hash_val = pim_compute_ecmp_hash(&src_pfx, grp);
943 mod_val = hash_val % consider;
944 if (PIM_DEBUG_PIM_NHT_DETAIL)
945 zlog_debug("%s: hash_val %u mod_val %u", __func__,
946 hash_val, mod_val);
947 }
948
949 i = 0;
950 while (!found && (i < num_ifindex)) {
951 first_ifindex = nexthop_tab[i].ifindex;
952
953 ifp = ifps[i];
954 if (!ifp) {
955 if (PIM_DEBUG_PIM_NHT)
956 zlog_debug(
957 "%s %s: could not find interface for ifindex %d (address %pPA(%s))",
958 __FILE__, __func__, first_ifindex, &src,
959 pim->vrf->name);
960 if (i == mod_val)
961 mod_val++;
962 i++;
963 continue;
964 }
965
966 if (!ifp->info) {
967 if (PIM_DEBUG_PIM_NHT)
968 zlog_debug(
969 "%s: multicast not enabled on input interface %s(%s) (ifindex=%d, RPF for source %pPA)",
970 __func__, ifp->name, pim->vrf->name,
971 first_ifindex, &src);
972 if (i == mod_val)
973 mod_val++;
974 i++;
975 continue;
976 }
977 if (neighbor_needed && !pim_if_connected_to_source(ifp, src)) {
978 nbr = nbrs[i];
979 if (PIM_DEBUG_PIM_NHT_DETAIL)
980 zlog_debug("ifp name: %s(%s), pim nbr: %p",
981 ifp->name, pim->vrf->name, nbr);
982 if (!nbr && !if_is_loopback(ifp)) {
983 if (i == mod_val)
984 mod_val++;
985 if (PIM_DEBUG_PIM_NHT)
986 zlog_debug(
987 "%s: NBR (%pPA) not found on input interface %s(%s) (RPF for source %pPA)",
988 __func__,
989 &nexthop_tab[i].nexthop_addr,
990 ifp->name, pim->vrf->name,
991 &src);
992 i++;
993 continue;
994 }
995 }
996
997 if (i == mod_val) {
998 if (PIM_DEBUG_PIM_NHT)
999 zlog_debug(
1000 "%s: found nhop %pPA for addr %pPA interface %s(%s) metric %d dist %d",
1001 __func__, &nexthop_tab[i].nexthop_addr,
1002 &src, ifp->name, pim->vrf->name,
1003 nexthop_tab[i].route_metric,
1004 nexthop_tab[i].protocol_distance);
1005 /* update nexthop data */
1006 nexthop->interface = ifp;
1007 nexthop->mrib_nexthop_addr =
1008 nexthop_tab[i].nexthop_addr;
1009 nexthop->mrib_metric_preference =
1010 nexthop_tab[i].protocol_distance;
1011 nexthop->mrib_route_metric =
1012 nexthop_tab[i].route_metric;
1013 nexthop->last_lookup = src;
1014 nexthop->last_lookup_time = pim_time_monotonic_usec();
1015 nexthop->nbr = nbr;
1016 found = 1;
1017 }
1018 i++;
1019 }
1020
1021 if (found)
1022 return 1;
1023 else
1024 return 0;
1025 }
1026
1027 int pim_ecmp_fib_lookup_if_vif_index(struct pim_instance *pim, pim_addr src,
1028 struct prefix *grp)
1029 {
1030 struct pim_nexthop nhop;
1031 int vif_index;
1032 ifindex_t ifindex;
1033
1034 memset(&nhop, 0, sizeof(nhop));
1035 if (!pim_ecmp_nexthop_lookup(pim, &nhop, src, grp, 1)) {
1036 if (PIM_DEBUG_PIM_NHT)
1037 zlog_debug(
1038 "%s: could not find nexthop ifindex for address %pPA(%s)",
1039 __func__, &src, pim->vrf->name);
1040 return -1;
1041 }
1042
1043 ifindex = nhop.interface->ifindex;
1044 if (PIM_DEBUG_PIM_NHT)
1045 zlog_debug(
1046 "%s: found nexthop ifindex=%d (interface %s(%s)) for address %pPA",
1047 __func__, ifindex,
1048 ifindex2ifname(ifindex, pim->vrf->vrf_id),
1049 pim->vrf->name, &src);
1050
1051 vif_index = pim_if_find_vifindex_by_ifindex(pim, ifindex);
1052
1053 if (vif_index < 0) {
1054 if (PIM_DEBUG_PIM_NHT) {
1055 zlog_debug(
1056 "%s: low vif_index=%d(%s) < 1 nexthop for address %pPA",
1057 __func__, vif_index, pim->vrf->name, &src);
1058 }
1059 return -2;
1060 }
1061
1062 return vif_index;
1063 }