]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
Merge pull request #12485 from opensourcerouting/fix/crash_attrinfo
[mirror_frr.git] / bgpd / bgp_nht.c
1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "command.h"
24 #include "thread.h"
25 #include "prefix.h"
26 #include "zclient.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "log.h"
30 #include "memory.h"
31 #include "nexthop.h"
32 #include "vrf.h"
33 #include "filter.h"
34 #include "nexthop_group.h"
35
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
49
50 extern struct zclient *zclient;
51
52 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
53 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
54 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
55 static void bgp_nht_ifp_initial(struct thread *thread);
56
57 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
58 {
59 return (bgp_zebra_num_connects() == 0
60 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
61 && bnc->nexthop_num > 0));
62 }
63
64 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
65 struct bgp_path_info *path)
66 {
67 struct interface *ifp = NULL;
68 struct nexthop *nexthop;
69 struct bgp_interface *iifp;
70 struct peer *peer;
71
72 if (!path->extra || !path->extra->peer_orig)
73 return false;
74
75 peer = path->extra->peer_orig;
76
77 /* only connected ebgp peers are valid */
78 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
79 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
80 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
81 return false;
82
83 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
84 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
85 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
86 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
87 ifp = if_lookup_by_index(
88 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
89 bnc->bgp->vrf_id);
90 }
91 if (!ifp)
92 continue;
93 iifp = ifp->info;
94 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
95 return true;
96 }
97 return false;
98 }
99
100 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
101 struct bgp_path_info *path)
102 {
103 struct interface *ifp = NULL;
104 struct nexthop *nexthop;
105
106 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
107 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
108 ifp = if_lookup_by_index(
109 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
110 bnc->bgp->vrf_id);
111 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
112 ifp->ll_type == ZEBRA_LLT_IP6GRE))
113 break;
114 }
115 }
116 if (!ifp)
117 return false;
118
119 if (CHECK_FLAG(path->attr->rmap_change_flags,
120 BATTR_RMAP_L3VPN_ACCEPT_GRE))
121 return true;
122
123 return false;
124 }
125
126 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
127 struct bgp_path_info *path)
128 {
129 /*
130 * - In the case of MPLS-VPN, the label is learned from LDP or other
131 * protocols, and nexthop tracking is enabled for the label.
132 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
133 * - In the case of SRv6-VPN, we need to track the reachability to the
134 * SID (in other words, IPv6 address). As in MPLS, we need to record
135 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
136 * currently not implemented, and this function assumes that all
137 * Transit routes for SRv6-VPN are valid.
138 * - Otherwise check for mpls-gre acceptance
139 */
140 return (bgp_zebra_num_connects() == 0 ||
141 (bnc && (bnc->nexthop_num > 0 &&
142 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
143 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
144 bnc->bgp->srv6_enabled ||
145 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
146 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
147 }
148
149 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
150 {
151 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
152 if (BGP_DEBUG(nht, NHT))
153 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
154 &bnc->prefix, bnc->ifindex, bnc->srte_color,
155 bnc->bgp->name_pretty);
156 /* only unregister if this is the last nh for this prefix*/
157 if (!bnc_existing_for_prefix(bnc))
158 unregister_zebra_rnh(bnc);
159 bnc_free(bnc);
160 }
161 }
162
163 void bgp_unlink_nexthop(struct bgp_path_info *path)
164 {
165 struct bgp_nexthop_cache *bnc = path->nexthop;
166
167 if (!bnc)
168 return;
169
170 path_nh_map(path, NULL, false);
171
172 bgp_unlink_nexthop_check(bnc);
173 }
174
175 void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
176 {
177 struct prefix pp;
178 struct prefix pt;
179 struct bgp_nexthop_cache *bncp, *bnct;
180 afi_t afi;
181 ifindex_t ifindex = 0;
182
183 if (!sockunion2hostprefix(&from->su, &pp))
184 return;
185
186 /*
187 * Gather the ifindex for if up/down events to be
188 * tagged into this fun
189 */
190 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
191 ifindex = from->su.sin6.sin6_scope_id;
192
193 afi = family2afi(pp.family);
194 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
195
196 if (!sockunion2hostprefix(&to->su, &pt))
197 return;
198
199 /*
200 * Gather the ifindex for if up/down events to be
201 * tagged into this fun
202 */
203 ifindex = 0;
204 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
205 ifindex = to->su.sin6.sin6_scope_id;
206 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
207
208 if (bnct != bncp)
209 return;
210
211 if (bnct)
212 bnct->nht_info = to;
213 }
214
215 /*
216 * Returns the bnc whose bnc->nht_info matches the LL peer by
217 * looping through the IPv6 nexthop table
218 */
219 static struct bgp_nexthop_cache *
220 bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
221 {
222 struct bgp_nexthop_cache *bnc;
223
224 frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
225 bnc) {
226 if (bnc->nht_info == peer) {
227 if (BGP_DEBUG(nht, NHT)) {
228 zlog_debug(
229 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
230 &bnc->prefix, bnc->ifindex,
231 bnc->srte_color, bnc, peer->host,
232 peer->bgp->name_pretty, peer);
233 }
234 return bnc;
235 }
236 }
237
238 if (BGP_DEBUG(nht, NHT))
239 zlog_debug(
240 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
241 peer->host, peer->bgp->name_pretty, peer);
242
243 return NULL;
244 }
245
246 void bgp_unlink_nexthop_by_peer(struct peer *peer)
247 {
248 struct prefix p;
249 struct bgp_nexthop_cache *bnc;
250 afi_t afi = family2afi(peer->su.sa.sa_family);
251 ifindex_t ifindex = 0;
252
253 if (!sockunion2hostprefix(&peer->su, &p)) {
254 /*
255 * In scenarios where unnumbered BGP session is brought
256 * down by shutting down the interface before unconfiguring
257 * the BGP neighbor, neighbor information in peer->su.sa
258 * will be cleared when the interface is shutdown. So
259 * during the deletion of unnumbered bgp peer, above check
260 * will return true. Therefore, in this case,BGP needs to
261 * find the bnc whose bnc->nht_info matches the
262 * peer being deleted and free it.
263 */
264 bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
265 } else {
266 /*
267 * Gather the ifindex for if up/down events to be
268 * tagged into this fun
269 */
270 if (afi == AFI_IP6 &&
271 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
272 ifindex = peer->su.sin6.sin6_scope_id;
273 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
274 ifindex);
275 }
276
277 if (!bnc)
278 return;
279
280 /* cleanup the peer reference */
281 bnc->nht_info = NULL;
282
283 bgp_unlink_nexthop_check(bnc);
284 }
285
286 /*
287 * A route and its nexthop might belong to different VRFs. Therefore,
288 * we need both the bgp_route and bgp_nexthop pointers.
289 */
290 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
291 afi_t afi, safi_t safi, struct bgp_path_info *pi,
292 struct peer *peer, int connected,
293 const struct prefix *orig_prefix)
294 {
295 struct bgp_nexthop_cache_head *tree = NULL;
296 struct bgp_nexthop_cache *bnc;
297 struct prefix p;
298 uint32_t srte_color = 0;
299 int is_bgp_static_route = 0;
300 ifindex_t ifindex = 0;
301
302 if (pi) {
303 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
304 && (pi->sub_type == BGP_ROUTE_STATIC))
305 ? 1
306 : 0;
307
308 /* Since Extended Next-hop Encoding (RFC5549) support, we want
309 to derive
310 address-family from the next-hop. */
311 if (!is_bgp_static_route)
312 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
313 : AFI_IP;
314
315 /* Validation for the ipv4 mapped ipv6 nexthop. */
316 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
317 afi = AFI_IP;
318 }
319
320 /* This will return true if the global IPv6 NH is a link local
321 * addr */
322 if (make_prefix(afi, pi, &p) < 0)
323 return 1;
324
325 if (!is_bgp_static_route && orig_prefix
326 && prefix_same(&p, orig_prefix)) {
327 if (BGP_DEBUG(nht, NHT)) {
328 zlog_debug(
329 "%s(%pFX): prefix loops through itself",
330 __func__, &p);
331 }
332 return 0;
333 }
334
335 srte_color = pi->attr->srte_color;
336 } else if (peer) {
337 /*
338 * Gather the ifindex for if up/down events to be
339 * tagged into this fun
340 */
341 if (afi == AFI_IP6 &&
342 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
343 ifindex = peer->su.sin6.sin6_scope_id;
344 if (ifindex == 0) {
345 if (BGP_DEBUG(nht, NHT)) {
346 zlog_debug(
347 "%s: Unable to locate ifindex, waiting till we have one",
348 peer->conf_if);
349 }
350 return 0;
351 }
352 }
353
354 if (!sockunion2hostprefix(&peer->su, &p)) {
355 if (BGP_DEBUG(nht, NHT)) {
356 zlog_debug(
357 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
358 __func__, afi, AFI_IP, AFI_IP6);
359 }
360 return 0;
361 }
362 } else
363 return 0;
364
365 if (is_bgp_static_route)
366 tree = &bgp_nexthop->import_check_table[afi];
367 else
368 tree = &bgp_nexthop->nexthop_cache_table[afi];
369
370 bnc = bnc_find(tree, &p, srte_color, ifindex);
371 if (!bnc) {
372 bnc = bnc_new(tree, &p, srte_color, ifindex);
373 bnc->bgp = bgp_nexthop;
374 if (BGP_DEBUG(nht, NHT))
375 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
376 &bnc->prefix, bnc->ifindex, bnc->srte_color,
377 bnc->bgp->name_pretty, peer);
378 } else {
379 if (BGP_DEBUG(nht, NHT))
380 zlog_debug(
381 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
382 &bnc->prefix, bnc->ifindex,
383 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
384 bnc->path_count, bnc->nht_info);
385 }
386
387 if (pi && is_route_parent_evpn(pi))
388 bnc->is_evpn_gwip_nexthop = true;
389
390 if (is_bgp_static_route) {
391 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
392
393 /* If we're toggling the type, re-register */
394 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
395 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
396 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
397 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
398 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
399 } else if ((!CHECK_FLAG(bgp_route->flags,
400 BGP_FLAG_IMPORT_CHECK))
401 && CHECK_FLAG(bnc->flags,
402 BGP_STATIC_ROUTE_EXACT_MATCH)) {
403 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
404 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
405 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
406 }
407 }
408 /* When nexthop is already known, but now requires 'connected'
409 * resolution,
410 * re-register it. The reverse scenario where the nexthop currently
411 * requires
412 * 'connected' resolution does not need a re-register (i.e., we treat
413 * 'connected-required' as an override) except in the scenario where
414 * this
415 * is actually a case of tracking a peer for connectivity (e.g., after
416 * disable connected-check).
417 * NOTE: We don't track the number of paths separately for 'connected-
418 * required' vs 'connected-not-required' as this change is not a common
419 * scenario.
420 */
421 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
422 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
423 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
424 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
425 } else if (peer && !connected
426 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
427 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
428 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
429 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
430 }
431 if (peer && (bnc->ifindex != ifindex)) {
432 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
433 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
434 bnc->ifindex = ifindex;
435 }
436 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
437 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
438 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
439 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
440 && !is_default_host_route(&bnc->prefix))
441 register_zebra_rnh(bnc);
442
443 if (pi && pi->nexthop != bnc) {
444 /* Unlink from existing nexthop cache, if any. This will also
445 * free
446 * the nexthop cache entry, if appropriate.
447 */
448 bgp_unlink_nexthop(pi);
449
450 /* updates NHT pi list reference */
451 path_nh_map(pi, bnc, true);
452
453 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
454 (bgp_path_info_extra_get(pi))->igpmetric = bnc->metric;
455 else if (pi->extra)
456 pi->extra->igpmetric = 0;
457 } else if (peer) {
458 /*
459 * Let's not accidentally save the peer data for a peer
460 * we are going to throw away in a second or so.
461 * When we come back around we'll fix up this
462 * data properly in replace_nexthop_by_peer
463 */
464 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
465 bnc->nht_info = (void *)peer; /* NHT peer reference */
466 }
467
468 /*
469 * We are cheating here. Views have no associated underlying
470 * ability to detect nexthops. So when we have a view
471 * just tell everyone the nexthop is valid
472 */
473 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
474 return 1;
475 else if (safi == SAFI_UNICAST && pi &&
476 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
477 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
478 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
479 else
480 return (bgp_isvalid_nexthop(bnc));
481 }
482
483 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
484 {
485 struct bgp_nexthop_cache *bnc;
486 struct prefix p;
487 ifindex_t ifindex = 0;
488
489 if (!peer)
490 return;
491
492 /*
493 * In case the below check evaluates true and if
494 * the bnc has not been freed at this point, then
495 * we might have to do something similar to what's
496 * done in bgp_unlink_nexthop_by_peer(). Since
497 * bgp_unlink_nexthop_by_peer() loops through the
498 * nodes of V6 nexthop cache to find the bnc, it is
499 * currently not being called here.
500 */
501 if (!sockunion2hostprefix(&peer->su, &p))
502 return;
503 /*
504 * Gather the ifindex for if up/down events to be
505 * tagged into this fun
506 */
507 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
508 ifindex = peer->su.sin6.sin6_scope_id;
509 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
510 &p, 0, ifindex);
511 if (!bnc) {
512 if (BGP_DEBUG(nht, NHT))
513 zlog_debug(
514 "Cannot find connected NHT node for peer %s(%s)",
515 peer->host, peer->bgp->name_pretty);
516 return;
517 }
518
519 if (bnc->nht_info != peer) {
520 if (BGP_DEBUG(nht, NHT))
521 zlog_debug(
522 "Connected NHT %p node for peer %s(%s) points to %p",
523 bnc, peer->host, bnc->bgp->name_pretty,
524 bnc->nht_info);
525 return;
526 }
527
528 bnc->nht_info = NULL;
529
530 if (LIST_EMPTY(&(bnc->paths))) {
531 if (BGP_DEBUG(nht, NHT))
532 zlog_debug(
533 "Freeing connected NHT node %p for peer %s(%s)",
534 bnc, peer->host, bnc->bgp->name_pretty);
535 unregister_zebra_rnh(bnc);
536 bnc_free(bnc);
537 }
538 }
539
540 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
541 struct zapi_route *nhr,
542 bool import_check)
543 {
544 struct nexthop *nexthop;
545 struct nexthop *oldnh;
546 struct nexthop *nhlist_head = NULL;
547 struct nexthop *nhlist_tail = NULL;
548 int i;
549 bool evpn_resolved = false;
550
551 bnc->last_update = monotime(NULL);
552 bnc->change_flags = 0;
553
554 /* debug print the input */
555 if (BGP_DEBUG(nht, NHT)) {
556 char bnc_buf[BNC_FLAG_DUMP_SIZE];
557
558 zlog_debug(
559 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
560 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
561 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
562 nhr->nexthop_num, bnc->nexthop_num,
563 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
564 sizeof(bnc_buf)));
565 }
566
567 if (nhr->metric != bnc->metric)
568 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
569
570 if (nhr->nexthop_num != bnc->nexthop_num)
571 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
572
573 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
574 !prefix_same(&bnc->prefix, &nhr->prefix))) {
575 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
576 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
577 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
578 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
579
580 bnc_nexthop_free(bnc);
581 bnc->nexthop = NULL;
582
583 if (BGP_DEBUG(nht, NHT))
584 zlog_debug(
585 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
586 __func__, &bnc->prefix, &nhr->prefix);
587 } else if (nhr->nexthop_num) {
588 struct peer *peer = bnc->nht_info;
589
590 /* notify bgp fsm if nbr ip goes from invalid->valid */
591 if (!bnc->nexthop_num)
592 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
593
594 if (!bnc->is_evpn_gwip_nexthop)
595 bnc->flags |= BGP_NEXTHOP_VALID;
596 bnc->metric = nhr->metric;
597 bnc->nexthop_num = nhr->nexthop_num;
598
599 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
600
601 for (i = 0; i < nhr->nexthop_num; i++) {
602 int num_labels = 0;
603
604 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
605
606 /*
607 * Turn on RA for the v6 nexthops
608 * we receive from bgp. This is to allow us
609 * to work with v4 routing over v6 nexthops
610 */
611 if (peer && !peer->ifp
612 && CHECK_FLAG(peer->flags,
613 PEER_FLAG_CAPABILITY_ENHE)
614 && nhr->prefix.family == AF_INET6
615 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
616 struct interface *ifp;
617
618 ifp = if_lookup_by_index(nexthop->ifindex,
619 nexthop->vrf_id);
620 if (ifp)
621 zclient_send_interface_radv_req(
622 zclient, nexthop->vrf_id, ifp,
623 true,
624 BGP_UNNUM_DEFAULT_RA_INTERVAL);
625 }
626 /* There is at least one label-switched path */
627 if (nexthop->nh_label &&
628 nexthop->nh_label->num_labels) {
629
630 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
631 num_labels = nexthop->nh_label->num_labels;
632 }
633
634 if (BGP_DEBUG(nht, NHT)) {
635 char buf[NEXTHOP_STRLEN];
636 zlog_debug(
637 " nhop via %s (%d labels)",
638 nexthop2str(nexthop, buf, sizeof(buf)),
639 num_labels);
640 }
641
642 if (nhlist_tail) {
643 nhlist_tail->next = nexthop;
644 nhlist_tail = nexthop;
645 } else {
646 nhlist_tail = nexthop;
647 nhlist_head = nexthop;
648 }
649
650 /* No need to evaluate the nexthop if we have already
651 * determined
652 * that there has been a change.
653 */
654 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
655 continue;
656
657 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
658 if (nexthop_same(oldnh, nexthop))
659 break;
660
661 if (!oldnh)
662 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
663 }
664 bnc_nexthop_free(bnc);
665 bnc->nexthop = nhlist_head;
666
667 /*
668 * Gateway IP nexthop is L3 reachable. Mark it as
669 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
670 * remote EVPN RT-2.
671 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
672 * When its mapping with EVPN RT-2 is established, unset
673 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
674 */
675 if (bnc->is_evpn_gwip_nexthop) {
676 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
677
678 if (BGP_DEBUG(nht, NHT))
679 zlog_debug(
680 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
681 &bnc->prefix,
682 (evpn_resolved ? "successful"
683 : "failed"));
684
685 if (evpn_resolved) {
686 bnc->flags |= BGP_NEXTHOP_VALID;
687 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
688 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
689 } else {
690 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
691 bnc->flags &= ~BGP_NEXTHOP_VALID;
692 }
693 }
694 } else {
695 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
696 bnc->flags &= ~BGP_NEXTHOP_VALID;
697 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
698 bnc->nexthop_num = nhr->nexthop_num;
699
700 /* notify bgp fsm if nbr ip goes from valid->invalid */
701 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
702
703 bnc_nexthop_free(bnc);
704 bnc->nexthop = NULL;
705 }
706
707 evaluate_paths(bnc);
708 }
709
710 static void bgp_nht_ifp_table_handle(struct bgp *bgp,
711 struct bgp_nexthop_cache_head *table,
712 struct interface *ifp, bool up)
713 {
714 struct bgp_nexthop_cache *bnc;
715
716 frr_each (bgp_nexthop_cache, table, bnc) {
717 if (bnc->ifindex != ifp->ifindex)
718 continue;
719
720 bnc->last_update = monotime(NULL);
721 bnc->change_flags = 0;
722
723 /*
724 * For interface based routes ( ala the v6 LL routes
725 * that this was written for ) the metric received
726 * for the connected route is 0 not 1.
727 */
728 bnc->metric = 0;
729 if (up) {
730 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
731 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
732 bnc->nexthop_num = 1;
733 } else {
734 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
735 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
736 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
737 bnc->nexthop_num = 0;
738 }
739
740 evaluate_paths(bnc);
741 }
742 }
743 static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
744 {
745 struct bgp *bgp;
746
747 bgp = ifp->vrf->info;
748 if (!bgp)
749 return;
750
751 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
752 up);
753 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
754 up);
755 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
756 up);
757 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
758 up);
759 }
760
761 void bgp_nht_ifp_up(struct interface *ifp)
762 {
763 bgp_nht_ifp_handle(ifp, true);
764 }
765
766 void bgp_nht_ifp_down(struct interface *ifp)
767 {
768 bgp_nht_ifp_handle(ifp, false);
769 }
770
771 static void bgp_nht_ifp_initial(struct thread *thread)
772 {
773 ifindex_t ifindex = THREAD_VAL(thread);
774 struct bgp *bgp = THREAD_ARG(thread);
775 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
776
777 if (!ifp)
778 return;
779
780 if (BGP_DEBUG(nht, NHT))
781 zlog_debug(
782 "Handle NHT initial update for Intf %s(%d) status %s",
783 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
784
785 if (if_is_up(ifp))
786 bgp_nht_ifp_up(ifp);
787 else
788 bgp_nht_ifp_down(ifp);
789 }
790
791 /*
792 * So the bnc code has the ability to handle interface up/down
793 * events to properly handle v6 LL peering.
794 * What is happening here:
795 * The event system for peering expects the nht code to
796 * report on the tracking events after we move to active
797 * So let's give the system a chance to report on that event
798 * in a manner that is expected.
799 */
800 void bgp_nht_interface_events(struct peer *peer)
801 {
802 struct bgp *bgp = peer->bgp;
803 struct bgp_nexthop_cache_head *table;
804 struct bgp_nexthop_cache *bnc;
805 struct prefix p;
806 ifindex_t ifindex = 0;
807
808 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
809 return;
810
811 if (!sockunion2hostprefix(&peer->su, &p))
812 return;
813 /*
814 * Gather the ifindex for if up/down events to be
815 * tagged into this fun
816 */
817 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
818 ifindex = peer->su.sin6.sin6_scope_id;
819
820 table = &bgp->nexthop_cache_table[AFI_IP6];
821 bnc = bnc_find(table, &p, 0, ifindex);
822 if (!bnc)
823 return;
824
825 if (bnc->ifindex)
826 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
827 bnc->ifindex, NULL);
828 }
829
830 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
831 {
832 struct bgp_nexthop_cache_head *tree = NULL;
833 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
834 struct bgp *bgp;
835 struct prefix match;
836 struct zapi_route nhr;
837 afi_t afi;
838
839 bgp = bgp_lookup_by_vrf_id(vrf_id);
840 if (!bgp) {
841 flog_err(
842 EC_BGP_NH_UPD,
843 "parse nexthop update: instance not found for vrf_id %u",
844 vrf_id);
845 return;
846 }
847
848 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
849 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
850 bgp->name_pretty);
851 return;
852 }
853
854 afi = family2afi(match.family);
855 tree = &bgp->nexthop_cache_table[afi];
856
857 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
858 if (!bnc_nhc) {
859 if (BGP_DEBUG(nht, NHT))
860 zlog_debug(
861 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
862 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
863 } else
864 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
865
866 tree = &bgp->import_check_table[afi];
867
868 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
869 if (!bnc_import) {
870 if (BGP_DEBUG(nht, NHT))
871 zlog_debug(
872 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
873 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
874 } else
875 bgp_process_nexthop_update(bnc_import, &nhr, true);
876
877 /*
878 * HACK: if any BGP route is dependant on an SR-policy that doesn't
879 * exist, zebra will never send NH updates relative to that policy. In
880 * that case, whenever we receive an update about a colorless NH, update
881 * the corresponding colorful NHs that share the same endpoint but that
882 * are inactive. This ugly hack should work around the problem at the
883 * cost of a performance pernalty. Long term, what should be done is to
884 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
885 * which should provide a better infrastructure to solve this issue in
886 * a more efficient and elegant way.
887 */
888 if (nhr.srte_color == 0 && bnc_nhc) {
889 struct bgp_nexthop_cache *bnc_iter;
890
891 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
892 bnc_iter) {
893 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
894 bnc_iter->srte_color == 0 ||
895 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
896 continue;
897
898 bgp_process_nexthop_update(bnc_iter, &nhr, false);
899 }
900 }
901 }
902
903 /*
904 * Cleanup nexthop registration and status information for BGP nexthops
905 * pertaining to this VRF. This is invoked upon VRF deletion.
906 */
907 void bgp_cleanup_nexthops(struct bgp *bgp)
908 {
909 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
910 struct bgp_nexthop_cache *bnc;
911
912 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
913 bnc) {
914 /* Clear relevant flags. */
915 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
916 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
917 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
918 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
919 }
920 }
921 }
922
923 /**
924 * make_prefix - make a prefix structure from the path (essentially
925 * path's node.
926 */
927 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
928 {
929
930 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
931 && (pi->sub_type == BGP_ROUTE_STATIC))
932 ? 1
933 : 0;
934 struct bgp_dest *net = pi->net;
935 const struct prefix *p_orig = bgp_dest_get_prefix(net);
936 struct in_addr ipv4;
937
938 if (p_orig->family == AF_FLOWSPEC) {
939 if (!pi->peer)
940 return -1;
941 return bgp_flowspec_get_first_nh(pi->peer->bgp,
942 pi, p, afi);
943 }
944 memset(p, 0, sizeof(struct prefix));
945 switch (afi) {
946 case AFI_IP:
947 p->family = AF_INET;
948 if (is_bgp_static) {
949 p->u.prefix4 = p_orig->u.prefix4;
950 p->prefixlen = p_orig->prefixlen;
951 } else {
952 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
953 ipv4_mapped_ipv6_to_ipv4(
954 &pi->attr->mp_nexthop_global, &ipv4);
955 p->u.prefix4 = ipv4;
956 p->prefixlen = IPV4_MAX_BITLEN;
957 } else {
958 if (p_orig->family == AF_EVPN)
959 p->u.prefix4 =
960 pi->attr->mp_nexthop_global_in;
961 else
962 p->u.prefix4 = pi->attr->nexthop;
963 p->prefixlen = IPV4_MAX_BITLEN;
964 }
965 }
966 break;
967 case AFI_IP6:
968 p->family = AF_INET6;
969
970 if (is_bgp_static) {
971 p->u.prefix6 = p_orig->u.prefix6;
972 p->prefixlen = p_orig->prefixlen;
973 } else {
974 /* If we receive MP_REACH nexthop with ::(LL)
975 * or LL(LL), use LL address as nexthop cache.
976 */
977 if (pi->attr->mp_nexthop_len
978 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
979 && (IN6_IS_ADDR_UNSPECIFIED(
980 &pi->attr->mp_nexthop_global)
981 || IN6_IS_ADDR_LINKLOCAL(
982 &pi->attr->mp_nexthop_global)))
983 p->u.prefix6 = pi->attr->mp_nexthop_local;
984 /* If we receive MR_REACH with (GA)::(LL)
985 * then check for route-map to choose GA or LL
986 */
987 else if (pi->attr->mp_nexthop_len
988 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
989 if (pi->attr->mp_nexthop_prefer_global)
990 p->u.prefix6 =
991 pi->attr->mp_nexthop_global;
992 else
993 p->u.prefix6 =
994 pi->attr->mp_nexthop_local;
995 } else
996 p->u.prefix6 = pi->attr->mp_nexthop_global;
997 p->prefixlen = IPV6_MAX_BITLEN;
998 }
999 break;
1000 default:
1001 if (BGP_DEBUG(nht, NHT)) {
1002 zlog_debug(
1003 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
1004 __func__, afi, AFI_IP, AFI_IP6);
1005 }
1006 break;
1007 }
1008 return 0;
1009 }
1010
1011 /**
1012 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1013 * command to Zebra.
1014 * ARGUMENTS:
1015 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1016 * int command -- command to send to zebra
1017 * RETURNS:
1018 * void.
1019 */
1020 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
1021 {
1022 bool exact_match = false;
1023 bool resolve_via_default = false;
1024 int ret;
1025
1026 if (!zclient)
1027 return;
1028
1029 /* Don't try to register if Zebra doesn't know of this instance. */
1030 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1031 if (BGP_DEBUG(zebra, ZEBRA))
1032 zlog_debug(
1033 "%s: No zebra instance to talk to, not installing NHT entry",
1034 __func__);
1035 return;
1036 }
1037
1038 if (!bgp_zebra_num_connects()) {
1039 if (BGP_DEBUG(zebra, ZEBRA))
1040 zlog_debug(
1041 "%s: We have not connected yet, cannot send nexthops",
1042 __func__);
1043 }
1044 if (command == ZEBRA_NEXTHOP_REGISTER) {
1045 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1046 exact_match = true;
1047 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1048 resolve_via_default = true;
1049 }
1050
1051 if (BGP_DEBUG(zebra, ZEBRA))
1052 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1053 zserv_command_string(command), &bnc->prefix,
1054 bnc->bgp->name_pretty);
1055
1056 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1057 exact_match, resolve_via_default,
1058 bnc->bgp->vrf_id);
1059 if (ret == ZCLIENT_SEND_FAILURE) {
1060 flog_warn(EC_BGP_ZEBRA_SEND,
1061 "sendmsg_nexthop: zclient_send_message() failed");
1062 return;
1063 }
1064
1065 if (command == ZEBRA_NEXTHOP_REGISTER)
1066 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1067 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1068 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1069 return;
1070 }
1071
1072 /**
1073 * register_zebra_rnh - register a NH/route with Zebra for notification
1074 * when the route or the route to the nexthop changes.
1075 * ARGUMENTS:
1076 * struct bgp_nexthop_cache *bnc
1077 * RETURNS:
1078 * void.
1079 */
1080 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1081 {
1082 /* Check if we have already registered */
1083 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1084 return;
1085
1086 if (bnc->ifindex) {
1087 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1088 return;
1089 }
1090
1091 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1092 }
1093
1094 /**
1095 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1096 * ARGUMENTS:
1097 * struct bgp_nexthop_cache *bnc
1098 * RETURNS:
1099 * void.
1100 */
1101 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1102 {
1103 /* Check if we have already registered */
1104 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1105 return;
1106
1107 if (bnc->ifindex) {
1108 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1109 return;
1110 }
1111
1112 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1113 }
1114
1115 /**
1116 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1117 * ARGUMENTS:
1118 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1119 * RETURNS:
1120 * void.
1121 */
1122 void evaluate_paths(struct bgp_nexthop_cache *bnc)
1123 {
1124 struct bgp_dest *dest;
1125 struct bgp_path_info *path;
1126 int afi;
1127 struct peer *peer = (struct peer *)bnc->nht_info;
1128 struct bgp_table *table;
1129 safi_t safi;
1130 struct bgp *bgp_path;
1131 const struct prefix *p;
1132
1133 if (BGP_DEBUG(nht, NHT)) {
1134 char bnc_buf[BNC_FLAG_DUMP_SIZE];
1135 char chg_buf[BNC_FLAG_DUMP_SIZE];
1136
1137 zlog_debug(
1138 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1139 &bnc->prefix, bnc->ifindex, bnc->srte_color,
1140 bnc->bgp->name_pretty,
1141 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1142 sizeof(bnc_buf)),
1143 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1144 sizeof(bnc_buf)));
1145 }
1146
1147 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1148 if (!(path->type == ZEBRA_ROUTE_BGP
1149 && ((path->sub_type == BGP_ROUTE_NORMAL)
1150 || (path->sub_type == BGP_ROUTE_STATIC)
1151 || (path->sub_type == BGP_ROUTE_IMPORTED))))
1152 continue;
1153
1154 dest = path->net;
1155 assert(dest && bgp_dest_table(dest));
1156 p = bgp_dest_get_prefix(dest);
1157 afi = family2afi(p->family);
1158 table = bgp_dest_table(dest);
1159 safi = table->safi;
1160
1161 /*
1162 * handle routes from other VRFs (they can have a
1163 * nexthop in THIS VRF). bgp_path is the bgp instance
1164 * that owns the route referencing this nexthop.
1165 */
1166 bgp_path = table->bgp;
1167
1168 /*
1169 * Path becomes valid/invalid depending on whether the nexthop
1170 * reachable/unreachable.
1171 *
1172 * In case of unicast routes that were imported from vpn
1173 * and that have labels, they are valid only if there are
1174 * nexthops with labels
1175 *
1176 * If the nexthop is EVPN gateway-IP,
1177 * do not check for a valid label.
1178 */
1179
1180 bool bnc_is_valid_nexthop = false;
1181 bool path_valid = false;
1182
1183 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1184 && path->extra && path->extra->num_labels
1185 && (path->attr->evpn_overlay.type
1186 != OVERLAY_INDEX_GATEWAY_IP)) {
1187 bnc_is_valid_nexthop =
1188 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1189 : false;
1190 } else {
1191 if (bgp_update_martian_nexthop(
1192 bnc->bgp, afi, safi, path->type,
1193 path->sub_type, path->attr, dest)) {
1194 if (BGP_DEBUG(nht, NHT))
1195 zlog_debug(
1196 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1197 __func__, dest, bgp_path->name);
1198 } else
1199 bnc_is_valid_nexthop =
1200 bgp_isvalid_nexthop(bnc) ? true : false;
1201 }
1202
1203 if (BGP_DEBUG(nht, NHT)) {
1204 if (dest->pdest)
1205 zlog_debug(
1206 "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1207 afi, safi, dest,
1208 (struct prefix_rd *)bgp_dest_get_prefix(
1209 dest->pdest),
1210 bgp_path->name_pretty, path->flags);
1211 else
1212 zlog_debug(
1213 "... eval path %d/%d %pBD %s flags 0x%x",
1214 afi, safi, dest, bgp_path->name_pretty,
1215 path->flags);
1216 }
1217
1218 /* Skip paths marked for removal or as history. */
1219 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1220 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1221 continue;
1222
1223 /* Copy the metric to the path. Will be used for bestpath
1224 * computation */
1225 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1226 (bgp_path_info_extra_get(path))->igpmetric =
1227 bnc->metric;
1228 else if (path->extra)
1229 path->extra->igpmetric = 0;
1230
1231 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1232 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1233 || path->attr->srte_color != 0)
1234 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1235
1236 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1237 if (path_valid != bnc_is_valid_nexthop) {
1238 if (path_valid) {
1239 /* No longer valid, clear flag; also for EVPN
1240 * routes, unimport from VRFs if needed.
1241 */
1242 bgp_aggregate_decrement(bgp_path, p, path, afi,
1243 safi);
1244 bgp_path_info_unset_flag(dest, path,
1245 BGP_PATH_VALID);
1246 if (safi == SAFI_EVPN &&
1247 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1248 bgp_evpn_unimport_route(bgp_path,
1249 afi, safi, bgp_dest_get_prefix(dest), path);
1250 } else {
1251 /* Path becomes valid, set flag; also for EVPN
1252 * routes, import from VRFs if needed.
1253 */
1254 bgp_path_info_set_flag(dest, path,
1255 BGP_PATH_VALID);
1256 bgp_aggregate_increment(bgp_path, p, path, afi,
1257 safi);
1258 if (safi == SAFI_EVPN &&
1259 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1260 bgp_evpn_import_route(bgp_path,
1261 afi, safi, bgp_dest_get_prefix(dest), path);
1262 }
1263 }
1264
1265 bgp_process(bgp_path, dest, afi, safi);
1266 }
1267
1268 if (peer) {
1269 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1270
1271 if (valid_nexthops) {
1272 /*
1273 * Peering cannot occur across a blackhole nexthop
1274 */
1275 if (bnc->nexthop_num == 1 && bnc->nexthop
1276 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1277 peer->last_reset = PEER_DOWN_WAITING_NHT;
1278 valid_nexthops = 0;
1279 } else
1280 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1281 } else
1282 peer->last_reset = PEER_DOWN_WAITING_NHT;
1283
1284 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1285 if (BGP_DEBUG(nht, NHT))
1286 zlog_debug(
1287 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1288 __func__, peer->host,
1289 peer->bgp->name_pretty,
1290 !!valid_nexthops);
1291 bgp_fsm_nht_update(peer, !!valid_nexthops);
1292 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1293 }
1294 }
1295
1296 RESET_FLAG(bnc->change_flags);
1297 }
1298
1299 /**
1300 * path_nh_map - make or break path-to-nexthop association.
1301 * ARGUMENTS:
1302 * path - pointer to the path structure
1303 * bnc - pointer to the nexthop structure
1304 * make - if set, make the association. if unset, just break the existing
1305 * association.
1306 */
1307 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1308 bool make)
1309 {
1310 if (path->nexthop) {
1311 LIST_REMOVE(path, nh_thread);
1312 path->nexthop->path_count--;
1313 path->nexthop = NULL;
1314 }
1315 if (make) {
1316 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1317 path->nexthop = bnc;
1318 path->nexthop->path_count++;
1319 }
1320 }
1321
1322 /*
1323 * This function is called to register nexthops to zebra
1324 * as that we may have tried to install the nexthops
1325 * before we actually have a zebra connection
1326 */
1327 void bgp_nht_register_nexthops(struct bgp *bgp)
1328 {
1329 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1330 struct bgp_nexthop_cache *bnc;
1331
1332 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1333 bnc) {
1334 register_zebra_rnh(bnc);
1335 }
1336 }
1337 }
1338
1339 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1340 {
1341 struct bgp *bgp;
1342 struct bgp_nexthop_cache *bnc;
1343 struct nexthop *nhop;
1344 struct interface *ifp;
1345 struct prefix p;
1346 ifindex_t ifindex = 0;
1347
1348 if (peer->ifp)
1349 return;
1350
1351 bgp = peer->bgp;
1352 if (!sockunion2hostprefix(&peer->su, &p)) {
1353 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1354 __func__, peer->host);
1355 return;
1356 }
1357
1358 if (p.family != AF_INET6)
1359 return;
1360 /*
1361 * Gather the ifindex for if up/down events to be
1362 * tagged into this fun
1363 */
1364 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1365 ifindex = peer->su.sin6.sin6_scope_id;
1366
1367 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1368 if (!bnc)
1369 return;
1370
1371 if (peer != bnc->nht_info)
1372 return;
1373
1374 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1375 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1376
1377 if (!ifp)
1378 continue;
1379
1380 zclient_send_interface_radv_req(zclient,
1381 nhop->vrf_id,
1382 ifp, true,
1383 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1384 }
1385 }
1386
1387 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1388 {
1389 struct bgp *bgp;
1390 struct bgp_nexthop_cache *bnc;
1391 struct nexthop *nhop;
1392 struct interface *ifp;
1393 struct prefix p;
1394 ifindex_t ifindex = 0;
1395
1396 if (peer->ifp)
1397 return;
1398
1399 bgp = peer->bgp;
1400
1401 if (!sockunion2hostprefix(&peer->su, &p)) {
1402 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1403 __func__, peer->host);
1404 return;
1405 }
1406
1407 if (p.family != AF_INET6)
1408 return;
1409 /*
1410 * Gather the ifindex for if up/down events to be
1411 * tagged into this fun
1412 */
1413 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1414 ifindex = peer->su.sin6.sin6_scope_id;
1415
1416 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1417 if (!bnc)
1418 return;
1419
1420 if (peer != bnc->nht_info)
1421 return;
1422
1423 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1424 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1425
1426 if (!ifp)
1427 continue;
1428
1429 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1430 0);
1431 }
1432 }
1433
1434 /****************************************************************************
1435 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1436 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1437 * left to the application using it.
1438 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1439 * failover of remote ES links.
1440 ***************************************************************************/
1441 static bitfield_t bgp_nh_id_bitmap;
1442 static uint32_t bgp_l3nhg_start;
1443
1444 /* XXX - currently we do nothing on the callbacks */
1445 static void bgp_l3nhg_add_cb(const char *name)
1446 {
1447 }
1448
1449 static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1450 {
1451 }
1452
1453 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1454 const struct nexthop *nhop)
1455 {
1456 }
1457
1458 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1459 const struct nexthop *nhop)
1460 {
1461 }
1462
1463 static void bgp_l3nhg_del_cb(const char *name)
1464 {
1465 }
1466
1467 static void bgp_l3nhg_zebra_init(void)
1468 {
1469 static bool bgp_l3nhg_zebra_inited;
1470 if (bgp_l3nhg_zebra_inited)
1471 return;
1472
1473 bgp_l3nhg_zebra_inited = true;
1474 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1475 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1476 bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1477 bgp_l3nhg_del_cb);
1478 }
1479
1480
1481 void bgp_l3nhg_init(void)
1482 {
1483 uint32_t id_max;
1484
1485 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1486 bf_init(bgp_nh_id_bitmap, id_max);
1487 bf_assign_zero_index(bgp_nh_id_bitmap);
1488
1489 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1490 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1491 bgp_l3nhg_start + id_max);
1492 }
1493
1494 void bgp_l3nhg_finish(void)
1495 {
1496 bf_free(bgp_nh_id_bitmap);
1497 }
1498
1499 uint32_t bgp_l3nhg_id_alloc(void)
1500 {
1501 uint32_t nhg_id = 0;
1502
1503 bgp_l3nhg_zebra_init();
1504 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1505 if (nhg_id)
1506 nhg_id += bgp_l3nhg_start;
1507
1508 return nhg_id;
1509 }
1510
1511 void bgp_l3nhg_id_free(uint32_t nhg_id)
1512 {
1513 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1514 return;
1515
1516 nhg_id -= bgp_l3nhg_start;
1517
1518 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1519 }