]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
zebra: Unlock the route node when sending route notifications
[mirror_frr.git] / bgpd / bgp_nht.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* BGP Nexthop tracking
3 * Copyright (C) 2013 Cumulus Networks, Inc.
4 */
5
6 #include <zebra.h>
7
8 #include "command.h"
9 #include "frrevent.h"
10 #include "prefix.h"
11 #include "zclient.h"
12 #include "stream.h"
13 #include "network.h"
14 #include "log.h"
15 #include "memory.h"
16 #include "nexthop.h"
17 #include "vrf.h"
18 #include "filter.h"
19 #include "nexthop_group.h"
20
21 #include "bgpd/bgpd.h"
22 #include "bgpd/bgp_table.h"
23 #include "bgpd/bgp_route.h"
24 #include "bgpd/bgp_attr.h"
25 #include "bgpd/bgp_nexthop.h"
26 #include "bgpd/bgp_debug.h"
27 #include "bgpd/bgp_errors.h"
28 #include "bgpd/bgp_nht.h"
29 #include "bgpd/bgp_fsm.h"
30 #include "bgpd/bgp_zebra.h"
31 #include "bgpd/bgp_flowspec_util.h"
32 #include "bgpd/bgp_evpn.h"
33 #include "bgpd/bgp_rd.h"
34 #include "bgpd/bgp_mplsvpn.h"
35
36 extern struct zclient *zclient;
37
38 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
39 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
40 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
41 static void bgp_nht_ifp_initial(struct event *thread);
42
43 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
44 {
45 return (bgp_zebra_num_connects() == 0
46 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
47 && bnc->nexthop_num > 0));
48 }
49
50 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
51 struct bgp_path_info *path)
52 {
53 struct interface *ifp = NULL;
54 struct nexthop *nexthop;
55 struct bgp_interface *iifp;
56 struct peer *peer;
57
58 if (!path->extra || !path->extra->peer_orig)
59 return false;
60
61 peer = path->extra->peer_orig;
62
63 /* only connected ebgp peers are valid */
64 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
65 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
66 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
67 return false;
68
69 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
70 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
71 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
72 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
73 ifp = if_lookup_by_index(
74 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
75 bnc->bgp->vrf_id);
76 }
77 if (!ifp)
78 continue;
79 iifp = ifp->info;
80 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
81 return true;
82 }
83 return false;
84 }
85
86 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
87 struct bgp_path_info *path)
88 {
89 struct interface *ifp = NULL;
90 struct nexthop *nexthop;
91
92 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
93 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
94 ifp = if_lookup_by_index(
95 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
96 bnc->bgp->vrf_id);
97 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
98 ifp->ll_type == ZEBRA_LLT_IP6GRE))
99 break;
100 }
101 }
102 if (!ifp)
103 return false;
104
105 if (CHECK_FLAG(path->attr->rmap_change_flags,
106 BATTR_RMAP_L3VPN_ACCEPT_GRE))
107 return true;
108
109 return false;
110 }
111
112 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
113 struct bgp_path_info *path)
114 {
115 /*
116 * - In the case of MPLS-VPN, the label is learned from LDP or other
117 * protocols, and nexthop tracking is enabled for the label.
118 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
119 * - In the case of SRv6-VPN, we need to track the reachability to the
120 * SID (in other words, IPv6 address). As in MPLS, we need to record
121 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
122 * currently not implemented, and this function assumes that all
123 * Transit routes for SRv6-VPN are valid.
124 * - Otherwise check for mpls-gre acceptance
125 */
126 return (bgp_zebra_num_connects() == 0 ||
127 (bnc && (bnc->nexthop_num > 0 &&
128 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
129 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
130 bnc->bgp->srv6_enabled ||
131 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
132 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
133 }
134
135 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
136 {
137 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
138 if (BGP_DEBUG(nht, NHT))
139 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
140 &bnc->prefix, bnc->ifindex, bnc->srte_color,
141 bnc->bgp->name_pretty);
142 /* only unregister if this is the last nh for this prefix*/
143 if (!bnc_existing_for_prefix(bnc))
144 unregister_zebra_rnh(bnc);
145 bnc_free(bnc);
146 }
147 }
148
149 void bgp_unlink_nexthop(struct bgp_path_info *path)
150 {
151 struct bgp_nexthop_cache *bnc = path->nexthop;
152
153 bgp_mplsvpn_path_nh_label_unlink(path);
154
155 if (!bnc)
156 return;
157
158 path_nh_map(path, NULL, false);
159
160 bgp_unlink_nexthop_check(bnc);
161 }
162
163 void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
164 {
165 struct prefix pp;
166 struct prefix pt;
167 struct bgp_nexthop_cache *bncp, *bnct;
168 afi_t afi;
169 ifindex_t ifindex = 0;
170
171 if (!sockunion2hostprefix(&from->su, &pp))
172 return;
173
174 /*
175 * Gather the ifindex for if up/down events to be
176 * tagged into this fun
177 */
178 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
179 ifindex = from->su.sin6.sin6_scope_id;
180
181 afi = family2afi(pp.family);
182 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
183
184 if (!sockunion2hostprefix(&to->su, &pt))
185 return;
186
187 /*
188 * Gather the ifindex for if up/down events to be
189 * tagged into this fun
190 */
191 ifindex = 0;
192 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
193 ifindex = to->su.sin6.sin6_scope_id;
194 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
195
196 if (bnct != bncp)
197 return;
198
199 if (bnct)
200 bnct->nht_info = to;
201 }
202
203 /*
204 * Returns the bnc whose bnc->nht_info matches the LL peer by
205 * looping through the IPv6 nexthop table
206 */
207 static struct bgp_nexthop_cache *
208 bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
209 {
210 struct bgp_nexthop_cache *bnc;
211
212 frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
213 bnc) {
214 if (bnc->nht_info == peer) {
215 if (BGP_DEBUG(nht, NHT)) {
216 zlog_debug(
217 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
218 &bnc->prefix, bnc->ifindex,
219 bnc->srte_color, bnc, peer->host,
220 peer->bgp->name_pretty, peer);
221 }
222 return bnc;
223 }
224 }
225
226 if (BGP_DEBUG(nht, NHT))
227 zlog_debug(
228 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
229 peer->host, peer->bgp->name_pretty, peer);
230
231 return NULL;
232 }
233
234 void bgp_unlink_nexthop_by_peer(struct peer *peer)
235 {
236 struct prefix p;
237 struct bgp_nexthop_cache *bnc;
238 afi_t afi = family2afi(peer->su.sa.sa_family);
239 ifindex_t ifindex = 0;
240
241 if (!sockunion2hostprefix(&peer->su, &p)) {
242 /*
243 * In scenarios where unnumbered BGP session is brought
244 * down by shutting down the interface before unconfiguring
245 * the BGP neighbor, neighbor information in peer->su.sa
246 * will be cleared when the interface is shutdown. So
247 * during the deletion of unnumbered bgp peer, above check
248 * will return true. Therefore, in this case,BGP needs to
249 * find the bnc whose bnc->nht_info matches the
250 * peer being deleted and free it.
251 */
252 bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
253 } else {
254 /*
255 * Gather the ifindex for if up/down events to be
256 * tagged into this fun
257 */
258 if (afi == AFI_IP6 &&
259 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
260 ifindex = peer->su.sin6.sin6_scope_id;
261 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
262 ifindex);
263 }
264
265 if (!bnc)
266 return;
267
268 /* cleanup the peer reference */
269 bnc->nht_info = NULL;
270
271 bgp_unlink_nexthop_check(bnc);
272 }
273
274 /*
275 * A route and its nexthop might belong to different VRFs. Therefore,
276 * we need both the bgp_route and bgp_nexthop pointers.
277 */
278 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
279 afi_t afi, safi_t safi, struct bgp_path_info *pi,
280 struct peer *peer, int connected,
281 const struct prefix *orig_prefix)
282 {
283 struct bgp_nexthop_cache_head *tree = NULL;
284 struct bgp_nexthop_cache *bnc;
285 struct bgp_path_info *bpi_ultimate;
286 struct prefix p;
287 uint32_t srte_color = 0;
288 int is_bgp_static_route = 0;
289 ifindex_t ifindex = 0;
290
291 if (pi) {
292 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
293 && (pi->sub_type == BGP_ROUTE_STATIC))
294 ? 1
295 : 0;
296
297 /* Since Extended Next-hop Encoding (RFC5549) support, we want
298 to derive
299 address-family from the next-hop. */
300 if (!is_bgp_static_route)
301 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
302 : AFI_IP;
303
304 /* Validation for the ipv4 mapped ipv6 nexthop. */
305 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
306 afi = AFI_IP;
307 }
308
309 /* This will return true if the global IPv6 NH is a link local
310 * addr */
311 if (make_prefix(afi, pi, &p) < 0)
312 return 1;
313
314 if (!is_bgp_static_route && orig_prefix
315 && prefix_same(&p, orig_prefix)) {
316 if (BGP_DEBUG(nht, NHT)) {
317 zlog_debug(
318 "%s(%pFX): prefix loops through itself",
319 __func__, &p);
320 }
321 return 0;
322 }
323
324 srte_color = pi->attr->srte_color;
325 } else if (peer) {
326 /*
327 * Gather the ifindex for if up/down events to be
328 * tagged into this fun
329 */
330 if (afi == AFI_IP6 && peer->conf_if &&
331 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
332 ifindex = peer->su.sin6.sin6_scope_id;
333 if (ifindex == 0) {
334 if (BGP_DEBUG(nht, NHT)) {
335 zlog_debug(
336 "%s: Unable to locate ifindex, waiting till we have one",
337 peer->conf_if);
338 }
339 return 0;
340 }
341 }
342
343 if (!sockunion2hostprefix(&peer->su, &p)) {
344 if (BGP_DEBUG(nht, NHT)) {
345 zlog_debug(
346 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
347 __func__, afi, AFI_IP, AFI_IP6);
348 }
349 return 0;
350 }
351 } else
352 return 0;
353
354 if (is_bgp_static_route)
355 tree = &bgp_nexthop->import_check_table[afi];
356 else
357 tree = &bgp_nexthop->nexthop_cache_table[afi];
358
359 bnc = bnc_find(tree, &p, srte_color, ifindex);
360 if (!bnc) {
361 bnc = bnc_new(tree, &p, srte_color, ifindex);
362 bnc->bgp = bgp_nexthop;
363 if (BGP_DEBUG(nht, NHT))
364 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
365 &bnc->prefix, bnc->ifindex, bnc->srte_color,
366 bnc->bgp->name_pretty, peer);
367 } else {
368 if (BGP_DEBUG(nht, NHT))
369 zlog_debug(
370 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
371 &bnc->prefix, bnc->ifindex,
372 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
373 bnc->path_count, bnc->nht_info);
374 }
375
376 if (pi && is_route_parent_evpn(pi))
377 bnc->is_evpn_gwip_nexthop = true;
378
379 if (is_bgp_static_route) {
380 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
381
382 /* If we're toggling the type, re-register */
383 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
384 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
385 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
386 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
387 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
388 } else if ((!CHECK_FLAG(bgp_route->flags,
389 BGP_FLAG_IMPORT_CHECK))
390 && CHECK_FLAG(bnc->flags,
391 BGP_STATIC_ROUTE_EXACT_MATCH)) {
392 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
393 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
394 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
395 }
396 }
397 /* When nexthop is already known, but now requires 'connected'
398 * resolution,
399 * re-register it. The reverse scenario where the nexthop currently
400 * requires
401 * 'connected' resolution does not need a re-register (i.e., we treat
402 * 'connected-required' as an override) except in the scenario where
403 * this
404 * is actually a case of tracking a peer for connectivity (e.g., after
405 * disable connected-check).
406 * NOTE: We don't track the number of paths separately for 'connected-
407 * required' vs 'connected-not-required' as this change is not a common
408 * scenario.
409 */
410 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
411 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
412 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
413 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
414 } else if (peer && !connected
415 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
416 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
417 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
418 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
419 }
420 if (peer && (bnc->ifindex != ifindex)) {
421 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
422 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
423 bnc->ifindex = ifindex;
424 }
425 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
426 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
427 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
428 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
429 && !is_default_host_route(&bnc->prefix))
430 register_zebra_rnh(bnc);
431
432 if (pi && pi->nexthop != bnc) {
433 /* Unlink from existing nexthop cache, if any. This will also
434 * free
435 * the nexthop cache entry, if appropriate.
436 */
437 bgp_unlink_nexthop(pi);
438
439 /* updates NHT pi list reference */
440 path_nh_map(pi, bnc, true);
441
442 bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
443 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
444 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
445 bnc->metric;
446 else if (bpi_ultimate->extra)
447 bpi_ultimate->extra->igpmetric = 0;
448 } else if (peer) {
449 /*
450 * Let's not accidentally save the peer data for a peer
451 * we are going to throw away in a second or so.
452 * When we come back around we'll fix up this
453 * data properly in replace_nexthop_by_peer
454 */
455 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
456 bnc->nht_info = (void *)peer; /* NHT peer reference */
457 }
458
459 /*
460 * We are cheating here. Views have no associated underlying
461 * ability to detect nexthops. So when we have a view
462 * just tell everyone the nexthop is valid
463 */
464 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
465 return 1;
466 else if (safi == SAFI_UNICAST && pi &&
467 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
468 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
469 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
470 else
471 return (bgp_isvalid_nexthop(bnc));
472 }
473
474 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
475 {
476 struct bgp_nexthop_cache *bnc;
477 struct prefix p;
478 ifindex_t ifindex = 0;
479
480 if (!peer)
481 return;
482
483 /*
484 * In case the below check evaluates true and if
485 * the bnc has not been freed at this point, then
486 * we might have to do something similar to what's
487 * done in bgp_unlink_nexthop_by_peer(). Since
488 * bgp_unlink_nexthop_by_peer() loops through the
489 * nodes of V6 nexthop cache to find the bnc, it is
490 * currently not being called here.
491 */
492 if (!sockunion2hostprefix(&peer->su, &p))
493 return;
494 /*
495 * Gather the ifindex for if up/down events to be
496 * tagged into this fun
497 */
498 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
499 ifindex = peer->su.sin6.sin6_scope_id;
500 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
501 &p, 0, ifindex);
502 if (!bnc) {
503 if (BGP_DEBUG(nht, NHT))
504 zlog_debug(
505 "Cannot find connected NHT node for peer %s(%s)",
506 peer->host, peer->bgp->name_pretty);
507 return;
508 }
509
510 if (bnc->nht_info != peer) {
511 if (BGP_DEBUG(nht, NHT))
512 zlog_debug(
513 "Connected NHT %p node for peer %s(%s) points to %p",
514 bnc, peer->host, bnc->bgp->name_pretty,
515 bnc->nht_info);
516 return;
517 }
518
519 bnc->nht_info = NULL;
520
521 if (LIST_EMPTY(&(bnc->paths))) {
522 if (BGP_DEBUG(nht, NHT))
523 zlog_debug(
524 "Freeing connected NHT node %p for peer %s(%s)",
525 bnc, peer->host, bnc->bgp->name_pretty);
526 unregister_zebra_rnh(bnc);
527 bnc_free(bnc);
528 }
529 }
530
531 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
532 struct zapi_route *nhr,
533 bool import_check)
534 {
535 struct nexthop *nexthop;
536 struct nexthop *oldnh;
537 struct nexthop *nhlist_head = NULL;
538 struct nexthop *nhlist_tail = NULL;
539 int i;
540 bool evpn_resolved = false;
541
542 bnc->last_update = monotime(NULL);
543 bnc->change_flags = 0;
544
545 /* debug print the input */
546 if (BGP_DEBUG(nht, NHT)) {
547 char bnc_buf[BNC_FLAG_DUMP_SIZE];
548
549 zlog_debug(
550 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
551 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
552 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
553 nhr->nexthop_num, bnc->nexthop_num,
554 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
555 sizeof(bnc_buf)));
556 }
557
558 if (nhr->metric != bnc->metric)
559 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
560
561 if (nhr->nexthop_num != bnc->nexthop_num)
562 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
563
564 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
565 !prefix_same(&bnc->prefix, &nhr->prefix))) {
566 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
567 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
568 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
569 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
570
571 bnc_nexthop_free(bnc);
572 bnc->nexthop = NULL;
573
574 if (BGP_DEBUG(nht, NHT))
575 zlog_debug(
576 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
577 __func__, &bnc->prefix, &nhr->prefix);
578 } else if (nhr->nexthop_num) {
579 struct peer *peer = bnc->nht_info;
580
581 /* notify bgp fsm if nbr ip goes from invalid->valid */
582 if (!bnc->nexthop_num)
583 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
584
585 if (!bnc->is_evpn_gwip_nexthop)
586 bnc->flags |= BGP_NEXTHOP_VALID;
587 bnc->metric = nhr->metric;
588 bnc->nexthop_num = nhr->nexthop_num;
589
590 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
591
592 for (i = 0; i < nhr->nexthop_num; i++) {
593 int num_labels = 0;
594
595 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
596
597 /*
598 * Turn on RA for the v6 nexthops
599 * we receive from bgp. This is to allow us
600 * to work with v4 routing over v6 nexthops
601 */
602 if (peer && !peer->ifp
603 && CHECK_FLAG(peer->flags,
604 PEER_FLAG_CAPABILITY_ENHE)
605 && nhr->prefix.family == AF_INET6
606 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
607 struct interface *ifp;
608
609 ifp = if_lookup_by_index(nexthop->ifindex,
610 nexthop->vrf_id);
611 if (ifp)
612 zclient_send_interface_radv_req(
613 zclient, nexthop->vrf_id, ifp,
614 true,
615 BGP_UNNUM_DEFAULT_RA_INTERVAL);
616 }
617 /* There is at least one label-switched path */
618 if (nexthop->nh_label &&
619 nexthop->nh_label->num_labels) {
620
621 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
622 num_labels = nexthop->nh_label->num_labels;
623 }
624
625 if (BGP_DEBUG(nht, NHT)) {
626 char buf[NEXTHOP_STRLEN];
627 zlog_debug(
628 " nhop via %s (%d labels)",
629 nexthop2str(nexthop, buf, sizeof(buf)),
630 num_labels);
631 }
632
633 if (nhlist_tail) {
634 nhlist_tail->next = nexthop;
635 nhlist_tail = nexthop;
636 } else {
637 nhlist_tail = nexthop;
638 nhlist_head = nexthop;
639 }
640
641 /* No need to evaluate the nexthop if we have already
642 * determined
643 * that there has been a change.
644 */
645 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
646 continue;
647
648 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
649 if (nexthop_same(oldnh, nexthop))
650 break;
651
652 if (!oldnh)
653 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
654 }
655 bnc_nexthop_free(bnc);
656 bnc->nexthop = nhlist_head;
657
658 /*
659 * Gateway IP nexthop is L3 reachable. Mark it as
660 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
661 * remote EVPN RT-2.
662 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
663 * When its mapping with EVPN RT-2 is established, unset
664 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
665 */
666 if (bnc->is_evpn_gwip_nexthop) {
667 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
668
669 if (BGP_DEBUG(nht, NHT))
670 zlog_debug(
671 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
672 &bnc->prefix,
673 (evpn_resolved ? "successful"
674 : "failed"));
675
676 if (evpn_resolved) {
677 bnc->flags |= BGP_NEXTHOP_VALID;
678 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
679 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
680 } else {
681 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
682 bnc->flags &= ~BGP_NEXTHOP_VALID;
683 }
684 }
685 } else {
686 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
687 bnc->flags &= ~BGP_NEXTHOP_VALID;
688 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
689 bnc->nexthop_num = nhr->nexthop_num;
690
691 /* notify bgp fsm if nbr ip goes from valid->invalid */
692 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
693
694 bnc_nexthop_free(bnc);
695 bnc->nexthop = NULL;
696 }
697
698 evaluate_paths(bnc);
699 }
700
701 static void bgp_nht_ifp_table_handle(struct bgp *bgp,
702 struct bgp_nexthop_cache_head *table,
703 struct interface *ifp, bool up)
704 {
705 struct bgp_nexthop_cache *bnc;
706
707 frr_each (bgp_nexthop_cache, table, bnc) {
708 if (bnc->ifindex != ifp->ifindex)
709 continue;
710
711 bnc->last_update = monotime(NULL);
712 bnc->change_flags = 0;
713
714 /*
715 * For interface based routes ( ala the v6 LL routes
716 * that this was written for ) the metric received
717 * for the connected route is 0 not 1.
718 */
719 bnc->metric = 0;
720 if (up) {
721 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
722 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
723 bnc->nexthop_num = 1;
724 } else {
725 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
726 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
727 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
728 bnc->nexthop_num = 0;
729 }
730
731 evaluate_paths(bnc);
732 }
733 }
734 static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
735 {
736 struct bgp *bgp;
737
738 bgp = ifp->vrf->info;
739 if (!bgp)
740 return;
741
742 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
743 up);
744 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
745 up);
746 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
747 up);
748 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
749 up);
750 }
751
752 void bgp_nht_ifp_up(struct interface *ifp)
753 {
754 bgp_nht_ifp_handle(ifp, true);
755 }
756
757 void bgp_nht_ifp_down(struct interface *ifp)
758 {
759 bgp_nht_ifp_handle(ifp, false);
760 }
761
762 static void bgp_nht_ifp_initial(struct event *thread)
763 {
764 ifindex_t ifindex = EVENT_VAL(thread);
765 struct bgp *bgp = EVENT_ARG(thread);
766 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
767
768 if (!ifp)
769 return;
770
771 if (BGP_DEBUG(nht, NHT))
772 zlog_debug(
773 "Handle NHT initial update for Intf %s(%d) status %s",
774 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
775
776 if (if_is_up(ifp))
777 bgp_nht_ifp_up(ifp);
778 else
779 bgp_nht_ifp_down(ifp);
780 }
781
782 /*
783 * So the bnc code has the ability to handle interface up/down
784 * events to properly handle v6 LL peering.
785 * What is happening here:
786 * The event system for peering expects the nht code to
787 * report on the tracking events after we move to active
788 * So let's give the system a chance to report on that event
789 * in a manner that is expected.
790 */
791 void bgp_nht_interface_events(struct peer *peer)
792 {
793 struct bgp *bgp = peer->bgp;
794 struct bgp_nexthop_cache_head *table;
795 struct bgp_nexthop_cache *bnc;
796 struct prefix p;
797 ifindex_t ifindex = 0;
798
799 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
800 return;
801
802 if (!sockunion2hostprefix(&peer->su, &p))
803 return;
804 /*
805 * Gather the ifindex for if up/down events to be
806 * tagged into this fun
807 */
808 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
809 ifindex = peer->su.sin6.sin6_scope_id;
810
811 table = &bgp->nexthop_cache_table[AFI_IP6];
812 bnc = bnc_find(table, &p, 0, ifindex);
813 if (!bnc)
814 return;
815
816 if (bnc->ifindex)
817 event_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
818 bnc->ifindex, NULL);
819 }
820
821 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
822 {
823 struct bgp_nexthop_cache_head *tree = NULL;
824 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
825 struct bgp *bgp;
826 struct prefix match;
827 struct zapi_route nhr;
828 afi_t afi;
829
830 bgp = bgp_lookup_by_vrf_id(vrf_id);
831 if (!bgp) {
832 flog_err(
833 EC_BGP_NH_UPD,
834 "parse nexthop update: instance not found for vrf_id %u",
835 vrf_id);
836 return;
837 }
838
839 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
840 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
841 bgp->name_pretty);
842 return;
843 }
844
845 afi = family2afi(match.family);
846 tree = &bgp->nexthop_cache_table[afi];
847
848 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
849 if (!bnc_nhc) {
850 if (BGP_DEBUG(nht, NHT))
851 zlog_debug(
852 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
853 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
854 } else
855 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
856
857 tree = &bgp->import_check_table[afi];
858
859 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
860 if (!bnc_import) {
861 if (BGP_DEBUG(nht, NHT))
862 zlog_debug(
863 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
864 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
865 } else
866 bgp_process_nexthop_update(bnc_import, &nhr, true);
867
868 /*
869 * HACK: if any BGP route is dependant on an SR-policy that doesn't
870 * exist, zebra will never send NH updates relative to that policy. In
871 * that case, whenever we receive an update about a colorless NH, update
872 * the corresponding colorful NHs that share the same endpoint but that
873 * are inactive. This ugly hack should work around the problem at the
874 * cost of a performance pernalty. Long term, what should be done is to
875 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
876 * which should provide a better infrastructure to solve this issue in
877 * a more efficient and elegant way.
878 */
879 if (nhr.srte_color == 0 && bnc_nhc) {
880 struct bgp_nexthop_cache *bnc_iter;
881
882 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
883 bnc_iter) {
884 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
885 bnc_iter->srte_color == 0 ||
886 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
887 continue;
888
889 bgp_process_nexthop_update(bnc_iter, &nhr, false);
890 }
891 }
892 }
893
894 /*
895 * Cleanup nexthop registration and status information for BGP nexthops
896 * pertaining to this VRF. This is invoked upon VRF deletion.
897 */
898 void bgp_cleanup_nexthops(struct bgp *bgp)
899 {
900 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
901 struct bgp_nexthop_cache *bnc;
902
903 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
904 bnc) {
905 /* Clear relevant flags. */
906 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
907 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
908 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
909 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
910 }
911 }
912 }
913
914 /**
915 * make_prefix - make a prefix structure from the path (essentially
916 * path's node.
917 */
918 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
919 {
920
921 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
922 && (pi->sub_type == BGP_ROUTE_STATIC))
923 ? 1
924 : 0;
925 struct bgp_dest *net = pi->net;
926 const struct prefix *p_orig = bgp_dest_get_prefix(net);
927 struct in_addr ipv4;
928
929 if (p_orig->family == AF_FLOWSPEC) {
930 if (!pi->peer)
931 return -1;
932 return bgp_flowspec_get_first_nh(pi->peer->bgp,
933 pi, p, afi);
934 }
935 memset(p, 0, sizeof(struct prefix));
936 switch (afi) {
937 case AFI_IP:
938 p->family = AF_INET;
939 if (is_bgp_static) {
940 p->u.prefix4 = p_orig->u.prefix4;
941 p->prefixlen = p_orig->prefixlen;
942 } else {
943 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
944 ipv4_mapped_ipv6_to_ipv4(
945 &pi->attr->mp_nexthop_global, &ipv4);
946 p->u.prefix4 = ipv4;
947 p->prefixlen = IPV4_MAX_BITLEN;
948 } else {
949 if (p_orig->family == AF_EVPN)
950 p->u.prefix4 =
951 pi->attr->mp_nexthop_global_in;
952 else
953 p->u.prefix4 = pi->attr->nexthop;
954 p->prefixlen = IPV4_MAX_BITLEN;
955 }
956 }
957 break;
958 case AFI_IP6:
959 p->family = AF_INET6;
960
961 if (is_bgp_static) {
962 p->u.prefix6 = p_orig->u.prefix6;
963 p->prefixlen = p_orig->prefixlen;
964 } else {
965 /* If we receive MP_REACH nexthop with ::(LL)
966 * or LL(LL), use LL address as nexthop cache.
967 */
968 if (pi->attr->mp_nexthop_len
969 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
970 && (IN6_IS_ADDR_UNSPECIFIED(
971 &pi->attr->mp_nexthop_global)
972 || IN6_IS_ADDR_LINKLOCAL(
973 &pi->attr->mp_nexthop_global)))
974 p->u.prefix6 = pi->attr->mp_nexthop_local;
975 /* If we receive MR_REACH with (GA)::(LL)
976 * then check for route-map to choose GA or LL
977 */
978 else if (pi->attr->mp_nexthop_len
979 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
980 if (pi->attr->mp_nexthop_prefer_global)
981 p->u.prefix6 =
982 pi->attr->mp_nexthop_global;
983 else
984 p->u.prefix6 =
985 pi->attr->mp_nexthop_local;
986 } else
987 p->u.prefix6 = pi->attr->mp_nexthop_global;
988 p->prefixlen = IPV6_MAX_BITLEN;
989 }
990 break;
991 default:
992 if (BGP_DEBUG(nht, NHT)) {
993 zlog_debug(
994 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
995 __func__, afi, AFI_IP, AFI_IP6);
996 }
997 break;
998 }
999 return 0;
1000 }
1001
1002 /**
1003 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1004 * command to Zebra.
1005 * ARGUMENTS:
1006 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1007 * int command -- command to send to zebra
1008 * RETURNS:
1009 * void.
1010 */
1011 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
1012 {
1013 bool exact_match = false;
1014 bool resolve_via_default = false;
1015 int ret;
1016
1017 if (!zclient)
1018 return;
1019
1020 /* Don't try to register if Zebra doesn't know of this instance. */
1021 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1022 if (BGP_DEBUG(zebra, ZEBRA))
1023 zlog_debug(
1024 "%s: No zebra instance to talk to, not installing NHT entry",
1025 __func__);
1026 return;
1027 }
1028
1029 if (!bgp_zebra_num_connects()) {
1030 if (BGP_DEBUG(zebra, ZEBRA))
1031 zlog_debug(
1032 "%s: We have not connected yet, cannot send nexthops",
1033 __func__);
1034 }
1035 if (command == ZEBRA_NEXTHOP_REGISTER) {
1036 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1037 exact_match = true;
1038 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1039 resolve_via_default = true;
1040 }
1041
1042 if (BGP_DEBUG(zebra, ZEBRA))
1043 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1044 zserv_command_string(command), &bnc->prefix,
1045 bnc->bgp->name_pretty);
1046
1047 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1048 exact_match, resolve_via_default,
1049 bnc->bgp->vrf_id);
1050 if (ret == ZCLIENT_SEND_FAILURE) {
1051 flog_warn(EC_BGP_ZEBRA_SEND,
1052 "sendmsg_nexthop: zclient_send_message() failed");
1053 return;
1054 }
1055
1056 if (command == ZEBRA_NEXTHOP_REGISTER)
1057 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1058 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1059 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1060 return;
1061 }
1062
1063 /**
1064 * register_zebra_rnh - register a NH/route with Zebra for notification
1065 * when the route or the route to the nexthop changes.
1066 * ARGUMENTS:
1067 * struct bgp_nexthop_cache *bnc
1068 * RETURNS:
1069 * void.
1070 */
1071 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1072 {
1073 /* Check if we have already registered */
1074 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1075 return;
1076
1077 if (bnc->ifindex) {
1078 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1079 return;
1080 }
1081
1082 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1083 }
1084
1085 /**
1086 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1087 * ARGUMENTS:
1088 * struct bgp_nexthop_cache *bnc
1089 * RETURNS:
1090 * void.
1091 */
1092 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1093 {
1094 /* Check if we have already registered */
1095 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1096 return;
1097
1098 if (bnc->ifindex) {
1099 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1100 return;
1101 }
1102
1103 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1104 }
1105
1106 /**
1107 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1108 * ARGUMENTS:
1109 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1110 * RETURNS:
1111 * void.
1112 */
1113 void evaluate_paths(struct bgp_nexthop_cache *bnc)
1114 {
1115 struct bgp_dest *dest;
1116 struct bgp_path_info *path;
1117 struct bgp_path_info *bpi_ultimate;
1118 int afi;
1119 struct peer *peer = (struct peer *)bnc->nht_info;
1120 struct bgp_table *table;
1121 safi_t safi;
1122 struct bgp *bgp_path;
1123 const struct prefix *p;
1124
1125 if (BGP_DEBUG(nht, NHT)) {
1126 char bnc_buf[BNC_FLAG_DUMP_SIZE];
1127 char chg_buf[BNC_FLAG_DUMP_SIZE];
1128
1129 zlog_debug(
1130 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1131 &bnc->prefix, bnc->ifindex, bnc->srte_color,
1132 bnc->bgp->name_pretty,
1133 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1134 sizeof(bnc_buf)),
1135 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1136 sizeof(bnc_buf)));
1137 }
1138
1139 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1140 if (path->type == ZEBRA_ROUTE_BGP &&
1141 (path->sub_type == BGP_ROUTE_NORMAL ||
1142 path->sub_type == BGP_ROUTE_STATIC ||
1143 path->sub_type == BGP_ROUTE_IMPORTED))
1144 /* evaluate the path */
1145 ;
1146 else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE) {
1147 /* evaluate the path for redistributed routes
1148 * except those from VNC
1149 */
1150 if ((path->type == ZEBRA_ROUTE_VNC) ||
1151 (path->type == ZEBRA_ROUTE_VNC_DIRECT))
1152 continue;
1153 } else
1154 /* don't evaluate the path */
1155 continue;
1156
1157 dest = path->net;
1158 assert(dest && bgp_dest_table(dest));
1159 p = bgp_dest_get_prefix(dest);
1160 afi = family2afi(p->family);
1161 table = bgp_dest_table(dest);
1162 safi = table->safi;
1163
1164 /*
1165 * handle routes from other VRFs (they can have a
1166 * nexthop in THIS VRF). bgp_path is the bgp instance
1167 * that owns the route referencing this nexthop.
1168 */
1169 bgp_path = table->bgp;
1170
1171 /*
1172 * Path becomes valid/invalid depending on whether the nexthop
1173 * reachable/unreachable.
1174 *
1175 * In case of unicast routes that were imported from vpn
1176 * and that have labels, they are valid only if there are
1177 * nexthops with labels
1178 *
1179 * If the nexthop is EVPN gateway-IP,
1180 * do not check for a valid label.
1181 */
1182
1183 bool bnc_is_valid_nexthop = false;
1184 bool path_valid = false;
1185
1186 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1187 && path->extra && path->extra->num_labels
1188 && (path->attr->evpn_overlay.type
1189 != OVERLAY_INDEX_GATEWAY_IP)) {
1190 bnc_is_valid_nexthop =
1191 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1192 : false;
1193 } else {
1194 if (bgp_update_martian_nexthop(
1195 bnc->bgp, afi, safi, path->type,
1196 path->sub_type, path->attr, dest)) {
1197 if (BGP_DEBUG(nht, NHT))
1198 zlog_debug(
1199 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1200 __func__, dest, bgp_path->name);
1201 } else
1202 bnc_is_valid_nexthop =
1203 bgp_isvalid_nexthop(bnc) ? true : false;
1204 }
1205
1206 if (BGP_DEBUG(nht, NHT)) {
1207
1208 if (dest->pdest) {
1209 char rd_buf[RD_ADDRSTRLEN];
1210
1211 prefix_rd2str(
1212 (struct prefix_rd *)bgp_dest_get_prefix(
1213 dest->pdest),
1214 rd_buf, sizeof(rd_buf),
1215 bgp_get_asnotation(bnc->bgp));
1216 zlog_debug(
1217 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
1218 afi, safi, dest, rd_buf,
1219 bgp_path->name_pretty, path->flags);
1220 } else
1221 zlog_debug(
1222 "... eval path %d/%d %pBD %s flags 0x%x",
1223 afi, safi, dest, bgp_path->name_pretty,
1224 path->flags);
1225 }
1226
1227 /* Skip paths marked for removal or as history. */
1228 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1229 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1230 continue;
1231
1232 /* Copy the metric to the path. Will be used for bestpath
1233 * computation */
1234 bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
1235 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1236 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
1237 bnc->metric;
1238 else if (bpi_ultimate->extra)
1239 bpi_ultimate->extra->igpmetric = 0;
1240
1241 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1242 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1243 || path->attr->srte_color != 0)
1244 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1245
1246 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1247 if (path->type == ZEBRA_ROUTE_BGP &&
1248 path->sub_type == BGP_ROUTE_STATIC &&
1249 !CHECK_FLAG(bgp_path->flags, BGP_FLAG_IMPORT_CHECK))
1250 /* static routes with 'no bgp network import-check' are
1251 * always valid. if nht is called with static routes,
1252 * the vpn exportation needs to be triggered
1253 */
1254 vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
1255 path);
1256 else if (path->sub_type == BGP_ROUTE_REDISTRIBUTE &&
1257 safi == SAFI_UNICAST &&
1258 (bgp_path->inst_type == BGP_INSTANCE_TYPE_VRF ||
1259 bgp_path->inst_type == BGP_INSTANCE_TYPE_DEFAULT))
1260 /* redistribute routes are always valid
1261 * if nht is called with redistribute routes, the vpn
1262 * exportation needs to be triggered
1263 */
1264 vpn_leak_from_vrf_update(bgp_get_default(), bgp_path,
1265 path);
1266 else if (path_valid != bnc_is_valid_nexthop) {
1267 if (path_valid) {
1268 /* No longer valid, clear flag; also for EVPN
1269 * routes, unimport from VRFs if needed.
1270 */
1271 bgp_aggregate_decrement(bgp_path, p, path, afi,
1272 safi);
1273 bgp_path_info_unset_flag(dest, path,
1274 BGP_PATH_VALID);
1275 if (safi == SAFI_EVPN &&
1276 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1277 bgp_evpn_unimport_route(bgp_path,
1278 afi, safi, bgp_dest_get_prefix(dest), path);
1279 if (safi == SAFI_UNICAST &&
1280 (bgp_path->inst_type !=
1281 BGP_INSTANCE_TYPE_VIEW))
1282 vpn_leak_from_vrf_withdraw(
1283 bgp_get_default(), bgp_path,
1284 path);
1285 } else {
1286 /* Path becomes valid, set flag; also for EVPN
1287 * routes, import from VRFs if needed.
1288 */
1289 bgp_path_info_set_flag(dest, path,
1290 BGP_PATH_VALID);
1291 bgp_aggregate_increment(bgp_path, p, path, afi,
1292 safi);
1293 if (safi == SAFI_EVPN &&
1294 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1295 bgp_evpn_import_route(bgp_path,
1296 afi, safi, bgp_dest_get_prefix(dest), path);
1297 if (safi == SAFI_UNICAST &&
1298 (bgp_path->inst_type !=
1299 BGP_INSTANCE_TYPE_VIEW))
1300 vpn_leak_from_vrf_update(
1301 bgp_get_default(), bgp_path,
1302 path);
1303 }
1304 }
1305
1306 bgp_process(bgp_path, dest, afi, safi);
1307 }
1308
1309 if (peer) {
1310 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1311
1312 if (valid_nexthops) {
1313 /*
1314 * Peering cannot occur across a blackhole nexthop
1315 */
1316 if (bnc->nexthop_num == 1 && bnc->nexthop
1317 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1318 peer->last_reset = PEER_DOWN_WAITING_NHT;
1319 valid_nexthops = 0;
1320 } else
1321 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1322 } else
1323 peer->last_reset = PEER_DOWN_WAITING_NHT;
1324
1325 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1326 if (BGP_DEBUG(nht, NHT))
1327 zlog_debug(
1328 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1329 __func__, peer->host,
1330 peer->bgp->name_pretty,
1331 !!valid_nexthops);
1332 bgp_fsm_nht_update(peer, !!valid_nexthops);
1333 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1334 }
1335 }
1336
1337 RESET_FLAG(bnc->change_flags);
1338 }
1339
1340 /**
1341 * path_nh_map - make or break path-to-nexthop association.
1342 * ARGUMENTS:
1343 * path - pointer to the path structure
1344 * bnc - pointer to the nexthop structure
1345 * make - if set, make the association. if unset, just break the existing
1346 * association.
1347 */
1348 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1349 bool make)
1350 {
1351 if (path->nexthop) {
1352 LIST_REMOVE(path, nh_thread);
1353 path->nexthop->path_count--;
1354 path->nexthop = NULL;
1355 }
1356 if (make) {
1357 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1358 path->nexthop = bnc;
1359 path->nexthop->path_count++;
1360 }
1361 }
1362
1363 /*
1364 * This function is called to register nexthops to zebra
1365 * as that we may have tried to install the nexthops
1366 * before we actually have a zebra connection
1367 */
1368 void bgp_nht_register_nexthops(struct bgp *bgp)
1369 {
1370 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1371 struct bgp_nexthop_cache *bnc;
1372
1373 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1374 bnc) {
1375 register_zebra_rnh(bnc);
1376 }
1377 }
1378 }
1379
1380 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1381 {
1382 struct bgp *bgp;
1383 struct bgp_nexthop_cache *bnc;
1384 struct nexthop *nhop;
1385 struct interface *ifp;
1386 struct prefix p;
1387 ifindex_t ifindex = 0;
1388
1389 if (peer->ifp)
1390 return;
1391
1392 bgp = peer->bgp;
1393 if (!sockunion2hostprefix(&peer->su, &p)) {
1394 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1395 __func__, peer->host);
1396 return;
1397 }
1398
1399 if (p.family != AF_INET6)
1400 return;
1401 /*
1402 * Gather the ifindex for if up/down events to be
1403 * tagged into this fun
1404 */
1405 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1406 ifindex = peer->su.sin6.sin6_scope_id;
1407
1408 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1409 if (!bnc)
1410 return;
1411
1412 if (peer != bnc->nht_info)
1413 return;
1414
1415 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1416 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1417
1418 if (!ifp)
1419 continue;
1420
1421 zclient_send_interface_radv_req(zclient,
1422 nhop->vrf_id,
1423 ifp, true,
1424 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1425 }
1426 }
1427
1428 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1429 {
1430 struct bgp *bgp;
1431 struct bgp_nexthop_cache *bnc;
1432 struct nexthop *nhop;
1433 struct interface *ifp;
1434 struct prefix p;
1435 ifindex_t ifindex = 0;
1436
1437 if (peer->ifp)
1438 return;
1439
1440 bgp = peer->bgp;
1441
1442 if (!sockunion2hostprefix(&peer->su, &p)) {
1443 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1444 __func__, peer->host);
1445 return;
1446 }
1447
1448 if (p.family != AF_INET6)
1449 return;
1450 /*
1451 * Gather the ifindex for if up/down events to be
1452 * tagged into this fun
1453 */
1454 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1455 ifindex = peer->su.sin6.sin6_scope_id;
1456
1457 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1458 if (!bnc)
1459 return;
1460
1461 if (peer != bnc->nht_info)
1462 return;
1463
1464 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1465 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1466
1467 if (!ifp)
1468 continue;
1469
1470 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1471 0);
1472 }
1473 }
1474
1475 /****************************************************************************
1476 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1477 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1478 * left to the application using it.
1479 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1480 * failover of remote ES links.
1481 ***************************************************************************/
1482 static bitfield_t bgp_nh_id_bitmap;
1483 static uint32_t bgp_l3nhg_start;
1484
1485 /* XXX - currently we do nothing on the callbacks */
1486 static void bgp_l3nhg_add_cb(const char *name)
1487 {
1488 }
1489
1490 static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1491 {
1492 }
1493
1494 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1495 const struct nexthop *nhop)
1496 {
1497 }
1498
1499 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1500 const struct nexthop *nhop)
1501 {
1502 }
1503
1504 static void bgp_l3nhg_del_cb(const char *name)
1505 {
1506 }
1507
1508 static void bgp_l3nhg_zebra_init(void)
1509 {
1510 static bool bgp_l3nhg_zebra_inited;
1511 if (bgp_l3nhg_zebra_inited)
1512 return;
1513
1514 bgp_l3nhg_zebra_inited = true;
1515 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1516 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1517 bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1518 bgp_l3nhg_del_cb);
1519 }
1520
1521
1522 void bgp_l3nhg_init(void)
1523 {
1524 uint32_t id_max;
1525
1526 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1527 bf_init(bgp_nh_id_bitmap, id_max);
1528 bf_assign_zero_index(bgp_nh_id_bitmap);
1529
1530 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1531 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1532 bgp_l3nhg_start + id_max);
1533 }
1534
1535 void bgp_l3nhg_finish(void)
1536 {
1537 bf_free(bgp_nh_id_bitmap);
1538 }
1539
1540 uint32_t bgp_l3nhg_id_alloc(void)
1541 {
1542 uint32_t nhg_id = 0;
1543
1544 bgp_l3nhg_zebra_init();
1545 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1546 if (nhg_id)
1547 nhg_id += bgp_l3nhg_start;
1548
1549 return nhg_id;
1550 }
1551
1552 void bgp_l3nhg_id_free(uint32_t nhg_id)
1553 {
1554 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1555 return;
1556
1557 nhg_id -= bgp_l3nhg_start;
1558
1559 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1560 }