]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
Merge pull request #11127 from louis-6wind/bgp-leak
[mirror_frr.git] / bgpd / bgp_nht.c
1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "command.h"
24 #include "thread.h"
25 #include "prefix.h"
26 #include "zclient.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "log.h"
30 #include "memory.h"
31 #include "nexthop.h"
32 #include "vrf.h"
33 #include "filter.h"
34 #include "nexthop_group.h"
35
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
49 #include "bgpd/bgp_mplsvpn.h"
50
51 extern struct zclient *zclient;
52
53 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
54 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
55 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
56 static void bgp_nht_ifp_initial(struct thread *thread);
57
58 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
59 {
60 return (bgp_zebra_num_connects() == 0
61 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
62 && bnc->nexthop_num > 0));
63 }
64
65 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
66 struct bgp_path_info *path)
67 {
68 struct interface *ifp = NULL;
69 struct nexthop *nexthop;
70 struct bgp_interface *iifp;
71 struct peer *peer;
72
73 if (!path->extra || !path->extra->peer_orig)
74 return false;
75
76 peer = path->extra->peer_orig;
77
78 /* only connected ebgp peers are valid */
79 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
80 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
81 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
82 return false;
83
84 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
85 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
86 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
87 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
88 ifp = if_lookup_by_index(
89 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
90 bnc->bgp->vrf_id);
91 }
92 if (!ifp)
93 continue;
94 iifp = ifp->info;
95 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
96 return true;
97 }
98 return false;
99 }
100
101 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
102 struct bgp_path_info *path)
103 {
104 struct interface *ifp = NULL;
105 struct nexthop *nexthop;
106
107 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
108 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
109 ifp = if_lookup_by_index(
110 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
111 bnc->bgp->vrf_id);
112 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
113 ifp->ll_type == ZEBRA_LLT_IP6GRE))
114 break;
115 }
116 }
117 if (!ifp)
118 return false;
119
120 if (CHECK_FLAG(path->attr->rmap_change_flags,
121 BATTR_RMAP_L3VPN_ACCEPT_GRE))
122 return true;
123
124 return false;
125 }
126
127 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
128 struct bgp_path_info *path)
129 {
130 /*
131 * - In the case of MPLS-VPN, the label is learned from LDP or other
132 * protocols, and nexthop tracking is enabled for the label.
133 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
134 * - In the case of SRv6-VPN, we need to track the reachability to the
135 * SID (in other words, IPv6 address). As in MPLS, we need to record
136 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
137 * currently not implemented, and this function assumes that all
138 * Transit routes for SRv6-VPN are valid.
139 * - Otherwise check for mpls-gre acceptance
140 */
141 return (bgp_zebra_num_connects() == 0 ||
142 (bnc && (bnc->nexthop_num > 0 &&
143 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
144 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
145 bnc->bgp->srv6_enabled ||
146 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
147 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
148 }
149
150 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
151 {
152 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
153 if (BGP_DEBUG(nht, NHT))
154 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
155 &bnc->prefix, bnc->ifindex, bnc->srte_color,
156 bnc->bgp->name_pretty);
157 /* only unregister if this is the last nh for this prefix*/
158 if (!bnc_existing_for_prefix(bnc))
159 unregister_zebra_rnh(bnc);
160 bnc_free(bnc);
161 }
162 }
163
164 void bgp_unlink_nexthop(struct bgp_path_info *path)
165 {
166 struct bgp_nexthop_cache *bnc = path->nexthop;
167
168 if (!bnc)
169 return;
170
171 path_nh_map(path, NULL, false);
172
173 bgp_unlink_nexthop_check(bnc);
174 }
175
176 void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
177 {
178 struct prefix pp;
179 struct prefix pt;
180 struct bgp_nexthop_cache *bncp, *bnct;
181 afi_t afi;
182 ifindex_t ifindex = 0;
183
184 if (!sockunion2hostprefix(&from->su, &pp))
185 return;
186
187 /*
188 * Gather the ifindex for if up/down events to be
189 * tagged into this fun
190 */
191 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
192 ifindex = from->su.sin6.sin6_scope_id;
193
194 afi = family2afi(pp.family);
195 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
196
197 if (!sockunion2hostprefix(&to->su, &pt))
198 return;
199
200 /*
201 * Gather the ifindex for if up/down events to be
202 * tagged into this fun
203 */
204 ifindex = 0;
205 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
206 ifindex = to->su.sin6.sin6_scope_id;
207 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
208
209 if (bnct != bncp)
210 return;
211
212 if (bnct)
213 bnct->nht_info = to;
214 }
215
216 /*
217 * Returns the bnc whose bnc->nht_info matches the LL peer by
218 * looping through the IPv6 nexthop table
219 */
220 static struct bgp_nexthop_cache *
221 bgp_find_ipv6_nexthop_matching_peer(struct peer *peer)
222 {
223 struct bgp_nexthop_cache *bnc;
224
225 frr_each (bgp_nexthop_cache, &peer->bgp->nexthop_cache_table[AFI_IP6],
226 bnc) {
227 if (bnc->nht_info == peer) {
228 if (BGP_DEBUG(nht, NHT)) {
229 zlog_debug(
230 "Found bnc: %pFX(%u)(%u)(%p) for peer: %s(%s) %p",
231 &bnc->prefix, bnc->ifindex,
232 bnc->srte_color, bnc, peer->host,
233 peer->bgp->name_pretty, peer);
234 }
235 return bnc;
236 }
237 }
238
239 if (BGP_DEBUG(nht, NHT))
240 zlog_debug(
241 "Could not find bnc for peer %s(%s) %p in v6 nexthop table",
242 peer->host, peer->bgp->name_pretty, peer);
243
244 return NULL;
245 }
246
247 void bgp_unlink_nexthop_by_peer(struct peer *peer)
248 {
249 struct prefix p;
250 struct bgp_nexthop_cache *bnc;
251 afi_t afi = family2afi(peer->su.sa.sa_family);
252 ifindex_t ifindex = 0;
253
254 if (!sockunion2hostprefix(&peer->su, &p)) {
255 /*
256 * In scenarios where unnumbered BGP session is brought
257 * down by shutting down the interface before unconfiguring
258 * the BGP neighbor, neighbor information in peer->su.sa
259 * will be cleared when the interface is shutdown. So
260 * during the deletion of unnumbered bgp peer, above check
261 * will return true. Therefore, in this case,BGP needs to
262 * find the bnc whose bnc->nht_info matches the
263 * peer being deleted and free it.
264 */
265 bnc = bgp_find_ipv6_nexthop_matching_peer(peer);
266 } else {
267 /*
268 * Gather the ifindex for if up/down events to be
269 * tagged into this fun
270 */
271 if (afi == AFI_IP6 &&
272 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
273 ifindex = peer->su.sin6.sin6_scope_id;
274 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0,
275 ifindex);
276 }
277
278 if (!bnc)
279 return;
280
281 /* cleanup the peer reference */
282 bnc->nht_info = NULL;
283
284 bgp_unlink_nexthop_check(bnc);
285 }
286
287 /*
288 * A route and its nexthop might belong to different VRFs. Therefore,
289 * we need both the bgp_route and bgp_nexthop pointers.
290 */
291 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
292 afi_t afi, safi_t safi, struct bgp_path_info *pi,
293 struct peer *peer, int connected,
294 const struct prefix *orig_prefix)
295 {
296 struct bgp_nexthop_cache_head *tree = NULL;
297 struct bgp_nexthop_cache *bnc;
298 struct bgp_path_info *bpi_ultimate;
299 struct prefix p;
300 uint32_t srte_color = 0;
301 int is_bgp_static_route = 0;
302 ifindex_t ifindex = 0;
303
304 if (pi) {
305 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
306 && (pi->sub_type == BGP_ROUTE_STATIC))
307 ? 1
308 : 0;
309
310 /* Since Extended Next-hop Encoding (RFC5549) support, we want
311 to derive
312 address-family from the next-hop. */
313 if (!is_bgp_static_route)
314 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
315 : AFI_IP;
316
317 /* Validation for the ipv4 mapped ipv6 nexthop. */
318 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
319 afi = AFI_IP;
320 }
321
322 /* This will return true if the global IPv6 NH is a link local
323 * addr */
324 if (make_prefix(afi, pi, &p) < 0)
325 return 1;
326
327 if (!is_bgp_static_route && orig_prefix
328 && prefix_same(&p, orig_prefix)) {
329 if (BGP_DEBUG(nht, NHT)) {
330 zlog_debug(
331 "%s(%pFX): prefix loops through itself",
332 __func__, &p);
333 }
334 return 0;
335 }
336
337 srte_color = pi->attr->srte_color;
338 } else if (peer) {
339 /*
340 * Gather the ifindex for if up/down events to be
341 * tagged into this fun
342 */
343 if (afi == AFI_IP6 &&
344 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
345 ifindex = peer->su.sin6.sin6_scope_id;
346 if (ifindex == 0) {
347 if (BGP_DEBUG(nht, NHT)) {
348 zlog_debug(
349 "%s: Unable to locate ifindex, waiting till we have one",
350 peer->conf_if);
351 }
352 return 0;
353 }
354 }
355
356 if (!sockunion2hostprefix(&peer->su, &p)) {
357 if (BGP_DEBUG(nht, NHT)) {
358 zlog_debug(
359 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
360 __func__, afi, AFI_IP, AFI_IP6);
361 }
362 return 0;
363 }
364 } else
365 return 0;
366
367 if (is_bgp_static_route)
368 tree = &bgp_nexthop->import_check_table[afi];
369 else
370 tree = &bgp_nexthop->nexthop_cache_table[afi];
371
372 bnc = bnc_find(tree, &p, srte_color, ifindex);
373 if (!bnc) {
374 bnc = bnc_new(tree, &p, srte_color, ifindex);
375 bnc->bgp = bgp_nexthop;
376 if (BGP_DEBUG(nht, NHT))
377 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
378 &bnc->prefix, bnc->ifindex, bnc->srte_color,
379 bnc->bgp->name_pretty, peer);
380 } else {
381 if (BGP_DEBUG(nht, NHT))
382 zlog_debug(
383 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
384 &bnc->prefix, bnc->ifindex,
385 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
386 bnc->path_count, bnc->nht_info);
387 }
388
389 if (pi && is_route_parent_evpn(pi))
390 bnc->is_evpn_gwip_nexthop = true;
391
392 if (is_bgp_static_route && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE)) {
393 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
394
395 /* If we're toggling the type, re-register */
396 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
397 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
398 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
399 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
400 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
401 } else if ((!CHECK_FLAG(bgp_route->flags,
402 BGP_FLAG_IMPORT_CHECK))
403 && CHECK_FLAG(bnc->flags,
404 BGP_STATIC_ROUTE_EXACT_MATCH)) {
405 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
406 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
407 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
408 }
409 }
410 /* When nexthop is already known, but now requires 'connected'
411 * resolution,
412 * re-register it. The reverse scenario where the nexthop currently
413 * requires
414 * 'connected' resolution does not need a re-register (i.e., we treat
415 * 'connected-required' as an override) except in the scenario where
416 * this
417 * is actually a case of tracking a peer for connectivity (e.g., after
418 * disable connected-check).
419 * NOTE: We don't track the number of paths separately for 'connected-
420 * required' vs 'connected-not-required' as this change is not a common
421 * scenario.
422 */
423 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
424 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
425 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
426 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
427 } else if (peer && !connected &&
428 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
429 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
430 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
431 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
432 }
433 if (peer && (bnc->ifindex != ifindex)) {
434 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
435 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
436 bnc->ifindex = ifindex;
437 }
438 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
439 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
440 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
441 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
442 && !is_default_host_route(&bnc->prefix))
443 register_zebra_rnh(bnc);
444
445 if (pi && pi->nexthop != bnc) {
446 /* Unlink from existing nexthop cache, if any. This will also
447 * free
448 * the nexthop cache entry, if appropriate.
449 */
450 bgp_unlink_nexthop(pi);
451
452 /* updates NHT pi list reference */
453 path_nh_map(pi, bnc, true);
454
455 bpi_ultimate = bgp_get_imported_bpi_ultimate(pi);
456 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
457 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
458 bnc->metric;
459 else if (bpi_ultimate->extra)
460 bpi_ultimate->extra->igpmetric = 0;
461 } else if (peer) {
462 /*
463 * Let's not accidentally save the peer data for a peer
464 * we are going to throw away in a second or so.
465 * When we come back around we'll fix up this
466 * data properly in replace_nexthop_by_peer
467 */
468 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
469 bnc->nht_info = (void *)peer; /* NHT peer reference */
470 }
471
472 /*
473 * We are cheating here. Views have no associated underlying
474 * ability to detect nexthops. So when we have a view
475 * just tell everyone the nexthop is valid
476 */
477 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
478 return 1;
479 else if (safi == SAFI_UNICAST && pi &&
480 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
481 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
482 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
483 else
484 return (bgp_isvalid_nexthop(bnc));
485 }
486
487 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
488 {
489 struct bgp_nexthop_cache *bnc;
490 struct prefix p;
491 ifindex_t ifindex = 0;
492
493 if (!peer)
494 return;
495
496 /*
497 * In case the below check evaluates true and if
498 * the bnc has not been freed at this point, then
499 * we might have to do something similar to what's
500 * done in bgp_unlink_nexthop_by_peer(). Since
501 * bgp_unlink_nexthop_by_peer() loops through the
502 * nodes of V6 nexthop cache to find the bnc, it is
503 * currently not being called here.
504 */
505 if (!sockunion2hostprefix(&peer->su, &p))
506 return;
507 /*
508 * Gather the ifindex for if up/down events to be
509 * tagged into this fun
510 */
511 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
512 ifindex = peer->su.sin6.sin6_scope_id;
513 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
514 &p, 0, ifindex);
515 if (!bnc) {
516 if (BGP_DEBUG(nht, NHT))
517 zlog_debug(
518 "Cannot find connected NHT node for peer %s(%s)",
519 peer->host, peer->bgp->name_pretty);
520 return;
521 }
522
523 if (bnc->nht_info != peer) {
524 if (BGP_DEBUG(nht, NHT))
525 zlog_debug(
526 "Connected NHT %p node for peer %s(%s) points to %p",
527 bnc, peer->host, bnc->bgp->name_pretty,
528 bnc->nht_info);
529 return;
530 }
531
532 bnc->nht_info = NULL;
533
534 if (LIST_EMPTY(&(bnc->paths))) {
535 if (BGP_DEBUG(nht, NHT))
536 zlog_debug(
537 "Freeing connected NHT node %p for peer %s(%s)",
538 bnc, peer->host, bnc->bgp->name_pretty);
539 unregister_zebra_rnh(bnc);
540 bnc_free(bnc);
541 }
542 }
543
544 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
545 struct zapi_route *nhr,
546 bool import_check)
547 {
548 struct nexthop *nexthop;
549 struct nexthop *oldnh;
550 struct nexthop *nhlist_head = NULL;
551 struct nexthop *nhlist_tail = NULL;
552 int i;
553 bool evpn_resolved = false;
554
555 bnc->last_update = monotime(NULL);
556 bnc->change_flags = 0;
557
558 /* debug print the input */
559 if (BGP_DEBUG(nht, NHT)) {
560 char bnc_buf[BNC_FLAG_DUMP_SIZE];
561
562 zlog_debug(
563 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
564 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
565 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
566 nhr->nexthop_num, bnc->nexthop_num,
567 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
568 sizeof(bnc_buf)));
569 }
570
571 if (nhr->metric != bnc->metric)
572 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
573
574 if (nhr->nexthop_num != bnc->nexthop_num)
575 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
576
577 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
578 !prefix_same(&bnc->prefix, &nhr->prefix))) {
579 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
580 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
581 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
582 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
583
584 bnc_nexthop_free(bnc);
585 bnc->nexthop = NULL;
586
587 if (BGP_DEBUG(nht, NHT))
588 zlog_debug(
589 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
590 __func__, &bnc->prefix, &nhr->prefix);
591 } else if (nhr->nexthop_num) {
592 struct peer *peer = bnc->nht_info;
593
594 /* notify bgp fsm if nbr ip goes from invalid->valid */
595 if (!bnc->nexthop_num)
596 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
597
598 if (!bnc->is_evpn_gwip_nexthop)
599 bnc->flags |= BGP_NEXTHOP_VALID;
600 bnc->metric = nhr->metric;
601 bnc->nexthop_num = nhr->nexthop_num;
602
603 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
604
605 for (i = 0; i < nhr->nexthop_num; i++) {
606 int num_labels = 0;
607
608 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
609
610 /*
611 * Turn on RA for the v6 nexthops
612 * we receive from bgp. This is to allow us
613 * to work with v4 routing over v6 nexthops
614 */
615 if (peer && !peer->ifp
616 && CHECK_FLAG(peer->flags,
617 PEER_FLAG_CAPABILITY_ENHE)
618 && nhr->prefix.family == AF_INET6
619 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
620 struct interface *ifp;
621
622 ifp = if_lookup_by_index(nexthop->ifindex,
623 nexthop->vrf_id);
624 if (ifp)
625 zclient_send_interface_radv_req(
626 zclient, nexthop->vrf_id, ifp,
627 true,
628 BGP_UNNUM_DEFAULT_RA_INTERVAL);
629 }
630 /* There is at least one label-switched path */
631 if (nexthop->nh_label &&
632 nexthop->nh_label->num_labels) {
633
634 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
635 num_labels = nexthop->nh_label->num_labels;
636 }
637
638 if (BGP_DEBUG(nht, NHT)) {
639 char buf[NEXTHOP_STRLEN];
640 zlog_debug(
641 " nhop via %s (%d labels)",
642 nexthop2str(nexthop, buf, sizeof(buf)),
643 num_labels);
644 }
645
646 if (nhlist_tail) {
647 nhlist_tail->next = nexthop;
648 nhlist_tail = nexthop;
649 } else {
650 nhlist_tail = nexthop;
651 nhlist_head = nexthop;
652 }
653
654 /* No need to evaluate the nexthop if we have already
655 * determined
656 * that there has been a change.
657 */
658 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
659 continue;
660
661 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
662 if (nexthop_same(oldnh, nexthop))
663 break;
664
665 if (!oldnh)
666 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
667 }
668 bnc_nexthop_free(bnc);
669 bnc->nexthop = nhlist_head;
670
671 /*
672 * Gateway IP nexthop is L3 reachable. Mark it as
673 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
674 * remote EVPN RT-2.
675 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
676 * When its mapping with EVPN RT-2 is established, unset
677 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
678 */
679 if (bnc->is_evpn_gwip_nexthop) {
680 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
681
682 if (BGP_DEBUG(nht, NHT))
683 zlog_debug(
684 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
685 &bnc->prefix,
686 (evpn_resolved ? "successful"
687 : "failed"));
688
689 if (evpn_resolved) {
690 bnc->flags |= BGP_NEXTHOP_VALID;
691 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
692 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
693 } else {
694 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
695 bnc->flags &= ~BGP_NEXTHOP_VALID;
696 }
697 }
698 } else {
699 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
700 bnc->flags &= ~BGP_NEXTHOP_VALID;
701 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
702 bnc->nexthop_num = nhr->nexthop_num;
703
704 /* notify bgp fsm if nbr ip goes from valid->invalid */
705 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
706
707 bnc_nexthop_free(bnc);
708 bnc->nexthop = NULL;
709 }
710
711 evaluate_paths(bnc);
712 }
713
714 static void bgp_nht_ifp_table_handle(struct bgp *bgp,
715 struct bgp_nexthop_cache_head *table,
716 struct interface *ifp, bool up)
717 {
718 struct bgp_nexthop_cache *bnc;
719
720 frr_each (bgp_nexthop_cache, table, bnc) {
721 if (bnc->ifindex != ifp->ifindex)
722 continue;
723
724 bnc->last_update = monotime(NULL);
725 bnc->change_flags = 0;
726
727 /*
728 * For interface based routes ( ala the v6 LL routes
729 * that this was written for ) the metric received
730 * for the connected route is 0 not 1.
731 */
732 bnc->metric = 0;
733 if (up) {
734 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
735 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
736 bnc->nexthop_num = 1;
737 } else {
738 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
739 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
740 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
741 bnc->nexthop_num = 0;
742 }
743
744 evaluate_paths(bnc);
745 }
746 }
747 static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
748 {
749 struct bgp *bgp;
750
751 bgp = ifp->vrf->info;
752 if (!bgp)
753 return;
754
755 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
756 up);
757 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
758 up);
759 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
760 up);
761 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
762 up);
763 }
764
765 void bgp_nht_ifp_up(struct interface *ifp)
766 {
767 bgp_nht_ifp_handle(ifp, true);
768 }
769
770 void bgp_nht_ifp_down(struct interface *ifp)
771 {
772 bgp_nht_ifp_handle(ifp, false);
773 }
774
775 static void bgp_nht_ifp_initial(struct thread *thread)
776 {
777 ifindex_t ifindex = THREAD_VAL(thread);
778 struct bgp *bgp = THREAD_ARG(thread);
779 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
780
781 if (!ifp)
782 return;
783
784 if (BGP_DEBUG(nht, NHT))
785 zlog_debug(
786 "Handle NHT initial update for Intf %s(%d) status %s",
787 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
788
789 if (if_is_up(ifp))
790 bgp_nht_ifp_up(ifp);
791 else
792 bgp_nht_ifp_down(ifp);
793 }
794
795 /*
796 * So the bnc code has the ability to handle interface up/down
797 * events to properly handle v6 LL peering.
798 * What is happening here:
799 * The event system for peering expects the nht code to
800 * report on the tracking events after we move to active
801 * So let's give the system a chance to report on that event
802 * in a manner that is expected.
803 */
804 void bgp_nht_interface_events(struct peer *peer)
805 {
806 struct bgp *bgp = peer->bgp;
807 struct bgp_nexthop_cache_head *table;
808 struct bgp_nexthop_cache *bnc;
809 struct prefix p;
810 ifindex_t ifindex = 0;
811
812 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
813 return;
814
815 if (!sockunion2hostprefix(&peer->su, &p))
816 return;
817 /*
818 * Gather the ifindex for if up/down events to be
819 * tagged into this fun
820 */
821 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
822 ifindex = peer->su.sin6.sin6_scope_id;
823
824 table = &bgp->nexthop_cache_table[AFI_IP6];
825 bnc = bnc_find(table, &p, 0, ifindex);
826 if (!bnc)
827 return;
828
829 if (bnc->ifindex)
830 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
831 bnc->ifindex, NULL);
832 }
833
834 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
835 {
836 struct bgp_nexthop_cache_head *tree = NULL;
837 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
838 struct bgp_path_info *pi;
839 struct bgp_dest *dest;
840 struct bgp *bgp;
841 struct prefix match;
842 struct zapi_route nhr;
843 afi_t afi;
844 safi_t safi;
845
846 bgp = bgp_lookup_by_vrf_id(vrf_id);
847 if (!bgp) {
848 flog_err(
849 EC_BGP_NH_UPD,
850 "parse nexthop update: instance not found for vrf_id %u",
851 vrf_id);
852 return;
853 }
854
855 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
856 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
857 bgp->name_pretty);
858 return;
859 }
860
861 afi = family2afi(match.family);
862 tree = &bgp->nexthop_cache_table[afi];
863
864 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
865 if (bnc_nhc)
866 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
867 else if (BGP_DEBUG(nht, NHT))
868 zlog_debug(
869 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
870 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
871
872 tree = &bgp->import_check_table[afi];
873
874 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
875 if (bnc_import) {
876 bgp_process_nexthop_update(bnc_import, &nhr, true);
877
878 safi = nhr.safi;
879 if (bgp->rib[afi][safi]) {
880 dest = bgp_afi_node_get(bgp->rib[afi][safi], afi, safi,
881 &match, NULL);
882
883 for (pi = bgp_dest_get_bgp_path_info(dest); pi;
884 pi = pi->next)
885 if (pi->peer == bgp->peer_self &&
886 pi->type == ZEBRA_ROUTE_BGP &&
887 pi->sub_type == BGP_ROUTE_STATIC)
888 vpn_leak_from_vrf_update(
889 bgp_get_default(), bgp, pi);
890 }
891 } else if (BGP_DEBUG(nht, NHT))
892 zlog_debug(
893 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
894 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
895
896 /*
897 * HACK: if any BGP route is dependant on an SR-policy that doesn't
898 * exist, zebra will never send NH updates relative to that policy. In
899 * that case, whenever we receive an update about a colorless NH, update
900 * the corresponding colorful NHs that share the same endpoint but that
901 * are inactive. This ugly hack should work around the problem at the
902 * cost of a performance pernalty. Long term, what should be done is to
903 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
904 * which should provide a better infrastructure to solve this issue in
905 * a more efficient and elegant way.
906 */
907 if (nhr.srte_color == 0 && bnc_nhc) {
908 struct bgp_nexthop_cache *bnc_iter;
909
910 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
911 bnc_iter) {
912 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
913 bnc_iter->srte_color == 0 ||
914 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
915 continue;
916
917 bgp_process_nexthop_update(bnc_iter, &nhr, false);
918 }
919 }
920 }
921
922 /*
923 * Cleanup nexthop registration and status information for BGP nexthops
924 * pertaining to this VRF. This is invoked upon VRF deletion.
925 */
926 void bgp_cleanup_nexthops(struct bgp *bgp)
927 {
928 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
929 struct bgp_nexthop_cache *bnc;
930
931 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
932 bnc) {
933 /* Clear relevant flags. */
934 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
935 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
936 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
937 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
938 }
939 }
940 }
941
942 /**
943 * make_prefix - make a prefix structure from the path (essentially
944 * path's node.
945 */
946 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
947 {
948
949 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
950 && (pi->sub_type == BGP_ROUTE_STATIC))
951 ? 1
952 : 0;
953 struct bgp_dest *net = pi->net;
954 const struct prefix *p_orig = bgp_dest_get_prefix(net);
955 struct in_addr ipv4;
956
957 if (p_orig->family == AF_FLOWSPEC) {
958 if (!pi->peer)
959 return -1;
960 return bgp_flowspec_get_first_nh(pi->peer->bgp,
961 pi, p, afi);
962 }
963 memset(p, 0, sizeof(struct prefix));
964 switch (afi) {
965 case AFI_IP:
966 p->family = AF_INET;
967 if (is_bgp_static) {
968 p->u.prefix4 = p_orig->u.prefix4;
969 p->prefixlen = p_orig->prefixlen;
970 } else {
971 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
972 ipv4_mapped_ipv6_to_ipv4(
973 &pi->attr->mp_nexthop_global, &ipv4);
974 p->u.prefix4 = ipv4;
975 p->prefixlen = IPV4_MAX_BITLEN;
976 } else {
977 if (p_orig->family == AF_EVPN)
978 p->u.prefix4 =
979 pi->attr->mp_nexthop_global_in;
980 else
981 p->u.prefix4 = pi->attr->nexthop;
982 p->prefixlen = IPV4_MAX_BITLEN;
983 }
984 }
985 break;
986 case AFI_IP6:
987 p->family = AF_INET6;
988
989 if (is_bgp_static) {
990 p->u.prefix6 = p_orig->u.prefix6;
991 p->prefixlen = p_orig->prefixlen;
992 } else {
993 /* If we receive MP_REACH nexthop with ::(LL)
994 * or LL(LL), use LL address as nexthop cache.
995 */
996 if (pi->attr->mp_nexthop_len
997 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
998 && (IN6_IS_ADDR_UNSPECIFIED(
999 &pi->attr->mp_nexthop_global)
1000 || IN6_IS_ADDR_LINKLOCAL(
1001 &pi->attr->mp_nexthop_global)))
1002 p->u.prefix6 = pi->attr->mp_nexthop_local;
1003 /* If we receive MR_REACH with (GA)::(LL)
1004 * then check for route-map to choose GA or LL
1005 */
1006 else if (pi->attr->mp_nexthop_len
1007 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
1008 if (CHECK_FLAG(pi->attr->nh_flag,
1009 BGP_ATTR_NH_MP_PREFER_GLOBAL))
1010 p->u.prefix6 =
1011 pi->attr->mp_nexthop_global;
1012 else
1013 p->u.prefix6 =
1014 pi->attr->mp_nexthop_local;
1015 } else
1016 p->u.prefix6 = pi->attr->mp_nexthop_global;
1017 p->prefixlen = IPV6_MAX_BITLEN;
1018 }
1019 break;
1020 default:
1021 if (BGP_DEBUG(nht, NHT)) {
1022 zlog_debug(
1023 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
1024 __func__, afi, AFI_IP, AFI_IP6);
1025 }
1026 break;
1027 }
1028 return 0;
1029 }
1030
1031 /**
1032 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
1033 * command to Zebra.
1034 * ARGUMENTS:
1035 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1036 * int command -- command to send to zebra
1037 * RETURNS:
1038 * void.
1039 */
1040 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
1041 {
1042 bool exact_match = false;
1043 bool resolve_via_default = false;
1044 int ret;
1045
1046 if (!zclient)
1047 return;
1048
1049 /* Don't try to register if Zebra doesn't know of this instance. */
1050 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
1051 if (BGP_DEBUG(zebra, ZEBRA))
1052 zlog_debug(
1053 "%s: No zebra instance to talk to, not installing NHT entry",
1054 __func__);
1055 return;
1056 }
1057
1058 if (!bgp_zebra_num_connects()) {
1059 if (BGP_DEBUG(zebra, ZEBRA))
1060 zlog_debug(
1061 "%s: We have not connected yet, cannot send nexthops",
1062 __func__);
1063 }
1064 if (command == ZEBRA_NEXTHOP_REGISTER) {
1065 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
1066 exact_match = true;
1067 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
1068 resolve_via_default = true;
1069 }
1070
1071 if (BGP_DEBUG(zebra, ZEBRA))
1072 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
1073 zserv_command_string(command), &bnc->prefix,
1074 bnc->bgp->name_pretty);
1075
1076 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1077 exact_match, resolve_via_default,
1078 bnc->bgp->vrf_id);
1079 if (ret == ZCLIENT_SEND_FAILURE) {
1080 flog_warn(EC_BGP_ZEBRA_SEND,
1081 "sendmsg_nexthop: zclient_send_message() failed");
1082 return;
1083 }
1084
1085 if (command == ZEBRA_NEXTHOP_REGISTER)
1086 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1087 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1088 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1089 return;
1090 }
1091
1092 /**
1093 * register_zebra_rnh - register a NH/route with Zebra for notification
1094 * when the route or the route to the nexthop changes.
1095 * ARGUMENTS:
1096 * struct bgp_nexthop_cache *bnc
1097 * RETURNS:
1098 * void.
1099 */
1100 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1101 {
1102 /* Check if we have already registered */
1103 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1104 return;
1105
1106 if (bnc->ifindex) {
1107 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1108 return;
1109 }
1110
1111 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1112 }
1113
1114 /**
1115 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1116 * ARGUMENTS:
1117 * struct bgp_nexthop_cache *bnc
1118 * RETURNS:
1119 * void.
1120 */
1121 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1122 {
1123 /* Check if we have already registered */
1124 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1125 return;
1126
1127 if (bnc->ifindex) {
1128 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1129 return;
1130 }
1131
1132 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1133 }
1134
1135 /**
1136 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1137 * ARGUMENTS:
1138 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1139 * RETURNS:
1140 * void.
1141 */
1142 void evaluate_paths(struct bgp_nexthop_cache *bnc)
1143 {
1144 struct bgp_dest *dest;
1145 struct bgp_path_info *path;
1146 struct bgp_path_info *bpi_ultimate;
1147 int afi;
1148 struct peer *peer = (struct peer *)bnc->nht_info;
1149 struct bgp_table *table;
1150 safi_t safi;
1151 struct bgp *bgp_path;
1152 const struct prefix *p;
1153
1154 if (BGP_DEBUG(nht, NHT)) {
1155 char bnc_buf[BNC_FLAG_DUMP_SIZE];
1156 char chg_buf[BNC_FLAG_DUMP_SIZE];
1157
1158 zlog_debug(
1159 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1160 &bnc->prefix, bnc->ifindex, bnc->srte_color,
1161 bnc->bgp->name_pretty,
1162 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1163 sizeof(bnc_buf)),
1164 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1165 sizeof(bnc_buf)));
1166 }
1167
1168 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1169 if (!(path->type == ZEBRA_ROUTE_BGP
1170 && ((path->sub_type == BGP_ROUTE_NORMAL)
1171 || (path->sub_type == BGP_ROUTE_STATIC)
1172 || (path->sub_type == BGP_ROUTE_IMPORTED))))
1173 continue;
1174
1175 dest = path->net;
1176 assert(dest && bgp_dest_table(dest));
1177 p = bgp_dest_get_prefix(dest);
1178 afi = family2afi(p->family);
1179 table = bgp_dest_table(dest);
1180 safi = table->safi;
1181
1182 /*
1183 * handle routes from other VRFs (they can have a
1184 * nexthop in THIS VRF). bgp_path is the bgp instance
1185 * that owns the route referencing this nexthop.
1186 */
1187 bgp_path = table->bgp;
1188
1189 /*
1190 * Path becomes valid/invalid depending on whether the nexthop
1191 * reachable/unreachable.
1192 *
1193 * In case of unicast routes that were imported from vpn
1194 * and that have labels, they are valid only if there are
1195 * nexthops with labels
1196 *
1197 * If the nexthop is EVPN gateway-IP,
1198 * do not check for a valid label.
1199 */
1200
1201 bool bnc_is_valid_nexthop = false;
1202 bool path_valid = false;
1203
1204 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1205 && path->extra && path->extra->num_labels
1206 && (path->attr->evpn_overlay.type
1207 != OVERLAY_INDEX_GATEWAY_IP)) {
1208 bnc_is_valid_nexthop =
1209 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1210 : false;
1211 } else {
1212 if (bgp_update_martian_nexthop(
1213 bnc->bgp, afi, safi, path->type,
1214 path->sub_type, path->attr, dest)) {
1215 if (BGP_DEBUG(nht, NHT))
1216 zlog_debug(
1217 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1218 __func__, dest, bgp_path->name);
1219 } else
1220 bnc_is_valid_nexthop =
1221 bgp_isvalid_nexthop(bnc) ? true : false;
1222 }
1223
1224 if (BGP_DEBUG(nht, NHT)) {
1225 if (dest->pdest)
1226 zlog_debug(
1227 "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1228 afi, safi, dest,
1229 (struct prefix_rd *)bgp_dest_get_prefix(
1230 dest->pdest),
1231 bgp_path->name_pretty, path->flags);
1232 else
1233 zlog_debug(
1234 "... eval path %d/%d %pBD %s flags 0x%x",
1235 afi, safi, dest, bgp_path->name_pretty,
1236 path->flags);
1237 }
1238
1239 /* Skip paths marked for removal or as history. */
1240 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1241 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1242 continue;
1243
1244 /* Copy the metric to the path. Will be used for bestpath
1245 * computation */
1246 bpi_ultimate = bgp_get_imported_bpi_ultimate(path);
1247 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1248 (bgp_path_info_extra_get(bpi_ultimate))->igpmetric =
1249 bnc->metric;
1250 else if (bpi_ultimate->extra)
1251 bpi_ultimate->extra->igpmetric = 0;
1252
1253 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1254 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1255 || path->attr->srte_color != 0)
1256 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1257
1258 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1259 if (path_valid != bnc_is_valid_nexthop) {
1260 if (path_valid) {
1261 /* No longer valid, clear flag; also for EVPN
1262 * routes, unimport from VRFs if needed.
1263 */
1264 bgp_aggregate_decrement(bgp_path, p, path, afi,
1265 safi);
1266 bgp_path_info_unset_flag(dest, path,
1267 BGP_PATH_VALID);
1268 if (safi == SAFI_EVPN &&
1269 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1270 bgp_evpn_unimport_route(bgp_path,
1271 afi, safi, bgp_dest_get_prefix(dest), path);
1272 } else {
1273 /* Path becomes valid, set flag; also for EVPN
1274 * routes, import from VRFs if needed.
1275 */
1276 bgp_path_info_set_flag(dest, path,
1277 BGP_PATH_VALID);
1278 bgp_aggregate_increment(bgp_path, p, path, afi,
1279 safi);
1280 if (safi == SAFI_EVPN &&
1281 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1282 bgp_evpn_import_route(bgp_path,
1283 afi, safi, bgp_dest_get_prefix(dest), path);
1284 }
1285 }
1286
1287 bgp_process(bgp_path, dest, afi, safi);
1288 }
1289
1290 if (peer) {
1291 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1292
1293 if (valid_nexthops) {
1294 /*
1295 * Peering cannot occur across a blackhole nexthop
1296 */
1297 if (bnc->nexthop_num == 1 && bnc->nexthop
1298 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1299 peer->last_reset = PEER_DOWN_WAITING_NHT;
1300 valid_nexthops = 0;
1301 } else
1302 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1303 } else
1304 peer->last_reset = PEER_DOWN_WAITING_NHT;
1305
1306 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1307 if (BGP_DEBUG(nht, NHT))
1308 zlog_debug(
1309 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1310 __func__, peer->host,
1311 peer->bgp->name_pretty,
1312 !!valid_nexthops);
1313 bgp_fsm_nht_update(peer, !!valid_nexthops);
1314 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1315 }
1316 }
1317
1318 RESET_FLAG(bnc->change_flags);
1319 }
1320
1321 /**
1322 * path_nh_map - make or break path-to-nexthop association.
1323 * ARGUMENTS:
1324 * path - pointer to the path structure
1325 * bnc - pointer to the nexthop structure
1326 * make - if set, make the association. if unset, just break the existing
1327 * association.
1328 */
1329 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1330 bool make)
1331 {
1332 if (path->nexthop) {
1333 LIST_REMOVE(path, nh_thread);
1334 path->nexthop->path_count--;
1335 path->nexthop = NULL;
1336 }
1337 if (make) {
1338 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1339 path->nexthop = bnc;
1340 path->nexthop->path_count++;
1341 }
1342 }
1343
1344 /*
1345 * This function is called to register nexthops to zebra
1346 * as that we may have tried to install the nexthops
1347 * before we actually have a zebra connection
1348 */
1349 void bgp_nht_register_nexthops(struct bgp *bgp)
1350 {
1351 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1352 struct bgp_nexthop_cache *bnc;
1353
1354 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1355 bnc) {
1356 register_zebra_rnh(bnc);
1357 }
1358 }
1359 }
1360
1361 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1362 {
1363 struct bgp *bgp;
1364 struct bgp_nexthop_cache *bnc;
1365 struct nexthop *nhop;
1366 struct interface *ifp;
1367 struct prefix p;
1368 ifindex_t ifindex = 0;
1369
1370 if (peer->ifp)
1371 return;
1372
1373 bgp = peer->bgp;
1374 if (!sockunion2hostprefix(&peer->su, &p)) {
1375 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1376 __func__, peer->host);
1377 return;
1378 }
1379
1380 if (p.family != AF_INET6)
1381 return;
1382 /*
1383 * Gather the ifindex for if up/down events to be
1384 * tagged into this fun
1385 */
1386 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1387 ifindex = peer->su.sin6.sin6_scope_id;
1388
1389 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1390 if (!bnc)
1391 return;
1392
1393 if (peer != bnc->nht_info)
1394 return;
1395
1396 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1397 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1398
1399 if (!ifp)
1400 continue;
1401
1402 zclient_send_interface_radv_req(zclient,
1403 nhop->vrf_id,
1404 ifp, true,
1405 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1406 }
1407 }
1408
1409 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1410 {
1411 struct bgp *bgp;
1412 struct bgp_nexthop_cache *bnc;
1413 struct nexthop *nhop;
1414 struct interface *ifp;
1415 struct prefix p;
1416 ifindex_t ifindex = 0;
1417
1418 if (peer->ifp)
1419 return;
1420
1421 bgp = peer->bgp;
1422
1423 if (!sockunion2hostprefix(&peer->su, &p)) {
1424 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1425 __func__, peer->host);
1426 return;
1427 }
1428
1429 if (p.family != AF_INET6)
1430 return;
1431 /*
1432 * Gather the ifindex for if up/down events to be
1433 * tagged into this fun
1434 */
1435 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1436 ifindex = peer->su.sin6.sin6_scope_id;
1437
1438 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1439 if (!bnc)
1440 return;
1441
1442 if (peer != bnc->nht_info)
1443 return;
1444
1445 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1446 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1447
1448 if (!ifp)
1449 continue;
1450
1451 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1452 0);
1453 }
1454 }
1455
1456 /****************************************************************************
1457 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1458 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1459 * left to the application using it.
1460 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1461 * failover of remote ES links.
1462 ***************************************************************************/
1463 static bitfield_t bgp_nh_id_bitmap;
1464 static uint32_t bgp_l3nhg_start;
1465
1466 /* XXX - currently we do nothing on the callbacks */
1467 static void bgp_l3nhg_add_cb(const char *name)
1468 {
1469 }
1470
1471 static void bgp_l3nhg_modify_cb(const struct nexthop_group_cmd *nhgc)
1472 {
1473 }
1474
1475 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1476 const struct nexthop *nhop)
1477 {
1478 }
1479
1480 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1481 const struct nexthop *nhop)
1482 {
1483 }
1484
1485 static void bgp_l3nhg_del_cb(const char *name)
1486 {
1487 }
1488
1489 static void bgp_l3nhg_zebra_init(void)
1490 {
1491 static bool bgp_l3nhg_zebra_inited;
1492 if (bgp_l3nhg_zebra_inited)
1493 return;
1494
1495 bgp_l3nhg_zebra_inited = true;
1496 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1497 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_modify_cb,
1498 bgp_l3nhg_add_nexthop_cb, bgp_l3nhg_del_nexthop_cb,
1499 bgp_l3nhg_del_cb);
1500 }
1501
1502
1503 void bgp_l3nhg_init(void)
1504 {
1505 uint32_t id_max;
1506
1507 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1508 bf_init(bgp_nh_id_bitmap, id_max);
1509 bf_assign_zero_index(bgp_nh_id_bitmap);
1510
1511 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1512 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1513 bgp_l3nhg_start + id_max);
1514 }
1515
1516 void bgp_l3nhg_finish(void)
1517 {
1518 bf_free(bgp_nh_id_bitmap);
1519 }
1520
1521 uint32_t bgp_l3nhg_id_alloc(void)
1522 {
1523 uint32_t nhg_id = 0;
1524
1525 bgp_l3nhg_zebra_init();
1526 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1527 if (nhg_id)
1528 nhg_id += bgp_l3nhg_start;
1529
1530 return nhg_id;
1531 }
1532
1533 void bgp_l3nhg_id_free(uint32_t nhg_id)
1534 {
1535 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1536 return;
1537
1538 nhg_id -= bgp_l3nhg_start;
1539
1540 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1541 }