]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
Merge pull request #9998 from pguibert6WIND/bgp_tcp_keepalive
[mirror_frr.git] / bgpd / bgp_nht.c
1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "command.h"
24 #include "thread.h"
25 #include "prefix.h"
26 #include "zclient.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "log.h"
30 #include "memory.h"
31 #include "nexthop.h"
32 #include "vrf.h"
33 #include "filter.h"
34 #include "nexthop_group.h"
35
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
49
50 extern struct zclient *zclient;
51
52 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
53 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
54 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
55 static void bgp_nht_ifp_initial(struct thread *thread);
56
57 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
58 {
59 return (bgp_zebra_num_connects() == 0
60 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
61 && bnc->nexthop_num > 0));
62 }
63
64 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
65 struct bgp_path_info *path)
66 {
67 struct interface *ifp = NULL;
68 struct nexthop *nexthop;
69 struct bgp_interface *iifp;
70 struct peer *peer;
71
72 if (!path->extra || !path->extra->peer_orig)
73 return false;
74
75 peer = path->extra->peer_orig;
76
77 /* only connected ebgp peers are valid */
78 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
79 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
80 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
81 return false;
82
83 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
84 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
85 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
86 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
87 ifp = if_lookup_by_index(
88 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
89 bnc->bgp->vrf_id);
90 }
91 if (!ifp)
92 continue;
93 iifp = ifp->info;
94 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
95 return true;
96 }
97 return false;
98 }
99
100 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
101 struct bgp_path_info *path)
102 {
103 struct interface *ifp = NULL;
104 struct nexthop *nexthop;
105
106 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
107 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
108 ifp = if_lookup_by_index(
109 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
110 bnc->bgp->vrf_id);
111 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
112 ifp->ll_type == ZEBRA_LLT_IP6GRE))
113 break;
114 }
115 }
116 if (!ifp)
117 return false;
118
119 if (CHECK_FLAG(path->attr->rmap_change_flags,
120 BATTR_RMAP_L3VPN_ACCEPT_GRE))
121 return true;
122
123 return false;
124 }
125
126 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
127 struct bgp_path_info *path)
128 {
129 /*
130 * - In the case of MPLS-VPN, the label is learned from LDP or other
131 * protocols, and nexthop tracking is enabled for the label.
132 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
133 * - In the case of SRv6-VPN, we need to track the reachability to the
134 * SID (in other words, IPv6 address). As in MPLS, we need to record
135 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
136 * currently not implemented, and this function assumes that all
137 * Transit routes for SRv6-VPN are valid.
138 * - Otherwise check for mpls-gre acceptance
139 */
140 return (bgp_zebra_num_connects() == 0 ||
141 (bnc && (bnc->nexthop_num > 0 &&
142 (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
143 bnc->bgp->srv6_enabled ||
144 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
145 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
146 }
147
148 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
149 {
150 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
151 if (BGP_DEBUG(nht, NHT))
152 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
153 &bnc->prefix, bnc->ifindex, bnc->srte_color,
154 bnc->bgp->name_pretty);
155 /* only unregister if this is the last nh for this prefix*/
156 if (!bnc_existing_for_prefix(bnc))
157 unregister_zebra_rnh(bnc);
158 bnc_free(bnc);
159 }
160 }
161
162 void bgp_unlink_nexthop(struct bgp_path_info *path)
163 {
164 struct bgp_nexthop_cache *bnc = path->nexthop;
165
166 if (!bnc)
167 return;
168
169 path_nh_map(path, NULL, false);
170
171 bgp_unlink_nexthop_check(bnc);
172 }
173
174 void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
175 {
176 struct prefix pp;
177 struct prefix pt;
178 struct bgp_nexthop_cache *bncp, *bnct;
179 afi_t afi;
180 ifindex_t ifindex = 0;
181
182 if (!sockunion2hostprefix(&from->su, &pp))
183 return;
184
185 /*
186 * Gather the ifindex for if up/down events to be
187 * tagged into this fun
188 */
189 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
190 ifindex = from->su.sin6.sin6_scope_id;
191
192 afi = family2afi(pp.family);
193 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
194
195 if (!sockunion2hostprefix(&to->su, &pt))
196 return;
197
198 /*
199 * Gather the ifindex for if up/down events to be
200 * tagged into this fun
201 */
202 ifindex = 0;
203 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
204 ifindex = to->su.sin6.sin6_scope_id;
205 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
206
207 if (bnct != bncp)
208 return;
209
210 if (bnct)
211 bnct->nht_info = to;
212 }
213
214 void bgp_unlink_nexthop_by_peer(struct peer *peer)
215 {
216 struct prefix p;
217 struct bgp_nexthop_cache *bnc;
218 afi_t afi = family2afi(peer->su.sa.sa_family);
219 ifindex_t ifindex = 0;
220
221 if (!sockunion2hostprefix(&peer->su, &p))
222 return;
223 /*
224 * Gather the ifindex for if up/down events to be
225 * tagged into this fun
226 */
227 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
228 ifindex = peer->su.sin6.sin6_scope_id;
229 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0, ifindex);
230 if (!bnc)
231 return;
232
233 /* cleanup the peer reference */
234 bnc->nht_info = NULL;
235
236 bgp_unlink_nexthop_check(bnc);
237 }
238
239 /*
240 * A route and its nexthop might belong to different VRFs. Therefore,
241 * we need both the bgp_route and bgp_nexthop pointers.
242 */
243 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
244 afi_t afi, safi_t safi, struct bgp_path_info *pi,
245 struct peer *peer, int connected,
246 const struct prefix *orig_prefix)
247 {
248 struct bgp_nexthop_cache_head *tree = NULL;
249 struct bgp_nexthop_cache *bnc;
250 struct prefix p;
251 uint32_t srte_color = 0;
252 int is_bgp_static_route = 0;
253 ifindex_t ifindex = 0;
254
255 if (pi) {
256 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
257 && (pi->sub_type == BGP_ROUTE_STATIC))
258 ? 1
259 : 0;
260
261 /* Since Extended Next-hop Encoding (RFC5549) support, we want
262 to derive
263 address-family from the next-hop. */
264 if (!is_bgp_static_route)
265 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
266 : AFI_IP;
267
268 /* Validation for the ipv4 mapped ipv6 nexthop. */
269 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
270 afi = AFI_IP;
271 }
272
273 /* This will return true if the global IPv6 NH is a link local
274 * addr */
275 if (make_prefix(afi, pi, &p) < 0)
276 return 1;
277
278 if (!is_bgp_static_route && orig_prefix
279 && prefix_same(&p, orig_prefix)) {
280 if (BGP_DEBUG(nht, NHT)) {
281 zlog_debug(
282 "%s(%pFX): prefix loops through itself",
283 __func__, &p);
284 }
285 return 0;
286 }
287
288 srte_color = pi->attr->srte_color;
289 } else if (peer) {
290 /*
291 * Gather the ifindex for if up/down events to be
292 * tagged into this fun
293 */
294 if (afi == AFI_IP6 &&
295 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
296 ifindex = peer->su.sin6.sin6_scope_id;
297 if (ifindex == 0) {
298 if (BGP_DEBUG(nht, NHT)) {
299 zlog_debug(
300 "%s: Unable to locate ifindex, waiting till we have one",
301 peer->conf_if);
302 }
303 return 0;
304 }
305 }
306
307 if (!sockunion2hostprefix(&peer->su, &p)) {
308 if (BGP_DEBUG(nht, NHT)) {
309 zlog_debug(
310 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
311 __func__, afi, AFI_IP, AFI_IP6);
312 }
313 return 0;
314 }
315 } else
316 return 0;
317
318 if (is_bgp_static_route)
319 tree = &bgp_nexthop->import_check_table[afi];
320 else
321 tree = &bgp_nexthop->nexthop_cache_table[afi];
322
323 bnc = bnc_find(tree, &p, srte_color, ifindex);
324 if (!bnc) {
325 bnc = bnc_new(tree, &p, srte_color, ifindex);
326 bnc->bgp = bgp_nexthop;
327 if (BGP_DEBUG(nht, NHT))
328 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
329 &bnc->prefix, bnc->ifindex, bnc->srte_color,
330 bnc->bgp->name_pretty, peer);
331 } else {
332 if (BGP_DEBUG(nht, NHT))
333 zlog_debug(
334 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
335 &bnc->prefix, bnc->ifindex,
336 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
337 bnc->path_count, bnc->nht_info);
338 }
339
340 if (pi && is_route_parent_evpn(pi))
341 bnc->is_evpn_gwip_nexthop = true;
342
343 if (is_bgp_static_route) {
344 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
345
346 /* If we're toggling the type, re-register */
347 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
348 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
349 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
350 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
351 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
352 } else if ((!CHECK_FLAG(bgp_route->flags,
353 BGP_FLAG_IMPORT_CHECK))
354 && CHECK_FLAG(bnc->flags,
355 BGP_STATIC_ROUTE_EXACT_MATCH)) {
356 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
357 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
358 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
359 }
360 }
361 /* When nexthop is already known, but now requires 'connected'
362 * resolution,
363 * re-register it. The reverse scenario where the nexthop currently
364 * requires
365 * 'connected' resolution does not need a re-register (i.e., we treat
366 * 'connected-required' as an override) except in the scenario where
367 * this
368 * is actually a case of tracking a peer for connectivity (e.g., after
369 * disable connected-check).
370 * NOTE: We don't track the number of paths separately for 'connected-
371 * required' vs 'connected-not-required' as this change is not a common
372 * scenario.
373 */
374 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
375 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
376 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
377 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
378 } else if (peer && !connected
379 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
380 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
381 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
382 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
383 }
384 if (peer && (bnc->ifindex != ifindex)) {
385 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
386 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
387 bnc->ifindex = ifindex;
388 }
389 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
390 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
391 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
392 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
393 && !is_default_host_route(&bnc->prefix))
394 register_zebra_rnh(bnc);
395
396 if (pi && pi->nexthop != bnc) {
397 /* Unlink from existing nexthop cache, if any. This will also
398 * free
399 * the nexthop cache entry, if appropriate.
400 */
401 bgp_unlink_nexthop(pi);
402
403 /* updates NHT pi list reference */
404 path_nh_map(pi, bnc, true);
405
406 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
407 (bgp_path_info_extra_get(pi))->igpmetric = bnc->metric;
408 else if (pi->extra)
409 pi->extra->igpmetric = 0;
410 } else if (peer) {
411 /*
412 * Let's not accidentally save the peer data for a peer
413 * we are going to throw away in a second or so.
414 * When we come back around we'll fix up this
415 * data properly in replace_nexthop_by_peer
416 */
417 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
418 bnc->nht_info = (void *)peer; /* NHT peer reference */
419 }
420
421 /*
422 * We are cheating here. Views have no associated underlying
423 * ability to detect nexthops. So when we have a view
424 * just tell everyone the nexthop is valid
425 */
426 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
427 return 1;
428 else if (safi == SAFI_UNICAST && pi &&
429 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
430 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
431 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
432 else
433 return (bgp_isvalid_nexthop(bnc));
434 }
435
436 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
437 {
438 struct bgp_nexthop_cache *bnc;
439 struct prefix p;
440 ifindex_t ifindex = 0;
441
442 if (!peer)
443 return;
444
445 if (!sockunion2hostprefix(&peer->su, &p))
446 return;
447 /*
448 * Gather the ifindex for if up/down events to be
449 * tagged into this fun
450 */
451 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
452 ifindex = peer->su.sin6.sin6_scope_id;
453 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
454 &p, 0, ifindex);
455 if (!bnc) {
456 if (BGP_DEBUG(nht, NHT))
457 zlog_debug(
458 "Cannot find connected NHT node for peer %s(%s)",
459 peer->host, peer->bgp->name_pretty);
460 return;
461 }
462
463 if (bnc->nht_info != peer) {
464 if (BGP_DEBUG(nht, NHT))
465 zlog_debug(
466 "Connected NHT %p node for peer %s(%s) points to %p",
467 bnc, peer->host, bnc->bgp->name_pretty,
468 bnc->nht_info);
469 return;
470 }
471
472 bnc->nht_info = NULL;
473
474 if (LIST_EMPTY(&(bnc->paths))) {
475 if (BGP_DEBUG(nht, NHT))
476 zlog_debug(
477 "Freeing connected NHT node %p for peer %s(%s)",
478 bnc, peer->host, bnc->bgp->name_pretty);
479 unregister_zebra_rnh(bnc);
480 bnc_free(bnc);
481 }
482 }
483
484 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
485 struct zapi_route *nhr,
486 bool import_check)
487 {
488 struct nexthop *nexthop;
489 struct nexthop *oldnh;
490 struct nexthop *nhlist_head = NULL;
491 struct nexthop *nhlist_tail = NULL;
492 int i;
493 bool evpn_resolved = false;
494
495 bnc->last_update = monotime(NULL);
496 bnc->change_flags = 0;
497
498 /* debug print the input */
499 if (BGP_DEBUG(nht, NHT)) {
500 char bnc_buf[BNC_FLAG_DUMP_SIZE];
501
502 zlog_debug(
503 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
504 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
505 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
506 nhr->nexthop_num, bnc->nexthop_num,
507 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
508 sizeof(bnc_buf)));
509 }
510
511 if (nhr->metric != bnc->metric)
512 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
513
514 if (nhr->nexthop_num != bnc->nexthop_num)
515 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
516
517 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
518 !prefix_same(&bnc->prefix, &nhr->prefix))) {
519 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
520 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
521 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
522 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
523
524 bnc_nexthop_free(bnc);
525 bnc->nexthop = NULL;
526
527 if (BGP_DEBUG(nht, NHT))
528 zlog_debug(
529 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
530 __func__, &bnc->prefix, &nhr->prefix);
531 } else if (nhr->nexthop_num) {
532 struct peer *peer = bnc->nht_info;
533
534 /* notify bgp fsm if nbr ip goes from invalid->valid */
535 if (!bnc->nexthop_num)
536 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
537
538 if (!bnc->is_evpn_gwip_nexthop)
539 bnc->flags |= BGP_NEXTHOP_VALID;
540 bnc->metric = nhr->metric;
541 bnc->nexthop_num = nhr->nexthop_num;
542
543 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
544
545 for (i = 0; i < nhr->nexthop_num; i++) {
546 int num_labels = 0;
547
548 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
549
550 /*
551 * Turn on RA for the v6 nexthops
552 * we receive from bgp. This is to allow us
553 * to work with v4 routing over v6 nexthops
554 */
555 if (peer && !peer->ifp
556 && CHECK_FLAG(peer->flags,
557 PEER_FLAG_CAPABILITY_ENHE)
558 && nhr->prefix.family == AF_INET6
559 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
560 struct interface *ifp;
561
562 ifp = if_lookup_by_index(nexthop->ifindex,
563 nexthop->vrf_id);
564 if (ifp)
565 zclient_send_interface_radv_req(
566 zclient, nexthop->vrf_id, ifp,
567 true,
568 BGP_UNNUM_DEFAULT_RA_INTERVAL);
569 }
570 /* There is at least one label-switched path */
571 if (nexthop->nh_label &&
572 nexthop->nh_label->num_labels) {
573
574 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
575 num_labels = nexthop->nh_label->num_labels;
576 }
577
578 if (BGP_DEBUG(nht, NHT)) {
579 char buf[NEXTHOP_STRLEN];
580 zlog_debug(
581 " nhop via %s (%d labels)",
582 nexthop2str(nexthop, buf, sizeof(buf)),
583 num_labels);
584 }
585
586 if (nhlist_tail) {
587 nhlist_tail->next = nexthop;
588 nhlist_tail = nexthop;
589 } else {
590 nhlist_tail = nexthop;
591 nhlist_head = nexthop;
592 }
593
594 /* No need to evaluate the nexthop if we have already
595 * determined
596 * that there has been a change.
597 */
598 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
599 continue;
600
601 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
602 if (nexthop_same(oldnh, nexthop))
603 break;
604
605 if (!oldnh)
606 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
607 }
608 bnc_nexthop_free(bnc);
609 bnc->nexthop = nhlist_head;
610
611 /*
612 * Gateway IP nexthop is L3 reachable. Mark it as
613 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
614 * remote EVPN RT-2.
615 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
616 * When its mapping with EVPN RT-2 is established, unset
617 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
618 */
619 if (bnc->is_evpn_gwip_nexthop) {
620 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
621
622 if (BGP_DEBUG(nht, NHT))
623 zlog_debug(
624 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
625 &bnc->prefix,
626 (evpn_resolved ? "successful"
627 : "failed"));
628
629 if (evpn_resolved) {
630 bnc->flags |= BGP_NEXTHOP_VALID;
631 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
632 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
633 } else {
634 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
635 bnc->flags &= ~BGP_NEXTHOP_VALID;
636 }
637 }
638 } else {
639 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
640 bnc->flags &= ~BGP_NEXTHOP_VALID;
641 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
642 bnc->nexthop_num = nhr->nexthop_num;
643
644 /* notify bgp fsm if nbr ip goes from valid->invalid */
645 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
646
647 bnc_nexthop_free(bnc);
648 bnc->nexthop = NULL;
649 }
650
651 evaluate_paths(bnc);
652 }
653
654 static void bgp_nht_ifp_table_handle(struct bgp *bgp,
655 struct bgp_nexthop_cache_head *table,
656 struct interface *ifp, bool up)
657 {
658 struct bgp_nexthop_cache *bnc;
659
660 frr_each (bgp_nexthop_cache, table, bnc) {
661 if (bnc->ifindex != ifp->ifindex)
662 continue;
663
664 bnc->last_update = monotime(NULL);
665 bnc->change_flags = 0;
666
667 /*
668 * For interface based routes ( ala the v6 LL routes
669 * that this was written for ) the metric received
670 * for the connected route is 0 not 1.
671 */
672 bnc->metric = 0;
673 if (up) {
674 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
675 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
676 bnc->nexthop_num = 1;
677 } else {
678 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
679 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
680 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
681 bnc->nexthop_num = 0;
682 }
683
684 evaluate_paths(bnc);
685 }
686 }
687 static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
688 {
689 struct bgp *bgp;
690
691 bgp = ifp->vrf->info;
692 if (!bgp)
693 return;
694
695 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
696 up);
697 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
698 up);
699 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
700 up);
701 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
702 up);
703 }
704
705 void bgp_nht_ifp_up(struct interface *ifp)
706 {
707 bgp_nht_ifp_handle(ifp, true);
708 }
709
710 void bgp_nht_ifp_down(struct interface *ifp)
711 {
712 bgp_nht_ifp_handle(ifp, false);
713 }
714
715 static void bgp_nht_ifp_initial(struct thread *thread)
716 {
717 ifindex_t ifindex = THREAD_VAL(thread);
718 struct bgp *bgp = THREAD_ARG(thread);
719 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
720
721 if (!ifp)
722 return;
723
724 if (BGP_DEBUG(nht, NHT))
725 zlog_debug(
726 "Handle NHT initial update for Intf %s(%d) status %s",
727 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
728
729 if (if_is_up(ifp))
730 bgp_nht_ifp_up(ifp);
731 else
732 bgp_nht_ifp_down(ifp);
733 }
734
735 /*
736 * So the bnc code has the ability to handle interface up/down
737 * events to properly handle v6 LL peering.
738 * What is happening here:
739 * The event system for peering expects the nht code to
740 * report on the tracking events after we move to active
741 * So let's give the system a chance to report on that event
742 * in a manner that is expected.
743 */
744 void bgp_nht_interface_events(struct peer *peer)
745 {
746 struct bgp *bgp = peer->bgp;
747 struct bgp_nexthop_cache_head *table;
748 struct bgp_nexthop_cache *bnc;
749 struct prefix p;
750 ifindex_t ifindex = 0;
751
752 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
753 return;
754
755 if (!sockunion2hostprefix(&peer->su, &p))
756 return;
757 /*
758 * Gather the ifindex for if up/down events to be
759 * tagged into this fun
760 */
761 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
762 ifindex = peer->su.sin6.sin6_scope_id;
763
764 table = &bgp->nexthop_cache_table[AFI_IP6];
765 bnc = bnc_find(table, &p, 0, ifindex);
766 if (!bnc)
767 return;
768
769 if (bnc->ifindex)
770 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
771 bnc->ifindex, NULL);
772 }
773
774 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
775 {
776 struct bgp_nexthop_cache_head *tree = NULL;
777 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
778 struct bgp *bgp;
779 struct prefix match;
780 struct zapi_route nhr;
781 afi_t afi;
782
783 bgp = bgp_lookup_by_vrf_id(vrf_id);
784 if (!bgp) {
785 flog_err(
786 EC_BGP_NH_UPD,
787 "parse nexthop update: instance not found for vrf_id %u",
788 vrf_id);
789 return;
790 }
791
792 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
793 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
794 bgp->name_pretty);
795 return;
796 }
797
798 afi = family2afi(match.family);
799 tree = &bgp->nexthop_cache_table[afi];
800
801 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
802 if (!bnc_nhc) {
803 if (BGP_DEBUG(nht, NHT))
804 zlog_debug(
805 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
806 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
807 } else
808 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
809
810 tree = &bgp->import_check_table[afi];
811
812 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
813 if (!bnc_import) {
814 if (BGP_DEBUG(nht, NHT))
815 zlog_debug(
816 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
817 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
818 } else
819 bgp_process_nexthop_update(bnc_import, &nhr, true);
820
821 /*
822 * HACK: if any BGP route is dependant on an SR-policy that doesn't
823 * exist, zebra will never send NH updates relative to that policy. In
824 * that case, whenever we receive an update about a colorless NH, update
825 * the corresponding colorful NHs that share the same endpoint but that
826 * are inactive. This ugly hack should work around the problem at the
827 * cost of a performance pernalty. Long term, what should be done is to
828 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
829 * which should provide a better infrastructure to solve this issue in
830 * a more efficient and elegant way.
831 */
832 if (nhr.srte_color == 0 && bnc_nhc) {
833 struct bgp_nexthop_cache *bnc_iter;
834
835 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
836 bnc_iter) {
837 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
838 bnc_iter->srte_color == 0 ||
839 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
840 continue;
841
842 bgp_process_nexthop_update(bnc_iter, &nhr, false);
843 }
844 }
845 }
846
847 /*
848 * Cleanup nexthop registration and status information for BGP nexthops
849 * pertaining to this VRF. This is invoked upon VRF deletion.
850 */
851 void bgp_cleanup_nexthops(struct bgp *bgp)
852 {
853 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
854 struct bgp_nexthop_cache *bnc;
855
856 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
857 bnc) {
858 /* Clear relevant flags. */
859 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
860 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
861 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
862 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
863 }
864 }
865 }
866
867 /**
868 * make_prefix - make a prefix structure from the path (essentially
869 * path's node.
870 */
871 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
872 {
873
874 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
875 && (pi->sub_type == BGP_ROUTE_STATIC))
876 ? 1
877 : 0;
878 struct bgp_dest *net = pi->net;
879 const struct prefix *p_orig = bgp_dest_get_prefix(net);
880 struct in_addr ipv4;
881
882 if (p_orig->family == AF_FLOWSPEC) {
883 if (!pi->peer)
884 return -1;
885 return bgp_flowspec_get_first_nh(pi->peer->bgp,
886 pi, p, afi);
887 }
888 memset(p, 0, sizeof(struct prefix));
889 switch (afi) {
890 case AFI_IP:
891 p->family = AF_INET;
892 if (is_bgp_static) {
893 p->u.prefix4 = p_orig->u.prefix4;
894 p->prefixlen = p_orig->prefixlen;
895 } else {
896 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
897 ipv4_mapped_ipv6_to_ipv4(
898 &pi->attr->mp_nexthop_global, &ipv4);
899 p->u.prefix4 = ipv4;
900 p->prefixlen = IPV4_MAX_BITLEN;
901 } else {
902 if (p_orig->family == AF_EVPN)
903 p->u.prefix4 =
904 pi->attr->mp_nexthop_global_in;
905 else
906 p->u.prefix4 = pi->attr->nexthop;
907 p->prefixlen = IPV4_MAX_BITLEN;
908 }
909 }
910 break;
911 case AFI_IP6:
912 p->family = AF_INET6;
913
914 if (is_bgp_static) {
915 p->u.prefix6 = p_orig->u.prefix6;
916 p->prefixlen = p_orig->prefixlen;
917 } else {
918 /* If we receive MP_REACH nexthop with ::(LL)
919 * or LL(LL), use LL address as nexthop cache.
920 */
921 if (pi->attr->mp_nexthop_len
922 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
923 && (IN6_IS_ADDR_UNSPECIFIED(
924 &pi->attr->mp_nexthop_global)
925 || IN6_IS_ADDR_LINKLOCAL(
926 &pi->attr->mp_nexthop_global)))
927 p->u.prefix6 = pi->attr->mp_nexthop_local;
928 /* If we receive MR_REACH with (GA)::(LL)
929 * then check for route-map to choose GA or LL
930 */
931 else if (pi->attr->mp_nexthop_len
932 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
933 if (pi->attr->mp_nexthop_prefer_global)
934 p->u.prefix6 =
935 pi->attr->mp_nexthop_global;
936 else
937 p->u.prefix6 =
938 pi->attr->mp_nexthop_local;
939 } else
940 p->u.prefix6 = pi->attr->mp_nexthop_global;
941 p->prefixlen = IPV6_MAX_BITLEN;
942 }
943 break;
944 default:
945 if (BGP_DEBUG(nht, NHT)) {
946 zlog_debug(
947 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
948 __func__, afi, AFI_IP, AFI_IP6);
949 }
950 break;
951 }
952 return 0;
953 }
954
955 /**
956 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
957 * command to Zebra.
958 * ARGUMENTS:
959 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
960 * int command -- command to send to zebra
961 * RETURNS:
962 * void.
963 */
964 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
965 {
966 bool exact_match = false;
967 bool resolve_via_default = false;
968 int ret;
969
970 if (!zclient)
971 return;
972
973 /* Don't try to register if Zebra doesn't know of this instance. */
974 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
975 if (BGP_DEBUG(zebra, ZEBRA))
976 zlog_debug(
977 "%s: No zebra instance to talk to, not installing NHT entry",
978 __func__);
979 return;
980 }
981
982 if (!bgp_zebra_num_connects()) {
983 if (BGP_DEBUG(zebra, ZEBRA))
984 zlog_debug(
985 "%s: We have not connected yet, cannot send nexthops",
986 __func__);
987 }
988 if (command == ZEBRA_NEXTHOP_REGISTER) {
989 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
990 exact_match = true;
991 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
992 resolve_via_default = true;
993 }
994
995 if (BGP_DEBUG(zebra, ZEBRA))
996 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
997 zserv_command_string(command), &bnc->prefix,
998 bnc->bgp->name_pretty);
999
1000 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1001 exact_match, resolve_via_default,
1002 bnc->bgp->vrf_id);
1003 if (ret == ZCLIENT_SEND_FAILURE) {
1004 flog_warn(EC_BGP_ZEBRA_SEND,
1005 "sendmsg_nexthop: zclient_send_message() failed");
1006 return;
1007 }
1008
1009 if (command == ZEBRA_NEXTHOP_REGISTER)
1010 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1011 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1012 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1013 return;
1014 }
1015
1016 /**
1017 * register_zebra_rnh - register a NH/route with Zebra for notification
1018 * when the route or the route to the nexthop changes.
1019 * ARGUMENTS:
1020 * struct bgp_nexthop_cache *bnc
1021 * RETURNS:
1022 * void.
1023 */
1024 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1025 {
1026 /* Check if we have already registered */
1027 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1028 return;
1029
1030 if (bnc->ifindex) {
1031 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1032 return;
1033 }
1034
1035 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1036 }
1037
1038 /**
1039 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1040 * ARGUMENTS:
1041 * struct bgp_nexthop_cache *bnc
1042 * RETURNS:
1043 * void.
1044 */
1045 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1046 {
1047 /* Check if we have already registered */
1048 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1049 return;
1050
1051 if (bnc->ifindex) {
1052 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1053 return;
1054 }
1055
1056 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1057 }
1058
1059 /**
1060 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1061 * ARGUMENTS:
1062 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1063 * RETURNS:
1064 * void.
1065 */
1066 void evaluate_paths(struct bgp_nexthop_cache *bnc)
1067 {
1068 struct bgp_dest *dest;
1069 struct bgp_path_info *path;
1070 int afi;
1071 struct peer *peer = (struct peer *)bnc->nht_info;
1072 struct bgp_table *table;
1073 safi_t safi;
1074 struct bgp *bgp_path;
1075 const struct prefix *p;
1076
1077 if (BGP_DEBUG(nht, NHT)) {
1078 char bnc_buf[BNC_FLAG_DUMP_SIZE];
1079 char chg_buf[BNC_FLAG_DUMP_SIZE];
1080
1081 zlog_debug(
1082 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1083 &bnc->prefix, bnc->ifindex, bnc->srte_color,
1084 bnc->bgp->name_pretty,
1085 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1086 sizeof(bnc_buf)),
1087 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1088 sizeof(bnc_buf)));
1089 }
1090
1091 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1092 if (!(path->type == ZEBRA_ROUTE_BGP
1093 && ((path->sub_type == BGP_ROUTE_NORMAL)
1094 || (path->sub_type == BGP_ROUTE_STATIC)
1095 || (path->sub_type == BGP_ROUTE_IMPORTED))))
1096 continue;
1097
1098 dest = path->net;
1099 assert(dest && bgp_dest_table(dest));
1100 p = bgp_dest_get_prefix(dest);
1101 afi = family2afi(p->family);
1102 table = bgp_dest_table(dest);
1103 safi = table->safi;
1104
1105 /*
1106 * handle routes from other VRFs (they can have a
1107 * nexthop in THIS VRF). bgp_path is the bgp instance
1108 * that owns the route referencing this nexthop.
1109 */
1110 bgp_path = table->bgp;
1111
1112 /*
1113 * Path becomes valid/invalid depending on whether the nexthop
1114 * reachable/unreachable.
1115 *
1116 * In case of unicast routes that were imported from vpn
1117 * and that have labels, they are valid only if there are
1118 * nexthops with labels
1119 *
1120 * If the nexthop is EVPN gateway-IP,
1121 * do not check for a valid label.
1122 */
1123
1124 bool bnc_is_valid_nexthop = false;
1125 bool path_valid = false;
1126
1127 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1128 && path->extra && path->extra->num_labels
1129 && (path->attr->evpn_overlay.type
1130 != OVERLAY_INDEX_GATEWAY_IP)) {
1131 bnc_is_valid_nexthop =
1132 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1133 : false;
1134 } else {
1135 if (bgp_update_martian_nexthop(
1136 bnc->bgp, afi, safi, path->type,
1137 path->sub_type, path->attr, dest)) {
1138 if (BGP_DEBUG(nht, NHT))
1139 zlog_debug(
1140 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1141 __func__, dest, bgp_path->name);
1142 } else
1143 bnc_is_valid_nexthop =
1144 bgp_isvalid_nexthop(bnc) ? true : false;
1145 }
1146
1147 if (BGP_DEBUG(nht, NHT)) {
1148 if (dest->pdest)
1149 zlog_debug(
1150 "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1151 afi, safi, dest,
1152 (struct prefix_rd *)bgp_dest_get_prefix(
1153 dest->pdest),
1154 bgp_path->name_pretty, path->flags);
1155 else
1156 zlog_debug(
1157 "... eval path %d/%d %pBD %s flags 0x%x",
1158 afi, safi, dest, bgp_path->name_pretty,
1159 path->flags);
1160 }
1161
1162 /* Skip paths marked for removal or as history. */
1163 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1164 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1165 continue;
1166
1167 /* Copy the metric to the path. Will be used for bestpath
1168 * computation */
1169 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1170 (bgp_path_info_extra_get(path))->igpmetric =
1171 bnc->metric;
1172 else if (path->extra)
1173 path->extra->igpmetric = 0;
1174
1175 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1176 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1177 || path->attr->srte_color != 0)
1178 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1179
1180 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1181 if (path_valid != bnc_is_valid_nexthop) {
1182 if (path_valid) {
1183 /* No longer valid, clear flag; also for EVPN
1184 * routes, unimport from VRFs if needed.
1185 */
1186 bgp_aggregate_decrement(bgp_path, p, path, afi,
1187 safi);
1188 bgp_path_info_unset_flag(dest, path,
1189 BGP_PATH_VALID);
1190 if (safi == SAFI_EVPN &&
1191 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1192 bgp_evpn_unimport_route(bgp_path,
1193 afi, safi, bgp_dest_get_prefix(dest), path);
1194 } else {
1195 /* Path becomes valid, set flag; also for EVPN
1196 * routes, import from VRFs if needed.
1197 */
1198 bgp_path_info_set_flag(dest, path,
1199 BGP_PATH_VALID);
1200 bgp_aggregate_increment(bgp_path, p, path, afi,
1201 safi);
1202 if (safi == SAFI_EVPN &&
1203 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1204 bgp_evpn_import_route(bgp_path,
1205 afi, safi, bgp_dest_get_prefix(dest), path);
1206 }
1207 }
1208
1209 bgp_process(bgp_path, dest, afi, safi);
1210 }
1211
1212 if (peer) {
1213 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1214
1215 if (valid_nexthops) {
1216 /*
1217 * Peering cannot occur across a blackhole nexthop
1218 */
1219 if (bnc->nexthop_num == 1 && bnc->nexthop
1220 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1221 peer->last_reset = PEER_DOWN_WAITING_NHT;
1222 valid_nexthops = 0;
1223 } else
1224 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1225 } else
1226 peer->last_reset = PEER_DOWN_WAITING_NHT;
1227
1228 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1229 if (BGP_DEBUG(nht, NHT))
1230 zlog_debug(
1231 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1232 __func__, peer->host,
1233 peer->bgp->name_pretty,
1234 !!valid_nexthops);
1235 bgp_fsm_nht_update(peer, !!valid_nexthops);
1236 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1237 }
1238 }
1239
1240 RESET_FLAG(bnc->change_flags);
1241 }
1242
1243 /**
1244 * path_nh_map - make or break path-to-nexthop association.
1245 * ARGUMENTS:
1246 * path - pointer to the path structure
1247 * bnc - pointer to the nexthop structure
1248 * make - if set, make the association. if unset, just break the existing
1249 * association.
1250 */
1251 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1252 bool make)
1253 {
1254 if (path->nexthop) {
1255 LIST_REMOVE(path, nh_thread);
1256 path->nexthop->path_count--;
1257 path->nexthop = NULL;
1258 }
1259 if (make) {
1260 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1261 path->nexthop = bnc;
1262 path->nexthop->path_count++;
1263 }
1264 }
1265
1266 /*
1267 * This function is called to register nexthops to zebra
1268 * as that we may have tried to install the nexthops
1269 * before we actually have a zebra connection
1270 */
1271 void bgp_nht_register_nexthops(struct bgp *bgp)
1272 {
1273 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1274 struct bgp_nexthop_cache *bnc;
1275
1276 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1277 bnc) {
1278 register_zebra_rnh(bnc);
1279 }
1280 }
1281 }
1282
1283 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1284 {
1285 struct bgp *bgp;
1286 struct bgp_nexthop_cache *bnc;
1287 struct nexthop *nhop;
1288 struct interface *ifp;
1289 struct prefix p;
1290 ifindex_t ifindex = 0;
1291
1292 if (peer->ifp)
1293 return;
1294
1295 bgp = peer->bgp;
1296 if (!sockunion2hostprefix(&peer->su, &p)) {
1297 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1298 __func__, peer->host);
1299 return;
1300 }
1301
1302 if (p.family != AF_INET6)
1303 return;
1304 /*
1305 * Gather the ifindex for if up/down events to be
1306 * tagged into this fun
1307 */
1308 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1309 ifindex = peer->su.sin6.sin6_scope_id;
1310
1311 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1312 if (!bnc)
1313 return;
1314
1315 if (peer != bnc->nht_info)
1316 return;
1317
1318 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1319 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1320
1321 if (!ifp)
1322 continue;
1323
1324 zclient_send_interface_radv_req(zclient,
1325 nhop->vrf_id,
1326 ifp, true,
1327 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1328 }
1329 }
1330
1331 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1332 {
1333 struct bgp *bgp;
1334 struct bgp_nexthop_cache *bnc;
1335 struct nexthop *nhop;
1336 struct interface *ifp;
1337 struct prefix p;
1338 ifindex_t ifindex = 0;
1339
1340 if (peer->ifp)
1341 return;
1342
1343 bgp = peer->bgp;
1344
1345 if (!sockunion2hostprefix(&peer->su, &p)) {
1346 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1347 __func__, peer->host);
1348 return;
1349 }
1350
1351 if (p.family != AF_INET6)
1352 return;
1353 /*
1354 * Gather the ifindex for if up/down events to be
1355 * tagged into this fun
1356 */
1357 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1358 ifindex = peer->su.sin6.sin6_scope_id;
1359
1360 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1361 if (!bnc)
1362 return;
1363
1364 if (peer != bnc->nht_info)
1365 return;
1366
1367 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1368 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1369
1370 if (!ifp)
1371 continue;
1372
1373 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1374 0);
1375 }
1376 }
1377
1378 /****************************************************************************
1379 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1380 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1381 * left to the application using it.
1382 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1383 * failover of remote ES links.
1384 ***************************************************************************/
1385 static bitfield_t bgp_nh_id_bitmap;
1386 static uint32_t bgp_l3nhg_start;
1387
1388 /* XXX - currently we do nothing on the callbacks */
1389 static void bgp_l3nhg_add_cb(const char *name)
1390 {
1391 }
1392 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1393 const struct nexthop *nhop)
1394 {
1395 }
1396 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1397 const struct nexthop *nhop)
1398 {
1399 }
1400 static void bgp_l3nhg_del_cb(const char *name)
1401 {
1402 }
1403
1404 static void bgp_l3nhg_zebra_init(void)
1405 {
1406 static bool bgp_l3nhg_zebra_inited;
1407 if (bgp_l3nhg_zebra_inited)
1408 return;
1409
1410 bgp_l3nhg_zebra_inited = true;
1411 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1412 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_add_nexthop_cb,
1413 bgp_l3nhg_del_nexthop_cb, bgp_l3nhg_del_cb);
1414 }
1415
1416
1417 void bgp_l3nhg_init(void)
1418 {
1419 uint32_t id_max;
1420
1421 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1422 bf_init(bgp_nh_id_bitmap, id_max);
1423 bf_assign_zero_index(bgp_nh_id_bitmap);
1424
1425 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1426 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1427 bgp_l3nhg_start + id_max);
1428 }
1429
1430 void bgp_l3nhg_finish(void)
1431 {
1432 bf_free(bgp_nh_id_bitmap);
1433 }
1434
1435 uint32_t bgp_l3nhg_id_alloc(void)
1436 {
1437 uint32_t nhg_id = 0;
1438
1439 bgp_l3nhg_zebra_init();
1440 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1441 if (nhg_id)
1442 nhg_id += bgp_l3nhg_start;
1443
1444 return nhg_id;
1445 }
1446
1447 void bgp_l3nhg_id_free(uint32_t nhg_id)
1448 {
1449 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1450 return;
1451
1452 nhg_id -= bgp_l3nhg_start;
1453
1454 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1455 }