]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
bgpd: Implement ACCEPT_OWN extended community
[mirror_frr.git] / bgpd / bgp_nht.c
1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "command.h"
24 #include "thread.h"
25 #include "prefix.h"
26 #include "zclient.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "log.h"
30 #include "memory.h"
31 #include "nexthop.h"
32 #include "vrf.h"
33 #include "filter.h"
34 #include "nexthop_group.h"
35
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
49
50 extern struct zclient *zclient;
51
52 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc);
53 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc);
54 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
55 static void bgp_nht_ifp_initial(struct thread *thread);
56
57 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
58 {
59 return (bgp_zebra_num_connects() == 0
60 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)
61 && bnc->nexthop_num > 0));
62 }
63
64 static int bgp_isvalid_nexthop_for_ebgp(struct bgp_nexthop_cache *bnc,
65 struct bgp_path_info *path)
66 {
67 struct interface *ifp = NULL;
68 struct nexthop *nexthop;
69 struct bgp_interface *iifp;
70 struct peer *peer;
71
72 if (!path->extra || !path->extra->peer_orig)
73 return false;
74
75 peer = path->extra->peer_orig;
76
77 /* only connected ebgp peers are valid */
78 if (peer->sort != BGP_PEER_EBGP || peer->ttl != BGP_DEFAULT_TTL ||
79 CHECK_FLAG(peer->flags, PEER_FLAG_DISABLE_CONNECTED_CHECK) ||
80 CHECK_FLAG(peer->bgp->flags, BGP_FLAG_DISABLE_NH_CONNECTED_CHK))
81 return false;
82
83 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
84 if (nexthop->type == NEXTHOP_TYPE_IFINDEX ||
85 nexthop->type == NEXTHOP_TYPE_IPV4_IFINDEX ||
86 nexthop->type == NEXTHOP_TYPE_IPV6_IFINDEX) {
87 ifp = if_lookup_by_index(
88 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
89 bnc->bgp->vrf_id);
90 }
91 if (!ifp)
92 continue;
93 iifp = ifp->info;
94 if (CHECK_FLAG(iifp->flags, BGP_INTERFACE_MPLS_BGP_FORWARDING))
95 return true;
96 }
97 return false;
98 }
99
100 static int bgp_isvalid_nexthop_for_mplsovergre(struct bgp_nexthop_cache *bnc,
101 struct bgp_path_info *path)
102 {
103 struct interface *ifp = NULL;
104 struct nexthop *nexthop;
105
106 for (nexthop = bnc->nexthop; nexthop; nexthop = nexthop->next) {
107 if (nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
108 ifp = if_lookup_by_index(
109 bnc->ifindex ? bnc->ifindex : nexthop->ifindex,
110 bnc->bgp->vrf_id);
111 if (ifp && (ifp->ll_type == ZEBRA_LLT_IPGRE ||
112 ifp->ll_type == ZEBRA_LLT_IP6GRE))
113 break;
114 }
115 }
116 if (!ifp)
117 return false;
118
119 if (CHECK_FLAG(path->attr->rmap_change_flags,
120 BATTR_RMAP_L3VPN_ACCEPT_GRE))
121 return true;
122
123 return false;
124 }
125
126 static int bgp_isvalid_nexthop_for_mpls(struct bgp_nexthop_cache *bnc,
127 struct bgp_path_info *path)
128 {
129 /*
130 * - In the case of MPLS-VPN, the label is learned from LDP or other
131 * protocols, and nexthop tracking is enabled for the label.
132 * The value is recorded as BGP_NEXTHOP_LABELED_VALID.
133 * - In the case of SRv6-VPN, we need to track the reachability to the
134 * SID (in other words, IPv6 address). As in MPLS, we need to record
135 * the value as BGP_NEXTHOP_SID_VALID. However, this function is
136 * currently not implemented, and this function assumes that all
137 * Transit routes for SRv6-VPN are valid.
138 * - Otherwise check for mpls-gre acceptance
139 */
140 return (bgp_zebra_num_connects() == 0 ||
141 (bnc && (bnc->nexthop_num > 0 &&
142 (CHECK_FLAG(path->flags, BGP_PATH_ACCEPT_OWN) ||
143 CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID) ||
144 bnc->bgp->srv6_enabled ||
145 bgp_isvalid_nexthop_for_ebgp(bnc, path) ||
146 bgp_isvalid_nexthop_for_mplsovergre(bnc, path)))));
147 }
148
149 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
150 {
151 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
152 if (BGP_DEBUG(nht, NHT))
153 zlog_debug("%s: freeing bnc %pFX(%d)(%u)(%s)", __func__,
154 &bnc->prefix, bnc->ifindex, bnc->srte_color,
155 bnc->bgp->name_pretty);
156 /* only unregister if this is the last nh for this prefix*/
157 if (!bnc_existing_for_prefix(bnc))
158 unregister_zebra_rnh(bnc);
159 bnc_free(bnc);
160 }
161 }
162
163 void bgp_unlink_nexthop(struct bgp_path_info *path)
164 {
165 struct bgp_nexthop_cache *bnc = path->nexthop;
166
167 if (!bnc)
168 return;
169
170 path_nh_map(path, NULL, false);
171
172 bgp_unlink_nexthop_check(bnc);
173 }
174
175 void bgp_replace_nexthop_by_peer(struct peer *from, struct peer *to)
176 {
177 struct prefix pp;
178 struct prefix pt;
179 struct bgp_nexthop_cache *bncp, *bnct;
180 afi_t afi;
181 ifindex_t ifindex = 0;
182
183 if (!sockunion2hostprefix(&from->su, &pp))
184 return;
185
186 /*
187 * Gather the ifindex for if up/down events to be
188 * tagged into this fun
189 */
190 if (from->conf_if && IN6_IS_ADDR_LINKLOCAL(&from->su.sin6.sin6_addr))
191 ifindex = from->su.sin6.sin6_scope_id;
192
193 afi = family2afi(pp.family);
194 bncp = bnc_find(&from->bgp->nexthop_cache_table[afi], &pp, 0, ifindex);
195
196 if (!sockunion2hostprefix(&to->su, &pt))
197 return;
198
199 /*
200 * Gather the ifindex for if up/down events to be
201 * tagged into this fun
202 */
203 ifindex = 0;
204 if (to->conf_if && IN6_IS_ADDR_LINKLOCAL(&to->su.sin6.sin6_addr))
205 ifindex = to->su.sin6.sin6_scope_id;
206 bnct = bnc_find(&to->bgp->nexthop_cache_table[afi], &pt, 0, ifindex);
207
208 if (bnct != bncp)
209 return;
210
211 if (bnct)
212 bnct->nht_info = to;
213 }
214
215 void bgp_unlink_nexthop_by_peer(struct peer *peer)
216 {
217 struct prefix p;
218 struct bgp_nexthop_cache *bnc;
219 afi_t afi = family2afi(peer->su.sa.sa_family);
220 ifindex_t ifindex = 0;
221
222 if (!sockunion2hostprefix(&peer->su, &p))
223 return;
224 /*
225 * Gather the ifindex for if up/down events to be
226 * tagged into this fun
227 */
228 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
229 ifindex = peer->su.sin6.sin6_scope_id;
230 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0, ifindex);
231 if (!bnc)
232 return;
233
234 /* cleanup the peer reference */
235 bnc->nht_info = NULL;
236
237 bgp_unlink_nexthop_check(bnc);
238 }
239
240 /*
241 * A route and its nexthop might belong to different VRFs. Therefore,
242 * we need both the bgp_route and bgp_nexthop pointers.
243 */
244 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
245 afi_t afi, safi_t safi, struct bgp_path_info *pi,
246 struct peer *peer, int connected,
247 const struct prefix *orig_prefix)
248 {
249 struct bgp_nexthop_cache_head *tree = NULL;
250 struct bgp_nexthop_cache *bnc;
251 struct prefix p;
252 uint32_t srte_color = 0;
253 int is_bgp_static_route = 0;
254 ifindex_t ifindex = 0;
255
256 if (pi) {
257 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
258 && (pi->sub_type == BGP_ROUTE_STATIC))
259 ? 1
260 : 0;
261
262 /* Since Extended Next-hop Encoding (RFC5549) support, we want
263 to derive
264 address-family from the next-hop. */
265 if (!is_bgp_static_route)
266 afi = BGP_ATTR_MP_NEXTHOP_LEN_IP6(pi->attr) ? AFI_IP6
267 : AFI_IP;
268
269 /* Validation for the ipv4 mapped ipv6 nexthop. */
270 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
271 afi = AFI_IP;
272 }
273
274 /* This will return true if the global IPv6 NH is a link local
275 * addr */
276 if (make_prefix(afi, pi, &p) < 0)
277 return 1;
278
279 if (!is_bgp_static_route && orig_prefix
280 && prefix_same(&p, orig_prefix)) {
281 if (BGP_DEBUG(nht, NHT)) {
282 zlog_debug(
283 "%s(%pFX): prefix loops through itself",
284 __func__, &p);
285 }
286 return 0;
287 }
288
289 srte_color = pi->attr->srte_color;
290 } else if (peer) {
291 /*
292 * Gather the ifindex for if up/down events to be
293 * tagged into this fun
294 */
295 if (afi == AFI_IP6 &&
296 IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr)) {
297 ifindex = peer->su.sin6.sin6_scope_id;
298 if (ifindex == 0) {
299 if (BGP_DEBUG(nht, NHT)) {
300 zlog_debug(
301 "%s: Unable to locate ifindex, waiting till we have one",
302 peer->conf_if);
303 }
304 return 0;
305 }
306 }
307
308 if (!sockunion2hostprefix(&peer->su, &p)) {
309 if (BGP_DEBUG(nht, NHT)) {
310 zlog_debug(
311 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
312 __func__, afi, AFI_IP, AFI_IP6);
313 }
314 return 0;
315 }
316 } else
317 return 0;
318
319 if (is_bgp_static_route)
320 tree = &bgp_nexthop->import_check_table[afi];
321 else
322 tree = &bgp_nexthop->nexthop_cache_table[afi];
323
324 bnc = bnc_find(tree, &p, srte_color, ifindex);
325 if (!bnc) {
326 bnc = bnc_new(tree, &p, srte_color, ifindex);
327 bnc->bgp = bgp_nexthop;
328 if (BGP_DEBUG(nht, NHT))
329 zlog_debug("Allocated bnc %pFX(%d)(%u)(%s) peer %p",
330 &bnc->prefix, bnc->ifindex, bnc->srte_color,
331 bnc->bgp->name_pretty, peer);
332 } else {
333 if (BGP_DEBUG(nht, NHT))
334 zlog_debug(
335 "Found existing bnc %pFX(%d)(%s) flags 0x%x ifindex %d #paths %d peer %p",
336 &bnc->prefix, bnc->ifindex,
337 bnc->bgp->name_pretty, bnc->flags, bnc->ifindex,
338 bnc->path_count, bnc->nht_info);
339 }
340
341 if (pi && is_route_parent_evpn(pi))
342 bnc->is_evpn_gwip_nexthop = true;
343
344 if (is_bgp_static_route) {
345 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
346
347 /* If we're toggling the type, re-register */
348 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
349 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
350 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
351 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
352 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
353 } else if ((!CHECK_FLAG(bgp_route->flags,
354 BGP_FLAG_IMPORT_CHECK))
355 && CHECK_FLAG(bnc->flags,
356 BGP_STATIC_ROUTE_EXACT_MATCH)) {
357 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
358 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
359 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
360 }
361 }
362 /* When nexthop is already known, but now requires 'connected'
363 * resolution,
364 * re-register it. The reverse scenario where the nexthop currently
365 * requires
366 * 'connected' resolution does not need a re-register (i.e., we treat
367 * 'connected-required' as an override) except in the scenario where
368 * this
369 * is actually a case of tracking a peer for connectivity (e.g., after
370 * disable connected-check).
371 * NOTE: We don't track the number of paths separately for 'connected-
372 * required' vs 'connected-not-required' as this change is not a common
373 * scenario.
374 */
375 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
376 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
377 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
378 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
379 } else if (peer && !connected
380 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
381 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
382 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
383 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
384 }
385 if (peer && (bnc->ifindex != ifindex)) {
386 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
387 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
388 bnc->ifindex = ifindex;
389 }
390 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
391 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
392 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
393 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
394 && !is_default_host_route(&bnc->prefix))
395 register_zebra_rnh(bnc);
396
397 if (pi && pi->nexthop != bnc) {
398 /* Unlink from existing nexthop cache, if any. This will also
399 * free
400 * the nexthop cache entry, if appropriate.
401 */
402 bgp_unlink_nexthop(pi);
403
404 /* updates NHT pi list reference */
405 path_nh_map(pi, bnc, true);
406
407 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
408 (bgp_path_info_extra_get(pi))->igpmetric = bnc->metric;
409 else if (pi->extra)
410 pi->extra->igpmetric = 0;
411 } else if (peer) {
412 /*
413 * Let's not accidentally save the peer data for a peer
414 * we are going to throw away in a second or so.
415 * When we come back around we'll fix up this
416 * data properly in replace_nexthop_by_peer
417 */
418 if (CHECK_FLAG(peer->flags, PEER_FLAG_CONFIG_NODE))
419 bnc->nht_info = (void *)peer; /* NHT peer reference */
420 }
421
422 /*
423 * We are cheating here. Views have no associated underlying
424 * ability to detect nexthops. So when we have a view
425 * just tell everyone the nexthop is valid
426 */
427 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
428 return 1;
429 else if (safi == SAFI_UNICAST && pi &&
430 pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra &&
431 pi->extra->num_labels && !bnc->is_evpn_gwip_nexthop)
432 return bgp_isvalid_nexthop_for_mpls(bnc, pi);
433 else
434 return (bgp_isvalid_nexthop(bnc));
435 }
436
437 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
438 {
439 struct bgp_nexthop_cache *bnc;
440 struct prefix p;
441 ifindex_t ifindex = 0;
442
443 if (!peer)
444 return;
445
446 if (!sockunion2hostprefix(&peer->su, &p))
447 return;
448 /*
449 * Gather the ifindex for if up/down events to be
450 * tagged into this fun
451 */
452 if (afi == AFI_IP6 && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
453 ifindex = peer->su.sin6.sin6_scope_id;
454 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
455 &p, 0, ifindex);
456 if (!bnc) {
457 if (BGP_DEBUG(nht, NHT))
458 zlog_debug(
459 "Cannot find connected NHT node for peer %s(%s)",
460 peer->host, peer->bgp->name_pretty);
461 return;
462 }
463
464 if (bnc->nht_info != peer) {
465 if (BGP_DEBUG(nht, NHT))
466 zlog_debug(
467 "Connected NHT %p node for peer %s(%s) points to %p",
468 bnc, peer->host, bnc->bgp->name_pretty,
469 bnc->nht_info);
470 return;
471 }
472
473 bnc->nht_info = NULL;
474
475 if (LIST_EMPTY(&(bnc->paths))) {
476 if (BGP_DEBUG(nht, NHT))
477 zlog_debug(
478 "Freeing connected NHT node %p for peer %s(%s)",
479 bnc, peer->host, bnc->bgp->name_pretty);
480 unregister_zebra_rnh(bnc);
481 bnc_free(bnc);
482 }
483 }
484
485 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
486 struct zapi_route *nhr,
487 bool import_check)
488 {
489 struct nexthop *nexthop;
490 struct nexthop *oldnh;
491 struct nexthop *nhlist_head = NULL;
492 struct nexthop *nhlist_tail = NULL;
493 int i;
494 bool evpn_resolved = false;
495
496 bnc->last_update = monotime(NULL);
497 bnc->change_flags = 0;
498
499 /* debug print the input */
500 if (BGP_DEBUG(nht, NHT)) {
501 char bnc_buf[BNC_FLAG_DUMP_SIZE];
502
503 zlog_debug(
504 "%s(%u): Rcvd NH update %pFX(%u)%u) - metric %d/%d #nhops %d/%d flags %s",
505 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
506 bnc->ifindex, bnc->srte_color, nhr->metric, bnc->metric,
507 nhr->nexthop_num, bnc->nexthop_num,
508 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
509 sizeof(bnc_buf)));
510 }
511
512 if (nhr->metric != bnc->metric)
513 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
514
515 if (nhr->nexthop_num != bnc->nexthop_num)
516 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
517
518 if (import_check && (nhr->type == ZEBRA_ROUTE_BGP ||
519 !prefix_same(&bnc->prefix, &nhr->prefix))) {
520 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
521 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
522 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID);
523 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
524
525 bnc_nexthop_free(bnc);
526 bnc->nexthop = NULL;
527
528 if (BGP_DEBUG(nht, NHT))
529 zlog_debug(
530 "%s: Import Check does not resolve to the same prefix for %pFX received %pFX or matching route is BGP",
531 __func__, &bnc->prefix, &nhr->prefix);
532 } else if (nhr->nexthop_num) {
533 struct peer *peer = bnc->nht_info;
534
535 /* notify bgp fsm if nbr ip goes from invalid->valid */
536 if (!bnc->nexthop_num)
537 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
538
539 if (!bnc->is_evpn_gwip_nexthop)
540 bnc->flags |= BGP_NEXTHOP_VALID;
541 bnc->metric = nhr->metric;
542 bnc->nexthop_num = nhr->nexthop_num;
543
544 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
545
546 for (i = 0; i < nhr->nexthop_num; i++) {
547 int num_labels = 0;
548
549 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
550
551 /*
552 * Turn on RA for the v6 nexthops
553 * we receive from bgp. This is to allow us
554 * to work with v4 routing over v6 nexthops
555 */
556 if (peer && !peer->ifp
557 && CHECK_FLAG(peer->flags,
558 PEER_FLAG_CAPABILITY_ENHE)
559 && nhr->prefix.family == AF_INET6
560 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
561 struct interface *ifp;
562
563 ifp = if_lookup_by_index(nexthop->ifindex,
564 nexthop->vrf_id);
565 if (ifp)
566 zclient_send_interface_radv_req(
567 zclient, nexthop->vrf_id, ifp,
568 true,
569 BGP_UNNUM_DEFAULT_RA_INTERVAL);
570 }
571 /* There is at least one label-switched path */
572 if (nexthop->nh_label &&
573 nexthop->nh_label->num_labels) {
574
575 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
576 num_labels = nexthop->nh_label->num_labels;
577 }
578
579 if (BGP_DEBUG(nht, NHT)) {
580 char buf[NEXTHOP_STRLEN];
581 zlog_debug(
582 " nhop via %s (%d labels)",
583 nexthop2str(nexthop, buf, sizeof(buf)),
584 num_labels);
585 }
586
587 if (nhlist_tail) {
588 nhlist_tail->next = nexthop;
589 nhlist_tail = nexthop;
590 } else {
591 nhlist_tail = nexthop;
592 nhlist_head = nexthop;
593 }
594
595 /* No need to evaluate the nexthop if we have already
596 * determined
597 * that there has been a change.
598 */
599 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
600 continue;
601
602 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
603 if (nexthop_same(oldnh, nexthop))
604 break;
605
606 if (!oldnh)
607 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
608 }
609 bnc_nexthop_free(bnc);
610 bnc->nexthop = nhlist_head;
611
612 /*
613 * Gateway IP nexthop is L3 reachable. Mark it as
614 * BGP_NEXTHOP_VALID only if it is recursively resolved with a
615 * remote EVPN RT-2.
616 * Else, mark it as BGP_NEXTHOP_EVPN_INCOMPLETE.
617 * When its mapping with EVPN RT-2 is established, unset
618 * BGP_NEXTHOP_EVPN_INCOMPLETE and set BGP_NEXTHOP_VALID.
619 */
620 if (bnc->is_evpn_gwip_nexthop) {
621 evpn_resolved = bgp_evpn_is_gateway_ip_resolved(bnc);
622
623 if (BGP_DEBUG(nht, NHT))
624 zlog_debug(
625 "EVPN gateway IP %pFX recursive MAC/IP lookup %s",
626 &bnc->prefix,
627 (evpn_resolved ? "successful"
628 : "failed"));
629
630 if (evpn_resolved) {
631 bnc->flags |= BGP_NEXTHOP_VALID;
632 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
633 bnc->change_flags |= BGP_NEXTHOP_MACIP_CHANGED;
634 } else {
635 bnc->flags |= BGP_NEXTHOP_EVPN_INCOMPLETE;
636 bnc->flags &= ~BGP_NEXTHOP_VALID;
637 }
638 }
639 } else {
640 bnc->flags &= ~BGP_NEXTHOP_EVPN_INCOMPLETE;
641 bnc->flags &= ~BGP_NEXTHOP_VALID;
642 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID;
643 bnc->nexthop_num = nhr->nexthop_num;
644
645 /* notify bgp fsm if nbr ip goes from valid->invalid */
646 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
647
648 bnc_nexthop_free(bnc);
649 bnc->nexthop = NULL;
650 }
651
652 evaluate_paths(bnc);
653 }
654
655 static void bgp_nht_ifp_table_handle(struct bgp *bgp,
656 struct bgp_nexthop_cache_head *table,
657 struct interface *ifp, bool up)
658 {
659 struct bgp_nexthop_cache *bnc;
660
661 frr_each (bgp_nexthop_cache, table, bnc) {
662 if (bnc->ifindex != ifp->ifindex)
663 continue;
664
665 bnc->last_update = monotime(NULL);
666 bnc->change_flags = 0;
667
668 /*
669 * For interface based routes ( ala the v6 LL routes
670 * that this was written for ) the metric received
671 * for the connected route is 0 not 1.
672 */
673 bnc->metric = 0;
674 if (up) {
675 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
676 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
677 bnc->nexthop_num = 1;
678 } else {
679 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
680 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
681 SET_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED);
682 bnc->nexthop_num = 0;
683 }
684
685 evaluate_paths(bnc);
686 }
687 }
688 static void bgp_nht_ifp_handle(struct interface *ifp, bool up)
689 {
690 struct bgp *bgp;
691
692 bgp = ifp->vrf->info;
693 if (!bgp)
694 return;
695
696 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP], ifp,
697 up);
698 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP], ifp,
699 up);
700 bgp_nht_ifp_table_handle(bgp, &bgp->nexthop_cache_table[AFI_IP6], ifp,
701 up);
702 bgp_nht_ifp_table_handle(bgp, &bgp->import_check_table[AFI_IP6], ifp,
703 up);
704 }
705
706 void bgp_nht_ifp_up(struct interface *ifp)
707 {
708 bgp_nht_ifp_handle(ifp, true);
709 }
710
711 void bgp_nht_ifp_down(struct interface *ifp)
712 {
713 bgp_nht_ifp_handle(ifp, false);
714 }
715
716 static void bgp_nht_ifp_initial(struct thread *thread)
717 {
718 ifindex_t ifindex = THREAD_VAL(thread);
719 struct bgp *bgp = THREAD_ARG(thread);
720 struct interface *ifp = if_lookup_by_index(ifindex, bgp->vrf_id);
721
722 if (!ifp)
723 return;
724
725 if (BGP_DEBUG(nht, NHT))
726 zlog_debug(
727 "Handle NHT initial update for Intf %s(%d) status %s",
728 ifp->name, ifp->ifindex, if_is_up(ifp) ? "up" : "down");
729
730 if (if_is_up(ifp))
731 bgp_nht_ifp_up(ifp);
732 else
733 bgp_nht_ifp_down(ifp);
734 }
735
736 /*
737 * So the bnc code has the ability to handle interface up/down
738 * events to properly handle v6 LL peering.
739 * What is happening here:
740 * The event system for peering expects the nht code to
741 * report on the tracking events after we move to active
742 * So let's give the system a chance to report on that event
743 * in a manner that is expected.
744 */
745 void bgp_nht_interface_events(struct peer *peer)
746 {
747 struct bgp *bgp = peer->bgp;
748 struct bgp_nexthop_cache_head *table;
749 struct bgp_nexthop_cache *bnc;
750 struct prefix p;
751 ifindex_t ifindex = 0;
752
753 if (!IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
754 return;
755
756 if (!sockunion2hostprefix(&peer->su, &p))
757 return;
758 /*
759 * Gather the ifindex for if up/down events to be
760 * tagged into this fun
761 */
762 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
763 ifindex = peer->su.sin6.sin6_scope_id;
764
765 table = &bgp->nexthop_cache_table[AFI_IP6];
766 bnc = bnc_find(table, &p, 0, ifindex);
767 if (!bnc)
768 return;
769
770 if (bnc->ifindex)
771 thread_add_event(bm->master, bgp_nht_ifp_initial, bnc->bgp,
772 bnc->ifindex, NULL);
773 }
774
775 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
776 {
777 struct bgp_nexthop_cache_head *tree = NULL;
778 struct bgp_nexthop_cache *bnc_nhc, *bnc_import;
779 struct bgp *bgp;
780 struct prefix match;
781 struct zapi_route nhr;
782 afi_t afi;
783
784 bgp = bgp_lookup_by_vrf_id(vrf_id);
785 if (!bgp) {
786 flog_err(
787 EC_BGP_NH_UPD,
788 "parse nexthop update: instance not found for vrf_id %u",
789 vrf_id);
790 return;
791 }
792
793 if (!zapi_nexthop_update_decode(zclient->ibuf, &match, &nhr)) {
794 zlog_err("%s[%s]: Failure to decode nexthop update", __func__,
795 bgp->name_pretty);
796 return;
797 }
798
799 afi = family2afi(match.family);
800 tree = &bgp->nexthop_cache_table[afi];
801
802 bnc_nhc = bnc_find(tree, &match, nhr.srte_color, 0);
803 if (!bnc_nhc) {
804 if (BGP_DEBUG(nht, NHT))
805 zlog_debug(
806 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for nexthop cache",
807 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
808 } else
809 bgp_process_nexthop_update(bnc_nhc, &nhr, false);
810
811 tree = &bgp->import_check_table[afi];
812
813 bnc_import = bnc_find(tree, &match, nhr.srte_color, 0);
814 if (!bnc_import) {
815 if (BGP_DEBUG(nht, NHT))
816 zlog_debug(
817 "parse nexthop update(%pFX(%u)(%s)): bnc info not found for import check",
818 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
819 } else
820 bgp_process_nexthop_update(bnc_import, &nhr, true);
821
822 /*
823 * HACK: if any BGP route is dependant on an SR-policy that doesn't
824 * exist, zebra will never send NH updates relative to that policy. In
825 * that case, whenever we receive an update about a colorless NH, update
826 * the corresponding colorful NHs that share the same endpoint but that
827 * are inactive. This ugly hack should work around the problem at the
828 * cost of a performance pernalty. Long term, what should be done is to
829 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
830 * which should provide a better infrastructure to solve this issue in
831 * a more efficient and elegant way.
832 */
833 if (nhr.srte_color == 0 && bnc_nhc) {
834 struct bgp_nexthop_cache *bnc_iter;
835
836 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
837 bnc_iter) {
838 if (!prefix_same(&bnc_nhc->prefix, &bnc_iter->prefix) ||
839 bnc_iter->srte_color == 0 ||
840 CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
841 continue;
842
843 bgp_process_nexthop_update(bnc_iter, &nhr, false);
844 }
845 }
846 }
847
848 /*
849 * Cleanup nexthop registration and status information for BGP nexthops
850 * pertaining to this VRF. This is invoked upon VRF deletion.
851 */
852 void bgp_cleanup_nexthops(struct bgp *bgp)
853 {
854 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
855 struct bgp_nexthop_cache *bnc;
856
857 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
858 bnc) {
859 /* Clear relevant flags. */
860 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
861 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
862 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
863 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_EVPN_INCOMPLETE);
864 }
865 }
866 }
867
868 /**
869 * make_prefix - make a prefix structure from the path (essentially
870 * path's node.
871 */
872 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
873 {
874
875 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
876 && (pi->sub_type == BGP_ROUTE_STATIC))
877 ? 1
878 : 0;
879 struct bgp_dest *net = pi->net;
880 const struct prefix *p_orig = bgp_dest_get_prefix(net);
881 struct in_addr ipv4;
882
883 if (p_orig->family == AF_FLOWSPEC) {
884 if (!pi->peer)
885 return -1;
886 return bgp_flowspec_get_first_nh(pi->peer->bgp,
887 pi, p, afi);
888 }
889 memset(p, 0, sizeof(struct prefix));
890 switch (afi) {
891 case AFI_IP:
892 p->family = AF_INET;
893 if (is_bgp_static) {
894 p->u.prefix4 = p_orig->u.prefix4;
895 p->prefixlen = p_orig->prefixlen;
896 } else {
897 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
898 ipv4_mapped_ipv6_to_ipv4(
899 &pi->attr->mp_nexthop_global, &ipv4);
900 p->u.prefix4 = ipv4;
901 p->prefixlen = IPV4_MAX_BITLEN;
902 } else {
903 if (p_orig->family == AF_EVPN)
904 p->u.prefix4 =
905 pi->attr->mp_nexthop_global_in;
906 else
907 p->u.prefix4 = pi->attr->nexthop;
908 p->prefixlen = IPV4_MAX_BITLEN;
909 }
910 }
911 break;
912 case AFI_IP6:
913 p->family = AF_INET6;
914
915 if (is_bgp_static) {
916 p->u.prefix6 = p_orig->u.prefix6;
917 p->prefixlen = p_orig->prefixlen;
918 } else {
919 /* If we receive MP_REACH nexthop with ::(LL)
920 * or LL(LL), use LL address as nexthop cache.
921 */
922 if (pi->attr->mp_nexthop_len
923 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
924 && (IN6_IS_ADDR_UNSPECIFIED(
925 &pi->attr->mp_nexthop_global)
926 || IN6_IS_ADDR_LINKLOCAL(
927 &pi->attr->mp_nexthop_global)))
928 p->u.prefix6 = pi->attr->mp_nexthop_local;
929 /* If we receive MR_REACH with (GA)::(LL)
930 * then check for route-map to choose GA or LL
931 */
932 else if (pi->attr->mp_nexthop_len
933 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL) {
934 if (pi->attr->mp_nexthop_prefer_global)
935 p->u.prefix6 =
936 pi->attr->mp_nexthop_global;
937 else
938 p->u.prefix6 =
939 pi->attr->mp_nexthop_local;
940 } else
941 p->u.prefix6 = pi->attr->mp_nexthop_global;
942 p->prefixlen = IPV6_MAX_BITLEN;
943 }
944 break;
945 default:
946 if (BGP_DEBUG(nht, NHT)) {
947 zlog_debug(
948 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
949 __func__, afi, AFI_IP, AFI_IP6);
950 }
951 break;
952 }
953 return 0;
954 }
955
956 /**
957 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
958 * command to Zebra.
959 * ARGUMENTS:
960 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
961 * int command -- command to send to zebra
962 * RETURNS:
963 * void.
964 */
965 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
966 {
967 bool exact_match = false;
968 bool resolve_via_default = false;
969 int ret;
970
971 if (!zclient)
972 return;
973
974 /* Don't try to register if Zebra doesn't know of this instance. */
975 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
976 if (BGP_DEBUG(zebra, ZEBRA))
977 zlog_debug(
978 "%s: No zebra instance to talk to, not installing NHT entry",
979 __func__);
980 return;
981 }
982
983 if (!bgp_zebra_num_connects()) {
984 if (BGP_DEBUG(zebra, ZEBRA))
985 zlog_debug(
986 "%s: We have not connected yet, cannot send nexthops",
987 __func__);
988 }
989 if (command == ZEBRA_NEXTHOP_REGISTER) {
990 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED))
991 exact_match = true;
992 if (CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH))
993 resolve_via_default = true;
994 }
995
996 if (BGP_DEBUG(zebra, ZEBRA))
997 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
998 zserv_command_string(command), &bnc->prefix,
999 bnc->bgp->name_pretty);
1000
1001 ret = zclient_send_rnh(zclient, command, &bnc->prefix, SAFI_UNICAST,
1002 exact_match, resolve_via_default,
1003 bnc->bgp->vrf_id);
1004 if (ret == ZCLIENT_SEND_FAILURE) {
1005 flog_warn(EC_BGP_ZEBRA_SEND,
1006 "sendmsg_nexthop: zclient_send_message() failed");
1007 return;
1008 }
1009
1010 if (command == ZEBRA_NEXTHOP_REGISTER)
1011 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1012 else if (command == ZEBRA_NEXTHOP_UNREGISTER)
1013 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1014 return;
1015 }
1016
1017 /**
1018 * register_zebra_rnh - register a NH/route with Zebra for notification
1019 * when the route or the route to the nexthop changes.
1020 * ARGUMENTS:
1021 * struct bgp_nexthop_cache *bnc
1022 * RETURNS:
1023 * void.
1024 */
1025 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc)
1026 {
1027 /* Check if we have already registered */
1028 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
1029 return;
1030
1031 if (bnc->ifindex) {
1032 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1033 return;
1034 }
1035
1036 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
1037 }
1038
1039 /**
1040 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
1041 * ARGUMENTS:
1042 * struct bgp_nexthop_cache *bnc
1043 * RETURNS:
1044 * void.
1045 */
1046 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc)
1047 {
1048 /* Check if we have already registered */
1049 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
1050 return;
1051
1052 if (bnc->ifindex) {
1053 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
1054 return;
1055 }
1056
1057 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
1058 }
1059
1060 /**
1061 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
1062 * ARGUMENTS:
1063 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
1064 * RETURNS:
1065 * void.
1066 */
1067 void evaluate_paths(struct bgp_nexthop_cache *bnc)
1068 {
1069 struct bgp_dest *dest;
1070 struct bgp_path_info *path;
1071 int afi;
1072 struct peer *peer = (struct peer *)bnc->nht_info;
1073 struct bgp_table *table;
1074 safi_t safi;
1075 struct bgp *bgp_path;
1076 const struct prefix *p;
1077
1078 if (BGP_DEBUG(nht, NHT)) {
1079 char bnc_buf[BNC_FLAG_DUMP_SIZE];
1080 char chg_buf[BNC_FLAG_DUMP_SIZE];
1081
1082 zlog_debug(
1083 "NH update for %pFX(%d)(%u)(%s) - flags %s chgflags %s- evaluate paths",
1084 &bnc->prefix, bnc->ifindex, bnc->srte_color,
1085 bnc->bgp->name_pretty,
1086 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
1087 sizeof(bnc_buf)),
1088 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
1089 sizeof(bnc_buf)));
1090 }
1091
1092 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
1093 if (!(path->type == ZEBRA_ROUTE_BGP
1094 && ((path->sub_type == BGP_ROUTE_NORMAL)
1095 || (path->sub_type == BGP_ROUTE_STATIC)
1096 || (path->sub_type == BGP_ROUTE_IMPORTED))))
1097 continue;
1098
1099 dest = path->net;
1100 assert(dest && bgp_dest_table(dest));
1101 p = bgp_dest_get_prefix(dest);
1102 afi = family2afi(p->family);
1103 table = bgp_dest_table(dest);
1104 safi = table->safi;
1105
1106 /*
1107 * handle routes from other VRFs (they can have a
1108 * nexthop in THIS VRF). bgp_path is the bgp instance
1109 * that owns the route referencing this nexthop.
1110 */
1111 bgp_path = table->bgp;
1112
1113 /*
1114 * Path becomes valid/invalid depending on whether the nexthop
1115 * reachable/unreachable.
1116 *
1117 * In case of unicast routes that were imported from vpn
1118 * and that have labels, they are valid only if there are
1119 * nexthops with labels
1120 *
1121 * If the nexthop is EVPN gateway-IP,
1122 * do not check for a valid label.
1123 */
1124
1125 bool bnc_is_valid_nexthop = false;
1126 bool path_valid = false;
1127
1128 if (safi == SAFI_UNICAST && path->sub_type == BGP_ROUTE_IMPORTED
1129 && path->extra && path->extra->num_labels
1130 && (path->attr->evpn_overlay.type
1131 != OVERLAY_INDEX_GATEWAY_IP)) {
1132 bnc_is_valid_nexthop =
1133 bgp_isvalid_nexthop_for_mpls(bnc, path) ? true
1134 : false;
1135 } else {
1136 if (bgp_update_martian_nexthop(
1137 bnc->bgp, afi, safi, path->type,
1138 path->sub_type, path->attr, dest)) {
1139 if (BGP_DEBUG(nht, NHT))
1140 zlog_debug(
1141 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
1142 __func__, dest, bgp_path->name);
1143 } else
1144 bnc_is_valid_nexthop =
1145 bgp_isvalid_nexthop(bnc) ? true : false;
1146 }
1147
1148 if (BGP_DEBUG(nht, NHT)) {
1149 if (dest->pdest)
1150 zlog_debug(
1151 "... eval path %d/%d %pBD RD %pRD %s flags 0x%x",
1152 afi, safi, dest,
1153 (struct prefix_rd *)bgp_dest_get_prefix(
1154 dest->pdest),
1155 bgp_path->name_pretty, path->flags);
1156 else
1157 zlog_debug(
1158 "... eval path %d/%d %pBD %s flags 0x%x",
1159 afi, safi, dest, bgp_path->name_pretty,
1160 path->flags);
1161 }
1162
1163 /* Skip paths marked for removal or as history. */
1164 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
1165 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
1166 continue;
1167
1168 /* Copy the metric to the path. Will be used for bestpath
1169 * computation */
1170 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
1171 (bgp_path_info_extra_get(path))->igpmetric =
1172 bnc->metric;
1173 else if (path->extra)
1174 path->extra->igpmetric = 0;
1175
1176 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
1177 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
1178 || path->attr->srte_color != 0)
1179 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
1180
1181 path_valid = CHECK_FLAG(path->flags, BGP_PATH_VALID);
1182 if (path_valid != bnc_is_valid_nexthop) {
1183 if (path_valid) {
1184 /* No longer valid, clear flag; also for EVPN
1185 * routes, unimport from VRFs if needed.
1186 */
1187 bgp_aggregate_decrement(bgp_path, p, path, afi,
1188 safi);
1189 bgp_path_info_unset_flag(dest, path,
1190 BGP_PATH_VALID);
1191 if (safi == SAFI_EVPN &&
1192 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1193 bgp_evpn_unimport_route(bgp_path,
1194 afi, safi, bgp_dest_get_prefix(dest), path);
1195 } else {
1196 /* Path becomes valid, set flag; also for EVPN
1197 * routes, import from VRFs if needed.
1198 */
1199 bgp_path_info_set_flag(dest, path,
1200 BGP_PATH_VALID);
1201 bgp_aggregate_increment(bgp_path, p, path, afi,
1202 safi);
1203 if (safi == SAFI_EVPN &&
1204 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
1205 bgp_evpn_import_route(bgp_path,
1206 afi, safi, bgp_dest_get_prefix(dest), path);
1207 }
1208 }
1209
1210 bgp_process(bgp_path, dest, afi, safi);
1211 }
1212
1213 if (peer) {
1214 int valid_nexthops = bgp_isvalid_nexthop(bnc);
1215
1216 if (valid_nexthops) {
1217 /*
1218 * Peering cannot occur across a blackhole nexthop
1219 */
1220 if (bnc->nexthop_num == 1 && bnc->nexthop
1221 && bnc->nexthop->type == NEXTHOP_TYPE_BLACKHOLE) {
1222 peer->last_reset = PEER_DOWN_WAITING_NHT;
1223 valid_nexthops = 0;
1224 } else
1225 peer->last_reset = PEER_DOWN_WAITING_OPEN;
1226 } else
1227 peer->last_reset = PEER_DOWN_WAITING_NHT;
1228
1229 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
1230 if (BGP_DEBUG(nht, NHT))
1231 zlog_debug(
1232 "%s: Updating peer (%s(%s)) status with NHT nexthops %d",
1233 __func__, peer->host,
1234 peer->bgp->name_pretty,
1235 !!valid_nexthops);
1236 bgp_fsm_nht_update(peer, !!valid_nexthops);
1237 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
1238 }
1239 }
1240
1241 RESET_FLAG(bnc->change_flags);
1242 }
1243
1244 /**
1245 * path_nh_map - make or break path-to-nexthop association.
1246 * ARGUMENTS:
1247 * path - pointer to the path structure
1248 * bnc - pointer to the nexthop structure
1249 * make - if set, make the association. if unset, just break the existing
1250 * association.
1251 */
1252 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
1253 bool make)
1254 {
1255 if (path->nexthop) {
1256 LIST_REMOVE(path, nh_thread);
1257 path->nexthop->path_count--;
1258 path->nexthop = NULL;
1259 }
1260 if (make) {
1261 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
1262 path->nexthop = bnc;
1263 path->nexthop->path_count++;
1264 }
1265 }
1266
1267 /*
1268 * This function is called to register nexthops to zebra
1269 * as that we may have tried to install the nexthops
1270 * before we actually have a zebra connection
1271 */
1272 void bgp_nht_register_nexthops(struct bgp *bgp)
1273 {
1274 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
1275 struct bgp_nexthop_cache *bnc;
1276
1277 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
1278 bnc) {
1279 register_zebra_rnh(bnc);
1280 }
1281 }
1282 }
1283
1284 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1285 {
1286 struct bgp *bgp;
1287 struct bgp_nexthop_cache *bnc;
1288 struct nexthop *nhop;
1289 struct interface *ifp;
1290 struct prefix p;
1291 ifindex_t ifindex = 0;
1292
1293 if (peer->ifp)
1294 return;
1295
1296 bgp = peer->bgp;
1297 if (!sockunion2hostprefix(&peer->su, &p)) {
1298 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1299 __func__, peer->host);
1300 return;
1301 }
1302
1303 if (p.family != AF_INET6)
1304 return;
1305 /*
1306 * Gather the ifindex for if up/down events to be
1307 * tagged into this fun
1308 */
1309 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1310 ifindex = peer->su.sin6.sin6_scope_id;
1311
1312 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1313 if (!bnc)
1314 return;
1315
1316 if (peer != bnc->nht_info)
1317 return;
1318
1319 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1320 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1321
1322 if (!ifp)
1323 continue;
1324
1325 zclient_send_interface_radv_req(zclient,
1326 nhop->vrf_id,
1327 ifp, true,
1328 BGP_UNNUM_DEFAULT_RA_INTERVAL);
1329 }
1330 }
1331
1332 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
1333 {
1334 struct bgp *bgp;
1335 struct bgp_nexthop_cache *bnc;
1336 struct nexthop *nhop;
1337 struct interface *ifp;
1338 struct prefix p;
1339 ifindex_t ifindex = 0;
1340
1341 if (peer->ifp)
1342 return;
1343
1344 bgp = peer->bgp;
1345
1346 if (!sockunion2hostprefix(&peer->su, &p)) {
1347 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
1348 __func__, peer->host);
1349 return;
1350 }
1351
1352 if (p.family != AF_INET6)
1353 return;
1354 /*
1355 * Gather the ifindex for if up/down events to be
1356 * tagged into this fun
1357 */
1358 if (peer->conf_if && IN6_IS_ADDR_LINKLOCAL(&peer->su.sin6.sin6_addr))
1359 ifindex = peer->su.sin6.sin6_scope_id;
1360
1361 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0, ifindex);
1362 if (!bnc)
1363 return;
1364
1365 if (peer != bnc->nht_info)
1366 return;
1367
1368 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
1369 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
1370
1371 if (!ifp)
1372 continue;
1373
1374 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
1375 0);
1376 }
1377 }
1378
1379 /****************************************************************************
1380 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
1381 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
1382 * left to the application using it.
1383 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
1384 * failover of remote ES links.
1385 ***************************************************************************/
1386 static bitfield_t bgp_nh_id_bitmap;
1387 static uint32_t bgp_l3nhg_start;
1388
1389 /* XXX - currently we do nothing on the callbacks */
1390 static void bgp_l3nhg_add_cb(const char *name)
1391 {
1392 }
1393 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1394 const struct nexthop *nhop)
1395 {
1396 }
1397 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1398 const struct nexthop *nhop)
1399 {
1400 }
1401 static void bgp_l3nhg_del_cb(const char *name)
1402 {
1403 }
1404
1405 static void bgp_l3nhg_zebra_init(void)
1406 {
1407 static bool bgp_l3nhg_zebra_inited;
1408 if (bgp_l3nhg_zebra_inited)
1409 return;
1410
1411 bgp_l3nhg_zebra_inited = true;
1412 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1413 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_add_nexthop_cb,
1414 bgp_l3nhg_del_nexthop_cb, bgp_l3nhg_del_cb);
1415 }
1416
1417
1418 void bgp_l3nhg_init(void)
1419 {
1420 uint32_t id_max;
1421
1422 id_max = MIN(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1423 bf_init(bgp_nh_id_bitmap, id_max);
1424 bf_assign_zero_index(bgp_nh_id_bitmap);
1425
1426 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1427 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1428 bgp_l3nhg_start + id_max);
1429 }
1430
1431 void bgp_l3nhg_finish(void)
1432 {
1433 bf_free(bgp_nh_id_bitmap);
1434 }
1435
1436 uint32_t bgp_l3nhg_id_alloc(void)
1437 {
1438 uint32_t nhg_id = 0;
1439
1440 bgp_l3nhg_zebra_init();
1441 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1442 if (nhg_id)
1443 nhg_id += bgp_l3nhg_start;
1444
1445 return nhg_id;
1446 }
1447
1448 void bgp_l3nhg_id_free(uint32_t nhg_id)
1449 {
1450 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1451 return;
1452
1453 nhg_id -= bgp_l3nhg_start;
1454
1455 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1456 }