]> git.proxmox.com Git - mirror_frr.git/blob - bgpd/bgp_nht.c
zebra: add support for DF delay timer
[mirror_frr.git] / bgpd / bgp_nht.c
1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21 #include <zebra.h>
22
23 #include "command.h"
24 #include "thread.h"
25 #include "prefix.h"
26 #include "zclient.h"
27 #include "stream.h"
28 #include "network.h"
29 #include "log.h"
30 #include "memory.h"
31 #include "nexthop.h"
32 #include "vrf.h"
33 #include "filter.h"
34 #include "nexthop_group.h"
35
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_errors.h"
43 #include "bgpd/bgp_nht.h"
44 #include "bgpd/bgp_fsm.h"
45 #include "bgpd/bgp_zebra.h"
46 #include "bgpd/bgp_flowspec_util.h"
47 #include "bgpd/bgp_evpn.h"
48 #include "bgpd/bgp_rd.h"
49
50 extern struct zclient *zclient;
51
52 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc,
53 int is_bgp_static_route);
54 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc,
55 int is_bgp_static_route);
56 static void evaluate_paths(struct bgp_nexthop_cache *bnc);
57 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
58
59 static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
60 {
61 return (bgp_zebra_num_connects() == 0
62 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)));
63 }
64
65 static int bgp_isvalid_labeled_nexthop(struct bgp_nexthop_cache *bnc)
66 {
67 return (bgp_zebra_num_connects() == 0
68 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID)));
69 }
70
71 static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
72 {
73 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
74 if (BGP_DEBUG(nht, NHT)) {
75 char buf[PREFIX2STR_BUFFER];
76 zlog_debug("%s: freeing bnc %s(%u)(%s)", __func__,
77 bnc_str(bnc, buf, PREFIX2STR_BUFFER),
78 bnc->srte_color, bnc->bgp->name_pretty);
79 }
80 /* only unregister if this is the last nh for this prefix*/
81 if (!bnc_existing_for_prefix(bnc))
82 unregister_zebra_rnh(
83 bnc, CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE));
84 bnc_free(bnc);
85 }
86 }
87
88 void bgp_unlink_nexthop(struct bgp_path_info *path)
89 {
90 struct bgp_nexthop_cache *bnc = path->nexthop;
91
92 if (!bnc)
93 return;
94
95 path_nh_map(path, NULL, false);
96
97 bgp_unlink_nexthop_check(bnc);
98 }
99
100 void bgp_unlink_nexthop_by_peer(struct peer *peer)
101 {
102 struct prefix p;
103 struct bgp_nexthop_cache *bnc;
104 afi_t afi = family2afi(peer->su.sa.sa_family);
105
106 if (!sockunion2hostprefix(&peer->su, &p))
107 return;
108
109 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0);
110 if (!bnc)
111 return;
112
113 /* cleanup the peer reference */
114 bnc->nht_info = NULL;
115
116 bgp_unlink_nexthop_check(bnc);
117 }
118
119 /*
120 * A route and its nexthop might belong to different VRFs. Therefore,
121 * we need both the bgp_route and bgp_nexthop pointers.
122 */
123 int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
124 afi_t afi, struct bgp_path_info *pi,
125 struct peer *peer, int connected)
126 {
127 struct bgp_nexthop_cache_head *tree = NULL;
128 struct bgp_nexthop_cache *bnc;
129 struct prefix p;
130 uint32_t srte_color = 0;
131 int is_bgp_static_route = 0;
132
133 if (pi) {
134 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
135 && (pi->sub_type == BGP_ROUTE_STATIC))
136 ? 1
137 : 0;
138
139 /* Since Extended Next-hop Encoding (RFC5549) support, we want
140 to derive
141 address-family from the next-hop. */
142 if (!is_bgp_static_route)
143 afi = BGP_ATTR_NEXTHOP_AFI_IP6(pi->attr) ? AFI_IP6
144 : AFI_IP;
145
146 /* Validation for the ipv4 mapped ipv6 nexthop. */
147 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
148 afi = AFI_IP;
149 }
150
151 /* This will return true if the global IPv6 NH is a link local
152 * addr */
153 if (make_prefix(afi, pi, &p) < 0)
154 return 1;
155
156 srte_color = pi->attr->srte_color;
157 } else if (peer) {
158 if (!sockunion2hostprefix(&peer->su, &p)) {
159 if (BGP_DEBUG(nht, NHT)) {
160 zlog_debug(
161 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
162 __func__, afi, AFI_IP, AFI_IP6);
163 }
164 return 0;
165 }
166 } else
167 return 0;
168
169 if (is_bgp_static_route)
170 tree = &bgp_nexthop->import_check_table[afi];
171 else
172 tree = &bgp_nexthop->nexthop_cache_table[afi];
173
174 bnc = bnc_find(tree, &p, srte_color);
175 if (!bnc) {
176 bnc = bnc_new(tree, &p, srte_color);
177 bnc->bgp = bgp_nexthop;
178 if (BGP_DEBUG(nht, NHT)) {
179 char buf[PREFIX2STR_BUFFER];
180
181 zlog_debug("Allocated bnc %s(%u)(%s) peer %p",
182 bnc_str(bnc, buf, PREFIX2STR_BUFFER),
183 bnc->srte_color, bnc->bgp->name_pretty,
184 peer);
185 }
186 }
187
188 if (is_bgp_static_route) {
189 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
190
191 /* If we're toggling the type, re-register */
192 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
193 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
194 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
195 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
196 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
197 } else if ((!CHECK_FLAG(bgp_route->flags,
198 BGP_FLAG_IMPORT_CHECK))
199 && CHECK_FLAG(bnc->flags,
200 BGP_STATIC_ROUTE_EXACT_MATCH)) {
201 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
202 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
203 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
204 }
205 }
206 /* When nexthop is already known, but now requires 'connected'
207 * resolution,
208 * re-register it. The reverse scenario where the nexthop currently
209 * requires
210 * 'connected' resolution does not need a re-register (i.e., we treat
211 * 'connected-required' as an override) except in the scenario where
212 * this
213 * is actually a case of tracking a peer for connectivity (e.g., after
214 * disable connected-check).
215 * NOTE: We don't track the number of paths separately for 'connected-
216 * required' vs 'connected-not-required' as this change is not a common
217 * scenario.
218 */
219 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
220 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
221 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
222 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
223 } else if (peer && !connected
224 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
225 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
226 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
227 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
228 }
229 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
230 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
231 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
232 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
233 && !is_default_host_route(&bnc->prefix))
234 register_zebra_rnh(bnc, is_bgp_static_route);
235
236 if (pi && pi->nexthop != bnc) {
237 /* Unlink from existing nexthop cache, if any. This will also
238 * free
239 * the nexthop cache entry, if appropriate.
240 */
241 bgp_unlink_nexthop(pi);
242
243 /* updates NHT pi list reference */
244 path_nh_map(pi, bnc, true);
245
246 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
247 (bgp_path_info_extra_get(pi))->igpmetric = bnc->metric;
248 else if (pi->extra)
249 pi->extra->igpmetric = 0;
250 } else if (peer)
251 bnc->nht_info = (void *)peer; /* NHT peer reference */
252
253 /*
254 * We are cheating here. Views have no associated underlying
255 * ability to detect nexthops. So when we have a view
256 * just tell everyone the nexthop is valid
257 */
258 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
259 return 1;
260 else
261 return (bgp_isvalid_nexthop(bnc));
262 }
263
264 void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
265 {
266 struct bgp_nexthop_cache *bnc;
267 struct prefix p;
268
269 if (!peer)
270 return;
271
272 if (!sockunion2hostprefix(&peer->su, &p))
273 return;
274
275 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
276 &p, 0);
277 if (!bnc) {
278 if (BGP_DEBUG(nht, NHT))
279 zlog_debug(
280 "Cannot find connected NHT node for peer %s(%s)",
281 peer->host, peer->bgp->name_pretty);
282 return;
283 }
284
285 if (bnc->nht_info != peer) {
286 if (BGP_DEBUG(nht, NHT))
287 zlog_debug(
288 "Connected NHT %p node for peer %s(%s) points to %p",
289 bnc, peer->host, bnc->bgp->name_pretty,
290 bnc->nht_info);
291 return;
292 }
293
294 bnc->nht_info = NULL;
295
296 if (LIST_EMPTY(&(bnc->paths))) {
297 if (BGP_DEBUG(nht, NHT))
298 zlog_debug(
299 "Freeing connected NHT node %p for peer %s(%s)",
300 bnc, peer->host, bnc->bgp->name_pretty);
301 unregister_zebra_rnh(bnc, 0);
302 bnc_free(bnc);
303 }
304 }
305
306 static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
307 struct zapi_route *nhr)
308 {
309 struct nexthop *nexthop;
310 struct nexthop *oldnh;
311 struct nexthop *nhlist_head = NULL;
312 struct nexthop *nhlist_tail = NULL;
313 int i;
314
315 bnc->last_update = bgp_clock();
316 bnc->change_flags = 0;
317
318 /* debug print the input */
319 if (BGP_DEBUG(nht, NHT))
320 zlog_debug(
321 "%s(%u): Rcvd NH update %pFX(%u) - metric %d/%d #nhops %d/%d flags 0x%x",
322 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
323 bnc->srte_color, nhr->metric, bnc->metric,
324 nhr->nexthop_num, bnc->nexthop_num, bnc->flags);
325
326 if (nhr->metric != bnc->metric)
327 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
328
329 if (nhr->nexthop_num != bnc->nexthop_num)
330 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
331
332 if (nhr->nexthop_num) {
333 struct peer *peer = bnc->nht_info;
334
335 /* notify bgp fsm if nbr ip goes from invalid->valid */
336 if (!bnc->nexthop_num)
337 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
338
339 bnc->flags |= BGP_NEXTHOP_VALID;
340 bnc->metric = nhr->metric;
341 bnc->nexthop_num = nhr->nexthop_num;
342
343 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
344
345 for (i = 0; i < nhr->nexthop_num; i++) {
346 int num_labels = 0;
347
348 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
349
350 /*
351 * Turn on RA for the v6 nexthops
352 * we receive from bgp. This is to allow us
353 * to work with v4 routing over v6 nexthops
354 */
355 if (peer && !peer->ifp
356 && CHECK_FLAG(peer->flags,
357 PEER_FLAG_CAPABILITY_ENHE)
358 && nhr->prefix.family == AF_INET6
359 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
360 struct interface *ifp;
361
362 ifp = if_lookup_by_index(nexthop->ifindex,
363 nexthop->vrf_id);
364 if (ifp)
365 zclient_send_interface_radv_req(
366 zclient, nexthop->vrf_id, ifp,
367 true,
368 BGP_UNNUM_DEFAULT_RA_INTERVAL);
369 }
370 /* There is at least one label-switched path */
371 if (nexthop->nh_label &&
372 nexthop->nh_label->num_labels) {
373
374 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
375 num_labels = nexthop->nh_label->num_labels;
376 }
377
378 if (BGP_DEBUG(nht, NHT)) {
379 char buf[NEXTHOP_STRLEN];
380 zlog_debug(
381 " nhop via %s (%d labels)",
382 nexthop2str(nexthop, buf, sizeof(buf)),
383 num_labels);
384 }
385
386 if (nhlist_tail) {
387 nhlist_tail->next = nexthop;
388 nhlist_tail = nexthop;
389 } else {
390 nhlist_tail = nexthop;
391 nhlist_head = nexthop;
392 }
393
394 /* No need to evaluate the nexthop if we have already
395 * determined
396 * that there has been a change.
397 */
398 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
399 continue;
400
401 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
402 if (nexthop_same(oldnh, nexthop))
403 break;
404
405 if (!oldnh)
406 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
407 }
408 bnc_nexthop_free(bnc);
409 bnc->nexthop = nhlist_head;
410 } else {
411 bnc->flags &= ~BGP_NEXTHOP_VALID;
412 bnc->nexthop_num = nhr->nexthop_num;
413
414 /* notify bgp fsm if nbr ip goes from valid->invalid */
415 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
416
417 bnc_nexthop_free(bnc);
418 bnc->nexthop = NULL;
419 }
420
421 evaluate_paths(bnc);
422 }
423
424 void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
425 {
426 struct bgp_nexthop_cache_head *tree = NULL;
427 struct bgp_nexthop_cache *bnc;
428 struct bgp *bgp;
429 struct zapi_route nhr;
430 afi_t afi;
431
432 bgp = bgp_lookup_by_vrf_id(vrf_id);
433 if (!bgp) {
434 flog_err(
435 EC_BGP_NH_UPD,
436 "parse nexthop update: instance not found for vrf_id %u",
437 vrf_id);
438 return;
439 }
440
441 if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) {
442 zlog_err("%s[%s]: Failure to decode nexthop update",
443 __PRETTY_FUNCTION__, bgp->name_pretty);
444 return;
445 }
446
447 afi = family2afi(nhr.prefix.family);
448 if (command == ZEBRA_NEXTHOP_UPDATE)
449 tree = &bgp->nexthop_cache_table[afi];
450 else if (command == ZEBRA_IMPORT_CHECK_UPDATE)
451 tree = &bgp->import_check_table[afi];
452
453 bnc = bnc_find(tree, &nhr.prefix, nhr.srte_color);
454 if (!bnc) {
455 if (BGP_DEBUG(nht, NHT))
456 zlog_debug(
457 "parse nexthop update(%pFX(%u)(%s)): bnc info not found",
458 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
459 return;
460 }
461
462 bgp_process_nexthop_update(bnc, &nhr);
463
464 /*
465 * HACK: if any BGP route is dependant on an SR-policy that doesn't
466 * exist, zebra will never send NH updates relative to that policy. In
467 * that case, whenever we receive an update about a colorless NH, update
468 * the corresponding colorful NHs that share the same endpoint but that
469 * are inactive. This ugly hack should work around the problem at the
470 * cost of a performance pernalty. Long term, what should be done is to
471 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
472 * which should provide a better infrastructure to solve this issue in
473 * a more efficient and elegant way.
474 */
475 if (nhr.srte_color == 0) {
476 struct bgp_nexthop_cache *bnc_iter;
477
478 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
479 bnc_iter) {
480 if (!prefix_same(&bnc->prefix, &bnc_iter->prefix)
481 || bnc_iter->srte_color == 0
482 || CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
483 continue;
484
485 bgp_process_nexthop_update(bnc_iter, &nhr);
486 }
487 }
488 }
489
490 /*
491 * Cleanup nexthop registration and status information for BGP nexthops
492 * pertaining to this VRF. This is invoked upon VRF deletion.
493 */
494 void bgp_cleanup_nexthops(struct bgp *bgp)
495 {
496 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
497 struct bgp_nexthop_cache *bnc;
498
499 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
500 bnc) {
501 /* Clear relevant flags. */
502 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
503 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
504 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
505 }
506 }
507 }
508
509 /**
510 * make_prefix - make a prefix structure from the path (essentially
511 * path's node.
512 */
513 static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
514 {
515
516 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
517 && (pi->sub_type == BGP_ROUTE_STATIC))
518 ? 1
519 : 0;
520 struct bgp_dest *net = pi->net;
521 const struct prefix *p_orig = bgp_dest_get_prefix(net);
522 struct in_addr ipv4;
523
524 if (p_orig->family == AF_FLOWSPEC) {
525 if (!pi->peer)
526 return -1;
527 return bgp_flowspec_get_first_nh(pi->peer->bgp,
528 pi, p, afi);
529 }
530 memset(p, 0, sizeof(struct prefix));
531 switch (afi) {
532 case AFI_IP:
533 p->family = AF_INET;
534 if (is_bgp_static) {
535 p->u.prefix4 = p_orig->u.prefix4;
536 p->prefixlen = p_orig->prefixlen;
537 } else {
538 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
539 ipv4_mapped_ipv6_to_ipv4(
540 &pi->attr->mp_nexthop_global, &ipv4);
541 p->u.prefix4 = ipv4;
542 p->prefixlen = IPV4_MAX_BITLEN;
543 } else {
544 p->u.prefix4 = pi->attr->nexthop;
545 p->prefixlen = IPV4_MAX_BITLEN;
546 }
547 }
548 break;
549 case AFI_IP6:
550 p->family = AF_INET6;
551
552 if (is_bgp_static) {
553 p->u.prefix6 = p_orig->u.prefix6;
554 p->prefixlen = p_orig->prefixlen;
555 } else {
556 /* If we receive MP_REACH nexthop with ::(LL)
557 * or LL(LL), use LL address as nexthop cache.
558 */
559 if (pi->attr->mp_nexthop_len
560 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
561 && (IN6_IS_ADDR_UNSPECIFIED(
562 &pi->attr->mp_nexthop_global)
563 || IN6_IS_ADDR_LINKLOCAL(
564 &pi->attr->mp_nexthop_global)))
565 p->u.prefix6 = pi->attr->mp_nexthop_local;
566 else
567 p->u.prefix6 = pi->attr->mp_nexthop_global;
568 p->prefixlen = IPV6_MAX_BITLEN;
569 }
570 break;
571 default:
572 if (BGP_DEBUG(nht, NHT)) {
573 zlog_debug(
574 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
575 __func__, afi, AFI_IP, AFI_IP6);
576 }
577 break;
578 }
579 return 0;
580 }
581
582 /**
583 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
584 * command to Zebra.
585 * ARGUMENTS:
586 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
587 * int command -- command to send to zebra
588 * RETURNS:
589 * void.
590 */
591 static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
592 {
593 bool exact_match = false;
594 int ret;
595
596 if (!zclient)
597 return;
598
599 /* Don't try to register if Zebra doesn't know of this instance. */
600 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
601 if (BGP_DEBUG(zebra, ZEBRA))
602 zlog_debug(
603 "%s: No zebra instance to talk to, not installing NHT entry",
604 __func__);
605 return;
606 }
607
608 if (!bgp_zebra_num_connects()) {
609 if (BGP_DEBUG(zebra, ZEBRA))
610 zlog_debug(
611 "%s: We have not connected yet, cannot send nexthops",
612 __func__);
613 }
614 if ((command == ZEBRA_NEXTHOP_REGISTER
615 || command == ZEBRA_IMPORT_ROUTE_REGISTER)
616 && (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)
617 || CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)))
618 exact_match = true;
619
620 if (BGP_DEBUG(zebra, ZEBRA))
621 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
622 zserv_command_string(command), &bnc->prefix,
623 bnc->bgp->name_pretty);
624
625 ret = zclient_send_rnh(zclient, command, &bnc->prefix, exact_match,
626 bnc->bgp->vrf_id);
627 /* TBD: handle the failure */
628 if (ret == ZCLIENT_SEND_FAILURE)
629 flog_warn(EC_BGP_ZEBRA_SEND,
630 "sendmsg_nexthop: zclient_send_message() failed");
631
632 if ((command == ZEBRA_NEXTHOP_REGISTER)
633 || (command == ZEBRA_IMPORT_ROUTE_REGISTER))
634 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
635 else if ((command == ZEBRA_NEXTHOP_UNREGISTER)
636 || (command == ZEBRA_IMPORT_ROUTE_UNREGISTER))
637 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
638 return;
639 }
640
641 /**
642 * register_zebra_rnh - register a NH/route with Zebra for notification
643 * when the route or the route to the nexthop changes.
644 * ARGUMENTS:
645 * struct bgp_nexthop_cache *bnc
646 * RETURNS:
647 * void.
648 */
649 static void register_zebra_rnh(struct bgp_nexthop_cache *bnc,
650 int is_bgp_import_route)
651 {
652 /* Check if we have already registered */
653 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
654 return;
655 if (is_bgp_import_route)
656 sendmsg_zebra_rnh(bnc, ZEBRA_IMPORT_ROUTE_REGISTER);
657 else
658 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
659 }
660
661 /**
662 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
663 * ARGUMENTS:
664 * struct bgp_nexthop_cache *bnc
665 * RETURNS:
666 * void.
667 */
668 static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc,
669 int is_bgp_import_route)
670 {
671 /* Check if we have already registered */
672 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
673 return;
674
675 if (is_bgp_import_route)
676 sendmsg_zebra_rnh(bnc, ZEBRA_IMPORT_ROUTE_UNREGISTER);
677 else
678 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
679 }
680
681 /**
682 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
683 * ARGUMENTS:
684 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
685 * RETURNS:
686 * void.
687 */
688 static void evaluate_paths(struct bgp_nexthop_cache *bnc)
689 {
690 struct bgp_dest *dest;
691 struct bgp_path_info *path;
692 int afi;
693 struct peer *peer = (struct peer *)bnc->nht_info;
694 struct bgp_table *table;
695 safi_t safi;
696 struct bgp *bgp_path;
697 const struct prefix *p;
698
699 if (BGP_DEBUG(nht, NHT)) {
700 char buf[PREFIX2STR_BUFFER];
701 bnc_str(bnc, buf, PREFIX2STR_BUFFER);
702 zlog_debug(
703 "NH update for %s(%u)(%s) - flags 0x%x chgflags 0x%x - evaluate paths",
704 buf, bnc->srte_color, bnc->bgp->name_pretty, bnc->flags,
705 bnc->change_flags);
706 }
707
708 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
709 if (!(path->type == ZEBRA_ROUTE_BGP
710 && ((path->sub_type == BGP_ROUTE_NORMAL)
711 || (path->sub_type == BGP_ROUTE_STATIC)
712 || (path->sub_type == BGP_ROUTE_IMPORTED))))
713 continue;
714
715 dest = path->net;
716 assert(dest && bgp_dest_table(dest));
717 p = bgp_dest_get_prefix(dest);
718 afi = family2afi(p->family);
719 table = bgp_dest_table(dest);
720 safi = table->safi;
721
722 /*
723 * handle routes from other VRFs (they can have a
724 * nexthop in THIS VRF). bgp_path is the bgp instance
725 * that owns the route referencing this nexthop.
726 */
727 bgp_path = table->bgp;
728
729 /*
730 * Path becomes valid/invalid depending on whether the nexthop
731 * reachable/unreachable.
732 *
733 * In case of unicast routes that were imported from vpn
734 * and that have labels, they are valid only if there are
735 * nexthops with labels
736 */
737
738 bool bnc_is_valid_nexthop = false;
739 bool path_valid = false;
740
741 if (safi == SAFI_UNICAST &&
742 path->sub_type == BGP_ROUTE_IMPORTED &&
743 path->extra &&
744 path->extra->num_labels) {
745
746 bnc_is_valid_nexthop =
747 bgp_isvalid_labeled_nexthop(bnc) ? true : false;
748 } else {
749 if (bgp_update_martian_nexthop(
750 bnc->bgp, afi, safi, path->type,
751 path->sub_type, path->attr, dest)) {
752 if (BGP_DEBUG(nht, NHT))
753 zlog_debug(
754 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
755 __func__, dest, bgp_path->name);
756 } else
757 bnc_is_valid_nexthop =
758 bgp_isvalid_nexthop(bnc) ? true : false;
759 }
760
761 if (BGP_DEBUG(nht, NHT)) {
762 char buf1[RD_ADDRSTRLEN];
763
764 if (dest->pdest) {
765 prefix_rd2str((struct prefix_rd *)bgp_dest_get_prefix(dest->pdest),
766 buf1, sizeof(buf1));
767 zlog_debug(
768 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
769 afi, safi, dest, buf1,
770 bgp_path->name_pretty, path->flags);
771 } else
772 zlog_debug(
773 "... eval path %d/%d %pBD %s flags 0x%x",
774 afi, safi, dest, bgp_path->name_pretty,
775 path->flags);
776 }
777
778 /* Skip paths marked for removal or as history. */
779 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
780 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
781 continue;
782
783 /* Copy the metric to the path. Will be used for bestpath
784 * computation */
785 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
786 (bgp_path_info_extra_get(path))->igpmetric =
787 bnc->metric;
788 else if (path->extra)
789 path->extra->igpmetric = 0;
790
791 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
792 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
793 || path->attr->srte_color != 0)
794 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
795
796 path_valid = !!CHECK_FLAG(path->flags, BGP_PATH_VALID);
797 if (path_valid != bnc_is_valid_nexthop) {
798 if (path_valid) {
799 /* No longer valid, clear flag; also for EVPN
800 * routes, unimport from VRFs if needed.
801 */
802 bgp_aggregate_decrement(bgp_path, p, path, afi,
803 safi);
804 bgp_path_info_unset_flag(dest, path,
805 BGP_PATH_VALID);
806 if (safi == SAFI_EVPN &&
807 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
808 bgp_evpn_unimport_route(bgp_path,
809 afi, safi, bgp_dest_get_prefix(dest), path);
810 } else {
811 /* Path becomes valid, set flag; also for EVPN
812 * routes, import from VRFs if needed.
813 */
814 bgp_path_info_set_flag(dest, path,
815 BGP_PATH_VALID);
816 bgp_aggregate_increment(bgp_path, p, path, afi,
817 safi);
818 if (safi == SAFI_EVPN &&
819 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
820 bgp_evpn_import_route(bgp_path,
821 afi, safi, bgp_dest_get_prefix(dest), path);
822 }
823 }
824
825 bgp_process(bgp_path, dest, afi, safi);
826 }
827
828 if (peer) {
829 int valid_nexthops = bgp_isvalid_nexthop(bnc);
830
831 if (valid_nexthops)
832 peer->last_reset = PEER_DOWN_WAITING_OPEN;
833 else
834 peer->last_reset = PEER_DOWN_WAITING_NHT;
835
836 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
837 if (BGP_DEBUG(nht, NHT))
838 zlog_debug(
839 "%s: Updating peer (%s(%s)) status with NHT",
840 __func__, peer->host,
841 peer->bgp->name_pretty);
842 bgp_fsm_nht_update(peer, !!valid_nexthops);
843 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
844 }
845 }
846
847 RESET_FLAG(bnc->change_flags);
848 }
849
850 /**
851 * path_nh_map - make or break path-to-nexthop association.
852 * ARGUMENTS:
853 * path - pointer to the path structure
854 * bnc - pointer to the nexthop structure
855 * make - if set, make the association. if unset, just break the existing
856 * association.
857 */
858 void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
859 bool make)
860 {
861 if (path->nexthop) {
862 LIST_REMOVE(path, nh_thread);
863 path->nexthop->path_count--;
864 path->nexthop = NULL;
865 }
866 if (make) {
867 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
868 path->nexthop = bnc;
869 path->nexthop->path_count++;
870 }
871 }
872
873 /*
874 * This function is called to register nexthops to zebra
875 * as that we may have tried to install the nexthops
876 * before we actually have a zebra connection
877 */
878 void bgp_nht_register_nexthops(struct bgp *bgp)
879 {
880 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
881 struct bgp_nexthop_cache *bnc;
882
883 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
884 bnc) {
885 register_zebra_rnh(bnc, 0);
886 }
887 }
888 }
889
890 void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
891 {
892 struct bgp *bgp;
893 struct bgp_nexthop_cache *bnc;
894 struct nexthop *nhop;
895 struct interface *ifp;
896 struct prefix p;
897
898 if (peer->ifp)
899 return;
900
901 bgp = peer->bgp;
902 if (!sockunion2hostprefix(&peer->su, &p)) {
903 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
904 __func__, peer->host);
905 return;
906 }
907
908 if (p.family != AF_INET6)
909 return;
910
911 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0);
912 if (!bnc)
913 return;
914
915 if (peer != bnc->nht_info)
916 return;
917
918 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
919 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
920
921 if (!ifp)
922 continue;
923
924 zclient_send_interface_radv_req(zclient,
925 nhop->vrf_id,
926 ifp, true,
927 BGP_UNNUM_DEFAULT_RA_INTERVAL);
928 }
929 }
930
931 void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
932 {
933 struct bgp *bgp;
934 struct bgp_nexthop_cache *bnc;
935 struct nexthop *nhop;
936 struct interface *ifp;
937 struct prefix p;
938
939 if (peer->ifp)
940 return;
941
942 bgp = peer->bgp;
943
944 if (!sockunion2hostprefix(&peer->su, &p)) {
945 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
946 __func__, peer->host);
947 return;
948 }
949
950 if (p.family != AF_INET6)
951 return;
952
953 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0);
954 if (!bnc)
955 return;
956
957 if (peer != bnc->nht_info)
958 return;
959
960 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
961 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
962
963 if (!ifp)
964 continue;
965
966 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
967 0);
968 }
969 }
970
971 /****************************************************************************
972 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
973 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
974 * left to the application using it.
975 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
976 * failover of remote ES links.
977 ***************************************************************************/
978 static bitfield_t bgp_nh_id_bitmap;
979 static uint32_t bgp_l3nhg_start;
980
981 /* XXX - currently we do nothing on the callbacks */
982 static void bgp_l3nhg_add_cb(const char *name)
983 {
984 }
985 static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
986 const struct nexthop *nhop)
987 {
988 }
989 static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
990 const struct nexthop *nhop)
991 {
992 }
993 static void bgp_l3nhg_del_cb(const char *name)
994 {
995 }
996
997 static void bgp_l3nhg_zebra_init(void)
998 {
999 static bool bgp_l3nhg_zebra_inited;
1000 if (bgp_l3nhg_zebra_inited)
1001 return;
1002
1003 bgp_l3nhg_zebra_inited = true;
1004 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1005 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_add_nexthop_cb,
1006 bgp_l3nhg_del_nexthop_cb, bgp_l3nhg_del_cb);
1007 }
1008
1009
1010 #define min(A, B) ((A) < (B) ? (A) : (B))
1011 void bgp_l3nhg_init(void)
1012 {
1013 uint32_t id_max;
1014
1015 id_max = min(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1016 bf_init(bgp_nh_id_bitmap, id_max);
1017 bf_assign_zero_index(bgp_nh_id_bitmap);
1018
1019 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1020 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1021 bgp_l3nhg_start + id_max);
1022 }
1023
1024 void bgp_l3nhg_finish(void)
1025 {
1026 bf_free(bgp_nh_id_bitmap);
1027 }
1028
1029 uint32_t bgp_l3nhg_id_alloc(void)
1030 {
1031 uint32_t nhg_id = 0;
1032
1033 bgp_l3nhg_zebra_init();
1034 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1035 if (nhg_id)
1036 nhg_id += bgp_l3nhg_start;
1037
1038 return nhg_id;
1039 }
1040
1041 void bgp_l3nhg_id_free(uint32_t nhg_id)
1042 {
1043 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1044 return;
1045
1046 nhg_id -= bgp_l3nhg_start;
1047
1048 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1049 }