]> git.proxmox.com Git - mirror_frr.git/blame - bgpd/bgp_nht.c
Merge pull request #8036 from qlyoung/disable-mallinfo
[mirror_frr.git] / bgpd / bgp_nht.c
CommitLineData
fb018d25
DS
1/* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
3 *
4 * This file is part of GNU Zebra.
5 *
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
9 * later version.
10 *
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 *
896014f4
DL
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
fb018d25
DS
19 */
20
21#include <zebra.h>
22
23#include "command.h"
24#include "thread.h"
25#include "prefix.h"
26#include "zclient.h"
27#include "stream.h"
28#include "network.h"
29#include "log.h"
30#include "memory.h"
31#include "nexthop.h"
7076bb2f 32#include "vrf.h"
039f3a34 33#include "filter.h"
8bcb09a1 34#include "nexthop_group.h"
fb018d25
DS
35
36#include "bgpd/bgpd.h"
37#include "bgpd/bgp_table.h"
38#include "bgpd/bgp_route.h"
39#include "bgpd/bgp_attr.h"
40#include "bgpd/bgp_nexthop.h"
41#include "bgpd/bgp_debug.h"
14454c9f 42#include "bgpd/bgp_errors.h"
fb018d25 43#include "bgpd/bgp_nht.h"
ffd0c037 44#include "bgpd/bgp_fsm.h"
afbb1c59 45#include "bgpd/bgp_zebra.h"
0378bcaa 46#include "bgpd/bgp_flowspec_util.h"
7c312383 47#include "bgpd/bgp_evpn.h"
9e15d76a 48#include "bgpd/bgp_rd.h"
fb018d25
DS
49
50extern struct zclient *zclient;
fb018d25 51
078430f6 52static void register_zebra_rnh(struct bgp_nexthop_cache *bnc,
d62a17ae 53 int is_bgp_static_route);
078430f6
DS
54static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc,
55 int is_bgp_static_route);
fb018d25 56static void evaluate_paths(struct bgp_nexthop_cache *bnc);
40381db7 57static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p);
fb018d25 58
d62a17ae 59static int bgp_isvalid_nexthop(struct bgp_nexthop_cache *bnc)
d4d9d757 60{
d62a17ae 61 return (bgp_zebra_num_connects() == 0
62 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID)));
d4d9d757
LB
63}
64
960035b2
PZ
65static int bgp_isvalid_labeled_nexthop(struct bgp_nexthop_cache *bnc)
66{
67 return (bgp_zebra_num_connects() == 0
68 || (bnc && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_LABELED_VALID)));
69}
70
d62a17ae 71static void bgp_unlink_nexthop_check(struct bgp_nexthop_cache *bnc)
fb018d25 72{
d5c4bac9 73 if (LIST_EMPTY(&(bnc->paths)) && !bnc->nht_info) {
d62a17ae 74 if (BGP_DEBUG(nht, NHT)) {
75 char buf[PREFIX2STR_BUFFER];
84c320dc 76 zlog_debug("%s: freeing bnc %s(%u)(%s)", __func__,
8c1a4c10 77 bnc_str(bnc, buf, PREFIX2STR_BUFFER),
545aeef1 78 bnc->srte_color, bnc->bgp->name_pretty);
d62a17ae 79 }
e37e1e27
PR
80 /* only unregister if this is the last nh for this prefix*/
81 if (!bnc_existing_for_prefix(bnc))
82 unregister_zebra_rnh(
83 bnc, CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE));
d62a17ae 84 bnc_free(bnc);
fb018d25 85 }
fb018d25
DS
86}
87
4b7e6066 88void bgp_unlink_nexthop(struct bgp_path_info *path)
f9164b1d 89{
d62a17ae 90 struct bgp_nexthop_cache *bnc = path->nexthop;
91
92 if (!bnc)
93 return;
f9164b1d 94
7f040da1 95 path_nh_map(path, NULL, false);
f9164b1d 96
d62a17ae 97 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
98}
99
d62a17ae 100void bgp_unlink_nexthop_by_peer(struct peer *peer)
f9164b1d 101{
d62a17ae 102 struct prefix p;
d62a17ae 103 struct bgp_nexthop_cache *bnc;
104 afi_t afi = family2afi(peer->su.sa.sa_family);
105
106 if (!sockunion2hostprefix(&peer->su, &p))
107 return;
108
545aeef1 109 bnc = bnc_find(&peer->bgp->nexthop_cache_table[afi], &p, 0);
14315f2d 110 if (!bnc)
d62a17ae 111 return;
112
d62a17ae 113 /* cleanup the peer reference */
114 bnc->nht_info = NULL;
115
116 bgp_unlink_nexthop_check(bnc);
f9164b1d
PJ
117}
118
960035b2
PZ
119/*
120 * A route and its nexthop might belong to different VRFs. Therefore,
121 * we need both the bgp_route and bgp_nexthop pointers.
122 */
123int bgp_find_or_add_nexthop(struct bgp *bgp_route, struct bgp *bgp_nexthop,
4053e952 124 afi_t afi, safi_t safi, struct bgp_path_info *pi,
d62a17ae 125 struct peer *peer, int connected)
fb018d25 126{
f663c581 127 struct bgp_nexthop_cache_head *tree = NULL;
d62a17ae 128 struct bgp_nexthop_cache *bnc;
129 struct prefix p;
545aeef1 130 uint32_t srte_color = 0;
d62a17ae 131 int is_bgp_static_route = 0;
132
40381db7
DS
133 if (pi) {
134 is_bgp_static_route = ((pi->type == ZEBRA_ROUTE_BGP)
135 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 136 ? 1
137 : 0;
138
139 /* Since Extended Next-hop Encoding (RFC5549) support, we want
140 to derive
141 address-family from the next-hop. */
142 if (!is_bgp_static_route)
40381db7 143 afi = BGP_ATTR_NEXTHOP_AFI_IP6(pi->attr) ? AFI_IP6
d62a17ae 144 : AFI_IP;
145
92d6f769
K
146 /* Validation for the ipv4 mapped ipv6 nexthop. */
147 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
148 afi = AFI_IP;
149 }
150
2951a7a4 151 /* This will return true if the global IPv6 NH is a link local
d62a17ae 152 * addr */
40381db7 153 if (make_prefix(afi, pi, &p) < 0)
d62a17ae 154 return 1;
545aeef1
RW
155
156 srte_color = pi->attr->srte_color;
d62a17ae 157 } else if (peer) {
d62a17ae 158 if (!sockunion2hostprefix(&peer->su, &p)) {
159 if (BGP_DEBUG(nht, NHT)) {
160 zlog_debug(
161 "%s: Attempting to register with unknown AFI %d (not %d or %d)",
15569c58 162 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 163 }
164 return 0;
165 }
166 } else
167 return 0;
168
169 if (is_bgp_static_route)
f663c581 170 tree = &bgp_nexthop->import_check_table[afi];
d62a17ae 171 else
f663c581 172 tree = &bgp_nexthop->nexthop_cache_table[afi];
d62a17ae 173
545aeef1 174 bnc = bnc_find(tree, &p, srte_color);
14315f2d 175 if (!bnc) {
545aeef1 176 bnc = bnc_new(tree, &p, srte_color);
960035b2 177 bnc->bgp = bgp_nexthop;
d62a17ae 178 if (BGP_DEBUG(nht, NHT)) {
179 char buf[PREFIX2STR_BUFFER];
180
545aeef1 181 zlog_debug("Allocated bnc %s(%u)(%s) peer %p",
8c1a4c10 182 bnc_str(bnc, buf, PREFIX2STR_BUFFER),
545aeef1
RW
183 bnc->srte_color, bnc->bgp->name_pretty,
184 peer);
d62a17ae 185 }
fc9a856f 186 }
d62a17ae 187
d62a17ae 188 if (is_bgp_static_route) {
189 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE);
190
191 /* If we're toggling the type, re-register */
892fedb6 192 if ((CHECK_FLAG(bgp_route->flags, BGP_FLAG_IMPORT_CHECK))
d62a17ae 193 && !CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)) {
194 SET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
195 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
196 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
892fedb6
DA
197 } else if ((!CHECK_FLAG(bgp_route->flags,
198 BGP_FLAG_IMPORT_CHECK))
d62a17ae 199 && CHECK_FLAG(bnc->flags,
200 BGP_STATIC_ROUTE_EXACT_MATCH)) {
201 UNSET_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH);
202 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
203 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
204 }
078430f6 205 }
d62a17ae 206 /* When nexthop is already known, but now requires 'connected'
207 * resolution,
208 * re-register it. The reverse scenario where the nexthop currently
209 * requires
210 * 'connected' resolution does not need a re-register (i.e., we treat
211 * 'connected-required' as an override) except in the scenario where
212 * this
213 * is actually a case of tracking a peer for connectivity (e.g., after
214 * disable connected-check).
215 * NOTE: We don't track the number of paths separately for 'connected-
216 * required' vs 'connected-not-required' as this change is not a common
217 * scenario.
218 */
219 else if (connected && !CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
220 SET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
221 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
222 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
223 } else if (peer && !connected
224 && CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)) {
225 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED);
226 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
227 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
078430f6 228 }
960035b2 229 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW) {
1ee0a2df
DS
230 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
231 SET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
b54892e0 232 } else if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED)
f663c581 233 && !is_default_host_route(&bnc->prefix))
d62a17ae 234 register_zebra_rnh(bnc, is_bgp_static_route);
1eb6c3ea 235
40381db7 236 if (pi && pi->nexthop != bnc) {
d62a17ae 237 /* Unlink from existing nexthop cache, if any. This will also
238 * free
239 * the nexthop cache entry, if appropriate.
240 */
40381db7 241 bgp_unlink_nexthop(pi);
d62a17ae 242
7f040da1
DS
243 /* updates NHT pi list reference */
244 path_nh_map(pi, bnc, true);
d62a17ae 245
246 if (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_VALID) && bnc->metric)
40381db7
DS
247 (bgp_path_info_extra_get(pi))->igpmetric = bnc->metric;
248 else if (pi->extra)
249 pi->extra->igpmetric = 0;
d62a17ae 250 } else if (peer)
251 bnc->nht_info = (void *)peer; /* NHT peer reference */
252
253 /*
254 * We are cheating here. Views have no associated underlying
255 * ability to detect nexthops. So when we have a view
256 * just tell everyone the nexthop is valid
257 */
960035b2 258 if (bgp_route->inst_type == BGP_INSTANCE_TYPE_VIEW)
d62a17ae 259 return 1;
4053e952
PR
260 else if (safi == SAFI_UNICAST && pi
261 && pi->sub_type == BGP_ROUTE_IMPORTED && pi->extra
262 && pi->extra->num_labels) {
263 return bgp_isvalid_labeled_nexthop(bnc);
264 } else
d62a17ae 265 return (bgp_isvalid_nexthop(bnc));
fb018d25
DS
266}
267
d62a17ae 268void bgp_delete_connected_nexthop(afi_t afi, struct peer *peer)
9a233a02 269{
d62a17ae 270 struct bgp_nexthop_cache *bnc;
271 struct prefix p;
272
273 if (!peer)
274 return;
275
d62a17ae 276 if (!sockunion2hostprefix(&peer->su, &p))
277 return;
278
f663c581 279 bnc = bnc_find(&peer->bgp->nexthop_cache_table[family2afi(p.family)],
545aeef1 280 &p, 0);
14315f2d
DS
281 if (!bnc) {
282 if (BGP_DEBUG(nht, NHT))
8c1a4c10 283 zlog_debug(
f663c581 284 "Cannot find connected NHT node for peer %s(%s)",
8c1a4c10 285 peer->host, peer->bgp->name_pretty);
14315f2d
DS
286 return;
287 }
d62a17ae 288
289 if (bnc->nht_info != peer) {
290 if (BGP_DEBUG(nht, NHT))
291 zlog_debug(
8c1a4c10
DS
292 "Connected NHT %p node for peer %s(%s) points to %p",
293 bnc, peer->host, bnc->bgp->name_pretty,
294 bnc->nht_info);
d62a17ae 295 return;
296 }
297
298 bnc->nht_info = NULL;
299
300 if (LIST_EMPTY(&(bnc->paths))) {
301 if (BGP_DEBUG(nht, NHT))
8c1a4c10
DS
302 zlog_debug(
303 "Freeing connected NHT node %p for peer %s(%s)",
304 bnc, peer->host, bnc->bgp->name_pretty);
d62a17ae 305 unregister_zebra_rnh(bnc, 0);
d62a17ae 306 bnc_free(bnc);
307 }
9a233a02
DS
308}
309
545aeef1
RW
310static void bgp_process_nexthop_update(struct bgp_nexthop_cache *bnc,
311 struct zapi_route *nhr)
fb018d25 312{
d62a17ae 313 struct nexthop *nexthop;
314 struct nexthop *oldnh;
315 struct nexthop *nhlist_head = NULL;
316 struct nexthop *nhlist_tail = NULL;
d62a17ae 317 int i;
14315f2d 318
d62a17ae 319 bnc->last_update = bgp_clock();
320 bnc->change_flags = 0;
d62a17ae 321
322 /* debug print the input */
987a720a
DS
323 if (BGP_DEBUG(nht, NHT)) {
324 char bnc_buf[BNC_FLAG_DUMP_SIZE];
325
d62a17ae 326 zlog_debug(
987a720a 327 "%s(%u): Rcvd NH update %pFX(%u) - metric %d/%d #nhops %d/%d flags %s",
2dbe669b 328 bnc->bgp->name_pretty, bnc->bgp->vrf_id, &nhr->prefix,
545aeef1 329 bnc->srte_color, nhr->metric, bnc->metric,
987a720a
DS
330 nhr->nexthop_num, bnc->nexthop_num,
331 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
332 sizeof(bnc_buf)));
333 }
d62a17ae 334
545aeef1 335 if (nhr->metric != bnc->metric)
d62a17ae 336 bnc->change_flags |= BGP_NEXTHOP_METRIC_CHANGED;
337
545aeef1 338 if (nhr->nexthop_num != bnc->nexthop_num)
d62a17ae 339 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
340
545aeef1 341 if (nhr->nexthop_num) {
6137a77d
DS
342 struct peer *peer = bnc->nht_info;
343
d62a17ae 344 /* notify bgp fsm if nbr ip goes from invalid->valid */
345 if (!bnc->nexthop_num)
346 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
347
348 bnc->flags |= BGP_NEXTHOP_VALID;
545aeef1
RW
349 bnc->metric = nhr->metric;
350 bnc->nexthop_num = nhr->nexthop_num;
4a749e2c 351
960035b2
PZ
352 bnc->flags &= ~BGP_NEXTHOP_LABELED_VALID; /* check below */
353
545aeef1 354 for (i = 0; i < nhr->nexthop_num; i++) {
960035b2
PZ
355 int num_labels = 0;
356
545aeef1 357 nexthop = nexthop_from_zapi_nexthop(&nhr->nexthops[i]);
d62a17ae 358
6137a77d
DS
359 /*
360 * Turn on RA for the v6 nexthops
361 * we receive from bgp. This is to allow us
362 * to work with v4 routing over v6 nexthops
363 */
687a2b5d
DS
364 if (peer && !peer->ifp
365 && CHECK_FLAG(peer->flags,
366 PEER_FLAG_CAPABILITY_ENHE)
545aeef1 367 && nhr->prefix.family == AF_INET6
65f803e8 368 && nexthop->type != NEXTHOP_TYPE_BLACKHOLE) {
6137a77d
DS
369 struct interface *ifp;
370
371 ifp = if_lookup_by_index(nexthop->ifindex,
372 nexthop->vrf_id);
8c9769e0
DS
373 if (ifp)
374 zclient_send_interface_radv_req(
375 zclient, nexthop->vrf_id, ifp,
376 true,
377 BGP_UNNUM_DEFAULT_RA_INTERVAL);
6137a77d 378 }
960035b2
PZ
379 /* There is at least one label-switched path */
380 if (nexthop->nh_label &&
381 nexthop->nh_label->num_labels) {
382
383 bnc->flags |= BGP_NEXTHOP_LABELED_VALID;
384 num_labels = nexthop->nh_label->num_labels;
385 }
386
d62a17ae 387 if (BGP_DEBUG(nht, NHT)) {
388 char buf[NEXTHOP_STRLEN];
389 zlog_debug(
960035b2
PZ
390 " nhop via %s (%d labels)",
391 nexthop2str(nexthop, buf, sizeof(buf)),
392 num_labels);
d62a17ae 393 }
394
395 if (nhlist_tail) {
396 nhlist_tail->next = nexthop;
397 nhlist_tail = nexthop;
398 } else {
399 nhlist_tail = nexthop;
400 nhlist_head = nexthop;
401 }
402
403 /* No need to evaluate the nexthop if we have already
404 * determined
405 * that there has been a change.
406 */
407 if (bnc->change_flags & BGP_NEXTHOP_CHANGED)
408 continue;
409
410 for (oldnh = bnc->nexthop; oldnh; oldnh = oldnh->next)
78fba41b 411 if (nexthop_same(oldnh, nexthop))
d62a17ae 412 break;
413
414 if (!oldnh)
415 bnc->change_flags |= BGP_NEXTHOP_CHANGED;
416 }
417 bnc_nexthop_free(bnc);
418 bnc->nexthop = nhlist_head;
419 } else {
420 bnc->flags &= ~BGP_NEXTHOP_VALID;
545aeef1 421 bnc->nexthop_num = nhr->nexthop_num;
d62a17ae 422
423 /* notify bgp fsm if nbr ip goes from valid->invalid */
424 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
425
426 bnc_nexthop_free(bnc);
427 bnc->nexthop = NULL;
428 }
429
430 evaluate_paths(bnc);
fb018d25
DS
431}
432
545aeef1
RW
433void bgp_parse_nexthop_update(int command, vrf_id_t vrf_id)
434{
435 struct bgp_nexthop_cache_head *tree = NULL;
436 struct bgp_nexthop_cache *bnc;
437 struct bgp *bgp;
438 struct zapi_route nhr;
439 afi_t afi;
440
441 bgp = bgp_lookup_by_vrf_id(vrf_id);
442 if (!bgp) {
443 flog_err(
444 EC_BGP_NH_UPD,
445 "parse nexthop update: instance not found for vrf_id %u",
446 vrf_id);
447 return;
448 }
449
450 if (!zapi_nexthop_update_decode(zclient->ibuf, &nhr)) {
6c83dded
QY
451 zlog_err("%s[%s]: Failure to decode nexthop update",
452 __PRETTY_FUNCTION__, bgp->name_pretty);
545aeef1
RW
453 return;
454 }
455
456 afi = family2afi(nhr.prefix.family);
457 if (command == ZEBRA_NEXTHOP_UPDATE)
458 tree = &bgp->nexthop_cache_table[afi];
459 else if (command == ZEBRA_IMPORT_CHECK_UPDATE)
460 tree = &bgp->import_check_table[afi];
461
462 bnc = bnc_find(tree, &nhr.prefix, nhr.srte_color);
463 if (!bnc) {
2dbe669b 464 if (BGP_DEBUG(nht, NHT))
545aeef1 465 zlog_debug(
2dbe669b
DA
466 "parse nexthop update(%pFX(%u)(%s)): bnc info not found",
467 &nhr.prefix, nhr.srte_color, bgp->name_pretty);
545aeef1
RW
468 return;
469 }
470
471 bgp_process_nexthop_update(bnc, &nhr);
472
473 /*
474 * HACK: if any BGP route is dependant on an SR-policy that doesn't
475 * exist, zebra will never send NH updates relative to that policy. In
476 * that case, whenever we receive an update about a colorless NH, update
477 * the corresponding colorful NHs that share the same endpoint but that
478 * are inactive. This ugly hack should work around the problem at the
479 * cost of a performance pernalty. Long term, what should be done is to
480 * make zebra's RNH subsystem aware of SR-TE colors (like bgpd is),
481 * which should provide a better infrastructure to solve this issue in
482 * a more efficient and elegant way.
483 */
484 if (nhr.srte_color == 0) {
485 struct bgp_nexthop_cache *bnc_iter;
486
487 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
488 bnc_iter) {
489 if (!prefix_same(&bnc->prefix, &bnc_iter->prefix)
490 || bnc_iter->srte_color == 0
491 || CHECK_FLAG(bnc_iter->flags, BGP_NEXTHOP_VALID))
492 continue;
493
494 bgp_process_nexthop_update(bnc_iter, &nhr);
495 }
496 }
497}
498
ee7ca6c0 499/*
500 * Cleanup nexthop registration and status information for BGP nexthops
501 * pertaining to this VRF. This is invoked upon VRF deletion.
502 */
503void bgp_cleanup_nexthops(struct bgp *bgp)
504{
f663c581
RW
505 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
506 struct bgp_nexthop_cache *bnc;
ee7ca6c0 507
f663c581
RW
508 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
509 bnc) {
ee7ca6c0 510 /* Clear relevant flags. */
511 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_VALID);
512 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
513 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
514 }
515 }
516}
517
fb018d25
DS
518/**
519 * make_prefix - make a prefix structure from the path (essentially
520 * path's node.
521 */
40381db7 522static int make_prefix(int afi, struct bgp_path_info *pi, struct prefix *p)
fb018d25 523{
078430f6 524
40381db7
DS
525 int is_bgp_static = ((pi->type == ZEBRA_ROUTE_BGP)
526 && (pi->sub_type == BGP_ROUTE_STATIC))
d62a17ae 527 ? 1
528 : 0;
9bcb3eef
DS
529 struct bgp_dest *net = pi->net;
530 const struct prefix *p_orig = bgp_dest_get_prefix(net);
92d6f769 531 struct in_addr ipv4;
0378bcaa
PG
532
533 if (p_orig->family == AF_FLOWSPEC) {
534 if (!pi->peer)
535 return -1;
536 return bgp_flowspec_get_first_nh(pi->peer->bgp,
1840384b 537 pi, p, afi);
0378bcaa 538 }
d62a17ae 539 memset(p, 0, sizeof(struct prefix));
540 switch (afi) {
541 case AFI_IP:
542 p->family = AF_INET;
543 if (is_bgp_static) {
b54892e0
DS
544 p->u.prefix4 = p_orig->u.prefix4;
545 p->prefixlen = p_orig->prefixlen;
d62a17ae 546 } else {
92d6f769
K
547 if (IS_MAPPED_IPV6(&pi->attr->mp_nexthop_global)) {
548 ipv4_mapped_ipv6_to_ipv4(
549 &pi->attr->mp_nexthop_global, &ipv4);
550 p->u.prefix4 = ipv4;
551 p->prefixlen = IPV4_MAX_BITLEN;
552 } else {
553 p->u.prefix4 = pi->attr->nexthop;
554 p->prefixlen = IPV4_MAX_BITLEN;
555 }
d62a17ae 556 }
557 break;
558 case AFI_IP6:
d62a17ae 559 p->family = AF_INET6;
560
561 if (is_bgp_static) {
b54892e0
DS
562 p->u.prefix6 = p_orig->u.prefix6;
563 p->prefixlen = p_orig->prefixlen;
d62a17ae 564 } else {
606fdbb1
DA
565 /* If we receive MP_REACH nexthop with ::(LL)
566 * or LL(LL), use LL address as nexthop cache.
567 */
568 if (pi->attr->mp_nexthop_len
569 == BGP_ATTR_NHLEN_IPV6_GLOBAL_AND_LL
570 && (IN6_IS_ADDR_UNSPECIFIED(
571 &pi->attr->mp_nexthop_global)
572 || IN6_IS_ADDR_LINKLOCAL(
573 &pi->attr->mp_nexthop_global)))
574 p->u.prefix6 = pi->attr->mp_nexthop_local;
575 else
576 p->u.prefix6 = pi->attr->mp_nexthop_global;
d62a17ae 577 p->prefixlen = IPV6_MAX_BITLEN;
578 }
579 break;
580 default:
581 if (BGP_DEBUG(nht, NHT)) {
582 zlog_debug(
583 "%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
15569c58 584 __func__, afi, AFI_IP, AFI_IP6);
d62a17ae 585 }
586 break;
65740e1b 587 }
d62a17ae 588 return 0;
fb018d25
DS
589}
590
591/**
078430f6 592 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
fb018d25
DS
593 * command to Zebra.
594 * ARGUMENTS:
595 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
078430f6 596 * int command -- command to send to zebra
fb018d25
DS
597 * RETURNS:
598 * void.
599 */
d62a17ae 600static void sendmsg_zebra_rnh(struct bgp_nexthop_cache *bnc, int command)
fb018d25 601{
3c192540 602 bool exact_match = false;
d62a17ae 603 int ret;
604
3c192540 605 if (!zclient)
d62a17ae 606 return;
607
608 /* Don't try to register if Zebra doesn't know of this instance. */
bb4ef1ae
DS
609 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc->bgp)) {
610 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
611 zlog_debug(
612 "%s: No zebra instance to talk to, not installing NHT entry",
613 __func__);
d62a17ae 614 return;
bb4ef1ae 615 }
d62a17ae 616
1ee0a2df
DS
617 if (!bgp_zebra_num_connects()) {
618 if (BGP_DEBUG(zebra, ZEBRA))
15569c58
DA
619 zlog_debug(
620 "%s: We have not connected yet, cannot send nexthops",
621 __func__);
1ee0a2df 622 }
996c9314
LB
623 if ((command == ZEBRA_NEXTHOP_REGISTER
624 || command == ZEBRA_IMPORT_ROUTE_REGISTER)
625 && (CHECK_FLAG(bnc->flags, BGP_NEXTHOP_CONNECTED)
626 || CHECK_FLAG(bnc->flags, BGP_STATIC_ROUTE_EXACT_MATCH)))
3c192540 627 exact_match = true;
d62a17ae 628
f663c581
RW
629 if (BGP_DEBUG(zebra, ZEBRA))
630 zlog_debug("%s: sending cmd %s for %pFX (vrf %s)", __func__,
631 zserv_command_string(command), &bnc->prefix,
632 bnc->bgp->name_pretty);
960035b2 633
f663c581 634 ret = zclient_send_rnh(zclient, command, &bnc->prefix, exact_match,
996c9314 635 bnc->bgp->vrf_id);
d62a17ae 636 /* TBD: handle the failure */
7cfdb485 637 if (ret == ZCLIENT_SEND_FAILURE)
e50f7cfd 638 flog_warn(EC_BGP_ZEBRA_SEND,
f162a5b9 639 "sendmsg_nexthop: zclient_send_message() failed");
d62a17ae 640
641 if ((command == ZEBRA_NEXTHOP_REGISTER)
642 || (command == ZEBRA_IMPORT_ROUTE_REGISTER))
643 SET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
644 else if ((command == ZEBRA_NEXTHOP_UNREGISTER)
645 || (command == ZEBRA_IMPORT_ROUTE_UNREGISTER))
646 UNSET_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED);
647 return;
fb018d25
DS
648}
649
650/**
078430f6
DS
651 * register_zebra_rnh - register a NH/route with Zebra for notification
652 * when the route or the route to the nexthop changes.
fb018d25 653 * ARGUMENTS:
078430f6 654 * struct bgp_nexthop_cache *bnc
fb018d25
DS
655 * RETURNS:
656 * void.
657 */
d62a17ae 658static void register_zebra_rnh(struct bgp_nexthop_cache *bnc,
659 int is_bgp_import_route)
fb018d25 660{
d62a17ae 661 /* Check if we have already registered */
662 if (bnc->flags & BGP_NEXTHOP_REGISTERED)
663 return;
664 if (is_bgp_import_route)
665 sendmsg_zebra_rnh(bnc, ZEBRA_IMPORT_ROUTE_REGISTER);
666 else
667 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_REGISTER);
fb018d25
DS
668}
669
670/**
078430f6 671 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
fb018d25 672 * ARGUMENTS:
078430f6 673 * struct bgp_nexthop_cache *bnc
fb018d25
DS
674 * RETURNS:
675 * void.
676 */
d62a17ae 677static void unregister_zebra_rnh(struct bgp_nexthop_cache *bnc,
678 int is_bgp_import_route)
fb018d25 679{
d62a17ae 680 /* Check if we have already registered */
681 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_REGISTERED))
682 return;
683
684 if (is_bgp_import_route)
685 sendmsg_zebra_rnh(bnc, ZEBRA_IMPORT_ROUTE_UNREGISTER);
686 else
687 sendmsg_zebra_rnh(bnc, ZEBRA_NEXTHOP_UNREGISTER);
fb018d25
DS
688}
689
690/**
691 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
692 * ARGUMENTS:
693 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
694 * RETURNS:
695 * void.
696 */
d62a17ae 697static void evaluate_paths(struct bgp_nexthop_cache *bnc)
fb018d25 698{
9bcb3eef 699 struct bgp_dest *dest;
4b7e6066 700 struct bgp_path_info *path;
d62a17ae 701 int afi;
702 struct peer *peer = (struct peer *)bnc->nht_info;
703 struct bgp_table *table;
704 safi_t safi;
960035b2 705 struct bgp *bgp_path;
b54892e0 706 const struct prefix *p;
d62a17ae 707
708 if (BGP_DEBUG(nht, NHT)) {
709 char buf[PREFIX2STR_BUFFER];
987a720a 710 char bnc_buf[BNC_FLAG_DUMP_SIZE];
df2a41a9 711 char chg_buf[BNC_FLAG_DUMP_SIZE];
987a720a 712
d62a17ae 713 bnc_str(bnc, buf, PREFIX2STR_BUFFER);
714 zlog_debug(
df2a41a9 715 "NH update for %s(%u)(%s) - flags %s chgflags %s- evaluate paths",
987a720a 716 buf, bnc->srte_color, bnc->bgp->name_pretty,
df2a41a9
DS
717 bgp_nexthop_dump_bnc_flags(bnc, bnc_buf,
718 sizeof(bnc_buf)),
719 bgp_nexthop_dump_bnc_change_flags(bnc, chg_buf,
720 sizeof(bnc_buf)));
fb018d25
DS
721 }
722
a2addae8 723 LIST_FOREACH (path, &(bnc->paths), nh_thread) {
d62a17ae 724 if (!(path->type == ZEBRA_ROUTE_BGP
725 && ((path->sub_type == BGP_ROUTE_NORMAL)
960035b2
PZ
726 || (path->sub_type == BGP_ROUTE_STATIC)
727 || (path->sub_type == BGP_ROUTE_IMPORTED))))
d62a17ae 728 continue;
729
9bcb3eef
DS
730 dest = path->net;
731 assert(dest && bgp_dest_table(dest));
732 p = bgp_dest_get_prefix(dest);
b54892e0 733 afi = family2afi(p->family);
9bcb3eef 734 table = bgp_dest_table(dest);
d62a17ae 735 safi = table->safi;
736
960035b2
PZ
737 /*
738 * handle routes from other VRFs (they can have a
739 * nexthop in THIS VRF). bgp_path is the bgp instance
740 * that owns the route referencing this nexthop.
741 */
742 bgp_path = table->bgp;
743
744 /*
745 * Path becomes valid/invalid depending on whether the nexthop
d62a17ae 746 * reachable/unreachable.
960035b2
PZ
747 *
748 * In case of unicast routes that were imported from vpn
749 * and that have labels, they are valid only if there are
750 * nexthops with labels
d62a17ae 751 */
960035b2 752
34ea39b6 753 bool bnc_is_valid_nexthop = false;
754 bool path_valid = false;
960035b2
PZ
755
756 if (safi == SAFI_UNICAST &&
757 path->sub_type == BGP_ROUTE_IMPORTED &&
758 path->extra &&
759 path->extra->num_labels) {
760
761 bnc_is_valid_nexthop =
34ea39b6 762 bgp_isvalid_labeled_nexthop(bnc) ? true : false;
960035b2 763 } else {
e7cbe5e5
NT
764 if (bgp_update_martian_nexthop(
765 bnc->bgp, afi, safi, path->type,
9bcb3eef 766 path->sub_type, path->attr, dest)) {
e7cbe5e5
NT
767 if (BGP_DEBUG(nht, NHT))
768 zlog_debug(
56ca3b5b 769 "%s: prefix %pBD (vrf %s), ignoring path due to martian or self-next-hop",
9bcb3eef 770 __func__, dest, bgp_path->name);
e7cbe5e5
NT
771 } else
772 bnc_is_valid_nexthop =
34ea39b6 773 bgp_isvalid_nexthop(bnc) ? true : false;
960035b2
PZ
774 }
775
9e15d76a 776 if (BGP_DEBUG(nht, NHT)) {
777 char buf1[RD_ADDRSTRLEN];
960035b2 778
9bcb3eef
DS
779 if (dest->pdest) {
780 prefix_rd2str((struct prefix_rd *)bgp_dest_get_prefix(dest->pdest),
9e15d76a 781 buf1, sizeof(buf1));
782 zlog_debug(
56ca3b5b 783 "... eval path %d/%d %pBD RD %s %s flags 0x%x",
9bcb3eef 784 afi, safi, dest, buf1,
9e15d76a 785 bgp_path->name_pretty, path->flags);
786 } else
787 zlog_debug(
56ca3b5b 788 "... eval path %d/%d %pBD %s flags 0x%x",
9bcb3eef 789 afi, safi, dest, bgp_path->name_pretty,
9e15d76a 790 path->flags);
791 }
d62a17ae 792
0139efe0 793 /* Skip paths marked for removal or as history. */
794 if (CHECK_FLAG(path->flags, BGP_PATH_REMOVED)
795 || CHECK_FLAG(path->flags, BGP_PATH_HISTORY))
796 continue;
797
d62a17ae 798 /* Copy the metric to the path. Will be used for bestpath
799 * computation */
800 if (bgp_isvalid_nexthop(bnc) && bnc->metric)
18ee8310
DS
801 (bgp_path_info_extra_get(path))->igpmetric =
802 bnc->metric;
d62a17ae 803 else if (path->extra)
804 path->extra->igpmetric = 0;
805
806 if (CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_METRIC_CHANGED)
545aeef1
RW
807 || CHECK_FLAG(bnc->change_flags, BGP_NEXTHOP_CHANGED)
808 || path->attr->srte_color != 0)
1defdda8 809 SET_FLAG(path->flags, BGP_PATH_IGP_CHANGED);
d62a17ae 810
34ea39b6 811 path_valid = !!CHECK_FLAG(path->flags, BGP_PATH_VALID);
812 if (path_valid != bnc_is_valid_nexthop) {
813 if (path_valid) {
814 /* No longer valid, clear flag; also for EVPN
815 * routes, unimport from VRFs if needed.
816 */
817 bgp_aggregate_decrement(bgp_path, p, path, afi,
818 safi);
9bcb3eef 819 bgp_path_info_unset_flag(dest, path,
34ea39b6 820 BGP_PATH_VALID);
821 if (safi == SAFI_EVPN &&
9bcb3eef 822 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 823 bgp_evpn_unimport_route(bgp_path,
9bcb3eef 824 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 825 } else {
826 /* Path becomes valid, set flag; also for EVPN
827 * routes, import from VRFs if needed.
828 */
9bcb3eef 829 bgp_path_info_set_flag(dest, path,
34ea39b6 830 BGP_PATH_VALID);
831 bgp_aggregate_increment(bgp_path, p, path, afi,
832 safi);
833 if (safi == SAFI_EVPN &&
9bcb3eef 834 bgp_evpn_is_prefix_nht_supported(bgp_dest_get_prefix(dest)))
34ea39b6 835 bgp_evpn_import_route(bgp_path,
9bcb3eef 836 afi, safi, bgp_dest_get_prefix(dest), path);
34ea39b6 837 }
7c312383
AD
838 }
839
9bcb3eef 840 bgp_process(bgp_path, dest, afi, safi);
d62a17ae 841 }
fc9a856f 842
1e91f1d1
DS
843 if (peer) {
844 int valid_nexthops = bgp_isvalid_nexthop(bnc);
845
846 if (valid_nexthops)
847 peer->last_reset = PEER_DOWN_WAITING_OPEN;
848 else
849 peer->last_reset = PEER_DOWN_WAITING_NHT;
850
851 if (!CHECK_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED)) {
852 if (BGP_DEBUG(nht, NHT))
15569c58
DA
853 zlog_debug(
854 "%s: Updating peer (%s(%s)) status with NHT",
855 __func__, peer->host,
856 peer->bgp->name_pretty);
f8dcd38d 857 bgp_fsm_nht_update(peer, !!valid_nexthops);
1e91f1d1
DS
858 SET_FLAG(bnc->flags, BGP_NEXTHOP_PEER_NOTIFIED);
859 }
d62a17ae 860 }
fc9a856f 861
d62a17ae 862 RESET_FLAG(bnc->change_flags);
fb018d25
DS
863}
864
865/**
866 * path_nh_map - make or break path-to-nexthop association.
867 * ARGUMENTS:
868 * path - pointer to the path structure
869 * bnc - pointer to the nexthop structure
870 * make - if set, make the association. if unset, just break the existing
871 * association.
872 */
7f040da1
DS
873void path_nh_map(struct bgp_path_info *path, struct bgp_nexthop_cache *bnc,
874 bool make)
fb018d25 875{
d62a17ae 876 if (path->nexthop) {
877 LIST_REMOVE(path, nh_thread);
878 path->nexthop->path_count--;
879 path->nexthop = NULL;
880 }
881 if (make) {
882 LIST_INSERT_HEAD(&(bnc->paths), path, nh_thread);
883 path->nexthop = bnc;
884 path->nexthop->path_count++;
885 }
fb018d25 886}
1ee0a2df
DS
887
888/*
889 * This function is called to register nexthops to zebra
890 * as that we may have tried to install the nexthops
891 * before we actually have a zebra connection
892 */
893void bgp_nht_register_nexthops(struct bgp *bgp)
894{
f663c581
RW
895 for (afi_t afi = AFI_IP; afi < AFI_MAX; afi++) {
896 struct bgp_nexthop_cache *bnc;
1ee0a2df 897
f663c581
RW
898 frr_each (bgp_nexthop_cache, &bgp->nexthop_cache_table[afi],
899 bnc) {
1ee0a2df
DS
900 register_zebra_rnh(bnc, 0);
901 }
902 }
903}
1ea03b90 904
b3a3290e 905void bgp_nht_reg_enhe_cap_intfs(struct peer *peer)
1ea03b90
DS
906{
907 struct bgp *bgp;
1ea03b90
DS
908 struct bgp_nexthop_cache *bnc;
909 struct nexthop *nhop;
910 struct interface *ifp;
911 struct prefix p;
912
913 if (peer->ifp)
914 return;
915
916 bgp = peer->bgp;
1ea03b90 917 if (!sockunion2hostprefix(&peer->su, &p)) {
b3a3290e
DS
918 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
919 __func__, peer->host);
1ea03b90
DS
920 return;
921 }
922
923 if (p.family != AF_INET6)
924 return;
1ea03b90 925
545aeef1 926 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0);
1ea03b90
DS
927 if (!bnc)
928 return;
929
930 if (peer != bnc->nht_info)
931 return;
932
933 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
8c9769e0
DS
934 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
935
936 if (!ifp)
937 continue;
938
1ea03b90
DS
939 zclient_send_interface_radv_req(zclient,
940 nhop->vrf_id,
941 ifp, true,
942 BGP_UNNUM_DEFAULT_RA_INTERVAL);
943 }
944}
b3a3290e
DS
945
946void bgp_nht_dereg_enhe_cap_intfs(struct peer *peer)
947{
948 struct bgp *bgp;
b3a3290e
DS
949 struct bgp_nexthop_cache *bnc;
950 struct nexthop *nhop;
951 struct interface *ifp;
952 struct prefix p;
953
954 if (peer->ifp)
955 return;
956
957 bgp = peer->bgp;
958
b3a3290e
DS
959 if (!sockunion2hostprefix(&peer->su, &p)) {
960 zlog_warn("%s: Unable to convert sockunion to prefix for %s",
961 __func__, peer->host);
962 return;
963 }
964
965 if (p.family != AF_INET6)
966 return;
967
545aeef1 968 bnc = bnc_find(&bgp->nexthop_cache_table[AFI_IP6], &p, 0);
b3a3290e
DS
969 if (!bnc)
970 return;
971
972 if (peer != bnc->nht_info)
973 return;
974
975 for (nhop = bnc->nexthop; nhop; nhop = nhop->next) {
976 ifp = if_lookup_by_index(nhop->ifindex, nhop->vrf_id);
977
68cecc3b
DS
978 if (!ifp)
979 continue;
980
b3a3290e
DS
981 zclient_send_interface_radv_req(zclient, nhop->vrf_id, ifp, 0,
982 0);
983 }
984}
c589d847
AK
985
986/****************************************************************************
987 * L3 NHGs are used for fast failover of nexthops in the dplane. These are
988 * the APIs for allocating L3 NHG ids. Management of the L3 NHG itself is
989 * left to the application using it.
990 * PS: Currently EVPN host routes is the only app using L3 NHG for fast
991 * failover of remote ES links.
992 ***************************************************************************/
993static bitfield_t bgp_nh_id_bitmap;
8bcb09a1 994static uint32_t bgp_l3nhg_start;
c589d847 995
8bcb09a1
AK
996/* XXX - currently we do nothing on the callbacks */
997static void bgp_l3nhg_add_cb(const char *name)
998{
999}
1000static void bgp_l3nhg_add_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1001 const struct nexthop *nhop)
1002{
1003}
1004static void bgp_l3nhg_del_nexthop_cb(const struct nexthop_group_cmd *nhgc,
1005 const struct nexthop *nhop)
1006{
1007}
1008static void bgp_l3nhg_del_cb(const char *name)
c589d847 1009{
c589d847
AK
1010}
1011
8bcb09a1 1012static void bgp_l3nhg_zebra_init(void)
c589d847 1013{
8bcb09a1
AK
1014 static bool bgp_l3nhg_zebra_inited;
1015 if (bgp_l3nhg_zebra_inited)
c589d847
AK
1016 return;
1017
8bcb09a1
AK
1018 bgp_l3nhg_zebra_inited = true;
1019 bgp_l3nhg_start = zclient_get_nhg_start(ZEBRA_ROUTE_BGP);
1020 nexthop_group_init(bgp_l3nhg_add_cb, bgp_l3nhg_add_nexthop_cb,
1021 bgp_l3nhg_del_nexthop_cb, bgp_l3nhg_del_cb);
c589d847
AK
1022}
1023
8bcb09a1
AK
1024
1025#define min(A, B) ((A) < (B) ? (A) : (B))
c589d847
AK
1026void bgp_l3nhg_init(void)
1027{
8bcb09a1
AK
1028 uint32_t id_max;
1029
1030 id_max = min(ZEBRA_NHG_PROTO_SPACING - 1, 16 * 1024);
1031 bf_init(bgp_nh_id_bitmap, id_max);
c589d847 1032 bf_assign_zero_index(bgp_nh_id_bitmap);
8bcb09a1
AK
1033
1034 if (BGP_DEBUG(nht, NHT) || BGP_DEBUG(evpn_mh, EVPN_MH_ES))
1035 zlog_debug("bgp l3_nhg range %u - %u", bgp_l3nhg_start + 1,
1036 bgp_l3nhg_start + id_max);
c589d847
AK
1037}
1038
1039void bgp_l3nhg_finish(void)
1040{
1041 bf_free(bgp_nh_id_bitmap);
1042}
8bcb09a1
AK
1043
1044uint32_t bgp_l3nhg_id_alloc(void)
1045{
1046 uint32_t nhg_id = 0;
1047
1048 bgp_l3nhg_zebra_init();
1049 bf_assign_index(bgp_nh_id_bitmap, nhg_id);
1050 if (nhg_id)
1051 nhg_id += bgp_l3nhg_start;
1052
1053 return nhg_id;
1054}
1055
1056void bgp_l3nhg_id_free(uint32_t nhg_id)
1057{
1058 if (!nhg_id || (nhg_id <= bgp_l3nhg_start))
1059 return;
1060
1061 nhg_id -= bgp_l3nhg_start;
1062
1063 bf_release_index(bgp_nh_id_bitmap, nhg_id);
1064}