1 /* BGP Nexthop tracking
2 * Copyright (C) 2013 Cumulus Networks, Inc.
4 * This file is part of GNU Zebra.
6 * GNU Zebra is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; either version 2, or (at your option) any
11 * GNU Zebra is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with GNU Zebra; see the file COPYING. If not, write to the Free
18 * Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
36 #include "bgpd/bgpd.h"
37 #include "bgpd/bgp_table.h"
38 #include "bgpd/bgp_route.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_nexthop.h"
41 #include "bgpd/bgp_debug.h"
42 #include "bgpd/bgp_nht.h"
43 #include "bgpd/bgp_fsm.h"
45 extern struct zclient
*zclient
;
47 static void register_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
48 int is_bgp_static_route
);
49 static void unregister_zebra_rnh(struct bgp_nexthop_cache
*bnc
,
50 int is_bgp_static_route
);
51 static void evaluate_paths(struct bgp_nexthop_cache
*bnc
);
52 static int make_prefix(int afi
, struct bgp_info
*ri
, struct prefix
*p
);
53 static void path_nh_map(struct bgp_info
*path
, struct bgp_nexthop_cache
*bnc
,
57 bgp_find_nexthop (struct bgp_info
*path
, int connected
)
59 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
64 if (connected
&& !(CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
)))
67 return (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
));
71 bgp_unlink_nexthop (struct bgp_info
*path
)
73 struct bgp_nexthop_cache
*bnc
= path
->nexthop
;
78 path_nh_map(path
, NULL
, 0);
80 if (LIST_EMPTY(&(bnc
->paths
)) && !bnc
->nht_info
)
82 if (BGP_DEBUG(nht
, NHT
))
84 char buf
[PREFIX2STR_BUFFER
];
85 zlog_debug("bgp_unlink_nexthop: freeing bnc %s",
86 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
));
88 unregister_zebra_rnh(bnc
, CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
));
89 bnc
->node
->info
= NULL
;
90 bgp_unlock_node(bnc
->node
);
96 bgp_find_or_add_nexthop (struct bgp
*bgp
, afi_t afi
, struct bgp_info
*ri
,
97 struct peer
*peer
, int connected
)
100 struct bgp_nexthop_cache
*bnc
;
102 int is_bgp_static_route
= 0;
106 is_bgp_static_route
= ((ri
->type
== ZEBRA_ROUTE_BGP
) &&
107 (ri
->sub_type
== BGP_ROUTE_STATIC
)) ? 1 : 0;
109 /* Since Extended Next-hop Encoding (RFC5549) support, we want to derive
110 address-family from the next-hop. */
111 if (!is_bgp_static_route
)
112 afi
= BGP_ATTR_NEXTHOP_AFI_IP6(ri
->attr
) ? AFI_IP6
: AFI_IP
;
114 /* This will return TRUE if the global IPv6 NH is a link local addr */
115 if (make_prefix(afi
, ri
, &p
) < 0)
123 p
.prefixlen
= IPV4_MAX_BITLEN
;
124 p
.u
.prefix4
= peer
->su
.sin
.sin_addr
;
126 else if (afi
== AFI_IP6
)
129 p
.prefixlen
= IPV6_MAX_BITLEN
;
130 p
.u
.prefix6
= peer
->su
.sin6
.sin6_addr
;
132 /* Don't register link local NH */
133 if (IN6_IS_ADDR_LINKLOCAL(&p
.u
.prefix6
))
138 if (BGP_DEBUG(nht
, NHT
))
140 zlog_debug("%s: Attempting to register with unknown AFI %d (not %d or %d)",
141 __FUNCTION__
, afi
, AFI_IP
, AFI_IP6
);
149 if (is_bgp_static_route
)
150 rn
= bgp_node_get (bgp
->import_check_table
[afi
], &p
);
152 rn
= bgp_node_get (bgp
->nexthop_cache_table
[afi
], &p
);
161 if (BGP_DEBUG(nht
, NHT
))
163 char buf
[PREFIX2STR_BUFFER
];
165 zlog_debug("Allocated bnc %s peer %p",
166 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
), peer
);
171 bgp_unlock_node (rn
);
173 if (is_bgp_static_route
)
175 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE
);
177 /* If we're toggling the type, re-register */
178 if ((bgp_flag_check(bgp
, BGP_FLAG_IMPORT_CHECK
)) &&
179 !CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
181 SET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
182 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
183 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
185 else if ((!bgp_flag_check(bgp
, BGP_FLAG_IMPORT_CHECK
)) &&
186 CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
188 UNSET_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
);
189 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
190 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
193 /* When nexthop is already known, but now requires 'connected' resolution,
194 * re-register it. The reverse scenario where the nexthop currently requires
195 * 'connected' resolution does not need a re-register (i.e., we treat
196 * 'connected-required' as an override) except in the scenario where this
197 * is actually a case of tracking a peer for connectivity (e.g., after
198 * disable connected-check).
199 * NOTE: We don't track the number of paths separately for 'connected-
200 * required' vs 'connected-not-required' as this change is not a common
203 else if (connected
&& ! CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
))
205 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
206 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
207 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
209 else if (peer
&& !connected
&& CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
))
211 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
);
212 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
213 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
);
216 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
217 register_zebra_rnh(bnc
, is_bgp_static_route
);
219 if (ri
&& ri
->nexthop
!= bnc
)
221 /* Unlink from existing nexthop cache, if any. This will also free
222 * the nexthop cache entry, if appropriate.
224 bgp_unlink_nexthop (ri
);
226 /* Link to new nexthop cache. */
227 path_nh_map(ri
, bnc
, 1);
229 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) && bnc
->metric
)
230 (bgp_info_extra_get(ri
))->igpmetric
= bnc
->metric
;
232 ri
->extra
->igpmetric
= 0;
235 bnc
->nht_info
= (void *)peer
;
237 return (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
));
241 bgp_delete_connected_nexthop (afi_t afi
, struct peer
*peer
)
244 struct bgp_nexthop_cache
*bnc
;
253 p
.prefixlen
= IPV4_MAX_BITLEN
;
254 p
.u
.prefix4
= peer
->su
.sin
.sin_addr
;
256 else if (afi
== AFI_IP6
)
259 p
.prefixlen
= IPV6_MAX_BITLEN
;
260 p
.u
.prefix6
= peer
->su
.sin6
.sin6_addr
;
262 /* We don't register link local address for NHT */
263 if (IN6_IS_ADDR_LINKLOCAL(&p
.u
.prefix6
))
269 rn
= bgp_node_lookup(peer
->bgp
->nexthop_cache_table
[family2afi(p
.family
)], &p
);
270 if (!rn
|| !rn
->info
)
272 if (BGP_DEBUG(nht
, NHT
))
273 zlog_debug("Cannot find connected NHT node for peer %s", peer
->host
);
275 bgp_unlock_node (rn
);
282 if (bnc
->nht_info
!= peer
)
284 if (BGP_DEBUG(nht
, NHT
))
285 zlog_debug("Connected NHT %p node for peer %s points to %p",
286 bnc
, peer
->host
, bnc
->nht_info
);
290 bnc
->nht_info
= NULL
;
292 if (LIST_EMPTY(&(bnc
->paths
)))
294 if (BGP_DEBUG(nht
, NHT
))
295 zlog_debug("Freeing connected NHT node %p for peer %s",
297 unregister_zebra_rnh(bnc
, 0);
298 bnc
->node
->info
= NULL
;
299 bgp_unlock_node(bnc
->node
);
305 bgp_parse_nexthop_update (int command
, vrf_id_t vrf_id
)
308 struct bgp_node
*rn
= NULL
;
309 struct bgp_nexthop_cache
*bnc
;
310 struct nexthop
*nexthop
;
311 struct nexthop
*oldnh
;
312 struct nexthop
*nhlist_head
= NULL
;
313 struct nexthop
*nhlist_tail
= NULL
;
320 bgp
= bgp_lookup_by_vrf_id (vrf_id
);
323 zlog_err("parse nexthop update: instance not found for vrf_id %d", vrf_id
);
329 memset(&p
, 0, sizeof(struct prefix
));
330 p
.family
= stream_getw(s
);
331 p
.prefixlen
= stream_getc(s
);
335 p
.u
.prefix4
.s_addr
= stream_get_ipv4 (s
);
338 stream_get(&p
.u
.prefix6
, s
, 16);
344 if (command
== ZEBRA_NEXTHOP_UPDATE
)
345 rn
= bgp_node_lookup(bgp
->nexthop_cache_table
[family2afi(p
.family
)], &p
);
346 else if (command
== ZEBRA_IMPORT_CHECK_UPDATE
)
347 rn
= bgp_node_lookup(bgp
->import_check_table
[family2afi(p
.family
)], &p
);
349 if (!rn
|| !rn
->info
)
351 if (BGP_DEBUG(nht
, NHT
))
353 char buf
[PREFIX2STR_BUFFER
];
354 prefix2str(&p
, buf
, sizeof(buf
));
355 zlog_debug("parse nexthop update(%s): rn not found", buf
);
358 bgp_unlock_node (rn
);
363 bgp_unlock_node (rn
);
364 bnc
->last_update
= bgp_clock();
365 bnc
->change_flags
= 0;
366 metric
= stream_getl (s
);
367 nexthop_num
= stream_getc (s
);
369 /* debug print the input */
370 if (BGP_DEBUG(nht
, NHT
))
372 char buf
[PREFIX2STR_BUFFER
];
373 prefix2str(&p
, buf
, sizeof (buf
));
374 zlog_debug("%d: NH update for %s - metric %d (cur %d) #nhops %d (cur %d)",
375 vrf_id
, buf
, metric
, bnc
->metric
, nexthop_num
, bnc
->nexthop_num
);
378 if (metric
!= bnc
->metric
)
379 bnc
->change_flags
|= BGP_NEXTHOP_METRIC_CHANGED
;
381 if(nexthop_num
!= bnc
->nexthop_num
)
382 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
386 /* notify bgp fsm if nbr ip goes from invalid->valid */
387 if (!bnc
->nexthop_num
)
388 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
390 bnc
->flags
|= BGP_NEXTHOP_VALID
;
391 bnc
->metric
= metric
;
392 bnc
->nexthop_num
= nexthop_num
;
394 for (i
= 0; i
< nexthop_num
; i
++)
396 nexthop
= nexthop_new();
397 nexthop
->type
= stream_getc (s
);
398 switch (nexthop
->type
)
400 case NEXTHOP_TYPE_IPV4
:
401 nexthop
->gate
.ipv4
.s_addr
= stream_get_ipv4 (s
);
403 case NEXTHOP_TYPE_IFINDEX
:
404 nexthop
->ifindex
= stream_getl (s
);
406 case NEXTHOP_TYPE_IPV4_IFINDEX
:
407 nexthop
->gate
.ipv4
.s_addr
= stream_get_ipv4 (s
);
408 nexthop
->ifindex
= stream_getl (s
);
410 case NEXTHOP_TYPE_IPV6
:
411 stream_get (&nexthop
->gate
.ipv6
, s
, 16);
413 case NEXTHOP_TYPE_IPV6_IFINDEX
:
414 stream_get (&nexthop
->gate
.ipv6
, s
, 16);
415 nexthop
->ifindex
= stream_getl (s
);
422 if (BGP_DEBUG(nht
, NHT
))
424 char buf
[NEXTHOP_STRLEN
];
425 zlog_debug(" nhop via %s",
426 nexthop2str (nexthop
, buf
, sizeof (buf
)));
431 nhlist_tail
->next
= nexthop
;
432 nhlist_tail
= nexthop
;
436 nhlist_tail
= nexthop
;
437 nhlist_head
= nexthop
;
440 /* No need to evaluate the nexthop if we have already determined
441 * that there has been a change.
443 if (bnc
->change_flags
& BGP_NEXTHOP_CHANGED
)
446 for (oldnh
= bnc
->nexthop
; oldnh
; oldnh
= oldnh
->next
)
447 if (nexthop_same_no_recurse(oldnh
, nexthop
))
451 bnc
->change_flags
|= BGP_NEXTHOP_CHANGED
;
453 bnc_nexthop_free(bnc
);
454 bnc
->nexthop
= nhlist_head
;
458 bnc
->flags
&= ~BGP_NEXTHOP_VALID
;
459 bnc
->nexthop_num
= nexthop_num
;
461 /* notify bgp fsm if nbr ip goes from valid->invalid */
462 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
464 bnc_nexthop_free(bnc
);
472 * make_prefix - make a prefix structure from the path (essentially
476 make_prefix (int afi
, struct bgp_info
*ri
, struct prefix
*p
)
479 int is_bgp_static
= ((ri
->type
== ZEBRA_ROUTE_BGP
) &&
480 (ri
->sub_type
== BGP_ROUTE_STATIC
)) ? 1 : 0;
482 memset (p
, 0, sizeof (struct prefix
));
489 p
->u
.prefix4
= ri
->net
->p
.u
.prefix4
;
490 p
->prefixlen
= ri
->net
->p
.prefixlen
;
494 p
->u
.prefix4
= ri
->attr
->nexthop
;
495 p
->prefixlen
= IPV4_MAX_BITLEN
;
500 /* We don't register link local NH */
501 if (ri
->attr
->extra
->mp_nexthop_len
!= BGP_ATTR_NHLEN_IPV6_GLOBAL
502 || IN6_IS_ADDR_LINKLOCAL (&ri
->attr
->extra
->mp_nexthop_global
))
505 p
->family
= AF_INET6
;
509 p
->u
.prefix6
= ri
->net
->p
.u
.prefix6
;
510 p
->prefixlen
= ri
->net
->p
.prefixlen
;
514 p
->u
.prefix6
= ri
->attr
->extra
->mp_nexthop_global
;
515 p
->prefixlen
= IPV6_MAX_BITLEN
;
520 if (BGP_DEBUG(nht
, NHT
))
522 zlog_debug("%s: Attempting to make prefix with unknown AFI %d (not %d or %d)",
523 __FUNCTION__
, afi
, AFI_IP
, AFI_IP6
);
531 * sendmsg_zebra_rnh -- Format and send a nexthop register/Unregister
534 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
535 * int command -- command to send to zebra
540 sendmsg_zebra_rnh (struct bgp_nexthop_cache
*bnc
, int command
)
547 if (!zclient
|| zclient
->sock
< 0)
550 /* Don't try to register if Zebra doesn't know of this instance. */
551 if (!IS_BGP_INST_KNOWN_TO_ZEBRA(bnc
->bgp
))
557 zclient_create_header (s
, command
, bnc
->bgp
->vrf_id
);
558 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_CONNECTED
) ||
559 CHECK_FLAG(bnc
->flags
, BGP_STATIC_ROUTE_EXACT_MATCH
))
564 stream_putw(s
, PREFIX_FAMILY(p
));
565 stream_putc(s
, p
->prefixlen
);
566 switch (PREFIX_FAMILY(p
))
569 stream_put_in_addr (s
, &p
->u
.prefix4
);
573 stream_put(s
, &(p
->u
.prefix6
), 16);
579 stream_putw_at (s
, 0, stream_get_endp (s
));
581 ret
= zclient_send_message(zclient
);
582 /* TBD: handle the failure */
584 zlog_warn("sendmsg_nexthop: zclient_send_message() failed");
586 if ((command
== ZEBRA_NEXTHOP_REGISTER
) ||
587 (command
== ZEBRA_IMPORT_ROUTE_REGISTER
))
588 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
589 else if ((command
== ZEBRA_NEXTHOP_UNREGISTER
) ||
590 (command
== ZEBRA_IMPORT_ROUTE_UNREGISTER
))
591 UNSET_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
);
596 * register_zebra_rnh - register a NH/route with Zebra for notification
597 * when the route or the route to the nexthop changes.
599 * struct bgp_nexthop_cache *bnc
604 register_zebra_rnh (struct bgp_nexthop_cache
*bnc
, int is_bgp_import_route
)
606 /* Check if we have already registered */
607 if (bnc
->flags
& BGP_NEXTHOP_REGISTERED
)
609 if (is_bgp_import_route
)
610 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_REGISTER
);
612 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_REGISTER
);
616 * unregister_zebra_rnh -- Unregister the route/nexthop from Zebra.
618 * struct bgp_nexthop_cache *bnc
623 unregister_zebra_rnh (struct bgp_nexthop_cache
*bnc
, int is_bgp_import_route
)
625 /* Check if we have already registered */
626 if (!CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_REGISTERED
))
629 if (is_bgp_import_route
)
630 sendmsg_zebra_rnh(bnc
, ZEBRA_IMPORT_ROUTE_UNREGISTER
);
632 sendmsg_zebra_rnh(bnc
, ZEBRA_NEXTHOP_UNREGISTER
);
636 * evaluate_paths - Evaluate the paths/nets associated with a nexthop.
638 * struct bgp_nexthop_cache *bnc -- the nexthop structure.
643 evaluate_paths (struct bgp_nexthop_cache
*bnc
)
646 struct bgp_info
*path
;
647 struct bgp
*bgp
= bnc
->bgp
;
649 struct peer
*peer
= (struct peer
*)bnc
->nht_info
;
651 if (BGP_DEBUG(nht
, NHT
))
653 char buf
[PREFIX2STR_BUFFER
];
654 bnc_str(bnc
, buf
, PREFIX2STR_BUFFER
);
655 zlog_debug("NH update for %s - flags 0x%x chgflags 0x%x - evaluate paths",
656 buf
, bnc
->flags
, bnc
->change_flags
);
659 LIST_FOREACH(path
, &(bnc
->paths
), nh_thread
)
661 if (!(path
->type
== ZEBRA_ROUTE_BGP
&&
662 ((path
->sub_type
== BGP_ROUTE_NORMAL
) ||
663 (path
->sub_type
== BGP_ROUTE_STATIC
))))
667 afi
= family2afi(rn
->p
.family
);
669 /* Path becomes valid/invalid depending on whether the nexthop
670 * reachable/unreachable.
672 if ((CHECK_FLAG(path
->flags
, BGP_INFO_VALID
) ? 1 : 0) !=
673 (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) ? 1 : 0))
675 if (CHECK_FLAG (path
->flags
, BGP_INFO_VALID
))
677 bgp_aggregate_decrement (bgp
, &rn
->p
, path
,
679 bgp_info_unset_flag (rn
, path
, BGP_INFO_VALID
);
683 bgp_info_set_flag (rn
, path
, BGP_INFO_VALID
);
684 bgp_aggregate_increment (bgp
, &rn
->p
, path
,
689 /* Copy the metric to the path. Will be used for bestpath computation */
690 if (CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
) && bnc
->metric
)
691 (bgp_info_extra_get(path
))->igpmetric
= bnc
->metric
;
692 else if (path
->extra
)
693 path
->extra
->igpmetric
= 0;
695 if (CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_METRIC_CHANGED
) ||
696 CHECK_FLAG(bnc
->change_flags
, BGP_NEXTHOP_CHANGED
))
697 SET_FLAG(path
->flags
, BGP_INFO_IGP_CHANGED
);
699 bgp_process(bgp
, rn
, afi
, SAFI_UNICAST
);
702 if (peer
&& !CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
))
704 if (BGP_DEBUG(nht
, NHT
))
705 zlog_debug("%s: Updating peer (%s) status with NHT", __FUNCTION__
, peer
->host
);
706 bgp_fsm_nht_update(peer
, CHECK_FLAG(bnc
->flags
, BGP_NEXTHOP_VALID
));
707 SET_FLAG(bnc
->flags
, BGP_NEXTHOP_PEER_NOTIFIED
);
710 RESET_FLAG(bnc
->change_flags
);
714 * path_nh_map - make or break path-to-nexthop association.
716 * path - pointer to the path structure
717 * bnc - pointer to the nexthop structure
718 * make - if set, make the association. if unset, just break the existing
722 path_nh_map (struct bgp_info
*path
, struct bgp_nexthop_cache
*bnc
, int make
)
726 LIST_REMOVE(path
, nh_thread
);
727 path
->nexthop
->path_count
--;
728 path
->nexthop
= NULL
;
732 LIST_INSERT_HEAD(&(bnc
->paths
), path
, nh_thread
);
734 path
->nexthop
->path_count
++;