1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Copyright 2009-2016, LabN Consulting, L.L.C.
8 * Purpose: Handle import of routes from BGP to RFAPI
11 #include "lib/zebra.h"
12 #include "lib/prefix.h"
13 #include "lib/agg_table.h"
15 #include "lib/memory.h"
17 #include "lib/skiplist.h"
18 #include "lib/thread.h"
19 #include "lib/stream.h"
20 #include "lib/lib_errors.h"
22 #include "bgpd/bgpd.h"
23 #include "bgpd/bgp_ecommunity.h"
24 #include "bgpd/bgp_attr.h"
25 #include "bgpd/bgp_route.h"
26 #include "bgpd/bgp_mplsvpn.h" /* prefix_rd2str() */
27 #include "bgpd/bgp_vnc_types.h"
28 #include "bgpd/bgp_rd.h"
30 #include "bgpd/rfapi/rfapi.h"
31 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
32 #include "bgpd/rfapi/rfapi_backend.h"
33 #include "bgpd/rfapi/rfapi_import.h"
34 #include "bgpd/rfapi/rfapi_private.h"
35 #include "bgpd/rfapi/rfapi_monitor.h"
36 #include "bgpd/rfapi/rfapi_nve_addr.h"
37 #include "bgpd/rfapi/rfapi_vty.h"
38 #include "bgpd/rfapi/vnc_export_bgp.h"
39 #include "bgpd/rfapi/vnc_export_bgp_p.h"
40 #include "bgpd/rfapi/vnc_zebra.h"
41 #include "bgpd/rfapi/vnc_import_bgp.h"
42 #include "bgpd/rfapi/vnc_import_bgp_p.h"
43 #include "bgpd/rfapi/rfapi_rib.h"
44 #include "bgpd/rfapi/rfapi_encap_tlv.h"
45 #include "bgpd/rfapi/vnc_debug.h"
47 #ifdef HAVE_GLIBC_BACKTRACE
48 /* for backtrace and friends */
50 #endif /* HAVE_GLIBC_BACKTRACE */
52 #undef DEBUG_MONITOR_MOVE_SHORTER
53 #undef DEBUG_RETURNED_NHL
54 #undef DEBUG_ROUTE_COUNTERS
55 #undef DEBUG_ENCAP_MONITOR
58 #undef DEBUG_BI_SEARCH
61 * Allocated for each withdraw timer instance; freed when the timer
62 * expires or is canceled
64 struct rfapi_withdraw
{
65 struct rfapi_import_table
*import_table
;
66 struct agg_node
*node
;
67 struct bgp_path_info
*info
;
68 safi_t safi
; /* used only for bulk operations */
70 * For import table node reference count checking (i.e., debugging).
71 * Normally when a timer expires, lockoffset should be 0. However, if
72 * the timer expiration function is called directly (e.g.,
73 * rfapiExpireVpnNow), the node could be locked by a preceding
74 * agg_route_top() or agg_route_next() in a loop, so we need to pass
82 * It's evil and fiendish. It's compiler-dependent.
83 * ? Might need LDFLAGS -rdynamic to produce all function names
85 void rfapiDebugBacktrace(void)
87 #ifdef HAVE_GLIBC_BACKTRACE
88 #define RFAPI_DEBUG_BACKTRACE_NENTRIES 200
89 void *buf
[RFAPI_DEBUG_BACKTRACE_NENTRIES
];
94 size
= backtrace(buf
, RFAPI_DEBUG_BACKTRACE_NENTRIES
);
95 syms
= backtrace_symbols(buf
, size
);
97 for (i
= 0; i
< size
&& i
< RFAPI_DEBUG_BACKTRACE_NENTRIES
; ++i
) {
98 vnc_zlog_debug_verbose("backtrace[%2zu]: %s", i
, syms
[i
]);
108 * Count remote routes and compare with actively-maintained values.
109 * Abort if they disagree.
111 void rfapiCheckRouteCount(void)
113 struct bgp
*bgp
= bgp_get_default();
115 struct rfapi_import_table
*it
;
123 for (it
= h
->imports
; it
; it
= it
->next
) {
124 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
126 struct agg_table
*rt
;
129 int holddown_count
= 0;
131 int imported_count
= 0;
132 int remote_count
= 0;
134 rt
= it
->imported_vpn
[afi
];
136 for (rn
= agg_route_top(rt
); rn
;
137 rn
= agg_route_next(rn
)) {
138 struct bgp_path_info
*bpi
;
139 struct bgp_path_info
*next
;
141 for (bpi
= rn
->info
; bpi
; bpi
= next
) {
144 if (CHECK_FLAG(bpi
->flags
,
149 if (RFAPI_LOCAL_BI(bpi
)) {
152 if (RFAPI_DIRECT_IMPORT_BI(
163 if (it
->holddown_count
[afi
] != holddown_count
) {
164 vnc_zlog_debug_verbose(
165 "%s: it->holddown_count %d != holddown_count %d",
166 __func__
, it
->holddown_count
[afi
],
170 if (it
->remote_count
[afi
] != remote_count
) {
171 vnc_zlog_debug_verbose(
172 "%s: it->remote_count %d != remote_count %d",
173 __func__
, it
->remote_count
[afi
],
177 if (it
->imported_count
[afi
] != imported_count
) {
178 vnc_zlog_debug_verbose(
179 "%s: it->imported_count %d != imported_count %d",
180 __func__
, it
->imported_count
[afi
],
188 #ifdef DEBUG_ROUTE_COUNTERS
189 #define VNC_ITRCCK do {rfapiCheckRouteCount();} while (0)
195 * Validate reference count for a node in an import table
197 * Normally lockoffset is 0 for nodes in quiescent state. However,
198 * agg_unlock_node will delete the node if it is called when
199 * node->lock == 1, and we have to validate the refcount before
200 * the node is deleted. In this case, we specify lockoffset 1.
202 void rfapiCheckRefcount(struct agg_node
*rn
, safi_t safi
, int lockoffset
)
204 unsigned int count_bpi
= 0;
205 unsigned int count_monitor
= 0;
206 struct bgp_path_info
*bpi
;
207 struct rfapi_monitor_encap
*hme
;
208 struct rfapi_monitor_vpn
*hmv
;
210 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
)
215 ++count_monitor
; /* rfapi_it_extra */
222 for (hme
= RFAPI_MONITOR_ENCAP(rn
); hme
;
229 for (hmv
= RFAPI_MONITOR_VPN(rn
); hmv
; hmv
= hmv
->next
)
232 if (RFAPI_MONITOR_EXTERIOR(rn
)->source
) {
233 ++count_monitor
; /* sl */
235 for (rc
= skiplist_next(
236 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
237 NULL
, NULL
, &cursor
);
240 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
241 NULL
, NULL
, &cursor
)) {
243 ++count_monitor
; /* sl entry */
252 case SAFI_LABELED_UNICAST
:
255 assert(!"Passed in safi should be impossible");
259 if (count_bpi
+ count_monitor
+ lockoffset
260 != agg_node_get_lock_count(rn
)) {
261 vnc_zlog_debug_verbose(
262 "%s: count_bpi=%d, count_monitor=%d, lockoffset=%d, rn->lock=%d",
263 __func__
, count_bpi
, count_monitor
, lockoffset
,
264 agg_node_get_lock_count(rn
));
270 * Perform deferred rfapi_close operations that were queued
273 static wq_item_status
rfapi_deferred_close_workfunc(struct work_queue
*q
,
276 struct rfapi_descriptor
*rfd
= data
;
277 struct rfapi
*h
= q
->spec
.data
;
279 assert(!(h
->flags
& RFAPI_INCALLBACK
));
281 vnc_zlog_debug_verbose("%s: completed deferred close on handle %p",
287 * Extract layer 2 option from Encap TLVS in BGP attrs
289 int rfapiGetL2o(struct attr
*attr
, struct rfapi_l2address_option
*l2o
)
292 struct bgp_attr_encap_subtlv
*pEncap
;
294 for (pEncap
= bgp_attr_get_vnc_subtlvs(attr
); pEncap
;
295 pEncap
= pEncap
->next
) {
297 if (pEncap
->type
== BGP_VNC_SUBTLV_TYPE_RFPOPTION
) {
299 == RFAPI_VN_OPTION_TYPE_L2ADDR
) {
301 if (pEncap
->value
[1] == 14) {
302 memcpy(l2o
->macaddr
.octet
,
319 l2o
->logical_net_id
=
322 + ((pEncap
->value
[14]
325 + ((pEncap
->value
[13]
340 * Extract the lifetime from the Tunnel Encap attribute of a route in
343 int rfapiGetVncLifetime(struct attr
*attr
, uint32_t *lifetime
)
345 struct bgp_attr_encap_subtlv
*pEncap
;
347 *lifetime
= RFAPI_INFINITE_LIFETIME
; /* default to infinite */
351 for (pEncap
= bgp_attr_get_vnc_subtlvs(attr
); pEncap
;
352 pEncap
= pEncap
->next
) {
355 == BGP_VNC_SUBTLV_TYPE_LIFETIME
) { /* lifetime */
356 if (pEncap
->length
== 4) {
357 memcpy(lifetime
, pEncap
->value
, 4);
358 *lifetime
= ntohl(*lifetime
);
369 * Look for UN address in Encap attribute
371 int rfapiGetVncTunnelUnAddr(struct attr
*attr
, struct prefix
*p
)
373 struct bgp_attr_encap_subtlv
*pEncap
;
374 bgp_encap_types tun_type
= BGP_ENCAP_TYPE_MPLS
;/*Default tunnel type*/
376 bgp_attr_extcom_tunnel_type(attr
, &tun_type
);
377 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
380 /* MPLS carries UN address in next hop */
381 rfapiNexthop2Prefix(attr
, p
);
382 if (p
->family
!= AF_UNSPEC
)
388 for (pEncap
= attr
->encap_subtlvs
; pEncap
;
389 pEncap
= pEncap
->next
) {
392 == BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
) { /* un
395 switch (pEncap
->length
) {
399 p
->prefixlen
= IPV4_MAX_BITLEN
;
400 memcpy(p
->u
.val
, pEncap
->value
,
407 p
->family
= AF_INET6
;
408 p
->prefixlen
= IPV6_MAX_BITLEN
;
409 memcpy(p
->u
.val
, pEncap
->value
,
422 * Get UN address wherever it might be
424 int rfapiGetUnAddrOfVpnBi(struct bgp_path_info
*bpi
, struct prefix
*p
)
426 /* If it's in this route's VNC attribute, we're done */
427 if (!rfapiGetVncTunnelUnAddr(bpi
->attr
, p
))
430 * Otherwise, see if it's cached from a corresponding ENCAP SAFI
434 switch (bpi
->extra
->vnc
.import
.un_family
) {
437 p
->family
= bpi
->extra
->vnc
.import
.un_family
;
438 p
->u
.prefix4
= bpi
->extra
->vnc
.import
.un
.addr4
;
439 p
->prefixlen
= IPV4_MAX_BITLEN
;
444 p
->family
= bpi
->extra
->vnc
.import
.un_family
;
445 p
->u
.prefix6
= bpi
->extra
->vnc
.import
.un
.addr6
;
446 p
->prefixlen
= IPV6_MAX_BITLEN
;
451 p
->family
= AF_UNSPEC
;
452 #ifdef DEBUG_ENCAP_MONITOR
453 vnc_zlog_debug_verbose(
454 "%s: bpi->extra->vnc.import.un_family is 0, no UN addr",
466 * Make a new bgp_path_info from gathered parameters
468 static struct bgp_path_info
*rfapiBgpInfoCreate(struct attr
*attr
,
469 struct peer
*peer
, void *rfd
,
470 struct prefix_rd
*prd
,
471 uint8_t type
, uint8_t sub_type
,
474 struct bgp_path_info
*new;
476 new = info_make(type
, sub_type
, 0, peer
, attr
, NULL
);
478 new->attr
= bgp_attr_intern(attr
);
480 bgp_path_info_extra_get(new);
482 new->extra
->vnc
.import
.rd
= *prd
;
483 new->extra
->vnc
.import
.create_time
= monotime(NULL
);
486 encode_label(*label
, &new->extra
->label
[0]);
494 * Frees bgp_path_info as used in import tables (parts are not
495 * allocated exactly the way they are in the main RIBs)
497 static void rfapiBgpInfoFree(struct bgp_path_info
*goner
)
503 vnc_zlog_debug_verbose("%s: calling peer_unlock(%p), #%d",
504 __func__
, goner
->peer
,
506 peer_unlock(goner
->peer
);
509 bgp_attr_unintern(&goner
->attr
);
512 bgp_path_info_extra_free(&goner
->extra
);
513 XFREE(MTYPE_BGP_ROUTE
, goner
);
516 struct rfapi_import_table
*rfapiMacImportTableGetNoAlloc(struct bgp
*bgp
,
520 struct rfapi_import_table
*it
= NULL
;
521 uintptr_t lni_as_ptr
= lni
;
530 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
))
536 struct rfapi_import_table
*rfapiMacImportTableGet(struct bgp
*bgp
, uint32_t lni
)
539 struct rfapi_import_table
*it
= NULL
;
540 uintptr_t lni_as_ptr
= lni
;
545 if (!h
->import_mac
) {
546 /* default cmp is good enough for LNI */
547 h
->import_mac
= skiplist_new(0, NULL
, NULL
);
550 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
)) {
552 struct ecommunity
*enew
;
553 struct ecommunity_val eval
;
556 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
557 sizeof(struct rfapi_import_table
));
558 /* set RT list of new import table based on LNI */
559 memset((char *)&eval
, 0, sizeof(eval
));
560 eval
.val
[0] = 0; /* VNC L2VPN */
561 eval
.val
[1] = 2; /* VNC L2VPN */
562 eval
.val
[5] = (lni
>> 16) & 0xff;
563 eval
.val
[6] = (lni
>> 8) & 0xff;
564 eval
.val
[7] = (lni
>> 0) & 0xff;
566 enew
= ecommunity_new();
567 ecommunity_add_val(enew
, &eval
, false, false);
568 it
->rt_import_list
= enew
;
570 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
571 it
->imported_vpn
[afi
] = agg_table_init();
572 it
->imported_encap
[afi
] = agg_table_init();
575 it
->l2_logical_net_id
= lni
;
577 skiplist_insert(h
->import_mac
, (void *)lni_as_ptr
, it
);
585 * Implement MONITOR_MOVE_SHORTER(original_node) from
586 * RFAPI-Import-Event-Handling.txt
588 * Returns pointer to the list of moved monitors
590 static struct rfapi_monitor_vpn
*
591 rfapiMonitorMoveShorter(struct agg_node
*original_vpn_node
, int lockoffset
)
593 struct bgp_path_info
*bpi
;
594 struct agg_node
*par
;
595 struct rfapi_monitor_vpn
*m
;
596 struct rfapi_monitor_vpn
*mlast
;
597 struct rfapi_monitor_vpn
*moved
;
599 int parent_already_refcounted
= 0;
601 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
, lockoffset
);
603 #ifdef DEBUG_MONITOR_MOVE_SHORTER
605 vnc_zlog_debug_verbose("%s: called with node pfx=%pFX",
606 __func__
, &original_vpn_node
->p
);
611 * 1. If there is at least one bpi (either regular route or
612 * route marked as withdrawn, with a pending timer) at
613 * original_node with a valid UN address, we're done. Return.
615 for (bpi
= original_vpn_node
->info
; bpi
; bpi
= bpi
->next
) {
618 if (!rfapiGetUnAddrOfVpnBi(bpi
, &pfx
)) {
619 #ifdef DEBUG_MONITOR_MOVE_SHORTER
620 vnc_zlog_debug_verbose(
621 "%s: have valid UN at original node, no change",
629 * 2. Travel up the tree (toward less-specific prefixes) from
630 * original_node to find the first node that has at least
631 * one route (even if it is only a withdrawn route) with a
632 * valid UN address. Call this node "Node P."
634 for (par
= agg_node_parent(original_vpn_node
); par
;
635 par
= agg_node_parent(par
)) {
636 for (bpi
= par
->info
; bpi
; bpi
= bpi
->next
) {
638 if (!rfapiGetUnAddrOfVpnBi(bpi
, &pfx
)) {
647 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 0);
651 * If no less-specific routes, try to use the 0/0 node
654 const struct prefix
*p
;
655 /* this isn't necessarily 0/0 */
656 par
= agg_route_table_top(original_vpn_node
);
659 p
= agg_node_get_prefix(par
);
661 * If we got the top node but it wasn't 0/0,
664 if (par
&& p
->prefixlen
) {
665 agg_unlock_node(par
); /* maybe free */
670 ++parent_already_refcounted
;
675 * Create 0/0 node if it isn't there
678 struct prefix pfx_default
;
679 const struct prefix
*p
= agg_node_get_prefix(original_vpn_node
);
681 memset(&pfx_default
, 0, sizeof(pfx_default
));
682 pfx_default
.family
= p
->family
;
684 /* creates default node if none exists */
685 par
= agg_node_get(agg_get_table(original_vpn_node
),
687 ++parent_already_refcounted
;
691 * 3. Move each of the monitors found at original_node to Node P.
692 * These are "Moved Monitors."
697 * Attach at end so that the list pointer we return points
698 * only to the moved routes
700 for (m
= RFAPI_MONITOR_VPN(par
), mlast
= NULL
; m
;
701 mlast
= m
, m
= m
->next
)
705 moved
= mlast
->next
= RFAPI_MONITOR_VPN(original_vpn_node
);
707 moved
= RFAPI_MONITOR_VPN_W_ALLOC(par
) =
708 RFAPI_MONITOR_VPN(original_vpn_node
);
710 if (RFAPI_MONITOR_VPN(
711 original_vpn_node
)) /* check agg, so not allocated */
712 RFAPI_MONITOR_VPN_W_ALLOC(original_vpn_node
) = NULL
;
715 * update the node pointers on the monitors
717 for (m
= moved
; m
; m
= m
->next
) {
722 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
,
723 parent_already_refcounted
- movecount
);
724 while (movecount
> parent_already_refcounted
) {
726 ++parent_already_refcounted
;
728 while (movecount
< parent_already_refcounted
) {
729 /* unlikely, but code defensively */
730 agg_unlock_node(par
);
731 --parent_already_refcounted
;
733 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
,
734 movecount
+ lockoffset
);
735 while (movecount
--) {
736 agg_unlock_node(original_vpn_node
);
739 #ifdef DEBUG_MONITOR_MOVE_SHORTER
741 vnc_zlog_debug_verbose("%s: moved to node pfx=%pFX", __func__
,
751 * Implement MONITOR_MOVE_LONGER(new_node) from
752 * RFAPI-Import-Event-Handling.txt
754 static void rfapiMonitorMoveLonger(struct agg_node
*new_vpn_node
)
756 struct rfapi_monitor_vpn
*monitor
;
757 struct rfapi_monitor_vpn
*mlast
;
758 struct bgp_path_info
*bpi
;
759 struct agg_node
*par
;
760 const struct prefix
*new_vpn_node_p
= agg_node_get_prefix(new_vpn_node
);
762 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
765 * Make sure we have at least one valid route at the new node
767 for (bpi
= new_vpn_node
->info
; bpi
; bpi
= bpi
->next
) {
769 if (!rfapiGetUnAddrOfVpnBi(bpi
, &pfx
))
774 vnc_zlog_debug_verbose(
775 "%s: no valid routes at node %p, so not attempting moves",
776 __func__
, new_vpn_node
);
781 * Find first parent node that has monitors
783 for (par
= agg_node_parent(new_vpn_node
); par
;
784 par
= agg_node_parent(par
)) {
785 if (RFAPI_MONITOR_VPN(par
))
790 vnc_zlog_debug_verbose(
791 "%s: no parent nodes with monitors, done", __func__
);
796 * Check each of these monitors to see of their longest-match
797 * is now the updated node. Move any such monitors to the more-
798 * specific updated node
800 for (mlast
= NULL
, monitor
= RFAPI_MONITOR_VPN(par
); monitor
;) {
802 * If new longest match for monitor prefix is the new
803 * route's prefix, move monitor to new route's prefix
805 if (prefix_match(new_vpn_node_p
, &monitor
->p
)) {
808 mlast
->next
= monitor
->next
;
810 RFAPI_MONITOR_VPN_W_ALLOC(par
) = monitor
->next
;
815 monitor
->next
= RFAPI_MONITOR_VPN(new_vpn_node
);
816 RFAPI_MONITOR_VPN_W_ALLOC(new_vpn_node
) = monitor
;
817 monitor
->node
= new_vpn_node
;
819 agg_lock_node(new_vpn_node
); /* incr refcount */
821 monitor
= mlast
? mlast
->next
: RFAPI_MONITOR_VPN(par
);
823 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 1);
824 /* decr refcount after we're done with par as this might
826 agg_unlock_node(par
);
831 monitor
= monitor
->next
;
834 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
838 static void rfapiBgpInfoChainFree(struct bgp_path_info
*bpi
)
840 struct bgp_path_info
*next
;
845 * If there is a timer waiting to delete this bpi, cancel
846 * the timer and delete immediately
848 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
849 && bpi
->extra
->vnc
.import
.timer
) {
850 struct rfapi_withdraw
*wcb
=
851 THREAD_ARG(bpi
->extra
->vnc
.import
.timer
);
853 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
854 THREAD_OFF(bpi
->extra
->vnc
.import
.timer
);
859 rfapiBgpInfoFree(bpi
);
864 static void rfapiImportTableFlush(struct rfapi_import_table
*it
)
871 ecommunity_free(&it
->rt_import_list
);
872 it
->rt_import_list
= NULL
;
874 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
878 for (rn
= agg_route_top(it
->imported_vpn
[afi
]); rn
;
879 rn
= agg_route_next(rn
)) {
881 * Each route_node has:
882 * aggregate: points to rfapi_it_extra with monitor
884 * info: points to chain of bgp_path_info
886 /* free bgp_path_info and its children */
887 rfapiBgpInfoChainFree(rn
->info
);
890 rfapiMonitorExtraFlush(SAFI_MPLS_VPN
, rn
);
893 for (rn
= agg_route_top(it
->imported_encap
[afi
]); rn
;
894 rn
= agg_route_next(rn
)) {
895 /* free bgp_path_info and its children */
896 rfapiBgpInfoChainFree(rn
->info
);
899 rfapiMonitorExtraFlush(SAFI_ENCAP
, rn
);
902 agg_table_finish(it
->imported_vpn
[afi
]);
903 agg_table_finish(it
->imported_encap
[afi
]);
905 if (it
->monitor_exterior_orphans
) {
906 skiplist_free(it
->monitor_exterior_orphans
);
910 void rfapiImportTableRefDelByIt(struct bgp
*bgp
,
911 struct rfapi_import_table
*it_target
)
914 struct rfapi_import_table
*it
;
915 struct rfapi_import_table
*prev
= NULL
;
922 for (it
= h
->imports
; it
; prev
= it
, it
= it
->next
) {
928 assert(it
->refcount
);
934 prev
->next
= it
->next
;
936 h
->imports
= it
->next
;
938 rfapiImportTableFlush(it
);
939 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
943 #ifdef RFAPI_REQUIRE_ENCAP_BEEC
945 * Look for magic BGP Encapsulation Extended Community value
946 * Format in RFC 5512 Sect. 4.5
948 static int rfapiEcommunitiesMatchBeec(struct ecommunity
*ecom
,
949 bgp_encap_types type
)
956 for (i
= 0; i
< (ecom
->size
* ECOMMUNITY_SIZE
); i
+= ECOMMUNITY_SIZE
) {
962 if (ep
[0] == ECOMMUNITY_ENCODE_OPAQUE
963 && ep
[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
964 && ep
[6] == ((type
&& 0xff00) >> 8)
965 && ep
[7] == (type
& 0xff)) {
974 int rfapiEcommunitiesIntersect(struct ecommunity
*e1
, struct ecommunity
*e2
)
983 s1
= ecommunity_ecom2str(e1
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
984 s2
= ecommunity_ecom2str(e2
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
985 vnc_zlog_debug_verbose("%s: e1[%s], e2[%s]", __func__
, s1
, s2
);
986 XFREE(MTYPE_ECOMMUNITY_STR
, s1
);
987 XFREE(MTYPE_ECOMMUNITY_STR
, s2
);
990 for (i
= 0; i
< e1
->size
; ++i
) {
991 for (j
= 0; j
< e2
->size
; ++j
) {
992 if (!memcmp(e1
->val
+ (i
* ECOMMUNITY_SIZE
),
993 e2
->val
+ (j
* ECOMMUNITY_SIZE
),
1003 int rfapiEcommunityGetLNI(struct ecommunity
*ecom
, uint32_t *lni
)
1008 for (i
= 0; i
< ecom
->size
; ++i
) {
1009 uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1011 if ((*(p
+ 0) == 0x00) && (*(p
+ 1) == 0x02)) {
1013 *lni
= (*(p
+ 5) << 16) | (*(p
+ 6) << 8)
1022 int rfapiEcommunityGetEthernetTag(struct ecommunity
*ecom
, uint16_t *tag_id
)
1024 struct bgp
*bgp
= bgp_get_default();
1025 *tag_id
= 0; /* default to untagged */
1029 for (i
= 0; i
< ecom
->size
; ++i
) {
1032 const uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1034 /* High-order octet of type. */
1037 if (*p
++ == ECOMMUNITY_ROUTE_TARGET
) {
1038 if (encode
== ECOMMUNITY_ENCODE_AS4
) {
1039 p
= ptr_get_be32(p
, &as
);
1040 } else if (encode
== ECOMMUNITY_ENCODE_AS
) {
1043 p
+= 2; /* skip next two, tag/vid
1044 always in lowest bytes */
1046 if (as
== bgp
->as
) {
1047 *tag_id
= *p
++ << 8;
1057 static int rfapiVpnBiNhEqualsPt(struct bgp_path_info
*bpi
,
1058 struct rfapi_ip_addr
*hpt
)
1065 family
= BGP_MP_NEXTHOP_FAMILY(bpi
->attr
->mp_nexthop_len
);
1067 if (hpt
->addr_family
!= family
)
1072 if (bpi
->attr
->mp_nexthop_global_in
.s_addr
1073 != hpt
->addr
.v4
.s_addr
)
1078 if (IPV6_ADDR_CMP(&bpi
->attr
->mp_nexthop_global
, &hpt
->addr
.v6
))
1091 * Compare 2 VPN BIs. Return true if they have the same VN and UN addresses
1093 static int rfapiVpnBiSamePtUn(struct bgp_path_info
*bpi1
,
1094 struct bgp_path_info
*bpi2
)
1096 struct prefix pfx_un1
;
1097 struct prefix pfx_un2
;
1103 * VN address comparisons
1106 if (BGP_MP_NEXTHOP_FAMILY(bpi1
->attr
->mp_nexthop_len
)
1107 != BGP_MP_NEXTHOP_FAMILY(bpi2
->attr
->mp_nexthop_len
)) {
1111 switch (BGP_MP_NEXTHOP_FAMILY(bpi1
->attr
->mp_nexthop_len
)) {
1113 if (bpi1
->attr
->mp_nexthop_global_in
.s_addr
1114 != bpi2
->attr
->mp_nexthop_global_in
.s_addr
)
1119 if (IPV6_ADDR_CMP(&bpi1
->attr
->mp_nexthop_global
,
1120 &bpi2
->attr
->mp_nexthop_global
))
1128 memset(&pfx_un1
, 0, sizeof(pfx_un1
));
1129 memset(&pfx_un2
, 0, sizeof(pfx_un2
));
1132 * UN address comparisons
1134 if (rfapiGetVncTunnelUnAddr(bpi1
->attr
, &pfx_un1
)) {
1136 pfx_un1
.family
= bpi1
->extra
->vnc
.import
.un_family
;
1137 switch (bpi1
->extra
->vnc
.import
.un_family
) {
1140 bpi1
->extra
->vnc
.import
.un
.addr4
;
1144 bpi1
->extra
->vnc
.import
.un
.addr6
;
1147 pfx_un1
.family
= AF_UNSPEC
;
1153 if (rfapiGetVncTunnelUnAddr(bpi2
->attr
, &pfx_un2
)) {
1155 pfx_un2
.family
= bpi2
->extra
->vnc
.import
.un_family
;
1156 switch (bpi2
->extra
->vnc
.import
.un_family
) {
1159 bpi2
->extra
->vnc
.import
.un
.addr4
;
1163 bpi2
->extra
->vnc
.import
.un
.addr6
;
1166 pfx_un2
.family
= AF_UNSPEC
;
1172 if (pfx_un1
.family
== AF_UNSPEC
|| pfx_un2
.family
== AF_UNSPEC
)
1175 if (pfx_un1
.family
!= pfx_un2
.family
)
1178 switch (pfx_un1
.family
) {
1180 if (!IPV4_ADDR_SAME(&pfx_un1
.u
.prefix4
, &pfx_un2
.u
.prefix4
))
1184 if (!IPV6_ADDR_SAME(&pfx_un1
.u
.prefix6
, &pfx_un2
.u
.prefix6
))
1193 uint8_t rfapiRfpCost(struct attr
*attr
)
1195 if (attr
->flag
& ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF
)) {
1196 if (attr
->local_pref
> 255) {
1199 return 255 - attr
->local_pref
;
1205 /*------------------------------------------
1208 * Find Layer 2 options in an option chain
1214 * l2o layer 2 options extracted
1218 * 1 no options found
1220 --------------------------------------------*/
1221 int rfapi_extract_l2o(
1222 struct bgp_tea_options
*pHop
, /* chain of options */
1223 struct rfapi_l2address_option
*l2o
) /* return extracted value */
1225 struct bgp_tea_options
*p
;
1227 for (p
= pHop
; p
; p
= p
->next
) {
1228 if ((p
->type
== RFAPI_VN_OPTION_TYPE_L2ADDR
)
1229 && (p
->length
>= 8)) {
1233 memcpy(&l2o
->macaddr
, v
, 6);
1235 l2o
->label
= ((v
[6] << 12) & 0xff000)
1236 + ((v
[7] << 4) & 0xff0)
1237 + ((v
[8] >> 4) & 0xf);
1239 l2o
->local_nve_id
= (uint8_t)v
[10];
1241 l2o
->logical_net_id
=
1242 (v
[11] << 16) + (v
[12] << 8) + (v
[13] << 0);
1250 static struct rfapi_next_hop_entry
*
1251 rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix
*rprefix
,
1252 struct bgp_path_info
*bpi
, /* route to encode */
1253 uint32_t lifetime
, /* use this in nhe */
1254 struct agg_node
*rn
) /* req for L2 eth addr */
1256 struct rfapi_next_hop_entry
*new;
1257 int have_vnc_tunnel_un
= 0;
1258 const struct prefix
*p
= agg_node_get_prefix(rn
);
1260 #ifdef DEBUG_ENCAP_MONITOR
1261 vnc_zlog_debug_verbose("%s: entry, bpi %p, rn %p", __func__
, bpi
, rn
);
1264 new = XCALLOC(MTYPE_RFAPI_NEXTHOP
, sizeof(struct rfapi_next_hop_entry
));
1266 new->prefix
= *rprefix
;
1269 && decode_rd_type(bpi
->extra
->vnc
.import
.rd
.val
)
1270 == RD_TYPE_VNC_ETH
) {
1273 struct rfapi_vn_option
*vo
;
1275 vo
= XCALLOC(MTYPE_RFAPI_VN_OPTION
,
1276 sizeof(struct rfapi_vn_option
));
1278 vo
->type
= RFAPI_VN_OPTION_TYPE_L2ADDR
;
1280 memcpy(&vo
->v
.l2addr
.macaddr
, &p
->u
.prefix_eth
.octet
, ETH_ALEN
);
1281 /* only low 3 bytes of this are significant */
1282 (void)rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(bpi
->attr
),
1283 &vo
->v
.l2addr
.logical_net_id
);
1284 (void)rfapiEcommunityGetEthernetTag(
1285 bgp_attr_get_ecommunity(bpi
->attr
),
1286 &vo
->v
.l2addr
.tag_id
);
1288 /* local_nve_id comes from lower byte of RD type */
1289 vo
->v
.l2addr
.local_nve_id
= bpi
->extra
->vnc
.import
.rd
.val
[1];
1291 /* label comes from MP_REACH_NLRI label */
1292 vo
->v
.l2addr
.label
= decode_label(&bpi
->extra
->label
[0]);
1294 new->vn_options
= vo
;
1297 * If there is an auxiliary prefix (i.e., host IP address),
1298 * use it as the nexthop prefix instead of the query prefix
1300 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
1301 rfapiQprefix2Rprefix(&bpi
->extra
->vnc
.import
.aux_prefix
,
1306 bgp_encap_types tun_type
= BGP_ENCAP_TYPE_MPLS
; /*Default*/
1307 new->prefix
.cost
= rfapiRfpCost(bpi
->attr
);
1309 struct bgp_attr_encap_subtlv
*pEncap
;
1311 switch (BGP_MP_NEXTHOP_FAMILY(bpi
->attr
->mp_nexthop_len
)) {
1313 new->vn_address
.addr_family
= AF_INET
;
1314 new->vn_address
.addr
.v4
= bpi
->attr
->mp_nexthop_global_in
;
1318 new->vn_address
.addr_family
= AF_INET6
;
1319 new->vn_address
.addr
.v6
= bpi
->attr
->mp_nexthop_global
;
1323 zlog_warn("%s: invalid vpn nexthop length: %d", __func__
,
1324 bpi
->attr
->mp_nexthop_len
);
1325 rfapi_free_next_hop_list(new);
1329 for (pEncap
= bgp_attr_get_vnc_subtlvs(bpi
->attr
); pEncap
;
1330 pEncap
= pEncap
->next
) {
1331 switch (pEncap
->type
) {
1332 case BGP_VNC_SUBTLV_TYPE_LIFETIME
:
1333 /* use configured lifetime, not attr lifetime */
1337 zlog_warn("%s: unknown VNC option type %d", __func__
,
1344 bgp_attr_extcom_tunnel_type(bpi
->attr
, &tun_type
);
1345 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
1347 /* MPLS carries UN address in next hop */
1348 rfapiNexthop2Prefix(bpi
->attr
, &p
);
1349 if (p
.family
!= AF_UNSPEC
) {
1350 rfapiQprefix2Raddr(&p
, &new->un_address
);
1351 have_vnc_tunnel_un
= 1;
1355 for (pEncap
= bpi
->attr
->encap_subtlvs
; pEncap
; pEncap
= pEncap
->next
) {
1356 switch (pEncap
->type
) {
1357 case BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
:
1359 * Overrides ENCAP UN address, if any
1361 switch (pEncap
->length
) {
1364 new->un_address
.addr_family
= AF_INET
;
1365 memcpy(&new->un_address
.addr
.v4
, pEncap
->value
,
1367 have_vnc_tunnel_un
= 1;
1371 new->un_address
.addr_family
= AF_INET6
;
1372 memcpy(&new->un_address
.addr
.v6
, pEncap
->value
,
1374 have_vnc_tunnel_un
= 1;
1379 "%s: invalid tunnel subtlv UN addr length (%d) for bpi %p",
1380 __func__
, pEncap
->length
, bpi
);
1385 zlog_warn("%s: unknown Encap Attribute option type %d",
1386 __func__
, pEncap
->type
);
1391 new->un_options
= rfapi_encap_tlv_to_un_option(bpi
->attr
);
1393 #ifdef DEBUG_ENCAP_MONITOR
1394 vnc_zlog_debug_verbose("%s: line %d: have_vnc_tunnel_un=%d", __func__
,
1395 __LINE__
, have_vnc_tunnel_un
);
1398 if (!have_vnc_tunnel_un
&& bpi
->extra
) {
1400 * use cached UN address from ENCAP route
1402 new->un_address
.addr_family
= bpi
->extra
->vnc
.import
.un_family
;
1403 switch (new->un_address
.addr_family
) {
1405 new->un_address
.addr
.v4
=
1406 bpi
->extra
->vnc
.import
.un
.addr4
;
1409 new->un_address
.addr
.v6
=
1410 bpi
->extra
->vnc
.import
.un
.addr6
;
1413 zlog_warn("%s: invalid UN addr family (%d) for bpi %p",
1414 __func__
, new->un_address
.addr_family
, bpi
);
1415 rfapi_free_next_hop_list(new);
1420 new->lifetime
= lifetime
;
1424 int rfapiHasNonRemovedRoutes(struct agg_node
*rn
)
1426 struct bgp_path_info
*bpi
;
1428 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
1431 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
1432 && (bpi
->extra
&& !rfapiGetUnAddrOfVpnBi(bpi
, &pfx
))) {
1440 #ifdef DEBUG_IT_NODES
1444 void rfapiDumpNode(struct agg_node
*rn
)
1446 struct bgp_path_info
*bpi
;
1448 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
1449 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
1451 int ctrc
= rfapiGetUnAddrOfVpnBi(bpi
, &pfx
);
1454 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
1455 && (bpi
->extra
&& !ctrc
)) {
1462 vnc_zlog_debug_verbose(
1463 " bpi=%p, nr=%d, flags=0x%x, extra=%p, ctrc=%d", bpi
,
1464 nr
, bpi
->flags
, bpi
->extra
, ctrc
);
1469 static int rfapiNhlAddNodeRoutes(
1470 struct agg_node
*rn
, /* in */
1471 struct rfapi_ip_prefix
*rprefix
, /* in */
1472 uint32_t lifetime
, /* in */
1473 int removed
, /* in */
1474 struct rfapi_next_hop_entry
**head
, /* in/out */
1475 struct rfapi_next_hop_entry
**tail
, /* in/out */
1476 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1477 struct agg_node
*rfd_rib_node
, /* preload this NVE rib node */
1478 struct prefix
*pfx_target_original
) /* query target */
1480 struct bgp_path_info
*bpi
;
1481 struct rfapi_next_hop_entry
*new;
1482 struct prefix pfx_un
;
1483 struct skiplist
*seen_nexthops
;
1485 const struct prefix
*p
= agg_node_get_prefix(rn
);
1486 int is_l2
= (p
->family
== AF_ETHERNET
);
1489 struct agg_table
*atable
= agg_get_table(rfd_rib_node
);
1490 struct rfapi_descriptor
*rfd
;
1493 rfd
= agg_get_table_info(atable
);
1495 if (rfapiRibFTDFilterRecentPrefix(rfd
, rn
,
1496 pfx_target_original
))
1502 skiplist_new(0, vnc_prefix_cmp
, prefix_free_lists
);
1504 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
1506 struct prefix pfx_vn
;
1507 struct prefix
*newpfx
;
1509 if (removed
&& !CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
1510 #ifdef DEBUG_RETURNED_NHL
1511 vnc_zlog_debug_verbose(
1512 "%s: want holddown, this route not holddown, skip",
1517 if (!removed
&& CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
1526 * Check for excluded VN address
1528 if (rfapiVpnBiNhEqualsPt(bpi
, exclude_vnaddr
))
1532 * Check for VN address (nexthop) copied already
1535 /* L2 routes: semantic nexthop in aux_prefix; VN addr
1537 pfx_vn
= bpi
->extra
->vnc
.import
.aux_prefix
;
1539 rfapiNexthop2Prefix(bpi
->attr
, &pfx_vn
);
1541 if (!skiplist_search(seen_nexthops
, &pfx_vn
, NULL
)) {
1542 #ifdef DEBUG_RETURNED_NHL
1543 vnc_zlog_debug_verbose(
1544 "%s: already put VN/nexthop %pFX, skip",
1550 if (rfapiGetUnAddrOfVpnBi(bpi
, &pfx_un
)) {
1551 #ifdef DEBUG_ENCAP_MONITOR
1552 vnc_zlog_debug_verbose(
1553 "%s: failed to get UN address of this VPN bpi",
1559 newpfx
= prefix_new();
1561 skiplist_insert(seen_nexthops
, newpfx
, newpfx
);
1563 new = rfapiRouteInfo2NextHopEntry(rprefix
, bpi
, lifetime
, rn
);
1565 if (rfapiRibPreloadBi(rfd_rib_node
, &pfx_vn
, &pfx_un
,
1567 /* duplicate filtered by RIB */
1568 rfapi_free_next_hop_list(new);
1575 (*tail
)->next
= new;
1584 skiplist_free(seen_nexthops
);
1593 * omit_node is meant for the situation where we are adding a subtree
1594 * of a parent of some original requested node. The response already
1595 * contains the original requested node, and we don't want to duplicate
1596 * its routes in the list, so we skip it if the right or left node
1597 * matches (of course, we still travel down its child subtrees).
1599 static int rfapiNhlAddSubtree(
1600 struct agg_node
*rn
, /* in */
1601 uint32_t lifetime
, /* in */
1602 struct rfapi_next_hop_entry
**head
, /* in/out */
1603 struct rfapi_next_hop_entry
**tail
, /* in/out */
1604 struct agg_node
*omit_node
, /* in */
1605 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1606 struct agg_table
*rfd_rib_table
, /* preload here */
1607 struct prefix
*pfx_target_original
) /* query target */
1609 struct rfapi_ip_prefix rprefix
;
1612 /* FIXME: need to find a better way here to work without sticking our
1613 * hands in node->link */
1614 if (agg_node_left(rn
) && agg_node_left(rn
) != omit_node
) {
1615 if (agg_node_left(rn
)->info
) {
1616 const struct prefix
*p
=
1617 agg_node_get_prefix(agg_node_left(rn
));
1619 struct agg_node
*rib_rn
= NULL
;
1621 rfapiQprefix2Rprefix(p
, &rprefix
);
1623 rib_rn
= agg_node_get(rfd_rib_table
, p
);
1625 count
= rfapiNhlAddNodeRoutes(
1626 agg_node_left(rn
), &rprefix
, lifetime
, 0, head
,
1627 tail
, exclude_vnaddr
, rib_rn
,
1628 pfx_target_original
);
1630 count
= rfapiNhlAddNodeRoutes(
1631 agg_node_left(rn
), &rprefix
, lifetime
,
1632 1, head
, tail
, exclude_vnaddr
, rib_rn
,
1633 pfx_target_original
);
1637 agg_unlock_node(rib_rn
);
1641 if (agg_node_right(rn
) && agg_node_right(rn
) != omit_node
) {
1642 if (agg_node_right(rn
)->info
) {
1643 const struct prefix
*p
=
1644 agg_node_get_prefix(agg_node_right(rn
));
1646 struct agg_node
*rib_rn
= NULL
;
1648 rfapiQprefix2Rprefix(p
, &rprefix
);
1650 rib_rn
= agg_node_get(rfd_rib_table
, p
);
1652 count
= rfapiNhlAddNodeRoutes(
1653 agg_node_right(rn
), &rprefix
, lifetime
, 0, head
,
1654 tail
, exclude_vnaddr
, rib_rn
,
1655 pfx_target_original
);
1657 count
= rfapiNhlAddNodeRoutes(
1658 agg_node_right(rn
), &rprefix
, lifetime
,
1659 1, head
, tail
, exclude_vnaddr
, rib_rn
,
1660 pfx_target_original
);
1664 agg_unlock_node(rib_rn
);
1668 if (agg_node_left(rn
)) {
1669 rcount
+= rfapiNhlAddSubtree(
1670 agg_node_left(rn
), lifetime
, head
, tail
, omit_node
,
1671 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1673 if (agg_node_right(rn
)) {
1674 rcount
+= rfapiNhlAddSubtree(
1675 agg_node_right(rn
), lifetime
, head
, tail
, omit_node
,
1676 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1683 * Implementation of ROUTE_LIST(node) from RFAPI-Import-Event-Handling.txt
1685 * Construct an rfapi nexthop list based on the routes attached to
1686 * the specified node.
1688 * If there are any routes that do NOT have BGP_PATH_REMOVED set,
1689 * return those only. If there are ONLY routes with BGP_PATH_REMOVED,
1690 * then return those, and also include all the non-removed routes from the
1691 * next less-specific node (i.e., this node's parent) at the end.
1693 struct rfapi_next_hop_entry
*rfapiRouteNode2NextHopList(
1694 struct agg_node
*rn
, uint32_t lifetime
, /* put into nexthop entries */
1695 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1696 struct agg_table
*rfd_rib_table
, /* preload here */
1697 struct prefix
*pfx_target_original
) /* query target */
1699 struct rfapi_ip_prefix rprefix
;
1700 struct rfapi_next_hop_entry
*answer
= NULL
;
1701 struct rfapi_next_hop_entry
*last
= NULL
;
1702 struct agg_node
*parent
;
1703 const struct prefix
*p
= agg_node_get_prefix(rn
);
1705 struct agg_node
*rib_rn
;
1707 #ifdef DEBUG_RETURNED_NHL
1708 vnc_zlog_debug_verbose("%s: called with node pfx=%rRN", __func__
, rn
);
1709 rfapiDebugBacktrace();
1712 rfapiQprefix2Rprefix(p
, &rprefix
);
1714 rib_rn
= rfd_rib_table
? agg_node_get(rfd_rib_table
, p
) : NULL
;
1717 * Add non-withdrawn routes at this node
1719 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 0, &answer
, &last
,
1720 exclude_vnaddr
, rib_rn
,
1721 pfx_target_original
);
1724 * If the list has at least one entry, it's finished
1727 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
, &last
, NULL
,
1728 exclude_vnaddr
, rfd_rib_table
,
1729 pfx_target_original
);
1730 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
,
1732 #ifdef DEBUG_RETURNED_NHL
1733 rfapiPrintNhl(NULL
, answer
);
1736 agg_unlock_node(rib_rn
);
1741 * Add withdrawn routes at this node
1743 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 1, &answer
, &last
,
1744 exclude_vnaddr
, rib_rn
,
1745 pfx_target_original
);
1747 agg_unlock_node(rib_rn
);
1749 // rfapiPrintNhl(NULL, answer);
1752 * walk up the tree until we find a node with non-deleted
1753 * routes, then add them
1755 for (parent
= agg_node_parent(rn
); parent
;
1756 parent
= agg_node_parent(parent
)) {
1757 if (rfapiHasNonRemovedRoutes(parent
)) {
1763 * Add non-withdrawn routes from less-specific prefix
1766 const struct prefix
*p
= agg_node_get_prefix(parent
);
1768 rib_rn
= rfd_rib_table
? agg_node_get(rfd_rib_table
, p
) : NULL
;
1769 rfapiQprefix2Rprefix(p
, &rprefix
);
1770 count
+= rfapiNhlAddNodeRoutes(parent
, &rprefix
, lifetime
, 0,
1771 &answer
, &last
, exclude_vnaddr
,
1772 rib_rn
, pfx_target_original
);
1773 count
+= rfapiNhlAddSubtree(parent
, lifetime
, &answer
, &last
,
1774 rn
, exclude_vnaddr
, rfd_rib_table
,
1775 pfx_target_original
);
1777 agg_unlock_node(rib_rn
);
1780 * There is no parent with non-removed routes. Still need to
1781 * add subtree of original node if it contributed routes to the
1785 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
,
1786 &last
, rn
, exclude_vnaddr
,
1788 pfx_target_original
);
1791 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
, count
,
1793 #ifdef DEBUG_RETURNED_NHL
1794 rfapiPrintNhl(NULL
, answer
);
1800 * Construct nexthop list of all routes in table
1802 struct rfapi_next_hop_entry
*rfapiRouteTable2NextHopList(
1803 struct agg_table
*rt
, uint32_t lifetime
, /* put into nexthop entries */
1804 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1805 struct agg_table
*rfd_rib_table
, /* preload this NVE rib table */
1806 struct prefix
*pfx_target_original
) /* query target */
1808 struct agg_node
*rn
;
1809 struct rfapi_next_hop_entry
*biglist
= NULL
;
1810 struct rfapi_next_hop_entry
*nhl
;
1811 struct rfapi_next_hop_entry
*tail
= NULL
;
1814 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
1816 nhl
= rfapiRouteNode2NextHopList(rn
, lifetime
, exclude_vnaddr
,
1818 pfx_target_original
);
1820 tail
= biglist
= nhl
;
1827 while (tail
->next
) {
1834 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1838 struct rfapi_next_hop_entry
*rfapiEthRouteNode2NextHopList(
1839 struct agg_node
*rn
, struct rfapi_ip_prefix
*rprefix
,
1840 uint32_t lifetime
, /* put into nexthop entries */
1841 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1842 struct agg_table
*rfd_rib_table
, /* preload NVE rib table */
1843 struct prefix
*pfx_target_original
) /* query target */
1846 struct rfapi_next_hop_entry
*answer
= NULL
;
1847 struct rfapi_next_hop_entry
*last
= NULL
;
1848 struct agg_node
*rib_rn
;
1850 rib_rn
= rfd_rib_table
1851 ? agg_node_get(rfd_rib_table
, agg_node_get_prefix(rn
))
1854 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 0, &answer
, &last
,
1855 NULL
, rib_rn
, pfx_target_original
);
1857 #ifdef DEBUG_ENCAP_MONITOR
1858 vnc_zlog_debug_verbose("%s: node %p: %d non-holddown routes", __func__
,
1863 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 1, &answer
,
1864 &last
, exclude_vnaddr
, rib_rn
,
1865 pfx_target_original
);
1866 vnc_zlog_debug_verbose("%s: node %p: %d holddown routes",
1867 __func__
, rn
, count
);
1871 agg_unlock_node(rib_rn
);
1873 #ifdef DEBUG_RETURNED_NHL
1874 rfapiPrintNhl(NULL
, answer
);
1882 * Construct nexthop list of all routes in table
1884 struct rfapi_next_hop_entry
*rfapiEthRouteTable2NextHopList(
1885 uint32_t logical_net_id
, struct rfapi_ip_prefix
*rprefix
,
1886 uint32_t lifetime
, /* put into nexthop entries */
1887 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1888 struct agg_table
*rfd_rib_table
, /* preload NVE rib node */
1889 struct prefix
*pfx_target_original
) /* query target */
1891 struct rfapi_import_table
*it
;
1892 struct bgp
*bgp
= bgp_get_default();
1893 struct agg_table
*rt
;
1894 struct agg_node
*rn
;
1895 struct rfapi_next_hop_entry
*biglist
= NULL
;
1896 struct rfapi_next_hop_entry
*nhl
;
1897 struct rfapi_next_hop_entry
*tail
= NULL
;
1901 it
= rfapiMacImportTableGet(bgp
, logical_net_id
);
1902 rt
= it
->imported_vpn
[AFI_L2VPN
];
1904 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
1906 nhl
= rfapiEthRouteNode2NextHopList(
1907 rn
, rprefix
, lifetime
, exclude_vnaddr
, rfd_rib_table
,
1908 pfx_target_original
);
1910 tail
= biglist
= nhl
;
1917 while (tail
->next
) {
1924 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1929 * Insert a new bpi to the imported route table node,
1930 * keeping the list of BPIs sorted best route first
1932 static void rfapiBgpInfoAttachSorted(struct agg_node
*rn
,
1933 struct bgp_path_info
*info_new
, afi_t afi
,
1937 struct bgp_path_info
*prev
;
1938 struct bgp_path_info
*next
;
1939 char pfx_buf
[PREFIX2STR_BUFFER
];
1942 bgp
= bgp_get_default(); /* assume 1 instance for now */
1944 if (VNC_DEBUG(IMPORT_BI_ATTACH
)) {
1945 vnc_zlog_debug_verbose("%s: info_new->peer=%p", __func__
,
1947 vnc_zlog_debug_verbose("%s: info_new->peer->su_remote=%p",
1948 __func__
, info_new
->peer
->su_remote
);
1951 for (prev
= NULL
, next
= rn
->info
; next
;
1952 prev
= next
, next
= next
->next
) {
1953 enum bgp_path_selection_reason reason
;
1956 || (!CHECK_FLAG(info_new
->flags
, BGP_PATH_REMOVED
)
1957 && CHECK_FLAG(next
->flags
, BGP_PATH_REMOVED
))
1958 || bgp_path_info_cmp_compatible(bgp
, info_new
, next
,
1961 == -1) { /* -1 if 1st is better */
1965 vnc_zlog_debug_verbose("%s: prev=%p, next=%p", __func__
, prev
, next
);
1967 prev
->next
= info_new
;
1969 rn
->info
= info_new
;
1971 info_new
->prev
= prev
;
1972 info_new
->next
= next
;
1974 next
->prev
= info_new
;
1975 bgp_attr_intern(info_new
->attr
);
1978 static void rfapiBgpInfoDetach(struct agg_node
*rn
, struct bgp_path_info
*bpi
)
1981 * Remove the route (doubly-linked)
1983 // bgp_attr_unintern (&bpi->attr);
1985 bpi
->next
->prev
= bpi
->prev
;
1987 bpi
->prev
->next
= bpi
->next
;
1989 rn
->info
= bpi
->next
;
1993 * For L3-indexed import tables
1995 static int rfapi_bi_peer_rd_cmp(const void *b1
, const void *b2
)
1997 const struct bgp_path_info
*bpi1
= b1
;
1998 const struct bgp_path_info
*bpi2
= b2
;
2003 if (bpi1
->peer
< bpi2
->peer
)
2005 if (bpi1
->peer
> bpi2
->peer
)
2011 return vnc_prefix_cmp(
2012 (const struct prefix
*)&bpi1
->extra
->vnc
.import
.rd
,
2013 (const struct prefix
*)&bpi2
->extra
->vnc
.import
.rd
);
2017 * For L2-indexed import tables
2018 * The BPIs in these tables should ALWAYS have an aux_prefix set because
2019 * they arrive via IPv4 or IPv6 advertisements.
2021 static int rfapi_bi_peer_rd_aux_cmp(const void *b1
, const void *b2
)
2023 const struct bgp_path_info
*bpi1
= b1
;
2024 const struct bgp_path_info
*bpi2
= b2
;
2030 if (bpi1
->peer
< bpi2
->peer
)
2032 if (bpi1
->peer
> bpi2
->peer
)
2038 rc
= vnc_prefix_cmp((struct prefix
*)&bpi1
->extra
->vnc
.import
.rd
,
2039 (struct prefix
*)&bpi2
->extra
->vnc
.import
.rd
);
2045 * L2 import tables can have multiple entries with the
2046 * same MAC address, same RD, but different L3 addresses.
2048 * Use presence of aux_prefix with AF=ethernet and prefixlen=1
2049 * as magic value to signify explicit wildcarding of the aux_prefix.
2050 * This magic value will not appear in bona fide bpi entries in
2051 * the import table, but is allowed in the "fake" bpi used to
2052 * probe the table when searching. (We have to test both b1 and b2
2053 * because there is no guarantee of the order the test key and
2054 * the real key will be passed)
2056 if ((bpi1
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2057 && (bpi1
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))
2058 || (bpi2
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2059 && (bpi2
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))) {
2062 * wildcard aux address specified
2067 return vnc_prefix_cmp(&bpi1
->extra
->vnc
.import
.aux_prefix
,
2068 &bpi2
->extra
->vnc
.import
.aux_prefix
);
2073 * Index on RD and Peer
2075 static void rfapiItBiIndexAdd(struct agg_node
*rn
, /* Import table VPN node */
2076 struct bgp_path_info
*bpi
) /* new BPI */
2078 struct skiplist
*sl
;
2079 const struct prefix
*p
;
2085 vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %pRDP", __func__
, bpi
,
2086 bpi
->peer
, &bpi
->extra
->vnc
.import
.rd
);
2088 sl
= RFAPI_RDINDEX_W_ALLOC(rn
);
2090 p
= agg_node_get_prefix(rn
);
2091 if (AF_ETHERNET
== p
->family
) {
2092 sl
= skiplist_new(0, rfapi_bi_peer_rd_aux_cmp
, NULL
);
2094 sl
= skiplist_new(0, rfapi_bi_peer_rd_cmp
, NULL
);
2096 RFAPI_IT_EXTRA_GET(rn
)->u
.vpn
.idx_rd
= sl
;
2097 agg_lock_node(rn
); /* for skiplist */
2099 assert(!skiplist_insert(sl
, (void *)bpi
, (void *)bpi
));
2100 agg_lock_node(rn
); /* for skiplist entry */
2102 /* NB: BPIs in import tables are not refcounted */
2105 static void rfapiItBiIndexDump(struct agg_node
*rn
)
2107 struct skiplist
*sl
;
2108 void *cursor
= NULL
;
2109 struct bgp_path_info
*k
;
2110 struct bgp_path_info
*v
;
2113 sl
= RFAPI_RDINDEX(rn
);
2117 for (rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
); !rc
;
2118 rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
)) {
2120 char buf
[RD_ADDRSTRLEN
];
2121 char buf_aux_pfx
[PREFIX_STRLEN
];
2124 &k
->extra
->vnc
.import
.rd
, buf
, sizeof(buf
),
2125 bgp_get_asnotation(k
->peer
? k
->peer
->bgp
: NULL
));
2126 if (k
->extra
->vnc
.import
.aux_prefix
.family
) {
2127 prefix2str(&k
->extra
->vnc
.import
.aux_prefix
,
2128 buf_aux_pfx
, sizeof(buf_aux_pfx
));
2130 strlcpy(buf_aux_pfx
, "(none)", sizeof(buf_aux_pfx
));
2132 vnc_zlog_debug_verbose("bpi %p, peer %p, rd %s, aux_prefix %s",
2133 k
, k
->peer
, buf
, buf_aux_pfx
);
2137 static struct bgp_path_info
*rfapiItBiIndexSearch(
2138 struct agg_node
*rn
, /* Import table VPN node */
2139 struct prefix_rd
*prd
, struct peer
*peer
,
2140 const struct prefix
*aux_prefix
) /* optional L3 addr for L2 ITs */
2142 struct skiplist
*sl
;
2144 struct bgp_path_info bpi_fake
= {0};
2145 struct bgp_path_info_extra bpi_extra
= {0};
2146 struct bgp_path_info
*bpi_result
;
2148 sl
= RFAPI_RDINDEX(rn
);
2152 #ifdef DEBUG_BI_SEARCH
2154 char buf_aux_pfx
[PREFIX_STRLEN
];
2157 prefix2str(aux_prefix
, buf_aux_pfx
,
2158 sizeof(buf_aux_pfx
));
2160 strlcpy(buf_aux_pfx
, "(nil)", sizeof(buf_aux_pfx
));
2162 vnc_zlog_debug_verbose(
2163 "%s want prd=%pRDP, peer=%p, aux_prefix=%s", __func__
,
2164 prd
, peer
, buf_aux_pfx
);
2165 rfapiItBiIndexDump(rn
);
2169 /* threshold is a WAG */
2170 if (sl
->count
< 3) {
2171 #ifdef DEBUG_BI_SEARCH
2172 vnc_zlog_debug_verbose("%s: short list algorithm", __func__
);
2174 /* if short list, linear search might be faster */
2175 for (bpi_result
= rn
->info
; bpi_result
;
2176 bpi_result
= bpi_result
->next
) {
2177 #ifdef DEBUG_BI_SEARCH
2178 vnc_zlog_debug_verbose(
2179 "%s: bpi has prd=%pRDP, peer=%p", __func__
,
2180 &bpi_result
->extra
->vnc
.import
.rd
,
2183 if (peer
== bpi_result
->peer
2184 && !prefix_cmp((struct prefix
*)&bpi_result
->extra
2186 (struct prefix
*)prd
)) {
2188 #ifdef DEBUG_BI_SEARCH
2189 vnc_zlog_debug_verbose(
2190 "%s: peer and RD same, doing aux_prefix check",
2196 &bpi_result
->extra
->vnc
.import
2199 #ifdef DEBUG_BI_SEARCH
2200 vnc_zlog_debug_verbose("%s: match",
2210 bpi_fake
.peer
= peer
;
2211 bpi_fake
.extra
= &bpi_extra
;
2212 bpi_fake
.extra
->vnc
.import
.rd
= *prd
;
2214 bpi_fake
.extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
2217 bpi_fake
.extra
->vnc
.import
.aux_prefix
.family
= AF_ETHERNET
;
2218 bpi_fake
.extra
->vnc
.import
.aux_prefix
.prefixlen
= 1;
2221 rc
= skiplist_search(sl
, (void *)&bpi_fake
, (void *)&bpi_result
);
2224 #ifdef DEBUG_BI_SEARCH
2225 vnc_zlog_debug_verbose("%s: no match", __func__
);
2230 #ifdef DEBUG_BI_SEARCH
2231 vnc_zlog_debug_verbose("%s: matched bpi=%p", __func__
, bpi_result
);
2237 static void rfapiItBiIndexDel(struct agg_node
*rn
, /* Import table VPN node */
2238 struct bgp_path_info
*bpi
) /* old BPI */
2240 struct skiplist
*sl
;
2243 vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %pRDP", __func__
, bpi
,
2244 bpi
->peer
, &bpi
->extra
->vnc
.import
.rd
);
2246 sl
= RFAPI_RDINDEX(rn
);
2249 rc
= skiplist_delete(sl
, (void *)(bpi
), (void *)bpi
);
2251 rfapiItBiIndexDump(rn
);
2255 agg_unlock_node(rn
); /* for skiplist entry */
2257 /* NB: BPIs in import tables are not refcounted */
2261 * Add a backreference at the ENCAP node to the VPN route that
2265 rfapiMonitorEncapAdd(struct rfapi_import_table
*import_table
,
2266 struct prefix
*p
, /* VN address */
2267 struct agg_node
*vpn_rn
, /* VPN node */
2268 struct bgp_path_info
*vpn_bpi
) /* VPN bpi/route */
2270 afi_t afi
= family2afi(p
->family
);
2271 struct agg_node
*rn
;
2272 struct rfapi_monitor_encap
*m
;
2275 rn
= agg_node_get(import_table
->imported_encap
[afi
], p
); /* locks rn */
2278 m
= XCALLOC(MTYPE_RFAPI_MONITOR_ENCAP
,
2279 sizeof(struct rfapi_monitor_encap
));
2285 /* insert to encap node's list */
2286 m
->next
= RFAPI_MONITOR_ENCAP(rn
);
2289 RFAPI_MONITOR_ENCAP_W_ALLOC(rn
) = m
;
2291 /* for easy lookup when deleting vpn route */
2292 vpn_bpi
->extra
->vnc
.import
.hme
= m
;
2294 vnc_zlog_debug_verbose(
2295 "%s: it=%p, vpn_bpi=%p, afi=%d, encap rn=%p, setting vpn_bpi->extra->vnc.import.hme=%p",
2296 __func__
, import_table
, vpn_bpi
, afi
, rn
, m
);
2298 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
2299 bgp_attr_intern(vpn_bpi
->attr
);
2302 static void rfapiMonitorEncapDelete(struct bgp_path_info
*vpn_bpi
)
2305 * Remove encap monitor
2307 vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__
, vpn_bpi
);
2308 if (vpn_bpi
->extra
) {
2309 struct rfapi_monitor_encap
*hme
=
2310 vpn_bpi
->extra
->vnc
.import
.hme
;
2314 vnc_zlog_debug_verbose("%s: hme=%p", __func__
, hme
);
2316 /* Refcount checking takes too long here */
2317 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 0);
2319 hme
->next
->prev
= hme
->prev
;
2321 hme
->prev
->next
= hme
->next
;
2323 RFAPI_MONITOR_ENCAP_W_ALLOC(hme
->rn
) =
2325 /* Refcount checking takes too long here */
2326 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 1);
2328 /* see if the struct rfapi_it_extra is empty and can be
2330 rfapiMonitorExtraPrune(SAFI_ENCAP
, hme
->rn
);
2332 agg_unlock_node(hme
->rn
); /* decr ref count */
2333 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, hme
);
2334 vpn_bpi
->extra
->vnc
.import
.hme
= NULL
;
2340 * Timer callback for withdraw
2342 static void rfapiWithdrawTimerVPN(struct thread
*t
)
2344 struct rfapi_withdraw
*wcb
= THREAD_ARG(t
);
2345 struct bgp_path_info
*bpi
= wcb
->info
;
2346 struct bgp
*bgp
= bgp_get_default();
2347 const struct prefix
*p
;
2348 struct rfapi_monitor_vpn
*moved
;
2350 bool early_exit
= false;
2353 vnc_zlog_debug_verbose(
2354 "%s: NULL BGP pointer, assume shutdown race condition!!!",
2358 if (bgp
&& CHECK_FLAG(bgp
->flags
, BGP_FLAG_DELETE_IN_PROGRESS
)) {
2359 vnc_zlog_debug_verbose(
2360 "%s: BGP delete in progress, assume shutdown race condition!!!",
2365 /* This callback is responsible for the withdraw object's memory */
2367 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2373 assert(wcb
->import_table
);
2376 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, wcb
->lockoffset
);
2378 vnc_zlog_debug_verbose("%s: removing bpi %p at prefix %pRN", __func__
,
2382 * Remove the route (doubly-linked)
2384 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_VALID
)
2385 && VALID_INTERIOR_TYPE(bpi
->type
))
2386 RFAPI_MONITOR_EXTERIOR(wcb
->node
)->valid_interior_count
--;
2388 p
= agg_node_get_prefix(wcb
->node
);
2389 afi
= family2afi(p
->family
);
2390 wcb
->import_table
->holddown_count
[afi
] -= 1; /* keep count consistent */
2391 rfapiItBiIndexDel(wcb
->node
, bpi
);
2392 rfapiBgpInfoDetach(wcb
->node
, bpi
); /* with removed bpi */
2394 vnc_import_bgp_exterior_del_route_interior(bgp
, wcb
->import_table
,
2399 * If VNC is configured to send response remove messages, AND
2400 * if the removed route had a UN address, do response removal
2403 if (!(bgp
->rfapi_cfg
->flags
2404 & BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE
)) {
2406 int has_valid_duplicate
= 0;
2407 struct bgp_path_info
*bpii
;
2410 * First check if there are any OTHER routes at this node
2411 * that have the same nexthop and a valid UN address. If
2412 * there are (e.g., from other peers), then the route isn't
2413 * really gone, so skip sending a response removal message.
2415 for (bpii
= wcb
->node
->info
; bpii
; bpii
= bpii
->next
) {
2416 if (rfapiVpnBiSamePtUn(bpi
, bpii
)) {
2417 has_valid_duplicate
= 1;
2422 vnc_zlog_debug_verbose("%s: has_valid_duplicate=%d", __func__
,
2423 has_valid_duplicate
);
2425 if (!has_valid_duplicate
) {
2426 rfapiRibPendingDeleteRoute(bgp
, wcb
->import_table
, afi
,
2431 rfapiMonitorEncapDelete(bpi
);
2434 * If there are no VPN monitors at this VPN Node A,
2437 if (!RFAPI_MONITOR_VPN(wcb
->node
)) {
2438 vnc_zlog_debug_verbose("%s: no VPN monitors at this node",
2444 * rfapiMonitorMoveShorter only moves monitors if there are
2445 * no remaining valid routes at the current node
2447 moved
= rfapiMonitorMoveShorter(wcb
->node
, 1);
2450 rfapiMonitorMovedUp(wcb
->import_table
, wcb
->node
, moved
->node
,
2458 rfapiBgpInfoFree(bpi
);
2462 * If route count at this node has gone to 0, withdraw exported prefix
2464 if (!wcb
->node
->info
) {
2465 /* see if the struct rfapi_it_extra is empty and can be freed */
2466 rfapiMonitorExtraPrune(SAFI_MPLS_VPN
, wcb
->node
);
2467 vnc_direct_bgp_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2468 vnc_zebra_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2471 * nexthop change event
2472 * vnc_direct_bgp_add_prefix() will recompute the VN addr
2475 vnc_direct_bgp_add_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2478 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, 1 + wcb
->lockoffset
);
2479 agg_unlock_node(wcb
->node
); /* decr ref count */
2480 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2484 * This works for multiprotocol extension, but not for plain ol'
2485 * unicast IPv4 because that nexthop is stored in attr->nexthop
2487 void rfapiNexthop2Prefix(struct attr
*attr
, struct prefix
*p
)
2492 memset(p
, 0, sizeof(struct prefix
));
2494 switch (p
->family
= BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2496 p
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2497 p
->prefixlen
= IPV4_MAX_BITLEN
;
2501 p
->u
.prefix6
= attr
->mp_nexthop_global
;
2502 p
->prefixlen
= IPV6_MAX_BITLEN
;
2506 vnc_zlog_debug_verbose("%s: Family is unknown = %d", __func__
,
2511 void rfapiUnicastNexthop2Prefix(afi_t afi
, struct attr
*attr
, struct prefix
*p
)
2513 if (afi
== AFI_IP
) {
2514 p
->family
= AF_INET
;
2515 p
->prefixlen
= IPV4_MAX_BITLEN
;
2516 p
->u
.prefix4
= attr
->nexthop
;
2518 rfapiNexthop2Prefix(attr
, p
);
2522 static int rfapiAttrNexthopAddrDifferent(struct prefix
*p1
, struct prefix
*p2
)
2525 vnc_zlog_debug_verbose("%s: p1 or p2 is NULL", __func__
);
2530 * Are address families the same?
2532 if (p1
->family
!= p2
->family
) {
2536 switch (p1
->family
) {
2538 if (IPV4_ADDR_SAME(&p1
->u
.prefix4
, &p2
->u
.prefix4
))
2543 if (IPV6_ADDR_SAME(&p1
->u
.prefix6
, &p2
->u
.prefix6
))
2554 static void rfapiCopyUnEncap2VPN(struct bgp_path_info
*encap_bpi
,
2555 struct bgp_path_info
*vpn_bpi
)
2557 if (!vpn_bpi
|| !vpn_bpi
->extra
) {
2558 zlog_warn("%s: no vpn bpi attr/extra, can't copy UN address",
2563 switch (BGP_MP_NEXTHOP_FAMILY(encap_bpi
->attr
->mp_nexthop_len
)) {
2567 * instrumentation to debug segfault of 091127
2569 vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__
, vpn_bpi
);
2570 vnc_zlog_debug_verbose("%s: vpn_bpi->extra=%p", __func__
,
2573 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_INET
;
2574 vpn_bpi
->extra
->vnc
.import
.un
.addr4
=
2575 encap_bpi
->attr
->mp_nexthop_global_in
;
2579 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_INET6
;
2580 vpn_bpi
->extra
->vnc
.import
.un
.addr6
=
2581 encap_bpi
->attr
->mp_nexthop_global
;
2585 zlog_warn("%s: invalid encap nexthop length: %d", __func__
,
2586 encap_bpi
->attr
->mp_nexthop_len
);
2587 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_UNSPEC
;
2593 * returns 0 on success, nonzero on error
2596 rfapiWithdrawEncapUpdateCachedUn(struct rfapi_import_table
*import_table
,
2597 struct bgp_path_info
*encap_bpi
,
2598 struct agg_node
*vpn_rn
,
2599 struct bgp_path_info
*vpn_bpi
)
2604 * clear cached UN address
2606 if (!vpn_bpi
|| !vpn_bpi
->extra
) {
2608 "%s: missing VPN bpi/extra, can't clear UN addr",
2612 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_UNSPEC
;
2613 memset(&vpn_bpi
->extra
->vnc
.import
.un
, 0,
2614 sizeof(vpn_bpi
->extra
->vnc
.import
.un
));
2615 if (CHECK_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
)) {
2616 if (rfapiGetVncTunnelUnAddr(vpn_bpi
->attr
, NULL
)) {
2617 UNSET_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
);
2618 if (VALID_INTERIOR_TYPE(vpn_bpi
->type
))
2619 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2620 ->valid_interior_count
--;
2621 /* signal interior route withdrawal to
2622 * import-exterior */
2623 vnc_import_bgp_exterior_del_route_interior(
2624 bgp_get_default(), import_table
, vpn_rn
,
2631 zlog_warn("%s: missing VPN bpi, can't clear UN addr",
2635 rfapiCopyUnEncap2VPN(encap_bpi
, vpn_bpi
);
2636 if (!CHECK_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
)) {
2637 SET_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
);
2638 if (VALID_INTERIOR_TYPE(vpn_bpi
->type
))
2639 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2640 ->valid_interior_count
++;
2641 /* signal interior route withdrawal to import-exterior
2643 vnc_import_bgp_exterior_add_route_interior(
2644 bgp_get_default(), import_table
, vpn_rn
,
2651 static void rfapiWithdrawTimerEncap(struct thread
*t
)
2653 struct rfapi_withdraw
*wcb
= THREAD_ARG(t
);
2654 struct bgp_path_info
*bpi
= wcb
->info
;
2655 int was_first_route
= 0;
2656 struct rfapi_monitor_encap
*em
;
2657 struct skiplist
*vpn_node_sl
= skiplist_new(0, NULL
, NULL
);
2661 assert(wcb
->import_table
);
2663 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 0);
2665 if (wcb
->node
->info
== bpi
)
2666 was_first_route
= 1;
2669 * Remove the route/bpi and free it
2671 rfapiBgpInfoDetach(wcb
->node
, bpi
);
2672 rfapiBgpInfoFree(bpi
);
2674 if (!was_first_route
)
2677 for (em
= RFAPI_MONITOR_ENCAP(wcb
->node
); em
; em
= em
->next
) {
2680 * Update monitoring VPN BPIs with new encap info at the
2681 * head of the encap bpi chain (which could be NULL after
2682 * removing the expiring bpi above)
2684 if (rfapiWithdrawEncapUpdateCachedUn(wcb
->import_table
,
2685 wcb
->node
->info
, em
->node
,
2690 * Build a list of unique VPN nodes referenced by these
2692 * Use a skiplist for speed.
2694 skiplist_insert(vpn_node_sl
, em
->node
, em
->node
);
2699 * for each VPN node referenced in the ENCAP monitors:
2701 struct agg_node
*rn
;
2702 while (!skiplist_first(vpn_node_sl
, (void **)&rn
, NULL
)) {
2703 if (!wcb
->node
->info
) {
2704 struct rfapi_monitor_vpn
*moved
;
2706 moved
= rfapiMonitorMoveShorter(rn
, 0);
2708 // rfapiDoRouteCallback(wcb->import_table,
2709 // moved->node, moved);
2710 rfapiMonitorMovedUp(wcb
->import_table
, rn
,
2711 moved
->node
, moved
);
2714 // rfapiDoRouteCallback(wcb->import_table, rn, NULL);
2715 rfapiMonitorItNodeChanged(wcb
->import_table
, rn
, NULL
);
2717 skiplist_delete_first(vpn_node_sl
);
2721 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 1);
2722 agg_unlock_node(wcb
->node
); /* decr ref count */
2723 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2724 skiplist_free(vpn_node_sl
);
2729 * Works for both VPN and ENCAP routes; timer_service_func is different
2733 rfapiBiStartWithdrawTimer(struct rfapi_import_table
*import_table
,
2734 struct agg_node
*rn
, struct bgp_path_info
*bpi
,
2735 afi_t afi
, safi_t safi
,
2736 void (*timer_service_func
)(struct thread
*))
2739 struct rfapi_withdraw
*wcb
;
2741 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
2743 * Already on the path to being withdrawn,
2744 * should already have a timer set up to
2747 vnc_zlog_debug_verbose(
2748 "%s: already being withdrawn, do nothing", __func__
);
2752 rfapiGetVncLifetime(bpi
->attr
, &lifetime
);
2753 vnc_zlog_debug_verbose("%s: VNC lifetime is %u", __func__
, lifetime
);
2756 * withdrawn routes get to hang around for a while
2758 SET_FLAG(bpi
->flags
, BGP_PATH_REMOVED
);
2760 /* set timer to remove the route later */
2761 lifetime
= rfapiGetHolddownFromLifetime(lifetime
);
2762 vnc_zlog_debug_verbose("%s: using timeout %u", __func__
, lifetime
);
2765 * Stash import_table, node, and info for use by timer
2766 * service routine, which is supposed to free the wcb.
2768 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2771 wcb
->import_table
= import_table
;
2772 bgp_attr_intern(bpi
->attr
);
2774 if (VNC_DEBUG(VERBOSE
)) {
2775 vnc_zlog_debug_verbose(
2776 "%s: wcb values: node=%p, info=%p, import_table=%p (bpi follows)",
2777 __func__
, wcb
->node
, wcb
->info
, wcb
->import_table
);
2778 rfapiPrintBi(NULL
, bpi
);
2783 if (lifetime
> UINT32_MAX
/ 1001) {
2784 /* sub-optimal case, but will probably never happen */
2785 bpi
->extra
->vnc
.import
.timer
= NULL
;
2786 thread_add_timer(bm
->master
, timer_service_func
, wcb
, lifetime
,
2787 &bpi
->extra
->vnc
.import
.timer
);
2789 static uint32_t jitter
;
2790 uint32_t lifetime_msec
;
2793 * the goal here is to spread out the timers so they are
2794 * sortable in the skip list
2796 if (++jitter
>= 1000)
2799 lifetime_msec
= (lifetime
* 1000) + jitter
;
2801 bpi
->extra
->vnc
.import
.timer
= NULL
;
2802 thread_add_timer_msec(bm
->master
, timer_service_func
, wcb
,
2804 &bpi
->extra
->vnc
.import
.timer
);
2807 /* re-sort route list (BGP_PATH_REMOVED routes are last) */
2808 if (((struct bgp_path_info
*)rn
->info
)->next
) {
2809 rfapiBgpInfoDetach(rn
, bpi
);
2810 rfapiBgpInfoAttachSorted(rn
, bpi
, afi
, safi
);
2815 typedef void(rfapi_bi_filtered_import_f
)(struct rfapi_import_table
*table
,
2816 int action
, struct peer
*peer
,
2817 void *rfd
, const struct prefix
*prefix
,
2818 const struct prefix
*aux_prefix
,
2819 afi_t afi
, struct prefix_rd
*prd
,
2820 struct attr
*attr
, uint8_t type
,
2821 uint8_t sub_type
, uint32_t *label
);
2824 static void rfapiExpireEncapNow(struct rfapi_import_table
*it
,
2825 struct agg_node
*rn
, struct bgp_path_info
*bpi
)
2827 struct rfapi_withdraw
*wcb
;
2831 * pretend we're an expiring timer
2833 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2836 wcb
->import_table
= it
;
2837 memset(&t
, 0, sizeof(t
));
2839 rfapiWithdrawTimerEncap(&t
); /* frees wcb */
2842 static int rfapiGetNexthop(struct attr
*attr
, struct prefix
*prefix
)
2844 switch (BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2846 prefix
->family
= AF_INET
;
2847 prefix
->prefixlen
= IPV4_MAX_BITLEN
;
2848 prefix
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2851 prefix
->family
= AF_INET6
;
2852 prefix
->prefixlen
= IPV6_MAX_BITLEN
;
2853 prefix
->u
.prefix6
= attr
->mp_nexthop_global
;
2856 vnc_zlog_debug_verbose("%s: unknown attr->mp_nexthop_len %d",
2857 __func__
, attr
->mp_nexthop_len
);
2864 * import a bgp_path_info if its route target list intersects with the
2865 * import table's route target list
2867 static void rfapiBgpInfoFilteredImportEncap(
2868 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
2869 void *rfd
, /* set for looped back routes */
2870 const struct prefix
*p
,
2871 const struct prefix
*aux_prefix
, /* Unused for encap routes */
2872 afi_t afi
, struct prefix_rd
*prd
,
2873 struct attr
*attr
, /* part of bgp_path_info */
2874 uint8_t type
, /* part of bgp_path_info */
2875 uint8_t sub_type
, /* part of bgp_path_info */
2876 uint32_t *label
) /* part of bgp_path_info */
2878 struct agg_table
*rt
= NULL
;
2879 struct agg_node
*rn
;
2880 struct bgp_path_info
*info_new
;
2881 struct bgp_path_info
*bpi
;
2882 struct bgp_path_info
*next
;
2885 struct prefix p_firstbpi_old
;
2886 struct prefix p_firstbpi_new
;
2888 const char *action_str
= NULL
;
2889 struct prefix un_prefix
;
2892 bgp
= bgp_get_default(); /* assume 1 instance for now */
2895 case FIF_ACTION_UPDATE
:
2896 action_str
= "update";
2898 case FIF_ACTION_WITHDRAW
:
2899 action_str
= "withdraw";
2901 case FIF_ACTION_KILL
:
2902 action_str
= "kill";
2909 vnc_zlog_debug_verbose(
2910 "%s: entry: %s: prefix %s/%d", __func__
, action_str
,
2911 inet_ntop(p
->family
, &p
->u
.prefix
, buf
, sizeof(buf
)),
2914 memset(&p_firstbpi_old
, 0, sizeof(p_firstbpi_old
));
2915 memset(&p_firstbpi_new
, 0, sizeof(p_firstbpi_new
));
2917 if (action
== FIF_ACTION_UPDATE
) {
2919 * Compare rt lists. If no intersection, don't import this route
2920 * On a withdraw, peer and RD are sufficient to determine if
2923 if (!attr
|| !bgp_attr_get_ecommunity(attr
)) {
2925 vnc_zlog_debug_verbose(
2926 "%s: attr, extra, or ecommunity missing, not importing",
2930 #ifdef RFAPI_REQUIRE_ENCAP_BEEC
2931 if (!rfapiEcommunitiesMatchBeec(
2932 bgp_attr_get_ecommunity(attr
))) {
2933 vnc_zlog_debug_verbose(
2934 "%s: it=%p: no match for BGP Encapsulation ecommunity",
2935 __func__
, import_table
);
2939 if (!rfapiEcommunitiesIntersect(
2940 import_table
->rt_import_list
,
2941 bgp_attr_get_ecommunity(attr
))) {
2943 vnc_zlog_debug_verbose(
2944 "%s: it=%p: no ecommunity intersection",
2945 __func__
, import_table
);
2950 * Updates must also have a nexthop address
2952 memset(&un_prefix
, 0,
2953 sizeof(un_prefix
)); /* keep valgrind happy */
2954 if (rfapiGetNexthop(attr
, &un_prefix
)) {
2955 vnc_zlog_debug_verbose("%s: missing nexthop address",
2962 * Figure out which radix tree the route would go into
2967 rt
= import_table
->imported_encap
[afi
];
2973 flog_err(EC_LIB_DEVELOPMENT
, "%s: bad afi %d", __func__
, afi
);
2978 * agg_node_lookup returns a node only if there is at least
2979 * one route attached.
2981 rn
= agg_node_lookup(rt
, p
);
2983 #ifdef DEBUG_ENCAP_MONITOR
2984 vnc_zlog_debug_verbose("%s: initial encap lookup(it=%p) rn=%p",
2985 __func__
, import_table
, rn
);
2990 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 1);
2991 agg_unlock_node(rn
); /* undo lock in agg_node_lookup */
2995 * capture nexthop of first bpi
2998 rfapiNexthop2Prefix(
2999 ((struct bgp_path_info
*)(rn
->info
))->attr
,
3003 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
3006 * Does this bgp_path_info refer to the same route
3007 * as we are trying to add?
3009 vnc_zlog_debug_verbose("%s: comparing BPI %p", __func__
,
3016 * RD of import table bpi is in
3017 * bpi->extra->vnc.import.rd RD of info_orig is in prd
3020 vnc_zlog_debug_verbose("%s: no bpi->extra",
3025 (struct prefix
*)&bpi
->extra
->vnc
.import
.rd
,
3026 (struct prefix
*)prd
)) {
3028 vnc_zlog_debug_verbose("%s: prd does not match",
3036 if (bpi
->peer
!= peer
) {
3037 vnc_zlog_debug_verbose(
3038 "%s: peer does not match", __func__
);
3042 vnc_zlog_debug_verbose("%s: found matching bpi",
3045 /* Same route. Delete this bpi, replace with new one */
3047 if (action
== FIF_ACTION_WITHDRAW
) {
3049 vnc_zlog_debug_verbose(
3050 "%s: withdrawing at prefix %pRN",
3053 rfapiBiStartWithdrawTimer(
3054 import_table
, rn
, bpi
, afi
, SAFI_ENCAP
,
3055 rfapiWithdrawTimerEncap
);
3058 vnc_zlog_debug_verbose(
3059 "%s: %s at prefix %pRN", __func__
,
3060 ((action
== FIF_ACTION_KILL
)
3066 * If this route is waiting to be deleted
3068 * a previous withdraw, we must cancel its
3071 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
3072 && bpi
->extra
->vnc
.import
.timer
) {
3073 struct rfapi_withdraw
*wcb
= THREAD_ARG(
3074 bpi
->extra
->vnc
.import
.timer
);
3076 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3078 bpi
->extra
->vnc
.import
.timer
);
3081 if (action
== FIF_ACTION_UPDATE
) {
3082 rfapiBgpInfoDetach(rn
, bpi
);
3083 rfapiBgpInfoFree(bpi
);
3087 * Kill: do export stuff when removing
3090 struct rfapi_withdraw
*wcb
;
3094 * pretend we're an expiring timer
3097 MTYPE_RFAPI_WITHDRAW
,
3098 sizeof(struct rfapi_withdraw
));
3101 wcb
->import_table
= import_table
;
3102 memset(&t
, 0, sizeof(t
));
3104 rfapiWithdrawTimerEncap(
3105 &t
); /* frees wcb */
3114 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, replacing
? 1 : 0);
3116 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
)
3120 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, NULL
);
3124 agg_lock_node(rn
); /* incr ref count for new BPI */
3126 rn
= agg_node_get(rt
, p
);
3129 vnc_zlog_debug_verbose("%s: (afi=%d, rn=%p) inserting at prefix %pRN",
3130 __func__
, afi
, rn
, rn
);
3132 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_ENCAP
);
3135 * Delete holddown routes from same NVE. See details in
3136 * rfapiBgpInfoFilteredImportVPN()
3138 for (bpi
= info_new
->next
; bpi
; bpi
= next
) {
3140 struct prefix pfx_un
;
3144 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
))
3148 * We already match the VN address (it is the prefix
3149 * of the route node)
3152 if (!rfapiGetNexthop(bpi
->attr
, &pfx_un
)
3153 && prefix_same(&pfx_un
, &un_prefix
)) {
3161 vnc_zlog_debug_verbose(
3162 "%s: removing holddown bpi matching NVE of new route",
3164 if (bpi
->extra
->vnc
.import
.timer
) {
3165 struct rfapi_withdraw
*wcb
=
3166 THREAD_ARG(bpi
->extra
->vnc
.import
.timer
);
3168 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3169 THREAD_OFF(bpi
->extra
->vnc
.import
.timer
);
3171 rfapiExpireEncapNow(import_table
, rn
, bpi
);
3174 rfapiNexthop2Prefix(((struct bgp_path_info
*)(rn
->info
))->attr
,
3178 * If the nexthop address of the selected Encap route (i.e.,
3179 * the UN address) has changed, then we must update the VPN
3180 * routes that refer to this Encap route and possibly force
3183 if (rfapiAttrNexthopAddrDifferent(&p_firstbpi_old
, &p_firstbpi_new
)) {
3185 struct rfapi_monitor_encap
*m
;
3186 struct rfapi_monitor_encap
*mnext
;
3188 struct agg_node
*referenced_vpn_prefix
;
3191 * Optimized approach: build radix tree on the fly to
3192 * hold list of VPN nodes referenced by the ENCAP monitors
3194 * The nodes in this table correspond to prefixes of VPN routes.
3195 * The "info" pointer of the node points to a chain of
3196 * struct rfapi_monitor_encap, each of which refers to a
3197 * specific VPN node.
3199 struct agg_table
*referenced_vpn_table
;
3201 referenced_vpn_table
= agg_table_init();
3204 * iterate over the set of monitors at this ENCAP node.
3206 #ifdef DEBUG_ENCAP_MONITOR
3207 vnc_zlog_debug_verbose("%s: examining monitors at rn=%p",
3210 for (m
= RFAPI_MONITOR_ENCAP(rn
); m
; m
= m
->next
) {
3211 const struct prefix
*p
;
3214 * For each referenced bpi/route, copy the ENCAP route's
3215 * nexthop to the VPN route's cached UN address field
3217 * the address family of the cached UN address field.
3219 rfapiCopyUnEncap2VPN(info_new
, m
->bpi
);
3220 if (!CHECK_FLAG(m
->bpi
->flags
, BGP_PATH_VALID
)) {
3221 SET_FLAG(m
->bpi
->flags
, BGP_PATH_VALID
);
3222 if (VALID_INTERIOR_TYPE(m
->bpi
->type
))
3223 RFAPI_MONITOR_EXTERIOR(m
->node
)
3224 ->valid_interior_count
++;
3225 vnc_import_bgp_exterior_add_route_interior(
3226 bgp
, import_table
, m
->node
, m
->bpi
);
3230 * Build a list of unique VPN nodes referenced by these
3233 * There could be more than one VPN node here with a
3235 * prefix. Those are currently in an unsorted linear
3239 p
= agg_node_get_prefix(m
->node
);
3240 referenced_vpn_prefix
=
3241 agg_node_get(referenced_vpn_table
, p
);
3242 assert(referenced_vpn_prefix
);
3243 for (mnext
= referenced_vpn_prefix
->info
; mnext
;
3244 mnext
= mnext
->next
) {
3246 if (mnext
->node
== m
->node
)
3252 * already have an entry for this VPN node
3254 agg_unlock_node(referenced_vpn_prefix
);
3257 MTYPE_RFAPI_MONITOR_ENCAP
,
3258 sizeof(struct rfapi_monitor_encap
));
3259 mnext
->node
= m
->node
;
3260 mnext
->next
= referenced_vpn_prefix
->info
;
3261 referenced_vpn_prefix
->info
= mnext
;
3266 * for each VPN node referenced in the ENCAP monitors:
3268 for (referenced_vpn_prefix
=
3269 agg_route_top(referenced_vpn_table
);
3270 referenced_vpn_prefix
;
3271 referenced_vpn_prefix
=
3272 agg_route_next(referenced_vpn_prefix
)) {
3274 while ((m
= referenced_vpn_prefix
->info
)) {
3278 rfapiMonitorMoveLonger(m
->node
);
3279 for (n
= m
->node
; n
; n
= agg_node_parent(n
)) {
3280 // rfapiDoRouteCallback(import_table, n,
3283 rfapiMonitorItNodeChanged(import_table
, m
->node
,
3286 referenced_vpn_prefix
->info
= m
->next
;
3287 agg_unlock_node(referenced_vpn_prefix
);
3288 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, m
);
3291 agg_table_finish(referenced_vpn_table
);
3294 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
3297 static void rfapiExpireVpnNow(struct rfapi_import_table
*it
,
3298 struct agg_node
*rn
, struct bgp_path_info
*bpi
,
3301 struct rfapi_withdraw
*wcb
;
3305 * pretend we're an expiring timer
3307 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
3310 wcb
->import_table
= it
;
3311 wcb
->lockoffset
= lockoffset
;
3312 memset(&t
, 0, sizeof(t
));
3314 rfapiWithdrawTimerVPN(&t
); /* frees wcb */
3319 * import a bgp_path_info if its route target list intersects with the
3320 * import table's route target list
3322 void rfapiBgpInfoFilteredImportVPN(
3323 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3324 void *rfd
, /* set for looped back routes */
3325 const struct prefix
*p
,
3326 const struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3327 afi_t afi
, struct prefix_rd
*prd
,
3328 struct attr
*attr
, /* part of bgp_path_info */
3329 uint8_t type
, /* part of bgp_path_info */
3330 uint8_t sub_type
, /* part of bgp_path_info */
3331 uint32_t *label
) /* part of bgp_path_info */
3333 struct agg_table
*rt
= NULL
;
3334 struct agg_node
*rn
;
3336 struct bgp_path_info
*info_new
;
3337 struct bgp_path_info
*bpi
;
3338 struct bgp_path_info
*next
;
3340 struct prefix vn_prefix
;
3341 struct prefix un_prefix
;
3342 int un_prefix_valid
= 0;
3343 struct agg_node
*ern
;
3345 int original_had_routes
= 0;
3346 struct prefix original_nexthop
;
3347 const char *action_str
= NULL
;
3351 bgp
= bgp_get_default(); /* assume 1 instance for now */
3354 case FIF_ACTION_UPDATE
:
3355 action_str
= "update";
3357 case FIF_ACTION_WITHDRAW
:
3358 action_str
= "withdraw";
3360 case FIF_ACTION_KILL
:
3361 action_str
= "kill";
3368 if (import_table
== bgp
->rfapi
->it_ce
)
3371 vnc_zlog_debug_verbose("%s: entry: %s%s: prefix %s/%d: it %p, afi %s",
3372 __func__
, (is_it_ce
? "CE-IT " : ""), action_str
,
3373 rfapi_ntop(p
->family
, &p
->u
.prefix
, buf
, BUFSIZ
),
3374 p
->prefixlen
, import_table
, afi2str(afi
));
3379 * Compare rt lists. If no intersection, don't import this route
3380 * On a withdraw, peer and RD are sufficient to determine if
3383 if (action
== FIF_ACTION_UPDATE
) {
3384 if (!attr
|| !bgp_attr_get_ecommunity(attr
)) {
3386 vnc_zlog_debug_verbose(
3387 "%s: attr, extra, or ecommunity missing, not importing",
3391 if ((import_table
!= bgp
->rfapi
->it_ce
) &&
3392 !rfapiEcommunitiesIntersect(
3393 import_table
->rt_import_list
,
3394 bgp_attr_get_ecommunity(attr
))) {
3396 vnc_zlog_debug_verbose(
3397 "%s: it=%p: no ecommunity intersection",
3398 __func__
, import_table
);
3402 memset(&vn_prefix
, 0,
3403 sizeof(vn_prefix
)); /* keep valgrind happy */
3404 if (rfapiGetNexthop(attr
, &vn_prefix
)) {
3405 /* missing nexthop address would be a bad, bad thing */
3406 vnc_zlog_debug_verbose("%s: missing nexthop", __func__
);
3412 * Figure out which radix tree the route would go into
3418 rt
= import_table
->imported_vpn
[afi
];
3423 flog_err(EC_LIB_DEVELOPMENT
, "%s: bad afi %d", __func__
, afi
);
3428 memset(&original_nexthop
, 0, sizeof(original_nexthop
));
3431 * agg_node_lookup returns a node only if there is at least
3432 * one route attached.
3434 rn
= agg_node_lookup(rt
, p
);
3436 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
3440 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
3441 agg_unlock_node(rn
); /* undo lock in agg_node_lookup */
3444 original_had_routes
= 1;
3446 if (VNC_DEBUG(VERBOSE
)) {
3447 vnc_zlog_debug_verbose("%s: showing IT node on entry",
3449 rfapiShowItNode(NULL
, rn
); /* debug */
3453 * Look for same route (will have same RD and peer)
3455 bpi
= rfapiItBiIndexSearch(rn
, prd
, peer
, aux_prefix
);
3460 * This was an old test when we iterated over the
3461 * BPIs linearly. Since we're now looking up with
3462 * RD and peer, comparing types should not be
3463 * needed. Changed to assertion.
3465 * Compare types. Doing so prevents a RFP-originated
3466 * route from matching an imported route, for example.
3468 if (VNC_DEBUG(VERBOSE
) && bpi
->type
!= type
)
3469 /* should be handled by RDs, but warn for now */
3470 zlog_warn("%s: type mismatch! (bpi=%d, arg=%d)",
3471 __func__
, bpi
->type
, type
);
3473 vnc_zlog_debug_verbose("%s: found matching bpi",
3477 * In the special CE table, withdrawals occur without
3480 if (import_table
== bgp
->rfapi
->it_ce
) {
3481 vnc_direct_bgp_del_route_ce(bgp
, rn
, bpi
);
3482 if (action
== FIF_ACTION_WITHDRAW
)
3483 action
= FIF_ACTION_KILL
;
3486 if (action
== FIF_ACTION_WITHDRAW
) {
3488 int washolddown
= CHECK_FLAG(bpi
->flags
,
3491 vnc_zlog_debug_verbose(
3492 "%s: withdrawing at prefix %pRN%s",
3495 ? " (already being withdrawn)"
3500 rfapiBiStartWithdrawTimer(
3501 import_table
, rn
, bpi
, afi
,
3503 rfapiWithdrawTimerVPN
);
3505 RFAPI_UPDATE_ITABLE_COUNT(
3506 bpi
, import_table
, afi
, -1);
3507 import_table
->holddown_count
[afi
] += 1;
3511 vnc_zlog_debug_verbose(
3512 "%s: %s at prefix %pRN", __func__
,
3513 ((action
== FIF_ACTION_KILL
)
3519 * If this route is waiting to be deleted
3521 * a previous withdraw, we must cancel its
3524 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
3525 && bpi
->extra
->vnc
.import
.timer
) {
3526 struct rfapi_withdraw
*wcb
= THREAD_ARG(
3527 bpi
->extra
->vnc
.import
.timer
);
3529 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3531 bpi
->extra
->vnc
.import
.timer
);
3533 import_table
->holddown_count
[afi
] -= 1;
3534 RFAPI_UPDATE_ITABLE_COUNT(
3535 bpi
, import_table
, afi
, 1);
3538 * decrement remote count (if route is remote)
3540 * we are going to remove it below
3542 RFAPI_UPDATE_ITABLE_COUNT(bpi
, import_table
,
3544 if (action
== FIF_ACTION_UPDATE
) {
3548 * make copy of original nexthop so we
3549 * can see if it changed
3551 rfapiGetNexthop(bpi
->attr
,
3555 * remove bpi without doing any export
3558 if (CHECK_FLAG(bpi
->flags
,
3560 && VALID_INTERIOR_TYPE(bpi
->type
))
3561 RFAPI_MONITOR_EXTERIOR(rn
)
3562 ->valid_interior_count
--;
3563 rfapiItBiIndexDel(rn
, bpi
);
3564 rfapiBgpInfoDetach(rn
, bpi
);
3565 rfapiMonitorEncapDelete(bpi
);
3566 vnc_import_bgp_exterior_del_route_interior(
3567 bgp
, import_table
, rn
, bpi
);
3568 rfapiBgpInfoFree(bpi
);
3572 * remove bpi and do export processing
3574 import_table
->holddown_count
[afi
] += 1;
3575 rfapiExpireVpnNow(import_table
, rn
, bpi
,
3583 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, replacing
? 1 : 0);
3585 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
) {
3591 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, label
);
3594 * lookup un address in encap table
3596 ern
= agg_node_match(import_table
->imported_encap
[afi
], &vn_prefix
);
3598 rfapiCopyUnEncap2VPN(ern
->info
, info_new
);
3599 agg_unlock_node(ern
); /* undo lock in route_note_match */
3601 /* Not a big deal, just means VPN route got here first */
3602 vnc_zlog_debug_verbose("%s: no encap route for vn addr %pFX",
3603 __func__
, &vn_prefix
);
3604 info_new
->extra
->vnc
.import
.un_family
= AF_UNSPEC
;
3612 * No need to increment reference count, so only "get"
3613 * if the node is not there already
3615 rn
= agg_node_get(rt
, p
);
3619 * For ethernet routes, if there is an accompanying IP address,
3620 * save it in the bpi
3622 if ((AFI_L2VPN
== afi
) && aux_prefix
) {
3624 vnc_zlog_debug_verbose("%s: setting BPI's aux_prefix",
3626 info_new
->extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
3629 vnc_zlog_debug_verbose("%s: inserting bpi %p at prefix %pRN #%d",
3630 __func__
, info_new
, rn
,
3631 agg_node_get_lock_count(rn
));
3633 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_MPLS_VPN
);
3634 rfapiItBiIndexAdd(rn
, info_new
);
3635 if (!rfapiGetUnAddrOfVpnBi(info_new
, NULL
)) {
3636 if (VALID_INTERIOR_TYPE(info_new
->type
))
3637 RFAPI_MONITOR_EXTERIOR(rn
)->valid_interior_count
++;
3638 SET_FLAG(info_new
->flags
, BGP_PATH_VALID
);
3640 RFAPI_UPDATE_ITABLE_COUNT(info_new
, import_table
, afi
, 1);
3641 vnc_import_bgp_exterior_add_route_interior(bgp
, import_table
, rn
,
3644 if (import_table
== bgp
->rfapi
->it_ce
)
3645 vnc_direct_bgp_add_route_ce(bgp
, rn
, info_new
);
3647 if (VNC_DEBUG(VERBOSE
)) {
3648 vnc_zlog_debug_verbose("%s: showing IT node", __func__
);
3649 rfapiShowItNode(NULL
, rn
); /* debug */
3652 rfapiMonitorEncapAdd(import_table
, &vn_prefix
, rn
, info_new
);
3654 if (!rfapiGetUnAddrOfVpnBi(info_new
, &un_prefix
)) {
3657 * if we have a valid UN address (either via Encap route
3658 * or via tunnel attribute), then we should attempt
3659 * to move any monitors at less-specific nodes to this node
3661 rfapiMonitorMoveLonger(rn
);
3663 un_prefix_valid
= 1;
3667 * 101129 Enhancement: if we add a route (implication: it is not
3668 * in holddown), delete all other routes from this nve at this
3669 * node that are in holddown, regardless of peer.
3671 * Reasons it's OK to do that:
3673 * - if the holddown route being deleted originally came from BGP VPN,
3674 * it is already gone from BGP (implication of holddown), so there
3675 * won't be any added inconsistency with the BGP RIB.
3677 * - once a fresh route is added at a prefix, any routes in holddown
3678 * at that prefix will not show up in RFP responses, so deleting
3679 * the holddown routes won't affect the contents of responses.
3681 * - lifetimes are supposed to be consistent, so there should not
3682 * be a case where the fresh route has a shorter lifetime than
3683 * the holddown route, so we don't expect the fresh route to
3684 * disappear and complete its holddown time before the existing
3685 * holddown routes time out. Therefore, we won't have a situation
3686 * where we expect the existing holddown routes to be hidden and
3687 * then to reappear sometime later (as holddown routes) in a
3690 * Among other things, this would enable us to skirt the problem
3691 * of local holddown routes that refer to NVE descriptors that
3692 * have already been closed (if the same NVE triggers a subsequent
3693 * rfapi_open(), the new peer is different and doesn't match the
3694 * peer of the holddown route, so the stale holddown route still
3695 * hangs around until it times out instead of just being replaced
3696 * by the fresh route).
3699 * We know that the new bpi will have been inserted before any routes
3700 * in holddown, so we can skip any that came before it
3702 for (bpi
= info_new
->next
; bpi
; bpi
= next
) {
3704 struct prefix pfx_vn
;
3705 struct prefix pfx_un
;
3707 int remote_peer_match
= 0;
3714 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
))
3718 * Must match VN address (nexthop of VPN route)
3720 if (rfapiGetNexthop(bpi
->attr
, &pfx_vn
))
3722 if (!prefix_same(&pfx_vn
, &vn_prefix
))
3725 if (un_prefix_valid
&& /* new route UN addr */
3726 !rfapiGetUnAddrOfVpnBi(bpi
, &pfx_un
)
3727 && /* old route UN addr */
3728 prefix_same(&pfx_un
, &un_prefix
)) { /* compare */
3731 if (!RFAPI_LOCAL_BI(bpi
) && !RFAPI_LOCAL_BI(info_new
)
3732 && sockunion_same(&bpi
->peer
->su
, &info_new
->peer
->su
)) {
3733 /* old & new are both remote, same peer */
3734 remote_peer_match
= 1;
3737 if (!un_match
&& !remote_peer_match
)
3740 vnc_zlog_debug_verbose(
3741 "%s: removing holddown bpi matching NVE of new route",
3743 if (bpi
->extra
->vnc
.import
.timer
) {
3744 struct rfapi_withdraw
*wcb
=
3745 THREAD_ARG(bpi
->extra
->vnc
.import
.timer
);
3747 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3748 THREAD_OFF(bpi
->extra
->vnc
.import
.timer
);
3750 rfapiExpireVpnNow(import_table
, rn
, bpi
, 0);
3753 if (!original_had_routes
) {
3755 * We went from 0 usable routes to 1 usable route. Perform the
3756 * "Adding a Route" export process.
3758 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3759 vnc_zebra_add_prefix(bgp
, import_table
, rn
);
3762 * Check for nexthop change event
3763 * Note: the prefix_same() test below detects two situations:
3764 * 1. route is replaced, new route has different nexthop
3765 * 2. new route is added (original_nexthop is 0)
3767 struct prefix new_nexthop
;
3769 rfapiGetNexthop(attr
, &new_nexthop
);
3770 if (!prefix_same(&original_nexthop
, &new_nexthop
)) {
3772 * nexthop change event
3773 * vnc_direct_bgp_add_prefix() will recompute VN addr
3776 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3780 if (!(bgp
->rfapi_cfg
->flags
& BGP_VNC_CONFIG_CALLBACK_DISABLE
)) {
3781 for (n
= rn
; n
; n
= agg_node_parent(n
)) {
3782 // rfapiDoRouteCallback(import_table, n, NULL);
3784 rfapiMonitorItNodeChanged(import_table
, rn
, NULL
);
3786 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 0);
3790 static void rfapiBgpInfoFilteredImportBadSafi(
3791 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3792 void *rfd
, /* set for looped back routes */
3793 const struct prefix
*p
,
3794 const struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3795 afi_t afi
, struct prefix_rd
*prd
,
3796 struct attr
*attr
, /* part of bgp_path_info */
3797 uint8_t type
, /* part of bgp_path_info */
3798 uint8_t sub_type
, /* part of bgp_path_info */
3799 uint32_t *label
) /* part of bgp_path_info */
3801 vnc_zlog_debug_verbose("%s: Error, bad safi", __func__
);
3804 static rfapi_bi_filtered_import_f
*
3805 rfapiBgpInfoFilteredImportFunction(safi_t safi
)
3809 return rfapiBgpInfoFilteredImportVPN
;
3812 return rfapiBgpInfoFilteredImportEncap
;
3816 case SAFI_MULTICAST
:
3818 case SAFI_LABELED_UNICAST
:
3822 flog_err(EC_LIB_DEVELOPMENT
, "%s: bad safi %d", __func__
, safi
);
3823 return rfapiBgpInfoFilteredImportBadSafi
;
3826 assert(!"Reached end of function when we were not expecting to");
3829 void rfapiProcessUpdate(struct peer
*peer
,
3830 void *rfd
, /* set when looped from RFP/RFAPI */
3831 const struct prefix
*p
, struct prefix_rd
*prd
,
3832 struct attr
*attr
, afi_t afi
, safi_t safi
, uint8_t type
,
3833 uint8_t sub_type
, uint32_t *label
)
3837 struct rfapi_import_table
*it
;
3838 int has_ip_route
= 1;
3841 bgp
= bgp_get_default(); /* assume 1 instance for now */
3848 * look at high-order byte of RD. FF means MAC
3849 * address is present (VNC L2VPN)
3851 if ((safi
== SAFI_MPLS_VPN
)
3852 && (decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
)) {
3853 struct prefix pfx_mac_buf
;
3854 struct prefix pfx_nexthop_buf
;
3858 * Set flag if prefix and nexthop are the same - don't
3859 * add the route to normal IP-based import tables
3861 if (!rfapiGetNexthop(attr
, &pfx_nexthop_buf
)) {
3862 if (!prefix_cmp(&pfx_nexthop_buf
, p
)) {
3867 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
3868 pfx_mac_buf
.family
= AF_ETHERNET
;
3869 pfx_mac_buf
.prefixlen
= 48;
3870 memcpy(&pfx_mac_buf
.u
.prefix_eth
.octet
, prd
->val
+ 2, 6);
3873 * Find rt containing LNI (Logical Network ID), which
3874 * _should_ always be present when mac address is present
3876 rc
= rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(attr
), &lni
);
3878 vnc_zlog_debug_verbose(
3879 "%s: rfapiEcommunityGetLNI returned %d, lni=%d, attr=%p",
3880 __func__
, rc
, lni
, attr
);
3882 it
= rfapiMacImportTableGet(bgp
, lni
);
3884 rfapiBgpInfoFilteredImportVPN(
3885 it
, FIF_ACTION_UPDATE
, peer
, rfd
,
3886 &pfx_mac_buf
, /* prefix */
3887 p
, /* aux prefix: IP addr */
3888 AFI_L2VPN
, prd
, attr
, type
, sub_type
, label
);
3896 * Iterate over all import tables; do a filtered import
3897 * for the afi/safi combination
3899 for (it
= h
->imports
; it
; it
= it
->next
) {
3900 (*rfapiBgpInfoFilteredImportFunction(safi
))(
3901 it
, FIF_ACTION_UPDATE
, peer
, rfd
, p
, /* prefix */
3902 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3905 if (safi
== SAFI_MPLS_VPN
) {
3906 vnc_direct_bgp_rh_add_route(bgp
, afi
, p
, peer
, attr
);
3907 rfapiBgpInfoFilteredImportVPN(
3908 bgp
->rfapi
->it_ce
, FIF_ACTION_UPDATE
, peer
, rfd
,
3910 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3915 void rfapiProcessWithdraw(struct peer
*peer
, void *rfd
, const struct prefix
*p
,
3916 struct prefix_rd
*prd
, struct attr
*attr
, afi_t afi
,
3917 safi_t safi
, uint8_t type
, int kill
)
3921 struct rfapi_import_table
*it
;
3923 bgp
= bgp_get_default(); /* assume 1 instance for now */
3930 * look at high-order byte of RD. FF means MAC
3931 * address is present (VNC L2VPN)
3933 if (h
->import_mac
!= NULL
&& safi
== SAFI_MPLS_VPN
3934 && decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
) {
3935 struct prefix pfx_mac_buf
;
3936 void *cursor
= NULL
;
3939 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
3940 pfx_mac_buf
.family
= AF_ETHERNET
;
3941 pfx_mac_buf
.prefixlen
= 48;
3942 memcpy(&pfx_mac_buf
.u
.prefix_eth
, prd
->val
+ 2, 6);
3945 * withdraw does not contain attrs, so we don't have
3946 * access to the route's LNI, which would ordinarily
3947 * select the specific mac-based import table. Instead,
3948 * we must iterate over all mac-based tables and rely
3949 * on the RD to match.
3951 * If this approach is too slow, add an index where
3952 * key is {RD, peer} and value is the import table
3954 for (rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
3956 rc
== 0; rc
= skiplist_next(h
->import_mac
, NULL
,
3957 (void **)&it
, &cursor
)) {
3959 #ifdef DEBUG_L2_EXTRA
3960 vnc_zlog_debug_verbose(
3961 "%s: calling rfapiBgpInfoFilteredImportVPN(it=%p, afi=AFI_L2VPN)",
3965 rfapiBgpInfoFilteredImportVPN(
3967 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
3968 peer
, rfd
, &pfx_mac_buf
, /* prefix */
3969 p
, /* aux_prefix: IP */
3970 AFI_L2VPN
, prd
, attr
, type
, 0,
3971 NULL
); /* sub_type & label unused for withdraw
3977 * XXX For the case where the withdraw involves an L2
3978 * route with no IP information, we rely on the lack
3979 * of RT-list intersection to filter out the withdraw
3980 * from the IP-based import tables below
3984 * Iterate over all import tables; do a filtered import
3985 * for the afi/safi combination
3988 for (it
= h
->imports
; it
; it
= it
->next
) {
3989 (*rfapiBgpInfoFilteredImportFunction(safi
))(
3990 it
, (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
3991 peer
, rfd
, p
, /* prefix */
3992 NULL
, afi
, prd
, attr
, type
, 0,
3993 NULL
); /* sub_type & label unused for withdraw */
3996 /* TBD the deletion should happen after the lifetime expires */
3997 if (safi
== SAFI_MPLS_VPN
)
3998 vnc_direct_bgp_rh_del_route(bgp
, afi
, p
, peer
);
4000 if (safi
== SAFI_MPLS_VPN
) {
4001 rfapiBgpInfoFilteredImportVPN(
4003 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
), peer
,
4004 rfd
, p
, /* prefix */
4005 NULL
, afi
, prd
, attr
, type
, 0,
4006 NULL
); /* sub_type & label unused for withdraw */
4011 * TBD optimized withdraw timer algorithm for case of many
4012 * routes expiring at the same time due to peer drop.
4015 * 1. Visit all BPIs in all ENCAP import tables.
4017 * a. If a bpi's peer is the failed peer, remove the bpi.
4018 * b. If the removed ENCAP bpi was first in the list of
4019 * BPIs at this ENCAP node, loop over all monitors
4022 * (1) for each ENCAP monitor, loop over all its
4023 * VPN node monitors and set their RFAPI_MON_FLAG_NEEDCALLBACK
4026 * 2. Visit all BPIs in all VPN import tables.
4027 * a. If a bpi's peer is the failed peer, remove the bpi.
4028 * b. loop over all the VPN node monitors and set their
4029 * RFAPI_MON_FLAG_NEEDCALLBACK flags
4030 * c. If there are no BPIs left at this VPN node,
4035 /* surprise, this gets called from peer_delete(), from rfapi_close() */
4036 static void rfapiProcessPeerDownRt(struct peer
*peer
,
4037 struct rfapi_import_table
*import_table
,
4038 afi_t afi
, safi_t safi
)
4040 struct agg_node
*rn
;
4041 struct bgp_path_info
*bpi
;
4042 struct agg_table
*rt
= NULL
;
4043 void (*timer_service_func
)(struct thread
*) = NULL
;
4045 assert(afi
== AFI_IP
|| afi
== AFI_IP6
);
4051 rt
= import_table
->imported_vpn
[afi
];
4052 timer_service_func
= rfapiWithdrawTimerVPN
;
4055 rt
= import_table
->imported_encap
[afi
];
4056 timer_service_func
= rfapiWithdrawTimerEncap
;
4060 case SAFI_MULTICAST
:
4062 case SAFI_LABELED_UNICAST
:
4065 /* Suppress uninitialized variable warning */
4067 timer_service_func
= NULL
;
4071 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
4072 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
4073 if (bpi
->peer
== peer
) {
4075 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
4076 /* already in holddown, skip */
4080 if (safi
== SAFI_MPLS_VPN
) {
4081 RFAPI_UPDATE_ITABLE_COUNT(
4082 bpi
, import_table
, afi
, -1);
4083 import_table
->holddown_count
[afi
] += 1;
4085 rfapiBiStartWithdrawTimer(import_table
, rn
, bpi
,
4087 timer_service_func
);
4095 * This gets called when a peer connection drops. We have to remove
4096 * all the routes from this peer.
4098 * Current approach is crude. TBD Optimize by setting fewer timers and
4099 * grouping withdrawn routes so we can generate callbacks more
4102 void rfapiProcessPeerDown(struct peer
*peer
)
4106 struct rfapi_import_table
*it
;
4109 * If this peer is a "dummy" peer structure atached to a RFAPI
4110 * nve_descriptor, we don't need to walk the import tables
4111 * because the routes are already withdrawn by rfapi_close()
4113 if (CHECK_FLAG(peer
->flags
, PEER_FLAG_IS_RFAPI_HD
))
4117 * 1. Visit all BPIs in all ENCAP import tables.
4118 * Start withdraw timer on the BPIs that match peer.
4120 * 2. Visit All BPIs in all VPN import tables.
4121 * Start withdraw timer on the BPIs that match peer.
4124 bgp
= bgp_get_default(); /* assume 1 instance for now */
4131 for (it
= h
->imports
; it
; it
= it
->next
) {
4132 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_ENCAP
);
4133 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_ENCAP
);
4134 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_MPLS_VPN
);
4135 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_MPLS_VPN
);
4139 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4140 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4145 * Import an entire RIB (for an afi/safi) to an import table RIB,
4146 * filtered according to the import table's RT list
4148 * TBD: does this function need additions to match rfapiProcessUpdate()
4149 * for, e.g., L2 handling?
4151 static void rfapiBgpTableFilteredImport(struct bgp
*bgp
,
4152 struct rfapi_import_table
*it
,
4153 afi_t afi
, safi_t safi
)
4155 struct bgp_dest
*dest1
;
4156 struct bgp_dest
*dest2
;
4158 /* Only these SAFIs have 2-level RIBS */
4159 assert(safi
== SAFI_MPLS_VPN
|| safi
== SAFI_ENCAP
);
4162 * Now visit all the rd nodes and the nodes of all the
4163 * route tables attached to them, and import the routes
4164 * if they have matching route targets
4166 for (dest1
= bgp_table_top(bgp
->rib
[afi
][safi
]); dest1
;
4167 dest1
= bgp_route_next(dest1
)) {
4169 if (bgp_dest_has_bgp_path_info_data(dest1
)) {
4171 for (dest2
= bgp_table_top(
4172 bgp_dest_get_bgp_table_info(dest1
));
4173 dest2
; dest2
= bgp_route_next(dest2
)) {
4175 struct bgp_path_info
*bpi
;
4177 for (bpi
= bgp_dest_get_bgp_path_info(dest2
);
4178 bpi
; bpi
= bpi
->next
) {
4181 if (CHECK_FLAG(bpi
->flags
,
4186 label
= decode_label(
4187 &bpi
->extra
->label
[0]);
4188 (*rfapiBgpInfoFilteredImportFunction(
4190 it
, /* which import table */
4191 FIF_ACTION_UPDATE
, bpi
->peer
,
4193 bgp_dest_get_prefix(dest2
),
4195 (struct prefix_rd
*)
4196 bgp_dest_get_prefix(
4198 bpi
->attr
, bpi
->type
,
4199 bpi
->sub_type
, &label
);
4207 /* per-bgp-instance rfapi data */
4208 struct rfapi
*bgp_rfapi_new(struct bgp
*bgp
)
4212 struct rfapi_rfp_cfg
*cfg
= NULL
;
4213 struct rfapi_rfp_cb_methods
*cbm
= NULL
;
4215 assert(bgp
->rfapi_cfg
== NULL
);
4217 h
= XCALLOC(MTYPE_RFAPI
, sizeof(struct rfapi
));
4219 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4220 h
->un
[afi
] = agg_table_init();
4224 * initialize the ce import table
4226 h
->it_ce
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4227 sizeof(struct rfapi_import_table
));
4228 h
->it_ce
->imported_vpn
[AFI_IP
] = agg_table_init();
4229 h
->it_ce
->imported_vpn
[AFI_IP6
] = agg_table_init();
4230 h
->it_ce
->imported_encap
[AFI_IP
] = agg_table_init();
4231 h
->it_ce
->imported_encap
[AFI_IP6
] = agg_table_init();
4232 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4233 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4236 * Set up work queue for deferred rfapi_close operations
4238 h
->deferred_close_q
=
4239 work_queue_new(bm
->master
, "rfapi deferred close");
4240 h
->deferred_close_q
->spec
.workfunc
= rfapi_deferred_close_workfunc
;
4241 h
->deferred_close_q
->spec
.data
= h
;
4243 h
->rfp
= rfp_start(bm
->master
, &cfg
, &cbm
);
4244 bgp
->rfapi_cfg
= bgp_rfapi_cfg_new(cfg
);
4246 h
->rfp_methods
= *cbm
;
4251 void bgp_rfapi_destroy(struct bgp
*bgp
, struct rfapi
*h
)
4255 if (bgp
== NULL
|| h
== NULL
)
4258 if (h
->resolve_nve_nexthop
) {
4259 skiplist_free(h
->resolve_nve_nexthop
);
4260 h
->resolve_nve_nexthop
= NULL
;
4263 agg_table_finish(h
->it_ce
->imported_vpn
[AFI_IP
]);
4264 agg_table_finish(h
->it_ce
->imported_vpn
[AFI_IP6
]);
4265 agg_table_finish(h
->it_ce
->imported_encap
[AFI_IP
]);
4266 agg_table_finish(h
->it_ce
->imported_encap
[AFI_IP6
]);
4268 if (h
->import_mac
) {
4269 struct rfapi_import_table
*it
;
4274 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4276 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4279 rfapiImportTableFlush(it
);
4280 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
4282 skiplist_free(h
->import_mac
);
4283 h
->import_mac
= NULL
;
4286 work_queue_free_and_null(&h
->deferred_close_q
);
4291 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4292 agg_table_finish(h
->un
[afi
]);
4295 XFREE(MTYPE_RFAPI_IMPORTTABLE
, h
->it_ce
);
4296 XFREE(MTYPE_RFAPI
, h
);
4299 struct rfapi_import_table
*
4300 rfapiImportTableRefAdd(struct bgp
*bgp
, struct ecommunity
*rt_import_list
,
4301 struct rfapi_nve_group_cfg
*rfg
)
4304 struct rfapi_import_table
*it
;
4310 for (it
= h
->imports
; it
; it
= it
->next
) {
4311 if (ecommunity_cmp(it
->rt_import_list
, rt_import_list
))
4315 vnc_zlog_debug_verbose("%s: matched it=%p", __func__
, it
);
4318 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4319 sizeof(struct rfapi_import_table
));
4320 it
->next
= h
->imports
;
4323 it
->rt_import_list
= ecommunity_dup(rt_import_list
);
4325 it
->monitor_exterior_orphans
=
4326 skiplist_new(0, NULL
, prefix_free_lists
);
4329 * fill import route tables from RIBs
4331 * Potential area for optimization. If this occurs when
4332 * tables are large (e.g., the operator adds a nve group
4333 * with a new RT list to a running system), it could take
4337 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4339 it
->imported_vpn
[afi
] = agg_table_init();
4340 it
->imported_encap
[afi
] = agg_table_init();
4342 rfapiBgpTableFilteredImport(bgp
, it
, afi
,
4344 rfapiBgpTableFilteredImport(bgp
, it
, afi
, SAFI_ENCAP
);
4346 vnc_import_bgp_exterior_redist_enable_it(bgp
, afi
, it
);
4356 * skiplist element free function
4358 static void delete_rem_pfx_na_free(void *na
)
4360 uint32_t *pCounter
= ((struct rfapi_nve_addr
*)na
)->info
;
4363 XFREE(MTYPE_RFAPI_NVE_ADDR
, na
);
4367 * Common deleter for IP and MAC import tables
4369 static void rfapiDeleteRemotePrefixesIt(
4370 struct bgp
*bgp
, struct rfapi_import_table
*it
, struct prefix
*un
,
4371 struct prefix
*vn
, struct prefix
*p
, int delete_active
,
4372 int delete_holddown
, uint32_t *pARcount
, uint32_t *pAHcount
,
4373 uint32_t *pHRcount
, uint32_t *pHHcount
,
4374 struct skiplist
*uniq_active_nves
, struct skiplist
*uniq_holddown_nves
)
4378 #ifdef DEBUG_L2_EXTRA
4380 char buf_pfx
[PREFIX_STRLEN
];
4383 prefix2str(p
, buf_pfx
, sizeof(buf_pfx
));
4389 vnc_zlog_debug_verbose(
4390 "%s: entry, p=%s, delete_active=%d, delete_holddown=%d",
4391 __func__
, buf_pfx
, delete_active
, delete_holddown
);
4395 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4397 struct agg_table
*rt
;
4398 struct agg_node
*rn
;
4400 if (p
&& (family2afi(p
->family
) != afi
)) {
4404 rt
= it
->imported_vpn
[afi
];
4408 vnc_zlog_debug_verbose("%s: scanning rt for afi=%d", __func__
,
4411 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
4412 struct bgp_path_info
*bpi
;
4413 struct bgp_path_info
*next
;
4414 const struct prefix
*rn_p
= agg_node_get_prefix(rn
);
4416 if (p
&& VNC_DEBUG(IMPORT_DEL_REMOTE
))
4417 vnc_zlog_debug_any("%s: want %pFX, have %pRN",
4420 if (p
&& prefix_cmp(p
, rn_p
))
4423 vnc_zlog_debug_verbose("%s: rn pfx=%pRN", __func__
, rn
);
4425 /* TBD is this valid for afi == AFI_L2VPN? */
4426 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
4428 for (bpi
= rn
->info
; bpi
; bpi
= next
) {
4437 vnc_zlog_debug_verbose("%s: examining bpi %p",
4440 if (!rfapiGetNexthop(bpi
->attr
, &qpt
))
4445 || !prefix_match(vn
, &qpt
)) {
4446 #ifdef DEBUG_L2_EXTRA
4447 vnc_zlog_debug_verbose(
4448 "%s: continue at vn && !qpt_valid || !prefix_match(vn, &qpt)",
4455 if (!rfapiGetUnAddrOfVpnBi(bpi
, &qct
))
4460 || !prefix_match(un
, &qct
)) {
4461 #ifdef DEBUG_L2_EXTRA
4462 vnc_zlog_debug_verbose(
4463 "%s: continue at un && !qct_valid || !prefix_match(un, &qct)",
4475 * If this route is waiting to be deleted
4477 * a previous withdraw, we must cancel its
4480 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
4481 if (!delete_holddown
)
4483 if (bpi
->extra
->vnc
.import
.timer
) {
4484 struct rfapi_withdraw
*wcb
=
4491 ->holddown_count
[afi
] -=
4493 RFAPI_UPDATE_ITABLE_COUNT(
4494 bpi
, wcb
->import_table
,
4496 XFREE(MTYPE_RFAPI_WITHDRAW
,
4499 bpi
->extra
->vnc
.import
4508 vnc_zlog_debug_verbose(
4509 "%s: deleting bpi %p (qct_valid=%d, qpt_valid=%d, delete_holddown=%d, delete_active=%d)",
4510 __func__
, bpi
, qct_valid
, qpt_valid
,
4511 delete_holddown
, delete_active
);
4517 if (qct_valid
&& qpt_valid
) {
4519 struct rfapi_nve_addr na
;
4520 struct rfapi_nve_addr
*nap
;
4522 memset(&na
, 0, sizeof(na
));
4523 assert(!rfapiQprefix2Raddr(&qct
,
4525 assert(!rfapiQprefix2Raddr(&qpt
,
4528 if (skiplist_search(
4531 : uniq_holddown_nves
),
4532 &na
, (void **)&nap
)) {
4536 MTYPE_RFAPI_NVE_ADDR
,
4540 nap
->info
= is_active
4546 : uniq_holddown_nves
),
4549 rfapiNveAddr2Str(nap
, line
,
4554 vnc_direct_bgp_rh_del_route(bgp
, afi
, rn_p
,
4557 RFAPI_UPDATE_ITABLE_COUNT(bpi
, it
, afi
, -1);
4558 it
->holddown_count
[afi
] += 1;
4559 rfapiExpireVpnNow(it
, rn
, bpi
, 1);
4561 vnc_zlog_debug_verbose(
4562 "%s: incrementing count (is_active=%d)",
4563 __func__
, is_active
);
4576 * For use by the "clear vnc prefixes" command
4578 /*------------------------------------------
4579 * rfapiDeleteRemotePrefixes
4581 * UI helper: For use by the "clear vnc prefixes" command
4584 * un if set, tunnel must match this prefix
4585 * vn if set, nexthop prefix must match this prefix
4586 * p if set, prefix must match this prefix
4587 * it if set, only look in this import table
4590 * pARcount number of active routes deleted
4591 * pAHcount number of active nves deleted
4592 * pHRcount number of holddown routes deleted
4593 * pHHcount number of holddown nves deleted
4597 --------------------------------------------*/
4598 void rfapiDeleteRemotePrefixes(struct prefix
*un
, struct prefix
*vn
,
4600 struct rfapi_import_table
*arg_it
,
4601 int delete_active
, int delete_holddown
,
4602 uint32_t *pARcount
, uint32_t *pAHcount
,
4603 uint32_t *pHRcount
, uint32_t *pHHcount
)
4607 struct rfapi_import_table
*it
;
4608 uint32_t deleted_holddown_route_count
= 0;
4609 uint32_t deleted_active_route_count
= 0;
4610 uint32_t deleted_holddown_nve_count
= 0;
4611 uint32_t deleted_active_nve_count
= 0;
4612 struct skiplist
*uniq_holddown_nves
;
4613 struct skiplist
*uniq_active_nves
;
4617 bgp
= bgp_get_default(); /* assume 1 instance for now */
4618 /* If no bgp instantiated yet, no vnc prefixes exist */
4625 uniq_holddown_nves
=
4626 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4628 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4631 * Iterate over all import tables; do a filtered import
4632 * for the afi/safi combination
4641 vnc_zlog_debug_verbose(
4642 "%s: calling rfapiDeleteRemotePrefixesIt() on (IP) import %p",
4645 rfapiDeleteRemotePrefixesIt(
4646 bgp
, it
, un
, vn
, p
, delete_active
, delete_holddown
,
4647 &deleted_active_route_count
, &deleted_active_nve_count
,
4648 &deleted_holddown_route_count
,
4649 &deleted_holddown_nve_count
, uniq_active_nves
,
4650 uniq_holddown_nves
);
4659 * Now iterate over L2 import tables
4661 if (h
->import_mac
&& !(p
&& (p
->family
!= AF_ETHERNET
))) {
4663 void *cursor
= NULL
;
4667 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4669 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4672 vnc_zlog_debug_verbose(
4673 "%s: calling rfapiDeleteRemotePrefixesIt() on import_mac %p",
4676 rfapiDeleteRemotePrefixesIt(
4677 bgp
, it
, un
, vn
, p
, delete_active
,
4678 delete_holddown
, &deleted_active_route_count
,
4679 &deleted_active_nve_count
,
4680 &deleted_holddown_route_count
,
4681 &deleted_holddown_nve_count
, uniq_active_nves
,
4682 uniq_holddown_nves
);
4687 * our custom element freeing function above counts as it deletes
4689 skiplist_free(uniq_holddown_nves
);
4690 skiplist_free(uniq_active_nves
);
4693 *pARcount
= deleted_active_route_count
;
4695 *pAHcount
= deleted_active_nve_count
;
4697 *pHRcount
= deleted_holddown_route_count
;
4699 *pHHcount
= deleted_holddown_nve_count
;
4704 /*------------------------------------------
4705 * rfapiCountRemoteRoutes
4707 * UI helper: count VRF routes from BGP side
4712 * pALRcount count of active local routes
4713 * pARRcount count of active remote routes
4714 * pHRcount count of holddown routes
4715 * pIRcount count of direct imported routes
4719 --------------------------------------------*/
4720 void rfapiCountAllItRoutes(int *pALRcount
, /* active local routes */
4721 int *pARRcount
, /* active remote routes */
4722 int *pHRcount
, /* holddown routes */
4723 int *pIRcount
) /* imported routes */
4727 struct rfapi_import_table
*it
;
4730 int total_active_local
= 0;
4731 int total_active_remote
= 0;
4732 int total_holddown
= 0;
4733 int total_imported
= 0;
4735 bgp
= bgp_get_default(); /* assume 1 instance for now */
4742 * Iterate over all import tables; do a filtered import
4743 * for the afi/safi combination
4746 for (it
= h
->imports
; it
; it
= it
->next
) {
4748 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4750 total_active_local
+= it
->local_count
[afi
];
4751 total_active_remote
+= it
->remote_count
[afi
];
4752 total_holddown
+= it
->holddown_count
[afi
];
4753 total_imported
+= it
->imported_count
[afi
];
4760 if (h
->import_mac
) {
4762 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4764 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4767 total_active_local
+= it
->local_count
[AFI_L2VPN
];
4768 total_active_remote
+= it
->remote_count
[AFI_L2VPN
];
4769 total_holddown
+= it
->holddown_count
[AFI_L2VPN
];
4770 total_imported
+= it
->imported_count
[AFI_L2VPN
];
4776 *pALRcount
= total_active_local
;
4779 *pARRcount
= total_active_remote
;
4782 *pHRcount
= total_holddown
;
4785 *pIRcount
= total_imported
;
4789 /*------------------------------------------
4790 * rfapiGetHolddownFromLifetime
4792 * calculate holddown value based on lifetime
4798 * Holddown value based on lifetime, holddown_factor,
4799 * and RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
4801 --------------------------------------------*/
4802 /* hold down time maxes out at RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY */
4803 uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime
)
4808 bgp
= bgp_get_default();
4809 if (bgp
&& bgp
->rfapi_cfg
)
4810 factor
= bgp
->rfapi_cfg
->rfp_cfg
.holddown_factor
;
4812 factor
= RFAPI_RFP_CFG_DEFAULT_HOLDDOWN_FACTOR
;
4814 if (factor
< 100 || lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4815 lifetime
= lifetime
* factor
/ 100;
4816 if (lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4819 return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
;