3 * Copyright 2009-2016, LabN Consulting, L.L.C.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * File: rfapi_import.c
23 * Purpose: Handle import of routes from BGP to RFAPI
28 #include "lib/zebra.h"
29 #include "lib/prefix.h"
30 #include "lib/agg_table.h"
32 #include "lib/memory.h"
34 #include "lib/skiplist.h"
35 #include "lib/thread.h"
36 #include "lib/stream.h"
37 #include "lib/lib_errors.h"
39 #include "bgpd/bgpd.h"
40 #include "bgpd/bgp_ecommunity.h"
41 #include "bgpd/bgp_attr.h"
42 #include "bgpd/bgp_route.h"
43 #include "bgpd/bgp_mplsvpn.h" /* prefix_rd2str() */
44 #include "bgpd/bgp_vnc_types.h"
45 #include "bgpd/bgp_rd.h"
47 #include "bgpd/rfapi/rfapi.h"
48 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
49 #include "bgpd/rfapi/rfapi_backend.h"
50 #include "bgpd/rfapi/rfapi_import.h"
51 #include "bgpd/rfapi/rfapi_private.h"
52 #include "bgpd/rfapi/rfapi_monitor.h"
53 #include "bgpd/rfapi/rfapi_nve_addr.h"
54 #include "bgpd/rfapi/rfapi_vty.h"
55 #include "bgpd/rfapi/vnc_export_bgp.h"
56 #include "bgpd/rfapi/vnc_export_bgp_p.h"
57 #include "bgpd/rfapi/vnc_zebra.h"
58 #include "bgpd/rfapi/vnc_import_bgp.h"
59 #include "bgpd/rfapi/vnc_import_bgp_p.h"
60 #include "bgpd/rfapi/rfapi_rib.h"
61 #include "bgpd/rfapi/rfapi_encap_tlv.h"
62 #include "bgpd/rfapi/vnc_debug.h"
64 #ifdef HAVE_GLIBC_BACKTRACE
65 /* for backtrace and friends */
67 #endif /* HAVE_GLIBC_BACKTRACE */
69 #undef DEBUG_MONITOR_MOVE_SHORTER
70 #undef DEBUG_RETURNED_NHL
71 #undef DEBUG_ROUTE_COUNTERS
72 #undef DEBUG_ENCAP_MONITOR
75 #undef DEBUG_BI_SEARCH
78 * Allocated for each withdraw timer instance; freed when the timer
79 * expires or is canceled
81 struct rfapi_withdraw
{
82 struct rfapi_import_table
*import_table
;
83 struct agg_node
*node
;
84 struct bgp_info
*info
;
85 safi_t safi
; /* used only for bulk operations */
87 * For import table node reference count checking (i.e., debugging).
88 * Normally when a timer expires, lockoffset should be 0. However, if
89 * the timer expiration function is called directly (e.g.,
90 * rfapiExpireVpnNow), the node could be locked by a preceding
91 * agg_route_top() or agg_route_next() in a loop, so we need to pass
99 * It's evil and fiendish. It's compiler-dependent.
100 * ? Might need LDFLAGS -rdynamic to produce all function names
102 void rfapiDebugBacktrace(void)
104 #ifdef HAVE_GLIBC_BACKTRACE
105 #define RFAPI_DEBUG_BACKTRACE_NENTRIES 200
106 void *buf
[RFAPI_DEBUG_BACKTRACE_NENTRIES
];
111 size
= backtrace(buf
, RFAPI_DEBUG_BACKTRACE_NENTRIES
);
112 syms
= backtrace_symbols(buf
, size
);
114 for (i
= 0; i
< size
&& i
< RFAPI_DEBUG_BACKTRACE_NENTRIES
; ++i
) {
115 vnc_zlog_debug_verbose("backtrace[%2zu]: %s", i
, syms
[i
]);
125 * Count remote routes and compare with actively-maintained values.
126 * Abort if they disagree.
128 void rfapiCheckRouteCount()
130 struct bgp
*bgp
= bgp_get_default();
132 struct rfapi_import_table
*it
;
140 for (it
= h
->imports
; it
; it
= it
->next
) {
141 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
143 struct agg_table
*rt
;
146 int holddown_count
= 0;
148 int imported_count
= 0;
149 int remote_count
= 0;
151 rt
= it
->imported_vpn
[afi
];
153 for (rn
= agg_route_top(rt
); rn
;
154 rn
= agg_route_next(rn
)) {
156 struct bgp_info
*next
;
158 for (bi
= rn
->info
; bi
; bi
= next
) {
161 if (CHECK_FLAG(bi
->flags
,
166 if (RFAPI_LOCAL_BI(bi
)) {
169 if (RFAPI_DIRECT_IMPORT_BI(
180 if (it
->holddown_count
[afi
] != holddown_count
) {
181 vnc_zlog_debug_verbose(
182 "%s: it->holddown_count %d != holddown_count %d",
183 __func__
, it
->holddown_count
[afi
],
187 if (it
->remote_count
[afi
] != remote_count
) {
188 vnc_zlog_debug_verbose(
189 "%s: it->remote_count %d != remote_count %d",
190 __func__
, it
->remote_count
[afi
],
194 if (it
->imported_count
[afi
] != imported_count
) {
195 vnc_zlog_debug_verbose(
196 "%s: it->imported_count %d != imported_count %d",
197 __func__
, it
->imported_count
[afi
],
205 #if DEBUG_ROUTE_COUNTERS
206 #define VNC_ITRCCK do {rfapiCheckRouteCount();} while (0)
212 * Validate reference count for a node in an import table
214 * Normally lockoffset is 0 for nodes in quiescent state. However,
215 * agg_unlock_node will delete the node if it is called when
216 * node->lock == 1, and we have to validate the refcount before
217 * the node is deleted. In this case, we specify lockoffset 1.
219 void rfapiCheckRefcount(struct agg_node
*rn
, safi_t safi
, int lockoffset
)
221 unsigned int count_bi
= 0;
222 unsigned int count_monitor
= 0;
224 struct rfapi_monitor_encap
*hme
;
225 struct rfapi_monitor_vpn
*hmv
;
227 for (bi
= rn
->info
; bi
; bi
= bi
->next
)
232 ++count_monitor
; /* rfapi_it_extra */
239 for (hme
= RFAPI_MONITOR_ENCAP(rn
); hme
;
246 for (hmv
= RFAPI_MONITOR_VPN(rn
); hmv
; hmv
= hmv
->next
)
249 if (RFAPI_MONITOR_EXTERIOR(rn
)->source
) {
250 ++count_monitor
; /* sl */
252 for (rc
= skiplist_next(
253 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
254 NULL
, NULL
, &cursor
);
257 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
258 NULL
, NULL
, &cursor
)) {
260 ++count_monitor
; /* sl entry */
270 if (count_bi
+ count_monitor
+ lockoffset
!= rn
->lock
) {
271 vnc_zlog_debug_verbose(
272 "%s: count_bi=%d, count_monitor=%d, lockoffset=%d, rn->lock=%d",
273 __func__
, count_bi
, count_monitor
, lockoffset
,
280 * Perform deferred rfapi_close operations that were queued
283 static wq_item_status
rfapi_deferred_close_workfunc(struct work_queue
*q
,
286 struct rfapi_descriptor
*rfd
= data
;
287 struct rfapi
*h
= q
->spec
.data
;
289 assert(!(h
->flags
& RFAPI_INCALLBACK
));
291 vnc_zlog_debug_verbose("%s: completed deferred close on handle %p",
297 * Extract layer 2 option from Encap TLVS in BGP attrs
299 int rfapiGetL2o(struct attr
*attr
, struct rfapi_l2address_option
*l2o
)
303 struct bgp_attr_encap_subtlv
*pEncap
;
305 for (pEncap
= attr
->vnc_subtlvs
; pEncap
;
306 pEncap
= pEncap
->next
) {
308 if (pEncap
->type
== BGP_VNC_SUBTLV_TYPE_RFPOPTION
) {
310 == RFAPI_VN_OPTION_TYPE_L2ADDR
) {
312 if (pEncap
->value
[1] == 14) {
313 memcpy(l2o
->macaddr
.octet
,
330 l2o
->logical_net_id
=
333 + ((pEncap
->value
[14]
336 + ((pEncap
->value
[13]
351 * Extract the lifetime from the Tunnel Encap attribute of a route in
354 int rfapiGetVncLifetime(struct attr
*attr
, uint32_t *lifetime
)
356 struct bgp_attr_encap_subtlv
*pEncap
;
358 *lifetime
= RFAPI_INFINITE_LIFETIME
; /* default to infinite */
362 for (pEncap
= attr
->vnc_subtlvs
; pEncap
;
363 pEncap
= pEncap
->next
) {
366 == BGP_VNC_SUBTLV_TYPE_LIFETIME
) { /* lifetime */
367 if (pEncap
->length
== 4) {
368 memcpy(lifetime
, pEncap
->value
, 4);
369 *lifetime
= ntohl(*lifetime
);
380 * Extract the tunnel type from the extended community
382 int rfapiGetTunnelType(struct attr
*attr
, bgp_encap_types
*type
)
384 *type
= BGP_ENCAP_TYPE_MPLS
; /* default to MPLS */
385 if (attr
&& attr
->ecommunity
) {
386 struct ecommunity
*ecom
= attr
->ecommunity
;
389 for (i
= 0; i
< (ecom
->size
* ECOMMUNITY_SIZE
);
390 i
+= ECOMMUNITY_SIZE
) {
394 if (ep
[0] == ECOMMUNITY_ENCODE_OPAQUE
395 && ep
[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
) {
396 *type
= (ep
[6] << 8) + ep
[7];
407 * Look for UN address in Encap attribute
409 int rfapiGetVncTunnelUnAddr(struct attr
*attr
, struct prefix
*p
)
411 struct bgp_attr_encap_subtlv
*pEncap
;
412 bgp_encap_types tun_type
;
414 rfapiGetTunnelType(attr
, &tun_type
);
415 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
418 /* MPLS carries UN address in next hop */
419 rfapiNexthop2Prefix(attr
, p
);
426 for (pEncap
= attr
->encap_subtlvs
; pEncap
;
427 pEncap
= pEncap
->next
) {
430 == BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
) { /* un
433 switch (pEncap
->length
) {
438 memcpy(p
->u
.val
, pEncap
->value
,
445 p
->family
= AF_INET6
;
447 memcpy(p
->u
.val
, pEncap
->value
,
460 * Get UN address wherever it might be
462 int rfapiGetUnAddrOfVpnBi(struct bgp_info
*bi
, struct prefix
*p
)
464 /* If it's in this route's VNC attribute, we're done */
465 if (!rfapiGetVncTunnelUnAddr(bi
->attr
, p
))
468 * Otherwise, see if it's cached from a corresponding ENCAP SAFI
472 switch (bi
->extra
->vnc
.import
.un_family
) {
475 p
->family
= bi
->extra
->vnc
.import
.un_family
;
476 p
->u
.prefix4
= bi
->extra
->vnc
.import
.un
.addr4
;
482 p
->family
= bi
->extra
->vnc
.import
.un_family
;
483 p
->u
.prefix6
= bi
->extra
->vnc
.import
.un
.addr6
;
490 #if DEBUG_ENCAP_MONITOR
491 vnc_zlog_debug_verbose(
492 "%s: bi->extra->vnc.import.un_family is 0, no UN addr",
504 * Make a new bgp_info from gathered parameters
506 static struct bgp_info
*rfapiBgpInfoCreate(struct attr
*attr
, struct peer
*peer
,
507 void *rfd
, struct prefix_rd
*prd
,
508 uint8_t type
, uint8_t sub_type
,
511 struct bgp_info
*new;
513 new = bgp_info_new();
518 new->attr
= bgp_attr_intern(attr
);
520 bgp_info_extra_get(new);
522 new->extra
->vnc
.import
.rd
= *prd
;
523 rfapi_time(&new->extra
->vnc
.import
.create_time
);
526 encode_label(*label
, &new->extra
->label
[0]);
528 new->sub_type
= sub_type
;
536 * Frees bgp_info as used in import tables (parts are not
537 * allocated exactly the way they are in the main RIBs)
539 static void rfapiBgpInfoFree(struct bgp_info
*goner
)
545 vnc_zlog_debug_verbose("%s: calling peer_unlock(%p), #%d",
546 __func__
, goner
->peer
,
548 peer_unlock(goner
->peer
);
552 bgp_attr_unintern(&goner
->attr
);
555 assert(!goner
->extra
->damp_info
); /* Not used in import tbls */
556 XFREE(MTYPE_BGP_ROUTE_EXTRA
, goner
->extra
);
559 XFREE(MTYPE_BGP_ROUTE
, goner
);
562 struct rfapi_import_table
*rfapiMacImportTableGetNoAlloc(struct bgp
*bgp
,
566 struct rfapi_import_table
*it
= NULL
;
567 uintptr_t lni_as_ptr
= lni
;
576 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
))
582 struct rfapi_import_table
*rfapiMacImportTableGet(struct bgp
*bgp
, uint32_t lni
)
585 struct rfapi_import_table
*it
= NULL
;
586 uintptr_t lni_as_ptr
= lni
;
591 if (!h
->import_mac
) {
592 /* default cmp is good enough for LNI */
593 h
->import_mac
= skiplist_new(0, NULL
, NULL
);
596 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
)) {
598 struct ecommunity
*enew
;
599 struct ecommunity_val eval
;
602 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
603 sizeof(struct rfapi_import_table
));
604 /* set RT list of new import table based on LNI */
605 memset((char *)&eval
, 0, sizeof(eval
));
606 eval
.val
[0] = 0; /* VNC L2VPN */
607 eval
.val
[1] = 2; /* VNC L2VPN */
608 eval
.val
[5] = (lni
>> 16) & 0xff;
609 eval
.val
[6] = (lni
>> 8) & 0xff;
610 eval
.val
[7] = (lni
>> 0) & 0xff;
612 enew
= ecommunity_new();
613 ecommunity_add_val(enew
, &eval
);
614 it
->rt_import_list
= enew
;
616 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
617 it
->imported_vpn
[afi
] = agg_table_init();
618 it
->imported_encap
[afi
] = agg_table_init();
621 it
->l2_logical_net_id
= lni
;
623 skiplist_insert(h
->import_mac
, (void *)lni_as_ptr
, it
);
631 * Implement MONITOR_MOVE_SHORTER(original_node) from
632 * RFAPI-Import-Event-Handling.txt
634 * Returns pointer to the list of moved monitors
636 static struct rfapi_monitor_vpn
*
637 rfapiMonitorMoveShorter(struct agg_node
*original_vpn_node
, int lockoffset
)
640 struct agg_node
*par
;
641 struct rfapi_monitor_vpn
*m
;
642 struct rfapi_monitor_vpn
*mlast
;
643 struct rfapi_monitor_vpn
*moved
;
645 int parent_already_refcounted
= 0;
647 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
, lockoffset
);
649 #if DEBUG_MONITOR_MOVE_SHORTER
651 char buf
[PREFIX_STRLEN
];
653 prefix2str(&original_vpn_node
->p
, buf
, sizeof(buf
));
654 vnc_zlog_debug_verbose("%s: called with node pfx=%s", __func__
,
660 * 1. If there is at least one bi (either regular route or
661 * route marked as withdrawn, with a pending timer) at
662 * original_node with a valid UN address, we're done. Return.
664 for (bi
= original_vpn_node
->info
; bi
; bi
= bi
->next
) {
667 if (!rfapiGetUnAddrOfVpnBi(bi
, &pfx
)) {
668 #if DEBUG_MONITOR_MOVE_SHORTER
669 vnc_zlog_debug_verbose(
670 "%s: have valid UN at original node, no change",
678 * 2. Travel up the tree (toward less-specific prefixes) from
679 * original_node to find the first node that has at least
680 * one route (even if it is only a withdrawn route) with a
681 * valid UN address. Call this node "Node P."
683 for (par
= agg_node_parent(original_vpn_node
); par
;
684 par
= agg_node_parent(par
)) {
685 for (bi
= par
->info
; bi
; bi
= bi
->next
) {
687 if (!rfapiGetUnAddrOfVpnBi(bi
, &pfx
)) {
696 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 0);
700 * If no less-specific routes, try to use the 0/0 node
703 /* this isn't necessarily 0/0 */
704 par
= agg_route_table_top(original_vpn_node
);
707 * If we got the top node but it wasn't 0/0,
710 if (par
&& par
->p
.prefixlen
) {
711 agg_unlock_node(par
); /* maybe free */
716 ++parent_already_refcounted
;
721 * Create 0/0 node if it isn't there
724 struct prefix pfx_default
;
726 memset(&pfx_default
, 0, sizeof(pfx_default
));
727 pfx_default
.family
= original_vpn_node
->p
.family
;
729 /* creates default node if none exists */
730 par
= agg_node_get(agg_get_table(original_vpn_node
),
732 ++parent_already_refcounted
;
736 * 3. Move each of the monitors found at original_node to Node P.
737 * These are "Moved Monitors."
742 * Attach at end so that the list pointer we return points
743 * only to the moved routes
745 for (m
= RFAPI_MONITOR_VPN(par
), mlast
= NULL
; m
;
746 mlast
= m
, m
= m
->next
)
750 moved
= mlast
->next
= RFAPI_MONITOR_VPN(original_vpn_node
);
752 moved
= RFAPI_MONITOR_VPN_W_ALLOC(par
) =
753 RFAPI_MONITOR_VPN(original_vpn_node
);
755 if (RFAPI_MONITOR_VPN(
756 original_vpn_node
)) /* check agg, so not allocated */
757 RFAPI_MONITOR_VPN_W_ALLOC(original_vpn_node
) = NULL
;
760 * update the node pointers on the monitors
762 for (m
= moved
; m
; m
= m
->next
) {
767 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
,
768 parent_already_refcounted
- movecount
);
769 while (movecount
> parent_already_refcounted
) {
771 ++parent_already_refcounted
;
773 while (movecount
< parent_already_refcounted
) {
774 /* unlikely, but code defensively */
775 agg_unlock_node(par
);
776 --parent_already_refcounted
;
778 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
,
779 movecount
+ lockoffset
);
780 while (movecount
--) {
781 agg_unlock_node(original_vpn_node
);
784 #if DEBUG_MONITOR_MOVE_SHORTER
786 char buf
[PREFIX_STRLEN
];
788 prefix2str(&par
->p
, buf
, sizeof(buf
));
789 vnc_zlog_debug_verbose("%s: moved to node pfx=%s", __func__
,
799 * Implement MONITOR_MOVE_LONGER(new_node) from
800 * RFAPI-Import-Event-Handling.txt
802 static void rfapiMonitorMoveLonger(struct agg_node
*new_vpn_node
)
804 struct rfapi_monitor_vpn
*monitor
;
805 struct rfapi_monitor_vpn
*mlast
;
807 struct agg_node
*par
;
809 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
812 * Make sure we have at least one valid route at the new node
814 for (bi
= new_vpn_node
->info
; bi
; bi
= bi
->next
) {
816 if (!rfapiGetUnAddrOfVpnBi(bi
, &pfx
))
821 vnc_zlog_debug_verbose(
822 "%s: no valid routes at node %p, so not attempting moves",
823 __func__
, new_vpn_node
);
828 * Find first parent node that has monitors
830 for (par
= agg_node_parent(new_vpn_node
); par
;
831 par
= agg_node_parent(par
)) {
832 if (RFAPI_MONITOR_VPN(par
))
837 vnc_zlog_debug_verbose(
838 "%s: no parent nodes with monitors, done", __func__
);
843 * Check each of these monitors to see of their longest-match
844 * is now the updated node. Move any such monitors to the more-
845 * specific updated node
847 for (mlast
= NULL
, monitor
= RFAPI_MONITOR_VPN(par
); monitor
;) {
850 * If new longest match for monitor prefix is the new
851 * route's prefix, move monitor to new route's prefix
853 if (prefix_match(&new_vpn_node
->p
, &monitor
->p
)) {
856 mlast
->next
= monitor
->next
;
858 RFAPI_MONITOR_VPN_W_ALLOC(par
) = monitor
->next
;
863 monitor
->next
= RFAPI_MONITOR_VPN(new_vpn_node
);
864 RFAPI_MONITOR_VPN_W_ALLOC(new_vpn_node
) = monitor
;
865 monitor
->node
= new_vpn_node
;
867 agg_lock_node(new_vpn_node
); /* incr refcount */
869 monitor
= mlast
? mlast
->next
: RFAPI_MONITOR_VPN(par
);
871 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 1);
872 /* decr refcount after we're done with par as this might
874 agg_unlock_node(par
);
879 monitor
= monitor
->next
;
882 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
886 static void rfapiBgpInfoChainFree(struct bgp_info
*bi
)
888 struct bgp_info
*next
;
893 * If there is a timer waiting to delete this bi, cancel
894 * the timer and delete immediately
896 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
897 && bi
->extra
->vnc
.import
.timer
) {
900 (struct thread
*)bi
->extra
->vnc
.import
.timer
;
901 struct rfapi_withdraw
*wcb
= t
->arg
;
903 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
909 rfapiBgpInfoFree(bi
);
914 static void rfapiImportTableFlush(struct rfapi_import_table
*it
)
921 ecommunity_free(&it
->rt_import_list
);
922 it
->rt_import_list
= NULL
;
924 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
928 for (rn
= agg_route_top(it
->imported_vpn
[afi
]); rn
;
929 rn
= agg_route_next(rn
)) {
931 * Each route_node has:
932 * aggregate: points to rfapi_it_extra with monitor
934 * info: points to chain of bgp_info
936 /* free bgp_info and its children */
937 rfapiBgpInfoChainFree(rn
->info
);
940 rfapiMonitorExtraFlush(SAFI_MPLS_VPN
, rn
);
943 for (rn
= agg_route_top(it
->imported_encap
[afi
]); rn
;
944 rn
= agg_route_next(rn
)) {
945 /* free bgp_info and its children */
946 rfapiBgpInfoChainFree(rn
->info
);
949 rfapiMonitorExtraFlush(SAFI_ENCAP
, rn
);
952 agg_table_finish(it
->imported_vpn
[afi
]);
953 agg_table_finish(it
->imported_encap
[afi
]);
955 if (it
->monitor_exterior_orphans
) {
956 skiplist_free(it
->monitor_exterior_orphans
);
960 void rfapiImportTableRefDelByIt(struct bgp
*bgp
,
961 struct rfapi_import_table
*it_target
)
964 struct rfapi_import_table
*it
;
965 struct rfapi_import_table
*prev
= NULL
;
972 for (it
= h
->imports
; it
; prev
= it
, it
= it
->next
) {
978 assert(it
->refcount
);
984 prev
->next
= it
->next
;
986 h
->imports
= it
->next
;
988 rfapiImportTableFlush(it
);
989 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
993 #if RFAPI_REQUIRE_ENCAP_BEEC
995 * Look for magic BGP Encapsulation Extended Community value
996 * Format in RFC 5512 Sect. 4.5
998 static int rfapiEcommunitiesMatchBeec(struct ecommunity
*ecom
,
999 bgp_encap_types type
)
1006 for (i
= 0; i
< (ecom
->size
* ECOMMUNITY_SIZE
); i
+= ECOMMUNITY_SIZE
) {
1012 if (ep
[0] == ECOMMUNITY_ENCODE_OPAQUE
1013 && ep
[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
1014 && ep
[6] == ((type
&& 0xff00) >> 8)
1015 && ep
[7] == (type
& 0xff)) {
1024 int rfapiEcommunitiesIntersect(struct ecommunity
*e1
, struct ecommunity
*e2
)
1033 s1
= ecommunity_ecom2str(e1
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
1034 s2
= ecommunity_ecom2str(e2
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
1035 vnc_zlog_debug_verbose("%s: e1[%s], e2[%s]", __func__
, s1
, s2
);
1036 XFREE(MTYPE_ECOMMUNITY_STR
, s1
);
1037 XFREE(MTYPE_ECOMMUNITY_STR
, s2
);
1040 for (i
= 0; i
< e1
->size
; ++i
) {
1041 for (j
= 0; j
< e2
->size
; ++j
) {
1042 if (!memcmp(e1
->val
+ (i
* ECOMMUNITY_SIZE
),
1043 e2
->val
+ (j
* ECOMMUNITY_SIZE
),
1053 int rfapiEcommunityGetLNI(struct ecommunity
*ecom
, uint32_t *lni
)
1057 for (i
= 0; i
< ecom
->size
; ++i
) {
1058 uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1060 if ((*(p
+ 0) == 0x00) && (*(p
+ 1) == 0x02)) {
1062 *lni
= (*(p
+ 5) << 16) | (*(p
+ 6) << 8)
1071 int rfapiEcommunityGetEthernetTag(struct ecommunity
*ecom
, uint16_t *tag_id
)
1073 struct bgp
*bgp
= bgp_get_default();
1074 *tag_id
= 0; /* default to untagged */
1077 for (i
= 0; i
< ecom
->size
; ++i
) {
1080 uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1082 /* High-order octet of type. */
1085 if (*p
++ == ECOMMUNITY_ROUTE_TARGET
) {
1086 if (encode
== ECOMMUNITY_ENCODE_AS4
) {
1087 p
= ptr_get_be32(p
, &as
);
1088 } else if (encode
== ECOMMUNITY_ENCODE_AS
) {
1091 p
+= 2; /* skip next two, tag/vid
1092 always in lowest bytes */
1094 if (as
== bgp
->as
) {
1095 *tag_id
= *p
++ << 8;
1105 static int rfapiVpnBiNhEqualsPt(struct bgp_info
*bi
, struct rfapi_ip_addr
*hpt
)
1112 family
= BGP_MP_NEXTHOP_FAMILY(bi
->attr
->mp_nexthop_len
);
1114 if (hpt
->addr_family
!= family
)
1119 if (bi
->attr
->mp_nexthop_global_in
.s_addr
1120 != hpt
->addr
.v4
.s_addr
)
1125 if (IPV6_ADDR_CMP(&bi
->attr
->mp_nexthop_global
, &hpt
->addr
.v6
))
1139 * Compare 2 VPN BIs. Return true if they have the same VN and UN addresses
1141 static int rfapiVpnBiSamePtUn(struct bgp_info
*bi1
, struct bgp_info
*bi2
)
1143 struct prefix pfx_un1
;
1144 struct prefix pfx_un2
;
1149 if (!bi1
->attr
|| !bi2
->attr
)
1153 * VN address comparisons
1156 if (BGP_MP_NEXTHOP_FAMILY(bi1
->attr
->mp_nexthop_len
)
1157 != BGP_MP_NEXTHOP_FAMILY(bi2
->attr
->mp_nexthop_len
)) {
1161 switch (BGP_MP_NEXTHOP_FAMILY(bi1
->attr
->mp_nexthop_len
)) {
1163 if (bi1
->attr
->mp_nexthop_global_in
.s_addr
1164 != bi2
->attr
->mp_nexthop_global_in
.s_addr
)
1169 if (IPV6_ADDR_CMP(&bi1
->attr
->mp_nexthop_global
,
1170 &bi2
->attr
->mp_nexthop_global
))
1180 * UN address comparisons
1182 if (rfapiGetVncTunnelUnAddr(bi1
->attr
, &pfx_un1
)) {
1184 pfx_un1
.family
= bi1
->extra
->vnc
.import
.un_family
;
1185 switch (bi1
->extra
->vnc
.import
.un_family
) {
1188 bi1
->extra
->vnc
.import
.un
.addr4
;
1192 bi1
->extra
->vnc
.import
.un
.addr6
;
1201 if (rfapiGetVncTunnelUnAddr(bi2
->attr
, &pfx_un2
)) {
1203 pfx_un2
.family
= bi2
->extra
->vnc
.import
.un_family
;
1204 switch (bi2
->extra
->vnc
.import
.un_family
) {
1207 bi2
->extra
->vnc
.import
.un
.addr4
;
1211 bi2
->extra
->vnc
.import
.un
.addr6
;
1220 if (!pfx_un1
.family
|| !pfx_un2
.family
)
1223 if (pfx_un1
.family
!= pfx_un2
.family
)
1226 switch (pfx_un1
.family
) {
1228 if (!IPV4_ADDR_SAME(&pfx_un1
.u
.prefix4
, &pfx_un2
.u
.prefix4
))
1232 if (!IPV6_ADDR_SAME(&pfx_un1
.u
.prefix6
, &pfx_un2
.u
.prefix6
))
1241 uint8_t rfapiRfpCost(struct attr
*attr
)
1243 if (attr
->flag
& ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF
)) {
1244 if (attr
->local_pref
> 255) {
1247 return 255 - attr
->local_pref
;
1253 /*------------------------------------------
1256 * Find Layer 2 options in an option chain
1262 * l2o layer 2 options extracted
1266 * 1 no options found
1268 --------------------------------------------*/
1269 int rfapi_extract_l2o(
1270 struct bgp_tea_options
*pHop
, /* chain of options */
1271 struct rfapi_l2address_option
*l2o
) /* return extracted value */
1273 struct bgp_tea_options
*p
;
1275 for (p
= pHop
; p
; p
= p
->next
) {
1276 if ((p
->type
== RFAPI_VN_OPTION_TYPE_L2ADDR
)
1277 && (p
->length
>= 8)) {
1281 memcpy(&l2o
->macaddr
, v
, 6);
1283 l2o
->label
= ((v
[6] << 12) & 0xff000)
1284 + ((v
[7] << 4) & 0xff0)
1285 + ((v
[8] >> 4) & 0xf);
1287 l2o
->local_nve_id
= (uint8_t)v
[10];
1289 l2o
->logical_net_id
=
1290 (v
[11] << 16) + (v
[12] << 8) + (v
[13] << 0);
1298 static struct rfapi_next_hop_entry
*
1299 rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix
*rprefix
,
1300 struct bgp_info
*bi
, /* route to encode */
1301 uint32_t lifetime
, /* use this in nhe */
1302 struct agg_node
*rn
) /* req for L2 eth addr */
1304 struct rfapi_next_hop_entry
*new;
1305 int have_vnc_tunnel_un
= 0;
1307 #if DEBUG_ENCAP_MONITOR
1308 vnc_zlog_debug_verbose("%s: entry, bi %p, rn %p", __func__
, bi
, rn
);
1311 new = XCALLOC(MTYPE_RFAPI_NEXTHOP
, sizeof(struct rfapi_next_hop_entry
));
1314 new->prefix
= *rprefix
;
1317 && decode_rd_type(bi
->extra
->vnc
.import
.rd
.val
)
1318 == RD_TYPE_VNC_ETH
) {
1321 struct rfapi_vn_option
*vo
;
1323 vo
= XCALLOC(MTYPE_RFAPI_VN_OPTION
,
1324 sizeof(struct rfapi_vn_option
));
1327 vo
->type
= RFAPI_VN_OPTION_TYPE_L2ADDR
;
1329 memcpy(&vo
->v
.l2addr
.macaddr
, &rn
->p
.u
.prefix_eth
.octet
,
1331 /* only low 3 bytes of this are significant */
1333 (void)rfapiEcommunityGetLNI(
1334 bi
->attr
->ecommunity
,
1335 &vo
->v
.l2addr
.logical_net_id
);
1336 (void)rfapiEcommunityGetEthernetTag(
1337 bi
->attr
->ecommunity
, &vo
->v
.l2addr
.tag_id
);
1340 /* local_nve_id comes from lower byte of RD type */
1341 vo
->v
.l2addr
.local_nve_id
= bi
->extra
->vnc
.import
.rd
.val
[1];
1343 /* label comes from MP_REACH_NLRI label */
1344 vo
->v
.l2addr
.label
= decode_label(&bi
->extra
->label
[0]);
1346 new->vn_options
= vo
;
1349 * If there is an auxiliary prefix (i.e., host IP address),
1350 * use it as the nexthop prefix instead of the query prefix
1352 if (bi
->extra
->vnc
.import
.aux_prefix
.family
) {
1353 rfapiQprefix2Rprefix(&bi
->extra
->vnc
.import
.aux_prefix
,
1359 bgp_encap_types tun_type
;
1360 new->prefix
.cost
= rfapiRfpCost(bi
->attr
);
1362 struct bgp_attr_encap_subtlv
*pEncap
;
1364 switch (BGP_MP_NEXTHOP_FAMILY(bi
->attr
->mp_nexthop_len
)) {
1366 new->vn_address
.addr_family
= AF_INET
;
1367 new->vn_address
.addr
.v4
=
1368 bi
->attr
->mp_nexthop_global_in
;
1372 new->vn_address
.addr_family
= AF_INET6
;
1373 new->vn_address
.addr
.v6
= bi
->attr
->mp_nexthop_global
;
1377 zlog_warn("%s: invalid vpn nexthop length: %d",
1378 __func__
, bi
->attr
->mp_nexthop_len
);
1379 rfapi_free_next_hop_list(new);
1383 for (pEncap
= bi
->attr
->vnc_subtlvs
; pEncap
;
1384 pEncap
= pEncap
->next
) {
1385 switch (pEncap
->type
) {
1386 case BGP_VNC_SUBTLV_TYPE_LIFETIME
:
1387 /* use configured lifetime, not attr lifetime */
1391 zlog_warn("%s: unknown VNC option type %d",
1392 __func__
, pEncap
->type
);
1399 rfapiGetTunnelType(bi
->attr
, &tun_type
);
1400 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
1402 /* MPLS carries UN address in next hop */
1403 rfapiNexthop2Prefix(bi
->attr
, &p
);
1404 if (p
.family
!= 0) {
1405 rfapiQprefix2Raddr(&p
, &new->un_address
);
1406 have_vnc_tunnel_un
= 1;
1410 for (pEncap
= bi
->attr
->encap_subtlvs
; pEncap
;
1411 pEncap
= pEncap
->next
) {
1412 switch (pEncap
->type
) {
1413 case BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
:
1415 * Overrides ENCAP UN address, if any
1417 switch (pEncap
->length
) {
1420 new->un_address
.addr_family
= AF_INET
;
1421 memcpy(&new->un_address
.addr
.v4
,
1423 have_vnc_tunnel_un
= 1;
1427 new->un_address
.addr_family
= AF_INET6
;
1428 memcpy(&new->un_address
.addr
.v6
,
1430 have_vnc_tunnel_un
= 1;
1435 "%s: invalid tunnel subtlv UN addr length (%d) for bi %p",
1436 __func__
, pEncap
->length
, bi
);
1442 "%s: unknown Encap Attribute option type %d",
1443 __func__
, pEncap
->type
);
1450 new->un_options
= rfapi_encap_tlv_to_un_option(bi
->attr
);
1452 #if DEBUG_ENCAP_MONITOR
1453 vnc_zlog_debug_verbose("%s: line %d: have_vnc_tunnel_un=%d",
1454 __func__
, __LINE__
, have_vnc_tunnel_un
);
1457 if (!have_vnc_tunnel_un
&& bi
->extra
) {
1459 * use cached UN address from ENCAP route
1461 new->un_address
.addr_family
=
1462 bi
->extra
->vnc
.import
.un_family
;
1463 switch (new->un_address
.addr_family
) {
1465 new->un_address
.addr
.v4
=
1466 bi
->extra
->vnc
.import
.un
.addr4
;
1469 new->un_address
.addr
.v6
=
1470 bi
->extra
->vnc
.import
.un
.addr6
;
1474 "%s: invalid UN addr family (%d) for bi %p",
1475 __func__
, new->un_address
.addr_family
,
1477 rfapi_free_next_hop_list(new);
1484 new->lifetime
= lifetime
;
1488 int rfapiHasNonRemovedRoutes(struct agg_node
*rn
)
1490 struct bgp_info
*bi
;
1492 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
1495 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
1496 && (bi
->extra
&& !rfapiGetUnAddrOfVpnBi(bi
, &pfx
))) {
1508 void rfapiDumpNode(struct agg_node
*rn
)
1510 struct bgp_info
*bi
;
1512 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
1513 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
1515 int ctrc
= rfapiGetUnAddrOfVpnBi(bi
, &pfx
);
1518 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
1519 && (bi
->extra
&& !ctrc
)) {
1526 vnc_zlog_debug_verbose(
1527 " bi=%p, nr=%d, flags=0x%x, extra=%p, ctrc=%d", bi
, nr
,
1528 bi
->flags
, bi
->extra
, ctrc
);
1533 static int rfapiNhlAddNodeRoutes(
1534 struct agg_node
*rn
, /* in */
1535 struct rfapi_ip_prefix
*rprefix
, /* in */
1536 uint32_t lifetime
, /* in */
1537 int removed
, /* in */
1538 struct rfapi_next_hop_entry
**head
, /* in/out */
1539 struct rfapi_next_hop_entry
**tail
, /* in/out */
1540 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1541 struct agg_node
*rfd_rib_node
, /* preload this NVE rib node */
1542 struct prefix
*pfx_target_original
) /* query target */
1544 struct bgp_info
*bi
;
1545 struct rfapi_next_hop_entry
*new;
1546 struct prefix pfx_un
;
1547 struct skiplist
*seen_nexthops
;
1549 int is_l2
= (rn
->p
.family
== AF_ETHERNET
);
1552 struct agg_table
*atable
= agg_get_table(rfd_rib_node
);
1553 struct rfapi_descriptor
*rfd
;
1556 rfd
= agg_get_table_info(atable
);
1558 if (rfapiRibFTDFilterRecentPrefix(rfd
, rn
,
1559 pfx_target_original
))
1565 skiplist_new(0, vnc_prefix_cmp
, (void (*)(void *))prefix_free
);
1567 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
1569 struct prefix pfx_vn
;
1570 struct prefix
*newpfx
;
1572 if (removed
&& !CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
1573 #if DEBUG_RETURNED_NHL
1574 vnc_zlog_debug_verbose(
1575 "%s: want holddown, this route not holddown, skip",
1580 if (!removed
&& CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
1589 * Check for excluded VN address
1591 if (rfapiVpnBiNhEqualsPt(bi
, exclude_vnaddr
))
1595 * Check for VN address (nexthop) copied already
1598 /* L2 routes: semantic nexthop in aux_prefix; VN addr
1600 pfx_vn
= bi
->extra
->vnc
.import
.aux_prefix
;
1602 rfapiNexthop2Prefix(bi
->attr
, &pfx_vn
);
1604 if (!skiplist_search(seen_nexthops
, &pfx_vn
, NULL
)) {
1605 #if DEBUG_RETURNED_NHL
1606 char buf
[PREFIX_STRLEN
];
1608 prefix2str(&pfx_vn
, buf
, sizeof(buf
));
1609 vnc_zlog_debug_verbose(
1610 "%s: already put VN/nexthop %s, skip", __func__
,
1616 if (rfapiGetUnAddrOfVpnBi(bi
, &pfx_un
)) {
1617 #if DEBUG_ENCAP_MONITOR
1618 vnc_zlog_debug_verbose(
1619 "%s: failed to get UN address of this VPN bi",
1625 newpfx
= prefix_new();
1627 skiplist_insert(seen_nexthops
, newpfx
, newpfx
);
1629 new = rfapiRouteInfo2NextHopEntry(rprefix
, bi
, lifetime
, rn
);
1631 if (rfapiRibPreloadBi(rfd_rib_node
, &pfx_vn
, &pfx_un
,
1633 /* duplicate filtered by RIB */
1634 rfapi_free_next_hop_list(new);
1641 (*tail
)->next
= new;
1650 skiplist_free(seen_nexthops
);
1659 * omit_node is meant for the situation where we are adding a subtree
1660 * of a parent of some original requested node. The response already
1661 * contains the original requested node, and we don't want to duplicate
1662 * its routes in the list, so we skip it if the right or left node
1663 * matches (of course, we still travel down its child subtrees).
1665 static int rfapiNhlAddSubtree(
1666 struct agg_node
*rn
, /* in */
1667 uint32_t lifetime
, /* in */
1668 struct rfapi_next_hop_entry
**head
, /* in/out */
1669 struct rfapi_next_hop_entry
**tail
, /* in/out */
1670 struct agg_node
*omit_node
, /* in */
1671 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1672 struct agg_table
*rfd_rib_table
, /* preload here */
1673 struct prefix
*pfx_target_original
) /* query target */
1675 struct rfapi_ip_prefix rprefix
;
1678 /* FIXME: need to find a better way here to work without sticking our
1679 * hands in node->link */
1680 if (agg_node_left(rn
) && agg_node_left(rn
) != omit_node
) {
1681 if (agg_node_left(rn
)->info
) {
1683 struct agg_node
*rib_rn
= NULL
;
1685 rfapiQprefix2Rprefix(&agg_node_left(rn
)->p
, &rprefix
);
1686 if (rfd_rib_table
) {
1687 rib_rn
= agg_node_get(rfd_rib_table
,
1688 &agg_node_left(rn
)->p
);
1691 count
= rfapiNhlAddNodeRoutes(
1692 agg_node_left(rn
), &rprefix
, lifetime
, 0, head
,
1693 tail
, exclude_vnaddr
, rib_rn
,
1694 pfx_target_original
);
1696 count
= rfapiNhlAddNodeRoutes(
1697 agg_node_left(rn
), &rprefix
, lifetime
,
1698 1, head
, tail
, exclude_vnaddr
, rib_rn
,
1699 pfx_target_original
);
1703 agg_unlock_node(rib_rn
);
1707 if (agg_node_right(rn
) && agg_node_right(rn
) != omit_node
) {
1708 if (agg_node_right(rn
)->info
) {
1710 struct agg_node
*rib_rn
= NULL
;
1712 rfapiQprefix2Rprefix(&agg_node_right(rn
)->p
, &rprefix
);
1713 if (rfd_rib_table
) {
1714 rib_rn
= agg_node_get(rfd_rib_table
,
1715 &agg_node_right(rn
)->p
);
1717 count
= rfapiNhlAddNodeRoutes(
1718 agg_node_right(rn
), &rprefix
, lifetime
, 0, head
,
1719 tail
, exclude_vnaddr
, rib_rn
,
1720 pfx_target_original
);
1722 count
= rfapiNhlAddNodeRoutes(
1723 agg_node_right(rn
), &rprefix
, lifetime
,
1724 1, head
, tail
, exclude_vnaddr
, rib_rn
,
1725 pfx_target_original
);
1729 agg_unlock_node(rib_rn
);
1733 if (agg_node_left(rn
)) {
1734 rcount
+= rfapiNhlAddSubtree(
1735 agg_node_left(rn
), lifetime
, head
, tail
, omit_node
,
1736 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1738 if (agg_node_right(rn
)) {
1739 rcount
+= rfapiNhlAddSubtree(
1740 agg_node_right(rn
), lifetime
, head
, tail
, omit_node
,
1741 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1748 * Implementation of ROUTE_LIST(node) from RFAPI-Import-Event-Handling.txt
1750 * Construct an rfapi nexthop list based on the routes attached to
1751 * the specified node.
1753 * If there are any routes that do NOT have BGP_INFO_REMOVED set,
1754 * return those only. If there are ONLY routes with BGP_INFO_REMOVED,
1755 * then return those, and also include all the non-removed routes from the
1756 * next less-specific node (i.e., this node's parent) at the end.
1758 struct rfapi_next_hop_entry
*rfapiRouteNode2NextHopList(
1759 struct agg_node
*rn
, uint32_t lifetime
, /* put into nexthop entries */
1760 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1761 struct agg_table
*rfd_rib_table
, /* preload here */
1762 struct prefix
*pfx_target_original
) /* query target */
1764 struct rfapi_ip_prefix rprefix
;
1765 struct rfapi_next_hop_entry
*answer
= NULL
;
1766 struct rfapi_next_hop_entry
*last
= NULL
;
1767 struct agg_node
*parent
;
1769 struct agg_node
*rib_rn
;
1771 #if DEBUG_RETURNED_NHL
1773 char buf
[PREFIX_STRLEN
];
1775 prefix2str(&rn
->p
, buf
, sizeof(buf
));
1776 vnc_zlog_debug_verbose("%s: called with node pfx=%s", __func__
,
1779 rfapiDebugBacktrace();
1782 rfapiQprefix2Rprefix(&rn
->p
, &rprefix
);
1784 rib_rn
= rfd_rib_table
? agg_node_get(rfd_rib_table
, &rn
->p
) : NULL
;
1787 * Add non-withdrawn routes at this node
1789 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 0, &answer
, &last
,
1790 exclude_vnaddr
, rib_rn
,
1791 pfx_target_original
);
1794 * If the list has at least one entry, it's finished
1797 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
, &last
, NULL
,
1798 exclude_vnaddr
, rfd_rib_table
,
1799 pfx_target_original
);
1800 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
,
1802 #if DEBUG_RETURNED_NHL
1803 rfapiPrintNhl(NULL
, answer
);
1806 agg_unlock_node(rib_rn
);
1811 * Add withdrawn routes at this node
1813 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 1, &answer
, &last
,
1814 exclude_vnaddr
, rib_rn
,
1815 pfx_target_original
);
1817 agg_unlock_node(rib_rn
);
1819 // rfapiPrintNhl(NULL, answer);
1822 * walk up the tree until we find a node with non-deleted
1823 * routes, then add them
1825 for (parent
= agg_node_parent(rn
); parent
;
1826 parent
= agg_node_parent(parent
)) {
1827 if (rfapiHasNonRemovedRoutes(parent
)) {
1833 * Add non-withdrawn routes from less-specific prefix
1836 rib_rn
= rfd_rib_table
? agg_node_get(rfd_rib_table
, &parent
->p
)
1838 rfapiQprefix2Rprefix(&parent
->p
, &rprefix
);
1839 count
+= rfapiNhlAddNodeRoutes(parent
, &rprefix
, lifetime
, 0,
1840 &answer
, &last
, exclude_vnaddr
,
1841 rib_rn
, pfx_target_original
);
1842 count
+= rfapiNhlAddSubtree(parent
, lifetime
, &answer
, &last
,
1843 rn
, exclude_vnaddr
, rfd_rib_table
,
1844 pfx_target_original
);
1846 agg_unlock_node(rib_rn
);
1849 * There is no parent with non-removed routes. Still need to
1850 * add subtree of original node if it contributed routes to the
1854 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
,
1855 &last
, rn
, exclude_vnaddr
,
1857 pfx_target_original
);
1860 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
, count
,
1862 #if DEBUG_RETURNED_NHL
1863 rfapiPrintNhl(NULL
, answer
);
1869 * Construct nexthop list of all routes in table
1871 struct rfapi_next_hop_entry
*rfapiRouteTable2NextHopList(
1872 struct agg_table
*rt
, uint32_t lifetime
, /* put into nexthop entries */
1873 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1874 struct agg_table
*rfd_rib_table
, /* preload this NVE rib table */
1875 struct prefix
*pfx_target_original
) /* query target */
1877 struct agg_node
*rn
;
1878 struct rfapi_next_hop_entry
*biglist
= NULL
;
1879 struct rfapi_next_hop_entry
*nhl
;
1880 struct rfapi_next_hop_entry
*tail
= NULL
;
1883 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
1885 nhl
= rfapiRouteNode2NextHopList(rn
, lifetime
, exclude_vnaddr
,
1887 pfx_target_original
);
1889 tail
= biglist
= nhl
;
1896 while (tail
->next
) {
1903 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1907 struct rfapi_next_hop_entry
*rfapiEthRouteNode2NextHopList(
1908 struct agg_node
*rn
, struct rfapi_ip_prefix
*rprefix
,
1909 uint32_t lifetime
, /* put into nexthop entries */
1910 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1911 struct agg_table
*rfd_rib_table
, /* preload NVE rib table */
1912 struct prefix
*pfx_target_original
) /* query target */
1915 struct rfapi_next_hop_entry
*answer
= NULL
;
1916 struct rfapi_next_hop_entry
*last
= NULL
;
1917 struct agg_node
*rib_rn
;
1919 rib_rn
= rfd_rib_table
? agg_node_get(rfd_rib_table
, &rn
->p
) : NULL
;
1921 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 0, &answer
, &last
,
1922 NULL
, rib_rn
, pfx_target_original
);
1924 #if DEBUG_ENCAP_MONITOR
1925 vnc_zlog_debug_verbose("%s: node %p: %d non-holddown routes", __func__
,
1930 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 1, &answer
,
1931 &last
, exclude_vnaddr
, rib_rn
,
1932 pfx_target_original
);
1933 vnc_zlog_debug_verbose("%s: node %p: %d holddown routes",
1934 __func__
, rn
, count
);
1938 agg_unlock_node(rib_rn
);
1940 #if DEBUG_RETURNED_NHL
1941 rfapiPrintNhl(NULL
, answer
);
1949 * Construct nexthop list of all routes in table
1951 struct rfapi_next_hop_entry
*rfapiEthRouteTable2NextHopList(
1952 uint32_t logical_net_id
, struct rfapi_ip_prefix
*rprefix
,
1953 uint32_t lifetime
, /* put into nexthop entries */
1954 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1955 struct agg_table
*rfd_rib_table
, /* preload NVE rib node */
1956 struct prefix
*pfx_target_original
) /* query target */
1958 struct rfapi_import_table
*it
;
1959 struct bgp
*bgp
= bgp_get_default();
1960 struct agg_table
*rt
;
1961 struct agg_node
*rn
;
1962 struct rfapi_next_hop_entry
*biglist
= NULL
;
1963 struct rfapi_next_hop_entry
*nhl
;
1964 struct rfapi_next_hop_entry
*tail
= NULL
;
1968 it
= rfapiMacImportTableGet(bgp
, logical_net_id
);
1969 rt
= it
->imported_vpn
[AFI_L2VPN
];
1971 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
1973 nhl
= rfapiEthRouteNode2NextHopList(
1974 rn
, rprefix
, lifetime
, exclude_vnaddr
, rfd_rib_table
,
1975 pfx_target_original
);
1977 tail
= biglist
= nhl
;
1984 while (tail
->next
) {
1991 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1996 * Insert a new bi to the imported route table node,
1997 * keeping the list of BIs sorted best route first
1999 static void rfapiBgpInfoAttachSorted(struct agg_node
*rn
,
2000 struct bgp_info
*info_new
, afi_t afi
,
2004 struct bgp_info
*prev
;
2005 struct bgp_info
*next
;
2006 char pfx_buf
[PREFIX2STR_BUFFER
];
2009 bgp
= bgp_get_default(); /* assume 1 instance for now */
2011 if (VNC_DEBUG(IMPORT_BI_ATTACH
)) {
2012 vnc_zlog_debug_verbose("%s: info_new->peer=%p", __func__
,
2014 vnc_zlog_debug_verbose("%s: info_new->peer->su_remote=%p",
2015 __func__
, info_new
->peer
->su_remote
);
2018 for (prev
= NULL
, next
= rn
->info
; next
;
2019 prev
= next
, next
= next
->next
) {
2020 if (!bgp
|| (!CHECK_FLAG(info_new
->flags
, BGP_INFO_REMOVED
)
2021 && CHECK_FLAG(next
->flags
, BGP_INFO_REMOVED
))
2022 || bgp_info_cmp_compatible(bgp
, info_new
, next
, pfx_buf
,
2024 == -1) { /* -1 if 1st is better */
2028 vnc_zlog_debug_verbose("%s: prev=%p, next=%p", __func__
, prev
, next
);
2030 prev
->next
= info_new
;
2032 rn
->info
= info_new
;
2034 info_new
->prev
= prev
;
2035 info_new
->next
= next
;
2037 next
->prev
= info_new
;
2038 bgp_attr_intern(info_new
->attr
);
2041 static void rfapiBgpInfoDetach(struct agg_node
*rn
, struct bgp_info
*bi
)
2044 * Remove the route (doubly-linked)
2046 // bgp_attr_unintern (&bi->attr);
2048 bi
->next
->prev
= bi
->prev
;
2050 bi
->prev
->next
= bi
->next
;
2052 rn
->info
= bi
->next
;
2056 * For L3-indexed import tables
2058 static int rfapi_bi_peer_rd_cmp(void *b1
, void *b2
)
2060 struct bgp_info
*bi1
= b1
;
2061 struct bgp_info
*bi2
= b2
;
2066 if (bi1
->peer
< bi2
->peer
)
2068 if (bi1
->peer
> bi2
->peer
)
2074 return vnc_prefix_cmp((struct prefix
*)&bi1
->extra
->vnc
.import
.rd
,
2075 (struct prefix
*)&bi2
->extra
->vnc
.import
.rd
);
2079 * For L2-indexed import tables
2080 * The BIs in these tables should ALWAYS have an aux_prefix set because
2081 * they arrive via IPv4 or IPv6 advertisements.
2083 static int rfapi_bi_peer_rd_aux_cmp(void *b1
, void *b2
)
2085 struct bgp_info
*bi1
= b1
;
2086 struct bgp_info
*bi2
= b2
;
2092 if (bi1
->peer
< bi2
->peer
)
2094 if (bi1
->peer
> bi2
->peer
)
2100 rc
= vnc_prefix_cmp((struct prefix
*)&bi1
->extra
->vnc
.import
.rd
,
2101 (struct prefix
*)&bi2
->extra
->vnc
.import
.rd
);
2107 * L2 import tables can have multiple entries with the
2108 * same MAC address, same RD, but different L3 addresses.
2110 * Use presence of aux_prefix with AF=ethernet and prefixlen=1
2111 * as magic value to signify explicit wildcarding of the aux_prefix.
2112 * This magic value will not appear in bona fide bi entries in
2113 * the import table, but is allowed in the "fake" bi used to
2114 * probe the table when searching. (We have to test both b1 and b2
2115 * because there is no guarantee of the order the test key and
2116 * the real key will be passed)
2118 if ((bi1
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2119 && (bi1
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))
2120 || (bi2
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2121 && (bi2
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))) {
2124 * wildcard aux address specified
2129 return vnc_prefix_cmp(&bi1
->extra
->vnc
.import
.aux_prefix
,
2130 &bi2
->extra
->vnc
.import
.aux_prefix
);
2135 * Index on RD and Peer
2137 static void rfapiItBiIndexAdd(struct agg_node
*rn
, /* Import table VPN node */
2138 struct bgp_info
*bi
) /* new BI */
2140 struct skiplist
*sl
;
2147 char buf
[RD_ADDRSTRLEN
];
2149 vnc_zlog_debug_verbose("%s: bi %p, peer %p, rd %s", __func__
,
2151 prefix_rd2str(&bi
->extra
->vnc
.import
.rd
,
2155 sl
= RFAPI_RDINDEX_W_ALLOC(rn
);
2157 if (AF_ETHERNET
== rn
->p
.family
) {
2158 sl
= skiplist_new(0, rfapi_bi_peer_rd_aux_cmp
, NULL
);
2160 sl
= skiplist_new(0, rfapi_bi_peer_rd_cmp
, NULL
);
2162 RFAPI_IT_EXTRA_GET(rn
)->u
.vpn
.idx_rd
= sl
;
2163 agg_lock_node(rn
); /* for skiplist */
2165 assert(!skiplist_insert(sl
, (void *)bi
, (void *)bi
));
2166 agg_lock_node(rn
); /* for skiplist entry */
2168 /* NB: BIs in import tables are not refcounted */
2171 static void rfapiItBiIndexDump(struct agg_node
*rn
)
2173 struct skiplist
*sl
;
2174 void *cursor
= NULL
;
2179 sl
= RFAPI_RDINDEX(rn
);
2183 for (rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
); !rc
;
2184 rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
)) {
2186 char buf
[RD_ADDRSTRLEN
];
2187 char buf_aux_pfx
[PREFIX_STRLEN
];
2189 prefix_rd2str(&k
->extra
->vnc
.import
.rd
, buf
, sizeof(buf
));
2190 if (k
->extra
->vnc
.import
.aux_prefix
.family
) {
2191 prefix2str(&k
->extra
->vnc
.import
.aux_prefix
,
2192 buf_aux_pfx
, sizeof(buf_aux_pfx
));
2194 strncpy(buf_aux_pfx
, "(none)", PREFIX_STRLEN
);
2196 vnc_zlog_debug_verbose("bi %p, peer %p, rd %s, aux_prefix %s",
2197 k
, k
->peer
, buf
, buf_aux_pfx
);
2201 static struct bgp_info
*rfapiItBiIndexSearch(
2202 struct agg_node
*rn
, /* Import table VPN node */
2203 struct prefix_rd
*prd
, struct peer
*peer
,
2204 struct prefix
*aux_prefix
) /* optional L3 addr for L2 ITs */
2206 struct skiplist
*sl
;
2208 struct bgp_info bi_fake
;
2209 struct bgp_info_extra bi_extra
;
2210 struct bgp_info
*bi_result
;
2212 sl
= RFAPI_RDINDEX(rn
);
2218 char buf
[RD_ADDRSTRLEN
];
2219 char buf_aux_pfx
[PREFIX_STRLEN
];
2222 prefix2str(aux_prefix
, buf_aux_pfx
,
2223 sizeof(buf_aux_pfx
));
2225 strncpy(buf_aux_pfx
, "(nil)", sizeof(buf_aux_pfx
));
2227 vnc_zlog_debug_verbose("%s want prd=%s, peer=%p, aux_prefix=%s",
2229 prefix_rd2str(prd
, buf
, sizeof(buf
)),
2231 rfapiItBiIndexDump(rn
);
2235 /* threshold is a WAG */
2236 if (sl
->count
< 3) {
2238 vnc_zlog_debug_verbose("%s: short list algorithm", __func__
);
2240 /* if short list, linear search might be faster */
2241 for (bi_result
= rn
->info
; bi_result
;
2242 bi_result
= bi_result
->next
) {
2245 char buf
[RD_ADDRSTRLEN
];
2247 vnc_zlog_debug_verbose(
2248 "%s: bi has prd=%s, peer=%p", __func__
,
2249 prefix_rd2str(&bi_result
->extra
->vnc
2255 if (peer
== bi_result
->peer
2256 && !prefix_cmp((struct prefix
*)&bi_result
->extra
2258 (struct prefix
*)prd
)) {
2261 vnc_zlog_debug_verbose(
2262 "%s: peer and RD same, doing aux_prefix check",
2266 || !prefix_cmp(aux_prefix
,
2267 &bi_result
->extra
->vnc
.import
2271 vnc_zlog_debug_verbose("%s: match",
2281 bi_fake
.peer
= peer
;
2282 bi_fake
.extra
= &bi_extra
;
2283 bi_fake
.extra
->vnc
.import
.rd
= *(struct prefix_rd
*)prd
;
2285 bi_fake
.extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
2288 bi_fake
.extra
->vnc
.import
.aux_prefix
.family
= AF_ETHERNET
;
2289 bi_fake
.extra
->vnc
.import
.aux_prefix
.prefixlen
= 1;
2292 rc
= skiplist_search(sl
, (void *)&bi_fake
, (void *)&bi_result
);
2296 vnc_zlog_debug_verbose("%s: no match", __func__
);
2302 vnc_zlog_debug_verbose("%s: matched bi=%p", __func__
, bi_result
);
2308 static void rfapiItBiIndexDel(struct agg_node
*rn
, /* Import table VPN node */
2309 struct bgp_info
*bi
) /* old BI */
2311 struct skiplist
*sl
;
2315 char buf
[RD_ADDRSTRLEN
];
2317 vnc_zlog_debug_verbose("%s: bi %p, peer %p, rd %s", __func__
,
2319 prefix_rd2str(&bi
->extra
->vnc
.import
.rd
,
2323 sl
= RFAPI_RDINDEX(rn
);
2326 rc
= skiplist_delete(sl
, (void *)(bi
), (void *)bi
);
2328 rfapiItBiIndexDump(rn
);
2332 agg_unlock_node(rn
); /* for skiplist entry */
2334 /* NB: BIs in import tables are not refcounted */
2338 * Add a backreference at the ENCAP node to the VPN route that
2341 static void rfapiMonitorEncapAdd(struct rfapi_import_table
*import_table
,
2342 struct prefix
*p
, /* VN address */
2343 struct agg_node
*vpn_rn
, /* VPN node */
2344 struct bgp_info
*vpn_bi
) /* VPN bi/route */
2346 afi_t afi
= family2afi(p
->family
);
2347 struct agg_node
*rn
;
2348 struct rfapi_monitor_encap
*m
;
2351 rn
= agg_node_get(import_table
->imported_encap
[afi
], p
); /* locks rn */
2354 m
= XCALLOC(MTYPE_RFAPI_MONITOR_ENCAP
,
2355 sizeof(struct rfapi_monitor_encap
));
2362 /* insert to encap node's list */
2363 m
->next
= RFAPI_MONITOR_ENCAP(rn
);
2366 RFAPI_MONITOR_ENCAP_W_ALLOC(rn
) = m
;
2368 /* for easy lookup when deleting vpn route */
2369 vpn_bi
->extra
->vnc
.import
.hme
= m
;
2371 vnc_zlog_debug_verbose(
2372 "%s: it=%p, vpn_bi=%p, afi=%d, encap rn=%p, setting vpn_bi->extra->vnc.import.hme=%p",
2373 __func__
, import_table
, vpn_bi
, afi
, rn
, m
);
2375 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
2376 bgp_attr_intern(vpn_bi
->attr
);
2379 static void rfapiMonitorEncapDelete(struct bgp_info
*vpn_bi
)
2382 * Remove encap monitor
2384 vnc_zlog_debug_verbose("%s: vpn_bi=%p", __func__
, vpn_bi
);
2385 if (vpn_bi
->extra
) {
2386 struct rfapi_monitor_encap
*hme
= vpn_bi
->extra
->vnc
.import
.hme
;
2390 vnc_zlog_debug_verbose("%s: hme=%p", __func__
, hme
);
2392 /* Refcount checking takes too long here */
2393 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 0);
2395 hme
->next
->prev
= hme
->prev
;
2397 hme
->prev
->next
= hme
->next
;
2399 RFAPI_MONITOR_ENCAP_W_ALLOC(hme
->rn
) =
2401 /* Refcount checking takes too long here */
2402 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 1);
2404 /* see if the struct rfapi_it_extra is empty and can be
2406 rfapiMonitorExtraPrune(SAFI_ENCAP
, hme
->rn
);
2408 agg_unlock_node(hme
->rn
); /* decr ref count */
2409 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, hme
);
2410 vpn_bi
->extra
->vnc
.import
.hme
= NULL
;
2416 * quagga lib/thread.h says this must return int even though
2417 * it doesn't do anything with the return value
2419 static int rfapiWithdrawTimerVPN(struct thread
*t
)
2421 struct rfapi_withdraw
*wcb
= t
->arg
;
2422 struct bgp_info
*bi
= wcb
->info
;
2423 struct bgp
*bgp
= bgp_get_default();
2425 struct rfapi_monitor_vpn
*moved
;
2430 assert(wcb
->import_table
);
2433 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, wcb
->lockoffset
);
2438 vnc_zlog_debug_verbose(
2439 "%s: removing bi %p at prefix %s/%d", __func__
, bi
,
2440 rfapi_ntop(wcb
->node
->p
.family
, &wcb
->node
->p
.u
.prefix
,
2442 wcb
->node
->p
.prefixlen
);
2446 * Remove the route (doubly-linked)
2448 if (CHECK_FLAG(bi
->flags
, BGP_INFO_VALID
)
2449 && VALID_INTERIOR_TYPE(bi
->type
))
2450 RFAPI_MONITOR_EXTERIOR(wcb
->node
)->valid_interior_count
--;
2452 afi
= family2afi(wcb
->node
->p
.family
);
2453 wcb
->import_table
->holddown_count
[afi
] -= 1; /* keep count consistent */
2454 rfapiItBiIndexDel(wcb
->node
, bi
);
2455 rfapiBgpInfoDetach(wcb
->node
, bi
); /* with removed bi */
2457 vnc_import_bgp_exterior_del_route_interior(bgp
, wcb
->import_table
,
2462 * If VNC is configured to send response remove messages, AND
2463 * if the removed route had a UN address, do response removal
2466 if (!(bgp
->rfapi_cfg
->flags
2467 & BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE
)) {
2469 int has_valid_duplicate
= 0;
2470 struct bgp_info
*bii
;
2473 * First check if there are any OTHER routes at this node
2474 * that have the same nexthop and a valid UN address. If
2475 * there are (e.g., from other peers), then the route isn't
2476 * really gone, so skip sending a response removal message.
2478 for (bii
= wcb
->node
->info
; bii
; bii
= bii
->next
) {
2479 if (rfapiVpnBiSamePtUn(bi
, bii
)) {
2480 has_valid_duplicate
= 1;
2485 vnc_zlog_debug_verbose("%s: has_valid_duplicate=%d", __func__
,
2486 has_valid_duplicate
);
2488 if (!has_valid_duplicate
) {
2489 rfapiRibPendingDeleteRoute(bgp
, wcb
->import_table
, afi
,
2494 rfapiMonitorEncapDelete(bi
);
2497 * If there are no VPN monitors at this VPN Node A,
2500 if (!RFAPI_MONITOR_VPN(wcb
->node
)) {
2501 vnc_zlog_debug_verbose("%s: no VPN monitors at this node",
2507 * rfapiMonitorMoveShorter only moves monitors if there are
2508 * no remaining valid routes at the current node
2510 moved
= rfapiMonitorMoveShorter(wcb
->node
, 1);
2513 rfapiMonitorMovedUp(wcb
->import_table
, wcb
->node
, moved
->node
,
2521 rfapiBgpInfoFree(bi
);
2525 * If route count at this node has gone to 0, withdraw exported prefix
2527 if (!wcb
->node
->info
) {
2528 /* see if the struct rfapi_it_extra is empty and can be freed */
2529 rfapiMonitorExtraPrune(SAFI_MPLS_VPN
, wcb
->node
);
2530 vnc_direct_bgp_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2531 vnc_zebra_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2534 * nexthop change event
2535 * vnc_direct_bgp_add_prefix() will recompute the VN addr
2538 vnc_direct_bgp_add_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2541 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, 1 + wcb
->lockoffset
);
2542 agg_unlock_node(wcb
->node
); /* decr ref count */
2543 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2548 * This works for multiprotocol extension, but not for plain ol'
2549 * unicast IPv4 because that nexthop is stored in attr->nexthop
2551 void rfapiNexthop2Prefix(struct attr
*attr
, struct prefix
*p
)
2556 memset(p
, 0, sizeof(struct prefix
));
2558 switch (p
->family
= BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2560 p
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2565 p
->u
.prefix6
= attr
->mp_nexthop_global
;
2570 vnc_zlog_debug_verbose("%s: Family is unknown = %d", __func__
,
2575 void rfapiUnicastNexthop2Prefix(afi_t afi
, struct attr
*attr
, struct prefix
*p
)
2577 if (afi
== AFI_IP
) {
2578 p
->family
= AF_INET
;
2580 p
->u
.prefix4
= attr
->nexthop
;
2582 rfapiNexthop2Prefix(attr
, p
);
2586 static int rfapiAttrNexthopAddrDifferent(struct prefix
*p1
, struct prefix
*p2
)
2589 vnc_zlog_debug_verbose("%s: p1 or p2 is NULL", __func__
);
2594 * Are address families the same?
2596 if (p1
->family
!= p2
->family
) {
2600 switch (p1
->family
) {
2602 if (IPV4_ADDR_SAME(&p1
->u
.prefix4
, &p2
->u
.prefix4
))
2607 if (IPV6_ADDR_SAME(&p1
->u
.prefix6
, &p2
->u
.prefix6
))
2618 static void rfapiCopyUnEncap2VPN(struct bgp_info
*encap_bi
,
2619 struct bgp_info
*vpn_bi
)
2621 if (!encap_bi
->attr
) {
2622 zlog_warn("%s: no encap bi attr/extra, can't copy UN address",
2627 if (!vpn_bi
|| !vpn_bi
->extra
) {
2628 zlog_warn("%s: no vpn bi attr/extra, can't copy UN address",
2633 switch (BGP_MP_NEXTHOP_FAMILY(encap_bi
->attr
->mp_nexthop_len
)) {
2637 * instrumentation to debug segfault of 091127
2639 vnc_zlog_debug_verbose("%s: vpn_bi=%p", __func__
, vpn_bi
);
2641 vnc_zlog_debug_verbose("%s: vpn_bi->extra=%p", __func__
,
2645 vpn_bi
->extra
->vnc
.import
.un_family
= AF_INET
;
2646 vpn_bi
->extra
->vnc
.import
.un
.addr4
=
2647 encap_bi
->attr
->mp_nexthop_global_in
;
2651 vpn_bi
->extra
->vnc
.import
.un_family
= AF_INET6
;
2652 vpn_bi
->extra
->vnc
.import
.un
.addr6
=
2653 encap_bi
->attr
->mp_nexthop_global
;
2657 zlog_warn("%s: invalid encap nexthop length: %d", __func__
,
2658 encap_bi
->attr
->mp_nexthop_len
);
2659 vpn_bi
->extra
->vnc
.import
.un_family
= 0;
2665 * returns 0 on success, nonzero on error
2667 static int rfapiWithdrawEncapUpdateCachedUn(
2668 struct rfapi_import_table
*import_table
, struct bgp_info
*encap_bi
,
2669 struct agg_node
*vpn_rn
, struct bgp_info
*vpn_bi
)
2674 * clear cached UN address
2676 if (!vpn_bi
|| !vpn_bi
->extra
) {
2678 "%s: missing VPN bi/extra, can't clear UN addr",
2682 vpn_bi
->extra
->vnc
.import
.un_family
= 0;
2683 memset(&vpn_bi
->extra
->vnc
.import
.un
, 0,
2684 sizeof(vpn_bi
->extra
->vnc
.import
.un
));
2685 if (CHECK_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
)) {
2686 if (rfapiGetVncTunnelUnAddr(vpn_bi
->attr
, NULL
)) {
2687 UNSET_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
);
2688 if (VALID_INTERIOR_TYPE(vpn_bi
->type
))
2689 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2690 ->valid_interior_count
--;
2691 /* signal interior route withdrawal to
2692 * import-exterior */
2693 vnc_import_bgp_exterior_del_route_interior(
2694 bgp_get_default(), import_table
, vpn_rn
,
2701 zlog_warn("%s: missing VPN bi, can't clear UN addr",
2705 rfapiCopyUnEncap2VPN(encap_bi
, vpn_bi
);
2706 if (!CHECK_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
)) {
2707 SET_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
);
2708 if (VALID_INTERIOR_TYPE(vpn_bi
->type
))
2709 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2710 ->valid_interior_count
++;
2711 /* signal interior route withdrawal to import-exterior
2713 vnc_import_bgp_exterior_add_route_interior(
2714 bgp_get_default(), import_table
, vpn_rn
,
2721 static int rfapiWithdrawTimerEncap(struct thread
*t
)
2723 struct rfapi_withdraw
*wcb
= t
->arg
;
2724 struct bgp_info
*bi
= wcb
->info
;
2725 int was_first_route
= 0;
2726 struct rfapi_monitor_encap
*em
;
2727 struct skiplist
*vpn_node_sl
= skiplist_new(0, NULL
, NULL
);
2731 assert(wcb
->import_table
);
2733 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 0);
2735 if (wcb
->node
->info
== bi
)
2736 was_first_route
= 1;
2739 * Remove the route/bi and free it
2741 rfapiBgpInfoDetach(wcb
->node
, bi
);
2742 rfapiBgpInfoFree(bi
);
2744 if (!was_first_route
)
2747 for (em
= RFAPI_MONITOR_ENCAP(wcb
->node
); em
; em
= em
->next
) {
2750 * Update monitoring VPN BIs with new encap info at the
2751 * head of the encap bi chain (which could be NULL after
2752 * removing the expiring bi above)
2754 if (rfapiWithdrawEncapUpdateCachedUn(wcb
->import_table
,
2755 wcb
->node
->info
, em
->node
,
2760 * Build a list of unique VPN nodes referenced by these
2762 * Use a skiplist for speed.
2764 skiplist_insert(vpn_node_sl
, em
->node
, em
->node
);
2769 * for each VPN node referenced in the ENCAP monitors:
2771 struct agg_node
*rn
;
2772 while (!skiplist_first(vpn_node_sl
, (void **)&rn
, NULL
)) {
2773 if (!wcb
->node
->info
) {
2774 struct rfapi_monitor_vpn
*moved
;
2776 moved
= rfapiMonitorMoveShorter(rn
, 0);
2778 // rfapiDoRouteCallback(wcb->import_table,
2779 // moved->node, moved);
2780 rfapiMonitorMovedUp(wcb
->import_table
, rn
,
2781 moved
->node
, moved
);
2784 // rfapiDoRouteCallback(wcb->import_table, rn, NULL);
2785 rfapiMonitorItNodeChanged(wcb
->import_table
, rn
, NULL
);
2787 skiplist_delete_first(vpn_node_sl
);
2791 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 1);
2792 agg_unlock_node(wcb
->node
); /* decr ref count */
2793 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2794 skiplist_free(vpn_node_sl
);
2800 * Works for both VPN and ENCAP routes; timer_service_func is different
2804 rfapiBiStartWithdrawTimer(struct rfapi_import_table
*import_table
,
2805 struct agg_node
*rn
, struct bgp_info
*bi
, afi_t afi
,
2807 int (*timer_service_func
)(struct thread
*))
2810 struct rfapi_withdraw
*wcb
;
2812 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
2814 * Already on the path to being withdrawn,
2815 * should already have a timer set up to
2818 vnc_zlog_debug_verbose(
2819 "%s: already being withdrawn, do nothing", __func__
);
2823 rfapiGetVncLifetime(bi
->attr
, &lifetime
);
2824 vnc_zlog_debug_verbose("%s: VNC lifetime is %u", __func__
, lifetime
);
2827 * withdrawn routes get to hang around for a while
2829 SET_FLAG(bi
->flags
, BGP_INFO_REMOVED
);
2831 /* set timer to remove the route later */
2832 lifetime
= rfapiGetHolddownFromLifetime(lifetime
);
2833 vnc_zlog_debug_verbose("%s: using timeout %u", __func__
, lifetime
);
2836 * Stash import_table, node, and info for use by timer
2837 * service routine, which is supposed to free the wcb.
2839 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2843 wcb
->import_table
= import_table
;
2844 bgp_attr_intern(bi
->attr
);
2846 if (VNC_DEBUG(VERBOSE
)) {
2847 vnc_zlog_debug_verbose(
2848 "%s: wcb values: node=%p, info=%p, import_table=%p (bi follows)",
2849 __func__
, wcb
->node
, wcb
->info
, wcb
->import_table
);
2850 rfapiPrintBi(NULL
, bi
);
2855 if (lifetime
> UINT32_MAX
/ 1001) {
2856 /* sub-optimal case, but will probably never happen */
2857 bi
->extra
->vnc
.import
.timer
= NULL
;
2858 thread_add_timer(bm
->master
, timer_service_func
, wcb
, lifetime
,
2859 &bi
->extra
->vnc
.import
.timer
);
2861 static uint32_t jitter
;
2862 uint32_t lifetime_msec
;
2865 * the goal here is to spread out the timers so they are
2866 * sortable in the skip list
2868 if (++jitter
>= 1000)
2871 lifetime_msec
= (lifetime
* 1000) + jitter
;
2873 bi
->extra
->vnc
.import
.timer
= NULL
;
2874 thread_add_timer_msec(bm
->master
, timer_service_func
, wcb
,
2876 &bi
->extra
->vnc
.import
.timer
);
2879 /* re-sort route list (BGP_INFO_REMOVED routes are last) */
2880 if (((struct bgp_info
*)rn
->info
)->next
) {
2881 rfapiBgpInfoDetach(rn
, bi
);
2882 rfapiBgpInfoAttachSorted(rn
, bi
, afi
, safi
);
2887 typedef void(rfapi_bi_filtered_import_f
)(struct rfapi_import_table
*, int,
2888 struct peer
*, void *, struct prefix
*,
2889 struct prefix
*, afi_t
,
2890 struct prefix_rd
*, struct attr
*,
2891 uint8_t, uint8_t, uint32_t *);
2894 static void rfapiExpireEncapNow(struct rfapi_import_table
*it
,
2895 struct agg_node
*rn
, struct bgp_info
*bi
)
2897 struct rfapi_withdraw
*wcb
;
2901 * pretend we're an expiring timer
2903 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2906 wcb
->import_table
= it
;
2907 memset(&t
, 0, sizeof(t
));
2909 rfapiWithdrawTimerEncap(&t
); /* frees wcb */
2912 static int rfapiGetNexthop(struct attr
*attr
, struct prefix
*prefix
)
2914 switch (BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2916 prefix
->family
= AF_INET
;
2917 prefix
->prefixlen
= 32;
2918 prefix
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2921 prefix
->family
= AF_INET6
;
2922 prefix
->prefixlen
= 128;
2923 prefix
->u
.prefix6
= attr
->mp_nexthop_global
;
2926 vnc_zlog_debug_verbose("%s: unknown attr->mp_nexthop_len %d",
2927 __func__
, attr
->mp_nexthop_len
);
2934 * import a bgp_info if its route target list intersects with the
2935 * import table's route target list
2937 static void rfapiBgpInfoFilteredImportEncap(
2938 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
2939 void *rfd
, /* set for looped back routes */
2941 struct prefix
*aux_prefix
, /* Unused for encap routes */
2942 afi_t afi
, struct prefix_rd
*prd
,
2943 struct attr
*attr
, /* part of bgp_info */
2944 uint8_t type
, /* part of bgp_info */
2945 uint8_t sub_type
, /* part of bgp_info */
2946 uint32_t *label
) /* part of bgp_info */
2948 struct agg_table
*rt
= NULL
;
2949 struct agg_node
*rn
;
2950 struct bgp_info
*info_new
;
2951 struct bgp_info
*bi
;
2952 struct bgp_info
*next
;
2955 struct prefix p_firstbi_old
;
2956 struct prefix p_firstbi_new
;
2958 const char *action_str
= NULL
;
2959 struct prefix un_prefix
;
2962 bgp
= bgp_get_default(); /* assume 1 instance for now */
2965 case FIF_ACTION_UPDATE
:
2966 action_str
= "update";
2968 case FIF_ACTION_WITHDRAW
:
2969 action_str
= "withdraw";
2971 case FIF_ACTION_KILL
:
2972 action_str
= "kill";
2979 vnc_zlog_debug_verbose(
2980 "%s: entry: %s: prefix %s/%d", __func__
, action_str
,
2981 inet_ntop(p
->family
, &p
->u
.prefix
, buf
, BUFSIZ
), p
->prefixlen
);
2983 memset(&p_firstbi_old
, 0, sizeof(p_firstbi_old
));
2984 memset(&p_firstbi_new
, 0, sizeof(p_firstbi_new
));
2986 if (action
== FIF_ACTION_UPDATE
) {
2988 * Compare rt lists. If no intersection, don't import this route
2989 * On a withdraw, peer and RD are sufficient to determine if
2992 if (!attr
|| !attr
->ecommunity
) {
2994 vnc_zlog_debug_verbose(
2995 "%s: attr, extra, or ecommunity missing, not importing",
2999 #if RFAPI_REQUIRE_ENCAP_BEEC
3000 if (!rfapiEcommunitiesMatchBeec(attr
->ecommunity
)) {
3001 vnc_zlog_debug_verbose(
3002 "%s: it=%p: no match for BGP Encapsulation ecommunity",
3003 __func__
, import_table
);
3007 if (!rfapiEcommunitiesIntersect(import_table
->rt_import_list
,
3008 attr
->ecommunity
)) {
3010 vnc_zlog_debug_verbose(
3011 "%s: it=%p: no ecommunity intersection",
3012 __func__
, import_table
);
3017 * Updates must also have a nexthop address
3019 memset(&un_prefix
, 0,
3020 sizeof(un_prefix
)); /* keep valgrind happy */
3021 if (rfapiGetNexthop(attr
, &un_prefix
)) {
3022 vnc_zlog_debug_verbose("%s: missing nexthop address",
3029 * Figure out which radix tree the route would go into
3034 rt
= import_table
->imported_encap
[afi
];
3038 flog_err(LIB_ERR_DEVELOPMENT
, "%s: bad afi %d", __func__
, afi
);
3043 * agg_node_lookup returns a node only if there is at least
3044 * one route attached.
3046 rn
= agg_node_lookup(rt
, p
);
3048 #if DEBUG_ENCAP_MONITOR
3049 vnc_zlog_debug_verbose("%s: initial encap lookup(it=%p) rn=%p",
3050 __func__
, import_table
, rn
);
3055 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 1);
3056 agg_unlock_node(rn
); /* undo lock in agg_node_lookup */
3060 * capture nexthop of first bi
3063 rfapiNexthop2Prefix(
3064 ((struct bgp_info
*)(rn
->info
))->attr
,
3068 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
3071 * Does this bgp_info refer to the same route
3072 * as we are trying to add?
3074 vnc_zlog_debug_verbose("%s: comparing BI %p", __func__
,
3081 * RD of import table bi is in bi->extra->vnc.import.rd
3082 * RD of info_orig is in prd
3085 vnc_zlog_debug_verbose("%s: no bi->extra",
3090 (struct prefix
*)&bi
->extra
->vnc
.import
.rd
,
3091 (struct prefix
*)prd
)) {
3093 vnc_zlog_debug_verbose("%s: prd does not match",
3101 if (bi
->peer
!= peer
) {
3102 vnc_zlog_debug_verbose(
3103 "%s: peer does not match", __func__
);
3107 vnc_zlog_debug_verbose("%s: found matching bi",
3110 /* Same route. Delete this bi, replace with new one */
3112 if (action
== FIF_ACTION_WITHDRAW
) {
3114 vnc_zlog_debug_verbose(
3115 "%s: withdrawing at prefix %s/%d",
3117 inet_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
,
3121 rfapiBiStartWithdrawTimer(
3122 import_table
, rn
, bi
, afi
, SAFI_ENCAP
,
3123 rfapiWithdrawTimerEncap
);
3126 vnc_zlog_debug_verbose(
3127 "%s: %s at prefix %s/%d", __func__
,
3128 ((action
== FIF_ACTION_KILL
)
3131 inet_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
,
3136 * If this route is waiting to be deleted
3138 * a previous withdraw, we must cancel its
3141 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
3142 && bi
->extra
->vnc
.import
.timer
) {
3145 (struct thread
*)bi
->extra
->vnc
3147 struct rfapi_withdraw
*wcb
= t
->arg
;
3149 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3153 if (action
== FIF_ACTION_UPDATE
) {
3154 rfapiBgpInfoDetach(rn
, bi
);
3155 rfapiBgpInfoFree(bi
);
3159 * Kill: do export stuff when removing
3162 struct rfapi_withdraw
*wcb
;
3166 * pretend we're an expiring timer
3169 MTYPE_RFAPI_WITHDRAW
,
3170 sizeof(struct rfapi_withdraw
));
3173 wcb
->import_table
= import_table
;
3174 memset(&t
, 0, sizeof(t
));
3176 rfapiWithdrawTimerEncap(
3177 &t
); /* frees wcb */
3186 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, replacing
? 1 : 0);
3188 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
)
3192 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, NULL
);
3196 agg_lock_node(rn
); /* incr ref count for new BI */
3198 rn
= agg_node_get(rt
, p
);
3201 vnc_zlog_debug_verbose(
3202 "%s: (afi=%d, rn=%p) inserting at prefix %s/%d", __func__
, afi
,
3203 rn
, inet_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
, buf
, BUFSIZ
),
3206 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_ENCAP
);
3209 * Delete holddown routes from same NVE. See details in
3210 * rfapiBgpInfoFilteredImportVPN()
3212 for (bi
= info_new
->next
; bi
; bi
= next
) {
3214 struct prefix pfx_un
;
3218 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
))
3222 * We already match the VN address (it is the prefix
3223 * of the route node)
3226 if (!rfapiGetNexthop(bi
->attr
, &pfx_un
)
3227 && prefix_same(&pfx_un
, &un_prefix
)) {
3235 vnc_zlog_debug_verbose(
3236 "%s: removing holddown bi matching NVE of new route",
3238 if (bi
->extra
->vnc
.import
.timer
) {
3240 (struct thread
*)bi
->extra
->vnc
.import
.timer
;
3241 struct rfapi_withdraw
*wcb
= t
->arg
;
3243 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3246 rfapiExpireEncapNow(import_table
, rn
, bi
);
3249 rfapiNexthop2Prefix(((struct bgp_info
*)(rn
->info
))->attr
,
3253 * If the nexthop address of the selected Encap route (i.e.,
3254 * the UN address) has changed, then we must update the VPN
3255 * routes that refer to this Encap route and possibly force
3258 if (rfapiAttrNexthopAddrDifferent(&p_firstbi_old
, &p_firstbi_new
)) {
3260 struct rfapi_monitor_encap
*m
;
3261 struct rfapi_monitor_encap
*mnext
;
3263 struct agg_node
*referenced_vpn_prefix
;
3266 * Optimized approach: build radix tree on the fly to
3267 * hold list of VPN nodes referenced by the ENCAP monitors
3269 * The nodes in this table correspond to prefixes of VPN routes.
3270 * The "info" pointer of the node points to a chain of
3271 * struct rfapi_monitor_encap, each of which refers to a
3272 * specific VPN node.
3274 struct agg_table
*referenced_vpn_table
;
3276 referenced_vpn_table
= agg_table_init();
3277 assert(referenced_vpn_table
);
3280 * iterate over the set of monitors at this ENCAP node.
3282 #if DEBUG_ENCAP_MONITOR
3283 vnc_zlog_debug_verbose("%s: examining monitors at rn=%p",
3286 for (m
= RFAPI_MONITOR_ENCAP(rn
); m
; m
= m
->next
) {
3289 * For each referenced bi/route, copy the ENCAP route's
3290 * nexthop to the VPN route's cached UN address field
3292 * the address family of the cached UN address field.
3294 rfapiCopyUnEncap2VPN(info_new
, m
->bi
);
3295 if (!CHECK_FLAG(m
->bi
->flags
, BGP_INFO_VALID
)) {
3296 SET_FLAG(m
->bi
->flags
, BGP_INFO_VALID
);
3297 if (VALID_INTERIOR_TYPE(m
->bi
->type
))
3298 RFAPI_MONITOR_EXTERIOR(m
->node
)
3299 ->valid_interior_count
++;
3300 vnc_import_bgp_exterior_add_route_interior(
3301 bgp
, import_table
, m
->node
, m
->bi
);
3305 * Build a list of unique VPN nodes referenced by these
3308 * There could be more than one VPN node here with a
3310 * prefix. Those are currently in an unsorted linear
3315 referenced_vpn_prefix
=
3316 agg_node_get(referenced_vpn_table
, &m
->node
->p
);
3317 assert(referenced_vpn_prefix
);
3318 for (mnext
= referenced_vpn_prefix
->info
; mnext
;
3319 mnext
= mnext
->next
) {
3321 if (mnext
->node
== m
->node
)
3327 * already have an entry for this VPN node
3329 agg_unlock_node(referenced_vpn_prefix
);
3332 MTYPE_RFAPI_MONITOR_ENCAP
,
3333 sizeof(struct rfapi_monitor_encap
));
3335 mnext
->node
= m
->node
;
3336 mnext
->next
= referenced_vpn_prefix
->info
;
3337 referenced_vpn_prefix
->info
= mnext
;
3342 * for each VPN node referenced in the ENCAP monitors:
3344 for (referenced_vpn_prefix
=
3345 agg_route_top(referenced_vpn_table
);
3346 referenced_vpn_prefix
;
3347 referenced_vpn_prefix
=
3348 agg_route_next(referenced_vpn_prefix
)) {
3350 while ((m
= referenced_vpn_prefix
->info
)) {
3354 rfapiMonitorMoveLonger(m
->node
);
3355 for (n
= m
->node
; n
; n
= agg_node_parent(n
)) {
3356 // rfapiDoRouteCallback(import_table, n,
3359 rfapiMonitorItNodeChanged(import_table
, m
->node
,
3362 referenced_vpn_prefix
->info
= m
->next
;
3363 agg_unlock_node(referenced_vpn_prefix
);
3364 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, m
);
3367 agg_table_finish(referenced_vpn_table
);
3370 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
3373 static void rfapiExpireVpnNow(struct rfapi_import_table
*it
,
3374 struct agg_node
*rn
, struct bgp_info
*bi
,
3377 struct rfapi_withdraw
*wcb
;
3381 * pretend we're an expiring timer
3383 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
3386 wcb
->import_table
= it
;
3387 wcb
->lockoffset
= lockoffset
;
3388 memset(&t
, 0, sizeof(t
));
3390 rfapiWithdrawTimerVPN(&t
); /* frees wcb */
3395 * import a bgp_info if its route target list intersects with the
3396 * import table's route target list
3398 void rfapiBgpInfoFilteredImportVPN(
3399 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3400 void *rfd
, /* set for looped back routes */
3402 struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3403 afi_t afi
, struct prefix_rd
*prd
,
3404 struct attr
*attr
, /* part of bgp_info */
3405 uint8_t type
, /* part of bgp_info */
3406 uint8_t sub_type
, /* part of bgp_info */
3407 uint32_t *label
) /* part of bgp_info */
3409 struct agg_table
*rt
= NULL
;
3410 struct agg_node
*rn
;
3412 struct bgp_info
*info_new
;
3413 struct bgp_info
*bi
;
3414 struct bgp_info
*next
;
3416 struct prefix vn_prefix
;
3417 struct prefix un_prefix
;
3418 int un_prefix_valid
= 0;
3419 struct agg_node
*ern
;
3421 int original_had_routes
= 0;
3422 struct prefix original_nexthop
;
3423 const char *action_str
= NULL
;
3427 bgp
= bgp_get_default(); /* assume 1 instance for now */
3430 case FIF_ACTION_UPDATE
:
3431 action_str
= "update";
3433 case FIF_ACTION_WITHDRAW
:
3434 action_str
= "withdraw";
3436 case FIF_ACTION_KILL
:
3437 action_str
= "kill";
3444 if (import_table
== bgp
->rfapi
->it_ce
)
3447 vnc_zlog_debug_verbose("%s: entry: %s%s: prefix %s/%d: it %p, afi %s",
3448 __func__
, (is_it_ce
? "CE-IT " : ""), action_str
,
3449 rfapi_ntop(p
->family
, &p
->u
.prefix
, buf
, BUFSIZ
),
3450 p
->prefixlen
, import_table
, afi2str(afi
));
3455 * Compare rt lists. If no intersection, don't import this route
3456 * On a withdraw, peer and RD are sufficient to determine if
3459 if (action
== FIF_ACTION_UPDATE
) {
3460 if (!attr
|| !attr
->ecommunity
) {
3462 vnc_zlog_debug_verbose(
3463 "%s: attr, extra, or ecommunity missing, not importing",
3467 if ((import_table
!= bgp
->rfapi
->it_ce
)
3468 && !rfapiEcommunitiesIntersect(import_table
->rt_import_list
,
3469 attr
->ecommunity
)) {
3471 vnc_zlog_debug_verbose(
3472 "%s: it=%p: no ecommunity intersection",
3473 __func__
, import_table
);
3477 memset(&vn_prefix
, 0,
3478 sizeof(vn_prefix
)); /* keep valgrind happy */
3479 if (rfapiGetNexthop(attr
, &vn_prefix
)) {
3480 /* missing nexthop address would be a bad, bad thing */
3481 vnc_zlog_debug_verbose("%s: missing nexthop", __func__
);
3487 * Figure out which radix tree the route would go into
3493 rt
= import_table
->imported_vpn
[afi
];
3497 flog_err(LIB_ERR_DEVELOPMENT
, "%s: bad afi %d", __func__
, afi
);
3502 memset(&original_nexthop
, 0, sizeof(original_nexthop
));
3505 * agg_node_lookup returns a node only if there is at least
3506 * one route attached.
3508 rn
= agg_node_lookup(rt
, p
);
3510 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
3514 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
3515 agg_unlock_node(rn
); /* undo lock in agg_node_lookup */
3518 original_had_routes
= 1;
3520 if (VNC_DEBUG(VERBOSE
)) {
3521 vnc_zlog_debug_verbose("%s: showing IT node on entry",
3523 rfapiShowItNode(NULL
, rn
); /* debug */
3527 * Look for same route (will have same RD and peer)
3529 bi
= rfapiItBiIndexSearch(rn
, prd
, peer
, aux_prefix
);
3534 * This was an old test when we iterated over the
3535 * BIs linearly. Since we're now looking up with
3536 * RD and peer, comparing types should not be
3537 * needed. Changed to assertion.
3539 * Compare types. Doing so prevents a RFP-originated
3540 * route from matching an imported route, for example.
3542 if (VNC_DEBUG(VERBOSE
) && bi
->type
!= type
)
3543 /* should be handled by RDs, but warn for now */
3544 zlog_warn("%s: type mismatch! (bi=%d, arg=%d)",
3545 __func__
, bi
->type
, type
);
3547 vnc_zlog_debug_verbose("%s: found matching bi",
3551 * In the special CE table, withdrawals occur without
3554 if (import_table
== bgp
->rfapi
->it_ce
) {
3555 vnc_direct_bgp_del_route_ce(bgp
, rn
, bi
);
3556 if (action
== FIF_ACTION_WITHDRAW
)
3557 action
= FIF_ACTION_KILL
;
3560 if (action
== FIF_ACTION_WITHDRAW
) {
3563 CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
);
3565 vnc_zlog_debug_verbose(
3566 "%s: withdrawing at prefix %s/%d%s",
3567 __func__
, rfapi_ntop(rn
->p
.family
,
3572 ? " (already being withdrawn)"
3577 rfapiBiStartWithdrawTimer(
3578 import_table
, rn
, bi
, afi
,
3580 rfapiWithdrawTimerVPN
);
3582 RFAPI_UPDATE_ITABLE_COUNT(
3583 bi
, import_table
, afi
, -1);
3584 import_table
->holddown_count
[afi
] += 1;
3588 vnc_zlog_debug_verbose(
3589 "%s: %s at prefix %s/%d", __func__
,
3590 ((action
== FIF_ACTION_KILL
)
3593 rfapi_ntop(rn
->p
.family
,
3594 &rn
->p
.u
.prefix
, buf
,
3599 * If this route is waiting to be deleted
3601 * a previous withdraw, we must cancel its
3604 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
3605 && bi
->extra
->vnc
.import
.timer
) {
3608 (struct thread
*)bi
->extra
->vnc
3610 struct rfapi_withdraw
*wcb
= t
->arg
;
3612 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3615 import_table
->holddown_count
[afi
] -= 1;
3616 RFAPI_UPDATE_ITABLE_COUNT(
3617 bi
, import_table
, afi
, 1);
3620 * decrement remote count (if route is remote)
3622 * we are going to remove it below
3624 RFAPI_UPDATE_ITABLE_COUNT(bi
, import_table
, afi
,
3626 if (action
== FIF_ACTION_UPDATE
) {
3630 * make copy of original nexthop so we
3631 * can see if it changed
3633 rfapiGetNexthop(bi
->attr
,
3637 * remove bi without doing any export
3640 if (CHECK_FLAG(bi
->flags
,
3642 && VALID_INTERIOR_TYPE(bi
->type
))
3643 RFAPI_MONITOR_EXTERIOR(rn
)
3644 ->valid_interior_count
--;
3645 rfapiItBiIndexDel(rn
, bi
);
3646 rfapiBgpInfoDetach(rn
, bi
);
3647 rfapiMonitorEncapDelete(bi
);
3648 vnc_import_bgp_exterior_del_route_interior(
3649 bgp
, import_table
, rn
, bi
);
3650 rfapiBgpInfoFree(bi
);
3654 * remove bi and do export processing
3656 import_table
->holddown_count
[afi
] += 1;
3657 rfapiExpireVpnNow(import_table
, rn
, bi
,
3665 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, replacing
? 1 : 0);
3667 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
) {
3673 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, label
);
3676 * lookup un address in encap table
3678 ern
= agg_node_match(import_table
->imported_encap
[afi
], &vn_prefix
);
3680 rfapiCopyUnEncap2VPN(ern
->info
, info_new
);
3681 agg_unlock_node(ern
); /* undo lock in route_note_match */
3683 char buf
[PREFIX_STRLEN
];
3685 prefix2str(&vn_prefix
, buf
, sizeof(buf
));
3686 /* Not a big deal, just means VPN route got here first */
3687 vnc_zlog_debug_verbose("%s: no encap route for vn addr %s",
3689 info_new
->extra
->vnc
.import
.un_family
= 0;
3697 * No need to increment reference count, so only "get"
3698 * if the node is not there already
3700 rn
= agg_node_get(rt
, p
);
3704 * For ethernet routes, if there is an accompanying IP address,
3707 if ((AFI_L2VPN
== afi
) && aux_prefix
) {
3709 vnc_zlog_debug_verbose("%s: setting BI's aux_prefix", __func__
);
3710 info_new
->extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
3713 vnc_zlog_debug_verbose(
3714 "%s: inserting bi %p at prefix %s/%d #%d", __func__
, info_new
,
3715 rfapi_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
, buf
, BUFSIZ
),
3716 rn
->p
.prefixlen
, rn
->lock
);
3718 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_MPLS_VPN
);
3719 rfapiItBiIndexAdd(rn
, info_new
);
3720 if (!rfapiGetUnAddrOfVpnBi(info_new
, NULL
)) {
3721 if (VALID_INTERIOR_TYPE(info_new
->type
))
3722 RFAPI_MONITOR_EXTERIOR(rn
)->valid_interior_count
++;
3723 SET_FLAG(info_new
->flags
, BGP_INFO_VALID
);
3725 RFAPI_UPDATE_ITABLE_COUNT(info_new
, import_table
, afi
, 1);
3726 vnc_import_bgp_exterior_add_route_interior(bgp
, import_table
, rn
,
3729 if (import_table
== bgp
->rfapi
->it_ce
)
3730 vnc_direct_bgp_add_route_ce(bgp
, rn
, info_new
);
3732 if (VNC_DEBUG(VERBOSE
)) {
3733 vnc_zlog_debug_verbose("%s: showing IT node", __func__
);
3734 rfapiShowItNode(NULL
, rn
); /* debug */
3737 rfapiMonitorEncapAdd(import_table
, &vn_prefix
, rn
, info_new
);
3739 if (!rfapiGetUnAddrOfVpnBi(info_new
, &un_prefix
)) {
3742 * if we have a valid UN address (either via Encap route
3743 * or via tunnel attribute), then we should attempt
3744 * to move any monitors at less-specific nodes to this node
3746 rfapiMonitorMoveLonger(rn
);
3748 un_prefix_valid
= 1;
3752 * 101129 Enhancement: if we add a route (implication: it is not
3753 * in holddown), delete all other routes from this nve at this
3754 * node that are in holddown, regardless of peer.
3756 * Reasons it's OK to do that:
3758 * - if the holddown route being deleted originally came from BGP VPN,
3759 * it is already gone from BGP (implication of holddown), so there
3760 * won't be any added inconsistency with the BGP RIB.
3762 * - once a fresh route is added at a prefix, any routes in holddown
3763 * at that prefix will not show up in RFP responses, so deleting
3764 * the holddown routes won't affect the contents of responses.
3766 * - lifetimes are supposed to be consistent, so there should not
3767 * be a case where the fresh route has a shorter lifetime than
3768 * the holddown route, so we don't expect the fresh route to
3769 * disappear and complete its holddown time before the existing
3770 * holddown routes time out. Therefore, we won't have a situation
3771 * where we expect the existing holddown routes to be hidden and
3772 * then to reappear sometime later (as holddown routes) in a
3775 * Among other things, this would enable us to skirt the problem
3776 * of local holddown routes that refer to NVE descriptors that
3777 * have already been closed (if the same NVE triggers a subsequent
3778 * rfapi_open(), the new peer is different and doesn't match the
3779 * peer of the holddown route, so the stale holddown route still
3780 * hangs around until it times out instead of just being replaced
3781 * by the fresh route).
3784 * We know that the new bi will have been inserted before any routes
3785 * in holddown, so we can skip any that came before it
3787 for (bi
= info_new
->next
; bi
; bi
= next
) {
3789 struct prefix pfx_vn
;
3790 struct prefix pfx_un
;
3792 int remote_peer_match
= 0;
3799 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
))
3803 * Must match VN address (nexthop of VPN route)
3805 if (rfapiGetNexthop(bi
->attr
, &pfx_vn
))
3807 if (!prefix_same(&pfx_vn
, &vn_prefix
))
3810 if (un_prefix_valid
&& /* new route UN addr */
3811 !rfapiGetUnAddrOfVpnBi(bi
, &pfx_un
)
3812 && /* old route UN addr */
3813 prefix_same(&pfx_un
, &un_prefix
)) { /* compare */
3816 if (!RFAPI_LOCAL_BI(bi
) && !RFAPI_LOCAL_BI(info_new
)
3817 && sockunion_same(&bi
->peer
->su
, &info_new
->peer
->su
)) {
3818 /* old & new are both remote, same peer */
3819 remote_peer_match
= 1;
3822 if (!un_match
& !remote_peer_match
)
3825 vnc_zlog_debug_verbose(
3826 "%s: removing holddown bi matching NVE of new route",
3828 if (bi
->extra
->vnc
.import
.timer
) {
3830 (struct thread
*)bi
->extra
->vnc
.import
.timer
;
3831 struct rfapi_withdraw
*wcb
= t
->arg
;
3833 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3836 rfapiExpireVpnNow(import_table
, rn
, bi
, 0);
3839 if (!original_had_routes
) {
3841 * We went from 0 usable routes to 1 usable route. Perform the
3842 * "Adding a Route" export process.
3844 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3845 vnc_zebra_add_prefix(bgp
, import_table
, rn
);
3848 * Check for nexthop change event
3849 * Note: the prefix_same() test below detects two situations:
3850 * 1. route is replaced, new route has different nexthop
3851 * 2. new route is added (original_nexthop is 0)
3853 struct prefix new_nexthop
;
3855 rfapiGetNexthop(attr
, &new_nexthop
);
3856 if (!prefix_same(&original_nexthop
, &new_nexthop
)) {
3858 * nexthop change event
3859 * vnc_direct_bgp_add_prefix() will recompute VN addr
3862 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3866 if (!(bgp
->rfapi_cfg
->flags
& BGP_VNC_CONFIG_CALLBACK_DISABLE
)) {
3867 for (n
= rn
; n
; n
= agg_node_parent(n
)) {
3868 // rfapiDoRouteCallback(import_table, n, NULL);
3870 rfapiMonitorItNodeChanged(import_table
, rn
, NULL
);
3872 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 0);
3876 static void rfapiBgpInfoFilteredImportBadSafi(
3877 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3878 void *rfd
, /* set for looped back routes */
3880 struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3881 afi_t afi
, struct prefix_rd
*prd
,
3882 struct attr
*attr
, /* part of bgp_info */
3883 uint8_t type
, /* part of bgp_info */
3884 uint8_t sub_type
, /* part of bgp_info */
3885 uint32_t *label
) /* part of bgp_info */
3887 vnc_zlog_debug_verbose("%s: Error, bad safi", __func__
);
3890 static rfapi_bi_filtered_import_f
*
3891 rfapiBgpInfoFilteredImportFunction(safi_t safi
)
3895 return rfapiBgpInfoFilteredImportVPN
;
3898 return rfapiBgpInfoFilteredImportEncap
;
3902 flog_err(LIB_ERR_DEVELOPMENT
, "%s: bad safi %d", __func__
,
3904 return rfapiBgpInfoFilteredImportBadSafi
;
3908 void rfapiProcessUpdate(struct peer
*peer
,
3909 void *rfd
, /* set when looped from RFP/RFAPI */
3910 struct prefix
*p
, struct prefix_rd
*prd
,
3911 struct attr
*attr
, afi_t afi
, safi_t safi
, uint8_t type
,
3912 uint8_t sub_type
, uint32_t *label
)
3916 struct rfapi_import_table
*it
;
3917 int has_ip_route
= 1;
3920 bgp
= bgp_get_default(); /* assume 1 instance for now */
3927 * look at high-order byte of RD. FF means MAC
3928 * address is present (VNC L2VPN)
3930 if ((safi
== SAFI_MPLS_VPN
)
3931 && (decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
)) {
3932 struct prefix pfx_mac_buf
;
3933 struct prefix pfx_nexthop_buf
;
3937 * Set flag if prefix and nexthop are the same - don't
3938 * add the route to normal IP-based import tables
3940 if (!rfapiGetNexthop(attr
, &pfx_nexthop_buf
)) {
3941 if (!prefix_cmp(&pfx_nexthop_buf
, p
)) {
3946 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
3947 pfx_mac_buf
.family
= AF_ETHERNET
;
3948 pfx_mac_buf
.prefixlen
= 48;
3949 memcpy(&pfx_mac_buf
.u
.prefix_eth
.octet
, prd
->val
+ 2, 6);
3952 * Find rt containing LNI (Logical Network ID), which
3953 * _should_ always be present when mac address is present
3955 rc
= rfapiEcommunityGetLNI(attr
->ecommunity
, &lni
);
3957 vnc_zlog_debug_verbose(
3958 "%s: rfapiEcommunityGetLNI returned %d, lni=%d, attr=%p",
3959 __func__
, rc
, lni
, attr
);
3961 it
= rfapiMacImportTableGet(bgp
, lni
);
3963 rfapiBgpInfoFilteredImportVPN(
3964 it
, FIF_ACTION_UPDATE
, peer
, rfd
,
3965 &pfx_mac_buf
, /* prefix */
3966 p
, /* aux prefix: IP addr */
3967 AFI_L2VPN
, prd
, attr
, type
, sub_type
, label
);
3975 * Iterate over all import tables; do a filtered import
3976 * for the afi/safi combination
3978 for (it
= h
->imports
; it
; it
= it
->next
) {
3979 (*rfapiBgpInfoFilteredImportFunction(safi
))(
3980 it
, FIF_ACTION_UPDATE
, peer
, rfd
, p
, /* prefix */
3981 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3984 if (safi
== SAFI_MPLS_VPN
) {
3985 vnc_direct_bgp_rh_add_route(bgp
, afi
, p
, peer
, attr
);
3986 rfapiBgpInfoFilteredImportVPN(
3987 bgp
->rfapi
->it_ce
, FIF_ACTION_UPDATE
, peer
, rfd
,
3989 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3994 void rfapiProcessWithdraw(struct peer
*peer
, void *rfd
, struct prefix
*p
,
3995 struct prefix_rd
*prd
, struct attr
*attr
, afi_t afi
,
3996 safi_t safi
, uint8_t type
, int kill
)
4000 struct rfapi_import_table
*it
;
4002 bgp
= bgp_get_default(); /* assume 1 instance for now */
4009 * look at high-order byte of RD. FF means MAC
4010 * address is present (VNC L2VPN)
4012 if (h
->import_mac
!= NULL
&& safi
== SAFI_MPLS_VPN
4013 && decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
) {
4014 struct prefix pfx_mac_buf
;
4015 void *cursor
= NULL
;
4018 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
4019 pfx_mac_buf
.family
= AF_ETHERNET
;
4020 pfx_mac_buf
.prefixlen
= 48;
4021 memcpy(&pfx_mac_buf
.u
.prefix_eth
, prd
->val
+ 2, 6);
4024 * withdraw does not contain attrs, so we don't have
4025 * access to the route's LNI, which would ordinarily
4026 * select the specific mac-based import table. Instead,
4027 * we must iterate over all mac-based tables and rely
4028 * on the RD to match.
4030 * If this approach is too slow, add an index where
4031 * key is {RD, peer} and value is the import table
4033 for (rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4035 rc
== 0; rc
= skiplist_next(h
->import_mac
, NULL
,
4036 (void **)&it
, &cursor
)) {
4039 vnc_zlog_debug_verbose(
4040 "%s: calling rfapiBgpInfoFilteredImportVPN(it=%p, afi=AFI_L2VPN)",
4044 rfapiBgpInfoFilteredImportVPN(
4046 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
4047 peer
, rfd
, &pfx_mac_buf
, /* prefix */
4048 p
, /* aux_prefix: IP */
4049 AFI_L2VPN
, prd
, attr
, type
, 0,
4050 NULL
); /* sub_type & label unused for withdraw
4056 * XXX For the case where the withdraw involves an L2
4057 * route with no IP information, we rely on the lack
4058 * of RT-list intersection to filter out the withdraw
4059 * from the IP-based import tables below
4063 * Iterate over all import tables; do a filtered import
4064 * for the afi/safi combination
4067 for (it
= h
->imports
; it
; it
= it
->next
) {
4068 (*rfapiBgpInfoFilteredImportFunction(safi
))(
4069 it
, (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
4070 peer
, rfd
, p
, /* prefix */
4071 NULL
, afi
, prd
, attr
, type
, 0,
4072 NULL
); /* sub_type & label unused for withdraw */
4075 /* TBD the deletion should happen after the lifetime expires */
4076 if (safi
== SAFI_MPLS_VPN
)
4077 vnc_direct_bgp_rh_del_route(bgp
, afi
, p
, peer
);
4079 if (safi
== SAFI_MPLS_VPN
) {
4080 rfapiBgpInfoFilteredImportVPN(
4082 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
), peer
,
4083 rfd
, p
, /* prefix */
4084 NULL
, afi
, prd
, attr
, type
, 0,
4085 NULL
); /* sub_type & label unused for withdraw */
4090 * TBD optimized withdraw timer algorithm for case of many
4091 * routes expiring at the same time due to peer drop.
4094 * 1. Visit all BIs in all ENCAP import tables.
4096 * a. If a bi's peer is the failed peer, remove the bi.
4097 * b. If the removed ENCAP bi was first in the list of
4098 * BIs at this ENCAP node, loop over all monitors
4101 * (1) for each ENCAP monitor, loop over all its
4102 * VPN node monitors and set their RFAPI_MON_FLAG_NEEDCALLBACK
4105 * 2. Visit all BIs in all VPN import tables.
4106 * a. If a bi's peer is the failed peer, remove the bi.
4107 * b. loop over all the VPN node monitors and set their
4108 * RFAPI_MON_FLAG_NEEDCALLBACK flags
4109 * c. If there are no BIs left at this VPN node,
4114 /* surprise, this gets called from peer_delete(), from rfapi_close() */
4115 static void rfapiProcessPeerDownRt(struct peer
*peer
,
4116 struct rfapi_import_table
*import_table
,
4117 afi_t afi
, safi_t safi
)
4119 struct agg_node
*rn
;
4120 struct bgp_info
*bi
;
4121 struct agg_table
*rt
;
4122 int (*timer_service_func
)(struct thread
*);
4124 assert(afi
== AFI_IP
|| afi
== AFI_IP6
);
4130 rt
= import_table
->imported_vpn
[afi
];
4131 timer_service_func
= rfapiWithdrawTimerVPN
;
4134 rt
= import_table
->imported_encap
[afi
];
4135 timer_service_func
= rfapiWithdrawTimerEncap
;
4142 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
4143 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
4144 if (bi
->peer
== peer
) {
4146 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
4147 /* already in holddown, skip */
4151 if (safi
== SAFI_MPLS_VPN
) {
4152 RFAPI_UPDATE_ITABLE_COUNT(
4153 bi
, import_table
, afi
, -1);
4154 import_table
->holddown_count
[afi
] += 1;
4156 rfapiBiStartWithdrawTimer(import_table
, rn
, bi
,
4158 timer_service_func
);
4166 * This gets called when a peer connection drops. We have to remove
4167 * all the routes from this peer.
4169 * Current approach is crude. TBD Optimize by setting fewer timers and
4170 * grouping withdrawn routes so we can generate callbacks more
4173 void rfapiProcessPeerDown(struct peer
*peer
)
4177 struct rfapi_import_table
*it
;
4180 * If this peer is a "dummy" peer structure atached to a RFAPI
4181 * nve_descriptor, we don't need to walk the import tables
4182 * because the routes are already withdrawn by rfapi_close()
4184 if (CHECK_FLAG(peer
->flags
, PEER_FLAG_IS_RFAPI_HD
))
4188 * 1. Visit all BIs in all ENCAP import tables.
4189 * Start withdraw timer on the BIs that match peer.
4191 * 2. Visit All BIs in all VPN import tables.
4192 * Start withdraw timer on the BIs that match peer.
4195 bgp
= bgp_get_default(); /* assume 1 instance for now */
4202 for (it
= h
->imports
; it
; it
= it
->next
) {
4203 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_ENCAP
);
4204 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_ENCAP
);
4205 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_MPLS_VPN
);
4206 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_MPLS_VPN
);
4210 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4211 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4216 * Import an entire RIB (for an afi/safi) to an import table RIB,
4217 * filtered according to the import table's RT list
4219 * TBD: does this function need additions to match rfapiProcessUpdate()
4220 * for, e.g., L2 handling?
4222 static void rfapiBgpTableFilteredImport(struct bgp
*bgp
,
4223 struct rfapi_import_table
*it
,
4224 afi_t afi
, safi_t safi
)
4226 struct bgp_node
*rn1
;
4227 struct bgp_node
*rn2
;
4229 /* Only these SAFIs have 2-level RIBS */
4230 assert(safi
== SAFI_MPLS_VPN
|| safi
== SAFI_ENCAP
);
4233 * Now visit all the rd nodes and the nodes of all the
4234 * route tables attached to them, and import the routes
4235 * if they have matching route targets
4237 for (rn1
= bgp_table_top(bgp
->rib
[afi
][safi
]); rn1
;
4238 rn1
= bgp_route_next(rn1
)) {
4241 for (rn2
= bgp_table_top(rn1
->info
); rn2
;
4242 rn2
= bgp_route_next(rn2
)) {
4244 struct bgp_info
*bi
;
4246 for (bi
= rn2
->info
; bi
; bi
= bi
->next
) {
4249 if (CHECK_FLAG(bi
->flags
,
4254 label
= decode_label(
4255 &bi
->extra
->label
[0]);
4256 (*rfapiBgpInfoFilteredImportFunction(
4258 it
, /* which import table */
4259 FIF_ACTION_UPDATE
, bi
->peer
,
4260 NULL
, &rn2
->p
, /* prefix */
4262 (struct prefix_rd
*)&rn1
->p
,
4264 bi
->sub_type
, &label
);
4272 /* per-bgp-instance rfapi data */
4273 struct rfapi
*bgp_rfapi_new(struct bgp
*bgp
)
4277 struct rfapi_rfp_cfg
*cfg
= NULL
;
4278 struct rfapi_rfp_cb_methods
*cbm
= NULL
;
4280 assert(bgp
->rfapi_cfg
== NULL
);
4282 h
= (struct rfapi
*)XCALLOC(MTYPE_RFAPI
, sizeof(struct rfapi
));
4284 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4285 h
->un
[afi
] = agg_table_init();
4289 * initialize the ce import table
4291 h
->it_ce
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4292 sizeof(struct rfapi_import_table
));
4293 h
->it_ce
->imported_vpn
[AFI_IP
] = agg_table_init();
4294 h
->it_ce
->imported_vpn
[AFI_IP6
] = agg_table_init();
4295 h
->it_ce
->imported_encap
[AFI_IP
] = agg_table_init();
4296 h
->it_ce
->imported_encap
[AFI_IP6
] = agg_table_init();
4297 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4298 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4301 * Set up work queue for deferred rfapi_close operations
4303 h
->deferred_close_q
=
4304 work_queue_new(bm
->master
, "rfapi deferred close");
4305 h
->deferred_close_q
->spec
.workfunc
= rfapi_deferred_close_workfunc
;
4306 h
->deferred_close_q
->spec
.data
= h
;
4308 h
->rfp
= rfp_start(bm
->master
, &cfg
, &cbm
);
4309 bgp
->rfapi_cfg
= bgp_rfapi_cfg_new(cfg
);
4311 h
->rfp_methods
= *cbm
;
4316 void bgp_rfapi_destroy(struct bgp
*bgp
, struct rfapi
*h
)
4320 if (bgp
== NULL
|| h
== NULL
)
4323 if (h
->resolve_nve_nexthop
) {
4324 skiplist_free(h
->resolve_nve_nexthop
);
4325 h
->resolve_nve_nexthop
= NULL
;
4328 agg_table_finish(h
->it_ce
->imported_vpn
[AFI_IP
]);
4329 agg_table_finish(h
->it_ce
->imported_vpn
[AFI_IP6
]);
4330 agg_table_finish(h
->it_ce
->imported_encap
[AFI_IP
]);
4331 agg_table_finish(h
->it_ce
->imported_encap
[AFI_IP6
]);
4333 if (h
->import_mac
) {
4334 struct rfapi_import_table
*it
;
4339 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4341 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4344 rfapiImportTableFlush(it
);
4345 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
4347 skiplist_free(h
->import_mac
);
4348 h
->import_mac
= NULL
;
4351 work_queue_free_and_null(&h
->deferred_close_q
);
4356 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4357 agg_table_finish(h
->un
[afi
]);
4360 XFREE(MTYPE_RFAPI_IMPORTTABLE
, h
->it_ce
);
4361 XFREE(MTYPE_RFAPI
, h
);
4364 struct rfapi_import_table
*
4365 rfapiImportTableRefAdd(struct bgp
*bgp
, struct ecommunity
*rt_import_list
,
4366 struct rfapi_nve_group_cfg
*rfg
)
4369 struct rfapi_import_table
*it
;
4375 for (it
= h
->imports
; it
; it
= it
->next
) {
4376 if (ecommunity_cmp(it
->rt_import_list
, rt_import_list
))
4380 vnc_zlog_debug_verbose("%s: matched it=%p", __func__
, it
);
4383 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4384 sizeof(struct rfapi_import_table
));
4386 it
->next
= h
->imports
;
4389 it
->rt_import_list
= ecommunity_dup(rt_import_list
);
4391 it
->monitor_exterior_orphans
=
4392 skiplist_new(0, NULL
, (void (*)(void *))prefix_free
);
4395 * fill import route tables from RIBs
4397 * Potential area for optimization. If this occurs when
4398 * tables are large (e.g., the operator adds a nve group
4399 * with a new RT list to a running system), it could take
4403 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4405 it
->imported_vpn
[afi
] = agg_table_init();
4406 it
->imported_encap
[afi
] = agg_table_init();
4408 rfapiBgpTableFilteredImport(bgp
, it
, afi
,
4410 rfapiBgpTableFilteredImport(bgp
, it
, afi
, SAFI_ENCAP
);
4412 vnc_import_bgp_exterior_redist_enable_it(bgp
, afi
, it
);
4422 * skiplist element free function
4424 static void delete_rem_pfx_na_free(void *na
)
4426 uint32_t *pCounter
= ((struct rfapi_nve_addr
*)na
)->info
;
4429 XFREE(MTYPE_RFAPI_NVE_ADDR
, na
);
4433 * Common deleter for IP and MAC import tables
4435 static void rfapiDeleteRemotePrefixesIt(
4436 struct bgp
*bgp
, struct rfapi_import_table
*it
, struct prefix
*un
,
4437 struct prefix
*vn
, struct prefix
*p
, int delete_active
,
4438 int delete_holddown
, uint32_t *pARcount
, uint32_t *pAHcount
,
4439 uint32_t *pHRcount
, uint32_t *pHHcount
,
4440 struct skiplist
*uniq_active_nves
, struct skiplist
*uniq_holddown_nves
)
4446 char buf_pfx
[PREFIX_STRLEN
];
4449 prefix2str(p
, buf_pfx
, sizeof(buf_pfx
));
4455 vnc_zlog_debug_verbose(
4456 "%s: entry, p=%s, delete_active=%d, delete_holddown=%d",
4457 __func__
, buf_pfx
, delete_active
, delete_holddown
);
4461 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4463 struct agg_table
*rt
;
4464 struct agg_node
*rn
;
4466 if (p
&& (family2afi(p
->family
) != afi
)) {
4470 rt
= it
->imported_vpn
[afi
];
4474 vnc_zlog_debug_verbose("%s: scanning rt for afi=%d", __func__
,
4477 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
4478 struct bgp_info
*bi
;
4479 struct bgp_info
*next
;
4481 if (VNC_DEBUG(IMPORT_DEL_REMOTE
)) {
4482 char p1line
[PREFIX_STRLEN
];
4483 char p2line
[PREFIX_STRLEN
];
4485 prefix2str(p
, p1line
, sizeof(p1line
));
4486 prefix2str(&rn
->p
, p2line
, sizeof(p2line
));
4487 vnc_zlog_debug_any("%s: want %s, have %s",
4488 __func__
, p1line
, p2line
);
4491 if (p
&& prefix_cmp(p
, &rn
->p
))
4495 char buf_pfx
[PREFIX_STRLEN
];
4497 prefix2str(&rn
->p
, buf_pfx
, sizeof(buf_pfx
));
4498 vnc_zlog_debug_verbose("%s: rn pfx=%s",
4502 /* TBD is this valid for afi == AFI_L2VPN? */
4503 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
4505 for (bi
= rn
->info
; bi
; bi
= next
) {
4514 vnc_zlog_debug_verbose("%s: examining bi %p",
4518 if (!rfapiGetNexthop(bi
->attr
, &qpt
))
4523 || !prefix_match(vn
, &qpt
)) {
4525 vnc_zlog_debug_verbose(
4526 "%s: continue at vn && !qpt_valid || !prefix_match(vn, &qpt)",
4533 if (!rfapiGetUnAddrOfVpnBi(bi
, &qct
))
4538 || !prefix_match(un
, &qct
)) {
4540 vnc_zlog_debug_verbose(
4541 "%s: continue at un && !qct_valid || !prefix_match(un, &qct)",
4553 * If this route is waiting to be deleted
4555 * a previous withdraw, we must cancel its
4558 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
4559 if (!delete_holddown
)
4561 if (bi
->extra
->vnc
.import
.timer
) {
4567 struct rfapi_withdraw
*wcb
=
4571 ->holddown_count
[afi
] -=
4573 RFAPI_UPDATE_ITABLE_COUNT(
4574 bi
, wcb
->import_table
,
4576 XFREE(MTYPE_RFAPI_WITHDRAW
,
4586 vnc_zlog_debug_verbose(
4587 "%s: deleting bi %p (qct_valid=%d, qpt_valid=%d, delete_holddown=%d, delete_active=%d)",
4588 __func__
, bi
, qct_valid
, qpt_valid
,
4589 delete_holddown
, delete_active
);
4595 if (qct_valid
&& qpt_valid
) {
4597 struct rfapi_nve_addr na
;
4598 struct rfapi_nve_addr
*nap
;
4600 memset(&na
, 0, sizeof(na
));
4601 assert(!rfapiQprefix2Raddr(&qct
,
4603 assert(!rfapiQprefix2Raddr(&qpt
,
4606 if (skiplist_search(
4609 : uniq_holddown_nves
),
4610 &na
, (void **)&nap
)) {
4614 MTYPE_RFAPI_NVE_ADDR
,
4619 nap
->info
= is_active
4625 : uniq_holddown_nves
),
4628 rfapiNveAddr2Str(nap
, line
,
4633 vnc_direct_bgp_rh_del_route(bgp
, afi
, &rn
->p
,
4636 RFAPI_UPDATE_ITABLE_COUNT(bi
, it
, afi
, -1);
4637 it
->holddown_count
[afi
] += 1;
4638 rfapiExpireVpnNow(it
, rn
, bi
, 1);
4640 vnc_zlog_debug_verbose(
4641 "%s: incrementing count (is_active=%d)",
4642 __func__
, is_active
);
4655 * For use by the "clear vnc prefixes" command
4657 /*------------------------------------------
4658 * rfapiDeleteRemotePrefixes
4660 * UI helper: For use by the "clear vnc prefixes" command
4663 * un if set, tunnel must match this prefix
4664 * vn if set, nexthop prefix must match this prefix
4665 * p if set, prefix must match this prefix
4666 * it if set, only look in this import table
4669 * pARcount number of active routes deleted
4670 * pAHcount number of active nves deleted
4671 * pHRcount number of holddown routes deleted
4672 * pHHcount number of holddown nves deleted
4676 --------------------------------------------*/
4677 void rfapiDeleteRemotePrefixes(struct prefix
*un
, struct prefix
*vn
,
4679 struct rfapi_import_table
*arg_it
,
4680 int delete_active
, int delete_holddown
,
4681 uint32_t *pARcount
, uint32_t *pAHcount
,
4682 uint32_t *pHRcount
, uint32_t *pHHcount
)
4686 struct rfapi_import_table
*it
;
4687 uint32_t deleted_holddown_route_count
= 0;
4688 uint32_t deleted_active_route_count
= 0;
4689 uint32_t deleted_holddown_nve_count
= 0;
4690 uint32_t deleted_active_nve_count
= 0;
4691 struct skiplist
*uniq_holddown_nves
;
4692 struct skiplist
*uniq_active_nves
;
4696 bgp
= bgp_get_default(); /* assume 1 instance for now */
4697 /* If no bgp instantiated yet, no vnc prefixes exist */
4704 uniq_holddown_nves
=
4705 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4707 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4710 * Iterate over all import tables; do a filtered import
4711 * for the afi/safi combination
4720 vnc_zlog_debug_verbose(
4721 "%s: calling rfapiDeleteRemotePrefixesIt() on (IP) import %p",
4724 rfapiDeleteRemotePrefixesIt(
4725 bgp
, it
, un
, vn
, p
, delete_active
, delete_holddown
,
4726 &deleted_active_route_count
, &deleted_active_nve_count
,
4727 &deleted_holddown_route_count
,
4728 &deleted_holddown_nve_count
, uniq_active_nves
,
4729 uniq_holddown_nves
);
4738 * Now iterate over L2 import tables
4740 if (h
->import_mac
&& !(p
&& (p
->family
!= AF_ETHERNET
))) {
4742 void *cursor
= NULL
;
4746 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4748 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4751 vnc_zlog_debug_verbose(
4752 "%s: calling rfapiDeleteRemotePrefixesIt() on import_mac %p",
4755 rfapiDeleteRemotePrefixesIt(
4756 bgp
, it
, un
, vn
, p
, delete_active
,
4757 delete_holddown
, &deleted_active_route_count
,
4758 &deleted_active_nve_count
,
4759 &deleted_holddown_route_count
,
4760 &deleted_holddown_nve_count
, uniq_active_nves
,
4761 uniq_holddown_nves
);
4766 * our custom element freeing function above counts as it deletes
4768 skiplist_free(uniq_holddown_nves
);
4769 skiplist_free(uniq_active_nves
);
4772 *pARcount
= deleted_active_route_count
;
4774 *pAHcount
= deleted_active_nve_count
;
4776 *pHRcount
= deleted_holddown_route_count
;
4778 *pHHcount
= deleted_holddown_nve_count
;
4783 /*------------------------------------------
4784 * rfapiCountRemoteRoutes
4786 * UI helper: count VRF routes from BGP side
4791 * pALRcount count of active local routes
4792 * pARRcount count of active remote routes
4793 * pHRcount count of holddown routes
4794 * pIRcount count of direct imported routes
4798 --------------------------------------------*/
4799 void rfapiCountAllItRoutes(int *pALRcount
, /* active local routes */
4800 int *pARRcount
, /* active remote routes */
4801 int *pHRcount
, /* holddown routes */
4802 int *pIRcount
) /* imported routes */
4806 struct rfapi_import_table
*it
;
4809 int total_active_local
= 0;
4810 int total_active_remote
= 0;
4811 int total_holddown
= 0;
4812 int total_imported
= 0;
4814 bgp
= bgp_get_default(); /* assume 1 instance for now */
4821 * Iterate over all import tables; do a filtered import
4822 * for the afi/safi combination
4825 for (it
= h
->imports
; it
; it
= it
->next
) {
4827 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4829 total_active_local
+= it
->local_count
[afi
];
4830 total_active_remote
+= it
->remote_count
[afi
];
4831 total_holddown
+= it
->holddown_count
[afi
];
4832 total_imported
+= it
->imported_count
[afi
];
4839 if (h
->import_mac
) {
4841 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4843 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4846 total_active_local
+= it
->local_count
[AFI_L2VPN
];
4847 total_active_remote
+= it
->remote_count
[AFI_L2VPN
];
4848 total_holddown
+= it
->holddown_count
[AFI_L2VPN
];
4849 total_imported
+= it
->imported_count
[AFI_L2VPN
];
4855 *pALRcount
= total_active_local
;
4858 *pARRcount
= total_active_remote
;
4861 *pHRcount
= total_holddown
;
4864 *pIRcount
= total_imported
;
4868 /*------------------------------------------
4869 * rfapiGetHolddownFromLifetime
4871 * calculate holddown value based on lifetime
4877 * Holddown value based on lifetime, holddown_factor,
4878 * and RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
4880 --------------------------------------------*/
4881 /* hold down time maxes out at RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY */
4882 uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime
)
4887 bgp
= bgp_get_default();
4888 if (bgp
&& bgp
->rfapi_cfg
)
4889 factor
= bgp
->rfapi_cfg
->rfp_cfg
.holddown_factor
;
4891 factor
= RFAPI_RFP_CFG_DEFAULT_HOLDDOWN_FACTOR
;
4893 if (factor
< 100 || lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4894 lifetime
= lifetime
* factor
/ 100;
4895 if (lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4898 return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
;