3 * Copyright 2009-2016, LabN Consulting, L.L.C.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * File: rfapi_import.c
23 * Purpose: Handle import of routes from BGP to RFAPI
28 #include "lib/zebra.h"
29 #include "lib/prefix.h"
30 #include "lib/table.h"
32 #include "lib/memory.h"
34 #include "lib/skiplist.h"
35 #include "lib/thread.h"
36 #include "lib/stream.h"
38 #include "bgpd/bgpd.h"
39 #include "bgpd/bgp_ecommunity.h"
40 #include "bgpd/bgp_attr.h"
41 #include "bgpd/bgp_route.h"
42 #include "bgpd/bgp_mplsvpn.h" /* prefix_rd2str() */
43 #include "bgpd/bgp_vnc_types.h"
44 #include "bgpd/bgp_rd.h"
46 #include "bgpd/rfapi/rfapi.h"
47 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
48 #include "bgpd/rfapi/rfapi_backend.h"
49 #include "bgpd/rfapi/rfapi_import.h"
50 #include "bgpd/rfapi/rfapi_private.h"
51 #include "bgpd/rfapi/rfapi_monitor.h"
52 #include "bgpd/rfapi/rfapi_nve_addr.h"
53 #include "bgpd/rfapi/rfapi_vty.h"
54 #include "bgpd/rfapi/vnc_export_bgp.h"
55 #include "bgpd/rfapi/vnc_export_bgp_p.h"
56 #include "bgpd/rfapi/vnc_zebra.h"
57 #include "bgpd/rfapi/vnc_import_bgp.h"
58 #include "bgpd/rfapi/vnc_import_bgp_p.h"
59 #include "bgpd/rfapi/rfapi_rib.h"
60 #include "bgpd/rfapi/rfapi_encap_tlv.h"
61 #include "bgpd/rfapi/vnc_debug.h"
63 #ifdef HAVE_GLIBC_BACKTRACE
64 /* for backtrace and friends */
66 #endif /* HAVE_GLIBC_BACKTRACE */
68 #undef DEBUG_MONITOR_MOVE_SHORTER
69 #undef DEBUG_RETURNED_NHL
70 #undef DEBUG_ROUTE_COUNTERS
71 #undef DEBUG_ENCAP_MONITOR
74 #undef DEBUG_BI_SEARCH
77 * Allocated for each withdraw timer instance; freed when the timer
78 * expires or is canceled
80 struct rfapi_withdraw
{
81 struct rfapi_import_table
*import_table
;
82 struct route_node
*node
;
83 struct bgp_info
*info
;
84 safi_t safi
; /* used only for bulk operations */
86 * For import table node reference count checking (i.e., debugging).
87 * Normally when a timer expires, lockoffset should be 0. However, if
88 * the timer expiration function is called directly (e.g.,
89 * rfapiExpireVpnNow), the node could be locked by a preceding
90 * route_top() or route_next() in a loop, so we need to pass this
98 * It's evil and fiendish. It's compiler-dependent.
99 * ? Might need LDFLAGS -rdynamic to produce all function names
101 void rfapiDebugBacktrace(void)
103 #ifdef HAVE_GLIBC_BACKTRACE
104 #define RFAPI_DEBUG_BACKTRACE_NENTRIES 200
105 void *buf
[RFAPI_DEBUG_BACKTRACE_NENTRIES
];
110 size
= backtrace(buf
, RFAPI_DEBUG_BACKTRACE_NENTRIES
);
111 syms
= backtrace_symbols(buf
, size
);
113 for (i
= 0; i
< size
&& i
< RFAPI_DEBUG_BACKTRACE_NENTRIES
; ++i
) {
114 vnc_zlog_debug_verbose("backtrace[%2zu]: %s", i
, syms
[i
]);
124 * Count remote routes and compare with actively-maintained values.
125 * Abort if they disagree.
127 void rfapiCheckRouteCount()
129 struct bgp
*bgp
= bgp_get_default();
131 struct rfapi_import_table
*it
;
139 for (it
= h
->imports
; it
; it
= it
->next
) {
140 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
142 struct route_table
*rt
;
143 struct route_node
*rn
;
145 int holddown_count
= 0;
147 int imported_count
= 0;
148 int remote_count
= 0;
150 rt
= it
->imported_vpn
[afi
];
152 for (rn
= route_top(rt
); rn
; rn
= route_next(rn
)) {
154 struct bgp_info
*next
;
156 for (bi
= rn
->info
; bi
; bi
= next
) {
159 if (CHECK_FLAG(bi
->flags
,
164 if (RFAPI_LOCAL_BI(bi
)) {
167 if (RFAPI_DIRECT_IMPORT_BI(
178 if (it
->holddown_count
[afi
] != holddown_count
) {
179 vnc_zlog_debug_verbose(
180 "%s: it->holddown_count %d != holddown_count %d",
181 __func__
, it
->holddown_count
[afi
],
185 if (it
->remote_count
[afi
] != remote_count
) {
186 vnc_zlog_debug_verbose(
187 "%s: it->remote_count %d != remote_count %d",
188 __func__
, it
->remote_count
[afi
],
192 if (it
->imported_count
[afi
] != imported_count
) {
193 vnc_zlog_debug_verbose(
194 "%s: it->imported_count %d != imported_count %d",
195 __func__
, it
->imported_count
[afi
],
203 #if DEBUG_ROUTE_COUNTERS
204 #define VNC_ITRCCK do {rfapiCheckRouteCount();} while (0)
210 * Validate reference count for a node in an import table
212 * Normally lockoffset is 0 for nodes in quiescent state. However,
213 * route_unlock_node will delete the node if it is called when
214 * node->lock == 1, and we have to validate the refcount before
215 * the node is deleted. In this case, we specify lockoffset 1.
217 void rfapiCheckRefcount(struct route_node
*rn
, safi_t safi
, int lockoffset
)
219 unsigned int count_bi
= 0;
220 unsigned int count_monitor
= 0;
222 struct rfapi_monitor_encap
*hme
;
223 struct rfapi_monitor_vpn
*hmv
;
225 for (bi
= rn
->info
; bi
; bi
= bi
->next
)
230 ++count_monitor
; /* rfapi_it_extra */
237 for (hme
= RFAPI_MONITOR_ENCAP(rn
); hme
;
244 for (hmv
= RFAPI_MONITOR_VPN(rn
); hmv
; hmv
= hmv
->next
)
247 if (RFAPI_MONITOR_EXTERIOR(rn
)->source
) {
248 ++count_monitor
; /* sl */
250 for (rc
= skiplist_next(
251 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
252 NULL
, NULL
, &cursor
);
255 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
256 NULL
, NULL
, &cursor
)) {
258 ++count_monitor
; /* sl entry */
268 if (count_bi
+ count_monitor
+ lockoffset
!= rn
->lock
) {
269 vnc_zlog_debug_verbose(
270 "%s: count_bi=%d, count_monitor=%d, lockoffset=%d, rn->lock=%d",
271 __func__
, count_bi
, count_monitor
, lockoffset
,
278 * Perform deferred rfapi_close operations that were queued
281 static wq_item_status
rfapi_deferred_close_workfunc(struct work_queue
*q
,
284 struct rfapi_descriptor
*rfd
= data
;
285 struct rfapi
*h
= q
->spec
.data
;
287 assert(!(h
->flags
& RFAPI_INCALLBACK
));
289 vnc_zlog_debug_verbose("%s: completed deferred close on handle %p",
295 * Extract layer 2 option from Encap TLVS in BGP attrs
297 int rfapiGetL2o(struct attr
*attr
, struct rfapi_l2address_option
*l2o
)
301 struct bgp_attr_encap_subtlv
*pEncap
;
303 for (pEncap
= attr
->vnc_subtlvs
; pEncap
;
304 pEncap
= pEncap
->next
) {
306 if (pEncap
->type
== BGP_VNC_SUBTLV_TYPE_RFPOPTION
) {
308 == RFAPI_VN_OPTION_TYPE_L2ADDR
) {
310 if (pEncap
->value
[1] == 14) {
311 memcpy(l2o
->macaddr
.octet
,
328 l2o
->logical_net_id
=
331 + ((pEncap
->value
[14]
334 + ((pEncap
->value
[13]
349 * Extract the lifetime from the Tunnel Encap attribute of a route in
352 int rfapiGetVncLifetime(struct attr
*attr
, uint32_t *lifetime
)
354 struct bgp_attr_encap_subtlv
*pEncap
;
356 *lifetime
= RFAPI_INFINITE_LIFETIME
; /* default to infinite */
360 for (pEncap
= attr
->vnc_subtlvs
; pEncap
;
361 pEncap
= pEncap
->next
) {
364 == BGP_VNC_SUBTLV_TYPE_LIFETIME
) { /* lifetime */
365 if (pEncap
->length
== 4) {
366 memcpy(lifetime
, pEncap
->value
, 4);
367 *lifetime
= ntohl(*lifetime
);
378 * Extract the tunnel type from the extended community
380 int rfapiGetTunnelType(struct attr
*attr
, bgp_encap_types
*type
)
382 *type
= BGP_ENCAP_TYPE_MPLS
; /* default to MPLS */
383 if (attr
&& attr
->ecommunity
) {
384 struct ecommunity
*ecom
= attr
->ecommunity
;
387 for (i
= 0; i
< (ecom
->size
* ECOMMUNITY_SIZE
);
388 i
+= ECOMMUNITY_SIZE
) {
392 if (ep
[0] == ECOMMUNITY_ENCODE_OPAQUE
393 && ep
[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
) {
394 *type
= (ep
[6] << 8) + ep
[7];
405 * Look for UN address in Encap attribute
407 int rfapiGetVncTunnelUnAddr(struct attr
*attr
, struct prefix
*p
)
409 struct bgp_attr_encap_subtlv
*pEncap
;
410 bgp_encap_types tun_type
;
412 rfapiGetTunnelType(attr
, &tun_type
);
413 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
416 /* MPLS carries UN address in next hop */
417 rfapiNexthop2Prefix(attr
, p
);
424 for (pEncap
= attr
->encap_subtlvs
; pEncap
;
425 pEncap
= pEncap
->next
) {
428 == BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
) { /* un
431 switch (pEncap
->length
) {
436 memcpy(p
->u
.val
, pEncap
->value
,
443 p
->family
= AF_INET6
;
445 memcpy(p
->u
.val
, pEncap
->value
,
458 * Get UN address wherever it might be
460 int rfapiGetUnAddrOfVpnBi(struct bgp_info
*bi
, struct prefix
*p
)
462 /* If it's in this route's VNC attribute, we're done */
463 if (!rfapiGetVncTunnelUnAddr(bi
->attr
, p
))
466 * Otherwise, see if it's cached from a corresponding ENCAP SAFI
470 switch (bi
->extra
->vnc
.import
.un_family
) {
473 p
->family
= bi
->extra
->vnc
.import
.un_family
;
474 p
->u
.prefix4
= bi
->extra
->vnc
.import
.un
.addr4
;
480 p
->family
= bi
->extra
->vnc
.import
.un_family
;
481 p
->u
.prefix6
= bi
->extra
->vnc
.import
.un
.addr6
;
488 #if DEBUG_ENCAP_MONITOR
489 vnc_zlog_debug_verbose(
490 "%s: bi->extra->vnc.import.un_family is 0, no UN addr",
502 * Make a new bgp_info from gathered parameters
504 static struct bgp_info
*rfapiBgpInfoCreate(struct attr
*attr
, struct peer
*peer
,
505 void *rfd
, struct prefix_rd
*prd
,
506 u_char type
, u_char sub_type
,
509 struct bgp_info
*new;
511 new = bgp_info_new();
516 new->attr
= bgp_attr_intern(attr
);
518 bgp_info_extra_get(new);
520 new->extra
->vnc
.import
.rd
= *prd
;
521 rfapi_time(&new->extra
->vnc
.import
.create_time
);
524 encode_label(*label
, &new->extra
->label
[0]);
526 new->sub_type
= sub_type
;
534 * Frees bgp_info as used in import tables (parts are not
535 * allocated exactly the way they are in the main RIBs)
537 static void rfapiBgpInfoFree(struct bgp_info
*goner
)
543 vnc_zlog_debug_verbose("%s: calling peer_unlock(%p), #%d",
544 __func__
, goner
->peer
,
546 peer_unlock(goner
->peer
);
550 bgp_attr_unintern(&goner
->attr
);
553 assert(!goner
->extra
->damp_info
); /* Not used in import tbls */
554 XFREE(MTYPE_BGP_ROUTE_EXTRA
, goner
->extra
);
557 XFREE(MTYPE_BGP_ROUTE
, goner
);
560 struct rfapi_import_table
*rfapiMacImportTableGetNoAlloc(struct bgp
*bgp
,
564 struct rfapi_import_table
*it
= NULL
;
565 uintptr_t lni_as_ptr
= lni
;
574 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
))
580 struct rfapi_import_table
*rfapiMacImportTableGet(struct bgp
*bgp
, uint32_t lni
)
583 struct rfapi_import_table
*it
= NULL
;
584 uintptr_t lni_as_ptr
= lni
;
589 if (!h
->import_mac
) {
590 /* default cmp is good enough for LNI */
591 h
->import_mac
= skiplist_new(0, NULL
, NULL
);
594 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
)) {
596 struct ecommunity
*enew
;
597 struct ecommunity_val eval
;
600 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
601 sizeof(struct rfapi_import_table
));
602 /* set RT list of new import table based on LNI */
603 memset((char *)&eval
, 0, sizeof(eval
));
604 eval
.val
[0] = 0; /* VNC L2VPN */
605 eval
.val
[1] = 2; /* VNC L2VPN */
606 eval
.val
[5] = (lni
>> 16) & 0xff;
607 eval
.val
[6] = (lni
>> 8) & 0xff;
608 eval
.val
[7] = (lni
>> 0) & 0xff;
610 enew
= ecommunity_new();
611 ecommunity_add_val(enew
, &eval
);
612 it
->rt_import_list
= enew
;
614 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
615 it
->imported_vpn
[afi
] = route_table_init();
616 it
->imported_encap
[afi
] = route_table_init();
619 it
->l2_logical_net_id
= lni
;
621 skiplist_insert(h
->import_mac
, (void *)lni_as_ptr
, it
);
629 * Implement MONITOR_MOVE_SHORTER(original_node) from
630 * RFAPI-Import-Event-Handling.txt
632 * Returns pointer to the list of moved monitors
634 static struct rfapi_monitor_vpn
*
635 rfapiMonitorMoveShorter(struct route_node
*original_vpn_node
, int lockoffset
)
638 struct route_node
*par
;
639 struct rfapi_monitor_vpn
*m
;
640 struct rfapi_monitor_vpn
*mlast
;
641 struct rfapi_monitor_vpn
*moved
;
643 int parent_already_refcounted
= 0;
645 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
, lockoffset
);
647 #if DEBUG_MONITOR_MOVE_SHORTER
649 char buf
[PREFIX_STRLEN
];
651 prefix2str(&original_vpn_node
->p
, buf
, sizeof(buf
));
652 vnc_zlog_debug_verbose("%s: called with node pfx=%s", __func__
,
658 * 1. If there is at least one bi (either regular route or
659 * route marked as withdrawn, with a pending timer) at
660 * original_node with a valid UN address, we're done. Return.
662 for (bi
= original_vpn_node
->info
; bi
; bi
= bi
->next
) {
665 if (!rfapiGetUnAddrOfVpnBi(bi
, &pfx
)) {
666 #if DEBUG_MONITOR_MOVE_SHORTER
667 vnc_zlog_debug_verbose(
668 "%s: have valid UN at original node, no change",
676 * 2. Travel up the tree (toward less-specific prefixes) from
677 * original_node to find the first node that has at least
678 * one route (even if it is only a withdrawn route) with a
679 * valid UN address. Call this node "Node P."
681 for (par
= original_vpn_node
->parent
; par
; par
= par
->parent
) {
682 for (bi
= par
->info
; bi
; bi
= bi
->next
) {
684 if (!rfapiGetUnAddrOfVpnBi(bi
, &pfx
)) {
693 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 0);
697 * If no less-specific routes, try to use the 0/0 node
700 /* this isn't necessarily 0/0 */
701 par
= route_top(original_vpn_node
->table
);
704 * If we got the top node but it wasn't 0/0,
707 if (par
&& par
->p
.prefixlen
) {
708 route_unlock_node(par
); /* maybe free */
713 ++parent_already_refcounted
;
718 * Create 0/0 node if it isn't there
721 struct prefix pfx_default
;
723 memset(&pfx_default
, 0, sizeof(pfx_default
));
724 pfx_default
.family
= original_vpn_node
->p
.family
;
726 /* creates default node if none exists */
727 par
= route_node_get(original_vpn_node
->table
, &pfx_default
);
728 ++parent_already_refcounted
;
732 * 3. Move each of the monitors found at original_node to Node P.
733 * These are "Moved Monitors."
738 * Attach at end so that the list pointer we return points
739 * only to the moved routes
741 for (m
= RFAPI_MONITOR_VPN(par
), mlast
= NULL
; m
;
742 mlast
= m
, m
= m
->next
)
746 moved
= mlast
->next
= RFAPI_MONITOR_VPN(original_vpn_node
);
748 moved
= RFAPI_MONITOR_VPN_W_ALLOC(par
) =
749 RFAPI_MONITOR_VPN(original_vpn_node
);
751 if (RFAPI_MONITOR_VPN(
752 original_vpn_node
)) /* check agg, so not allocated */
753 RFAPI_MONITOR_VPN_W_ALLOC(original_vpn_node
) = NULL
;
756 * update the node pointers on the monitors
758 for (m
= moved
; m
; m
= m
->next
) {
763 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
,
764 parent_already_refcounted
- movecount
);
765 while (movecount
> parent_already_refcounted
) {
766 route_lock_node(par
);
767 ++parent_already_refcounted
;
769 while (movecount
< parent_already_refcounted
) {
770 /* unlikely, but code defensively */
771 route_unlock_node(par
);
772 --parent_already_refcounted
;
774 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
,
775 movecount
+ lockoffset
);
776 while (movecount
--) {
777 route_unlock_node(original_vpn_node
);
780 #if DEBUG_MONITOR_MOVE_SHORTER
782 char buf
[PREFIX_STRLEN
];
784 prefix2str(&par
->p
, buf
, sizeof(buf
));
785 vnc_zlog_debug_verbose("%s: moved to node pfx=%s", __func__
,
795 * Implement MONITOR_MOVE_LONGER(new_node) from
796 * RFAPI-Import-Event-Handling.txt
798 static void rfapiMonitorMoveLonger(struct route_node
*new_vpn_node
)
800 struct rfapi_monitor_vpn
*monitor
;
801 struct rfapi_monitor_vpn
*mlast
;
803 struct route_node
*par
;
805 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
808 * Make sure we have at least one valid route at the new node
810 for (bi
= new_vpn_node
->info
; bi
; bi
= bi
->next
) {
812 if (!rfapiGetUnAddrOfVpnBi(bi
, &pfx
))
817 vnc_zlog_debug_verbose(
818 "%s: no valid routes at node %p, so not attempting moves",
819 __func__
, new_vpn_node
);
824 * Find first parent node that has monitors
826 for (par
= new_vpn_node
->parent
; par
; par
= par
->parent
) {
827 if (RFAPI_MONITOR_VPN(par
))
832 vnc_zlog_debug_verbose(
833 "%s: no parent nodes with monitors, done", __func__
);
838 * Check each of these monitors to see of their longest-match
839 * is now the updated node. Move any such monitors to the more-
840 * specific updated node
842 for (mlast
= NULL
, monitor
= RFAPI_MONITOR_VPN(par
); monitor
;) {
845 * If new longest match for monitor prefix is the new
846 * route's prefix, move monitor to new route's prefix
848 if (prefix_match(&new_vpn_node
->p
, &monitor
->p
)) {
851 mlast
->next
= monitor
->next
;
853 RFAPI_MONITOR_VPN_W_ALLOC(par
) = monitor
->next
;
858 monitor
->next
= RFAPI_MONITOR_VPN(new_vpn_node
);
859 RFAPI_MONITOR_VPN_W_ALLOC(new_vpn_node
) = monitor
;
860 monitor
->node
= new_vpn_node
;
862 route_lock_node(new_vpn_node
); /* incr refcount */
864 monitor
= mlast
? mlast
->next
: RFAPI_MONITOR_VPN(par
);
866 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 1);
867 /* decr refcount after we're done with par as this might
869 route_unlock_node(par
);
874 monitor
= monitor
->next
;
877 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
881 static void rfapiBgpInfoChainFree(struct bgp_info
*bi
)
883 struct bgp_info
*next
;
888 * If there is a timer waiting to delete this bi, cancel
889 * the timer and delete immediately
891 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
892 && bi
->extra
->vnc
.import
.timer
) {
895 (struct thread
*)bi
->extra
->vnc
.import
.timer
;
896 struct rfapi_withdraw
*wcb
= t
->arg
;
898 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
904 rfapiBgpInfoFree(bi
);
909 static void rfapiImportTableFlush(struct rfapi_import_table
*it
)
916 ecommunity_free(&it
->rt_import_list
);
917 it
->rt_import_list
= NULL
;
919 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
921 struct route_node
*rn
;
923 for (rn
= route_top(it
->imported_vpn
[afi
]); rn
;
924 rn
= route_next(rn
)) {
926 * Each route_node has:
927 * aggregate: points to rfapi_it_extra with monitor
929 * info: points to chain of bgp_info
931 /* free bgp_info and its children */
932 rfapiBgpInfoChainFree(rn
->info
);
935 rfapiMonitorExtraFlush(SAFI_MPLS_VPN
, rn
);
938 for (rn
= route_top(it
->imported_encap
[afi
]); rn
;
939 rn
= route_next(rn
)) {
940 /* free bgp_info and its children */
941 rfapiBgpInfoChainFree(rn
->info
);
944 rfapiMonitorExtraFlush(SAFI_ENCAP
, rn
);
947 route_table_finish(it
->imported_vpn
[afi
]);
948 route_table_finish(it
->imported_encap
[afi
]);
950 if (it
->monitor_exterior_orphans
) {
951 skiplist_free(it
->monitor_exterior_orphans
);
955 void rfapiImportTableRefDelByIt(struct bgp
*bgp
,
956 struct rfapi_import_table
*it_target
)
959 struct rfapi_import_table
*it
;
960 struct rfapi_import_table
*prev
= NULL
;
967 for (it
= h
->imports
; it
; prev
= it
, it
= it
->next
) {
973 assert(it
->refcount
);
979 prev
->next
= it
->next
;
981 h
->imports
= it
->next
;
983 rfapiImportTableFlush(it
);
984 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
988 #if RFAPI_REQUIRE_ENCAP_BEEC
990 * Look for magic BGP Encapsulation Extended Community value
991 * Format in RFC 5512 Sect. 4.5
993 static int rfapiEcommunitiesMatchBeec(struct ecommunity
*ecom
,
994 bgp_encap_types type
)
1001 for (i
= 0; i
< (ecom
->size
* ECOMMUNITY_SIZE
); i
+= ECOMMUNITY_SIZE
) {
1007 if (ep
[0] == ECOMMUNITY_ENCODE_OPAQUE
1008 && ep
[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
1009 && ep
[6] == ((type
&& 0xff00) >> 8)
1010 && ep
[7] == (type
& 0xff)) {
1019 int rfapiEcommunitiesIntersect(struct ecommunity
*e1
, struct ecommunity
*e2
)
1028 s1
= ecommunity_ecom2str(e1
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
1029 s2
= ecommunity_ecom2str(e2
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
1030 vnc_zlog_debug_verbose("%s: e1[%s], e2[%s]", __func__
, s1
, s2
);
1031 XFREE(MTYPE_ECOMMUNITY_STR
, s1
);
1032 XFREE(MTYPE_ECOMMUNITY_STR
, s2
);
1035 for (i
= 0; i
< e1
->size
; ++i
) {
1036 for (j
= 0; j
< e2
->size
; ++j
) {
1037 if (!memcmp(e1
->val
+ (i
* ECOMMUNITY_SIZE
),
1038 e2
->val
+ (j
* ECOMMUNITY_SIZE
),
1048 int rfapiEcommunityGetLNI(struct ecommunity
*ecom
, uint32_t *lni
)
1052 for (i
= 0; i
< ecom
->size
; ++i
) {
1053 uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1055 if ((*(p
+ 0) == 0x00) && (*(p
+ 1) == 0x02)) {
1057 *lni
= (*(p
+ 5) << 16) | (*(p
+ 6) << 8)
1066 int rfapiEcommunityGetEthernetTag(struct ecommunity
*ecom
, uint16_t *tag_id
)
1068 struct bgp
*bgp
= bgp_get_default();
1069 *tag_id
= 0; /* default to untagged */
1072 for (i
= 0; i
< ecom
->size
; ++i
) {
1075 uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1077 /* High-order octet of type. */
1080 if (*p
++ == ECOMMUNITY_ROUTE_TARGET
) {
1081 if (encode
== ECOMMUNITY_ENCODE_AS4
) {
1082 p
= ptr_get_be32(p
, &as
);
1083 } else if (encode
== ECOMMUNITY_ENCODE_AS
) {
1086 p
+= 2; /* skip next two, tag/vid
1087 always in lowest bytes */
1089 if (as
== bgp
->as
) {
1090 *tag_id
= *p
++ << 8;
1100 static int rfapiVpnBiNhEqualsPt(struct bgp_info
*bi
, struct rfapi_ip_addr
*hpt
)
1107 family
= BGP_MP_NEXTHOP_FAMILY(bi
->attr
->mp_nexthop_len
);
1109 if (hpt
->addr_family
!= family
)
1114 if (bi
->attr
->mp_nexthop_global_in
.s_addr
1115 != hpt
->addr
.v4
.s_addr
)
1120 if (IPV6_ADDR_CMP(&bi
->attr
->mp_nexthop_global
, &hpt
->addr
.v6
))
1134 * Compare 2 VPN BIs. Return true if they have the same VN and UN addresses
1136 static int rfapiVpnBiSamePtUn(struct bgp_info
*bi1
, struct bgp_info
*bi2
)
1138 struct prefix pfx_un1
;
1139 struct prefix pfx_un2
;
1144 if (!bi1
->attr
|| !bi2
->attr
)
1148 * VN address comparisons
1151 if (BGP_MP_NEXTHOP_FAMILY(bi1
->attr
->mp_nexthop_len
)
1152 != BGP_MP_NEXTHOP_FAMILY(bi2
->attr
->mp_nexthop_len
)) {
1156 switch (BGP_MP_NEXTHOP_FAMILY(bi1
->attr
->mp_nexthop_len
)) {
1158 if (bi1
->attr
->mp_nexthop_global_in
.s_addr
1159 != bi2
->attr
->mp_nexthop_global_in
.s_addr
)
1164 if (IPV6_ADDR_CMP(&bi1
->attr
->mp_nexthop_global
,
1165 &bi2
->attr
->mp_nexthop_global
))
1175 * UN address comparisons
1177 if (rfapiGetVncTunnelUnAddr(bi1
->attr
, &pfx_un1
)) {
1179 pfx_un1
.family
= bi1
->extra
->vnc
.import
.un_family
;
1180 switch (bi1
->extra
->vnc
.import
.un_family
) {
1183 bi1
->extra
->vnc
.import
.un
.addr4
;
1187 bi1
->extra
->vnc
.import
.un
.addr6
;
1196 if (rfapiGetVncTunnelUnAddr(bi2
->attr
, &pfx_un2
)) {
1198 pfx_un2
.family
= bi2
->extra
->vnc
.import
.un_family
;
1199 switch (bi2
->extra
->vnc
.import
.un_family
) {
1202 bi2
->extra
->vnc
.import
.un
.addr4
;
1206 bi2
->extra
->vnc
.import
.un
.addr6
;
1215 if (!pfx_un1
.family
|| !pfx_un2
.family
)
1218 if (pfx_un1
.family
!= pfx_un2
.family
)
1221 switch (pfx_un1
.family
) {
1223 if (!IPV4_ADDR_SAME(&pfx_un1
.u
.prefix4
, &pfx_un2
.u
.prefix4
))
1227 if (!IPV6_ADDR_SAME(&pfx_un1
.u
.prefix6
, &pfx_un2
.u
.prefix6
))
1236 uint8_t rfapiRfpCost(struct attr
*attr
)
1238 if (attr
->flag
& ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF
)) {
1239 if (attr
->local_pref
> 255) {
1242 return 255 - attr
->local_pref
;
1248 /*------------------------------------------
1251 * Find Layer 2 options in an option chain
1257 * l2o layer 2 options extracted
1261 * 1 no options found
1263 --------------------------------------------*/
1264 int rfapi_extract_l2o(
1265 struct bgp_tea_options
*pHop
, /* chain of options */
1266 struct rfapi_l2address_option
*l2o
) /* return extracted value */
1268 struct bgp_tea_options
*p
;
1270 for (p
= pHop
; p
; p
= p
->next
) {
1271 if ((p
->type
== RFAPI_VN_OPTION_TYPE_L2ADDR
)
1272 && (p
->length
>= 8)) {
1276 memcpy(&l2o
->macaddr
, v
, 6);
1278 l2o
->label
= ((v
[6] << 12) & 0xff000)
1279 + ((v
[7] << 4) & 0xff0)
1280 + ((v
[8] >> 4) & 0xf);
1282 l2o
->local_nve_id
= (uint8_t)v
[10];
1284 l2o
->logical_net_id
=
1285 (v
[11] << 16) + (v
[12] << 8) + (v
[13] << 0);
1293 static struct rfapi_next_hop_entry
*
1294 rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix
*rprefix
,
1295 struct bgp_info
*bi
, /* route to encode */
1296 uint32_t lifetime
, /* use this in nhe */
1297 struct route_node
*rn
) /* req for L2 eth addr */
1299 struct rfapi_next_hop_entry
*new;
1300 int have_vnc_tunnel_un
= 0;
1302 #if DEBUG_ENCAP_MONITOR
1303 vnc_zlog_debug_verbose("%s: entry, bi %p, rn %p", __func__
, bi
, rn
);
1306 new = XCALLOC(MTYPE_RFAPI_NEXTHOP
, sizeof(struct rfapi_next_hop_entry
));
1309 new->prefix
= *rprefix
;
1312 && decode_rd_type(bi
->extra
->vnc
.import
.rd
.val
)
1313 == RD_TYPE_VNC_ETH
) {
1316 struct rfapi_vn_option
*vo
;
1318 vo
= XCALLOC(MTYPE_RFAPI_VN_OPTION
,
1319 sizeof(struct rfapi_vn_option
));
1322 vo
->type
= RFAPI_VN_OPTION_TYPE_L2ADDR
;
1324 memcpy(&vo
->v
.l2addr
.macaddr
, &rn
->p
.u
.prefix_eth
.octet
,
1326 /* only low 3 bytes of this are significant */
1328 (void)rfapiEcommunityGetLNI(
1329 bi
->attr
->ecommunity
,
1330 &vo
->v
.l2addr
.logical_net_id
);
1331 (void)rfapiEcommunityGetEthernetTag(
1332 bi
->attr
->ecommunity
, &vo
->v
.l2addr
.tag_id
);
1335 /* local_nve_id comes from lower byte of RD type */
1336 vo
->v
.l2addr
.local_nve_id
= bi
->extra
->vnc
.import
.rd
.val
[1];
1338 /* label comes from MP_REACH_NLRI label */
1339 vo
->v
.l2addr
.label
= decode_label(&bi
->extra
->label
[0]);
1341 new->vn_options
= vo
;
1344 * If there is an auxiliary prefix (i.e., host IP address),
1345 * use it as the nexthop prefix instead of the query prefix
1347 if (bi
->extra
->vnc
.import
.aux_prefix
.family
) {
1348 rfapiQprefix2Rprefix(&bi
->extra
->vnc
.import
.aux_prefix
,
1354 bgp_encap_types tun_type
;
1355 new->prefix
.cost
= rfapiRfpCost(bi
->attr
);
1357 struct bgp_attr_encap_subtlv
*pEncap
;
1359 switch (BGP_MP_NEXTHOP_FAMILY(bi
->attr
->mp_nexthop_len
)) {
1361 new->vn_address
.addr_family
= AF_INET
;
1362 new->vn_address
.addr
.v4
=
1363 bi
->attr
->mp_nexthop_global_in
;
1367 new->vn_address
.addr_family
= AF_INET6
;
1368 new->vn_address
.addr
.v6
= bi
->attr
->mp_nexthop_global
;
1372 zlog_warn("%s: invalid vpn nexthop length: %d",
1373 __func__
, bi
->attr
->mp_nexthop_len
);
1374 rfapi_free_next_hop_list(new);
1378 for (pEncap
= bi
->attr
->vnc_subtlvs
; pEncap
;
1379 pEncap
= pEncap
->next
) {
1380 switch (pEncap
->type
) {
1381 case BGP_VNC_SUBTLV_TYPE_LIFETIME
:
1382 /* use configured lifetime, not attr lifetime */
1386 zlog_warn("%s: unknown VNC option type %d",
1387 __func__
, pEncap
->type
);
1394 rfapiGetTunnelType(bi
->attr
, &tun_type
);
1395 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
1397 /* MPLS carries UN address in next hop */
1398 rfapiNexthop2Prefix(bi
->attr
, &p
);
1399 if (p
.family
!= 0) {
1400 rfapiQprefix2Raddr(&p
, &new->un_address
);
1401 have_vnc_tunnel_un
= 1;
1405 for (pEncap
= bi
->attr
->encap_subtlvs
; pEncap
;
1406 pEncap
= pEncap
->next
) {
1407 switch (pEncap
->type
) {
1408 case BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
:
1410 * Overrides ENCAP UN address, if any
1412 switch (pEncap
->length
) {
1415 new->un_address
.addr_family
= AF_INET
;
1416 memcpy(&new->un_address
.addr
.v4
,
1418 have_vnc_tunnel_un
= 1;
1422 new->un_address
.addr_family
= AF_INET6
;
1423 memcpy(&new->un_address
.addr
.v6
,
1425 have_vnc_tunnel_un
= 1;
1430 "%s: invalid tunnel subtlv UN addr length (%d) for bi %p",
1431 __func__
, pEncap
->length
, bi
);
1437 "%s: unknown Encap Attribute option type %d",
1438 __func__
, pEncap
->type
);
1445 new->un_options
= rfapi_encap_tlv_to_un_option(bi
->attr
);
1447 #if DEBUG_ENCAP_MONITOR
1448 vnc_zlog_debug_verbose("%s: line %d: have_vnc_tunnel_un=%d",
1449 __func__
, __LINE__
, have_vnc_tunnel_un
);
1452 if (!have_vnc_tunnel_un
&& bi
->extra
) {
1454 * use cached UN address from ENCAP route
1456 new->un_address
.addr_family
=
1457 bi
->extra
->vnc
.import
.un_family
;
1458 switch (new->un_address
.addr_family
) {
1460 new->un_address
.addr
.v4
=
1461 bi
->extra
->vnc
.import
.un
.addr4
;
1464 new->un_address
.addr
.v6
=
1465 bi
->extra
->vnc
.import
.un
.addr6
;
1469 "%s: invalid UN addr family (%d) for bi %p",
1470 __func__
, new->un_address
.addr_family
,
1472 rfapi_free_next_hop_list(new);
1479 new->lifetime
= lifetime
;
1483 int rfapiHasNonRemovedRoutes(struct route_node
*rn
)
1485 struct bgp_info
*bi
;
1487 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
1490 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
1491 && (bi
->extra
&& !rfapiGetUnAddrOfVpnBi(bi
, &pfx
))) {
1503 void rfapiDumpNode(struct route_node
*rn
)
1505 struct bgp_info
*bi
;
1507 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
1508 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
1510 int ctrc
= rfapiGetUnAddrOfVpnBi(bi
, &pfx
);
1513 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
1514 && (bi
->extra
&& !ctrc
)) {
1521 vnc_zlog_debug_verbose(
1522 " bi=%p, nr=%d, flags=0x%x, extra=%p, ctrc=%d", bi
, nr
,
1523 bi
->flags
, bi
->extra
, ctrc
);
1528 static int rfapiNhlAddNodeRoutes(
1529 struct route_node
*rn
, /* in */
1530 struct rfapi_ip_prefix
*rprefix
, /* in */
1531 uint32_t lifetime
, /* in */
1532 int removed
, /* in */
1533 struct rfapi_next_hop_entry
**head
, /* in/out */
1534 struct rfapi_next_hop_entry
**tail
, /* in/out */
1535 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1536 struct route_node
*rfd_rib_node
, /* preload this NVE rib node */
1537 struct prefix
*pfx_target_original
) /* query target */
1539 struct bgp_info
*bi
;
1540 struct rfapi_next_hop_entry
*new;
1541 struct prefix pfx_un
;
1542 struct skiplist
*seen_nexthops
;
1544 int is_l2
= (rn
->p
.family
== AF_ETHERNET
);
1546 if (rfapiRibFTDFilterRecentPrefix(
1547 (struct rfapi_descriptor
*)(rfd_rib_node
->table
->info
), rn
,
1548 pfx_target_original
)) {
1553 skiplist_new(0, vnc_prefix_cmp
, (void (*)(void *))prefix_free
);
1555 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
1557 struct prefix pfx_vn
;
1558 struct prefix
*newpfx
;
1560 if (removed
&& !CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
1561 #if DEBUG_RETURNED_NHL
1562 vnc_zlog_debug_verbose(
1563 "%s: want holddown, this route not holddown, skip",
1568 if (!removed
&& CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
1577 * Check for excluded VN address
1579 if (rfapiVpnBiNhEqualsPt(bi
, exclude_vnaddr
))
1583 * Check for VN address (nexthop) copied already
1586 /* L2 routes: semantic nexthop in aux_prefix; VN addr
1588 pfx_vn
= bi
->extra
->vnc
.import
.aux_prefix
;
1590 rfapiNexthop2Prefix(bi
->attr
, &pfx_vn
);
1592 if (!skiplist_search(seen_nexthops
, &pfx_vn
, NULL
)) {
1593 #if DEBUG_RETURNED_NHL
1594 char buf
[PREFIX_STRLEN
];
1596 prefix2str(&pfx_vn
, buf
, sizeof(buf
));
1597 vnc_zlog_debug_verbose(
1598 "%s: already put VN/nexthop %s, skip", __func__
,
1604 if (rfapiGetUnAddrOfVpnBi(bi
, &pfx_un
)) {
1605 #if DEBUG_ENCAP_MONITOR
1606 vnc_zlog_debug_verbose(
1607 "%s: failed to get UN address of this VPN bi",
1613 newpfx
= prefix_new();
1615 skiplist_insert(seen_nexthops
, newpfx
, newpfx
);
1617 new = rfapiRouteInfo2NextHopEntry(rprefix
, bi
, lifetime
, rn
);
1619 if (rfapiRibPreloadBi(rfd_rib_node
, &pfx_vn
, &pfx_un
,
1621 /* duplicate filtered by RIB */
1622 rfapi_free_next_hop_list(new);
1629 (*tail
)->next
= new;
1638 skiplist_free(seen_nexthops
);
1647 * omit_node is meant for the situation where we are adding a subtree
1648 * of a parent of some original requested node. The response already
1649 * contains the original requested node, and we don't want to duplicate
1650 * its routes in the list, so we skip it if the right or left node
1651 * matches (of course, we still travel down its child subtrees).
1653 static int rfapiNhlAddSubtree(
1654 struct route_node
*rn
, /* in */
1655 uint32_t lifetime
, /* in */
1656 struct rfapi_next_hop_entry
**head
, /* in/out */
1657 struct rfapi_next_hop_entry
**tail
, /* in/out */
1658 struct route_node
*omit_node
, /* in */
1659 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1660 struct route_table
*rfd_rib_table
, /* preload here */
1661 struct prefix
*pfx_target_original
) /* query target */
1663 struct rfapi_ip_prefix rprefix
;
1666 /* FIXME: need to find a better way here to work without sticking our
1667 * hands in node->link */
1668 if (rn
->l_left
&& rn
->l_left
!= omit_node
) {
1669 if (rn
->l_left
->info
) {
1671 struct route_node
*rib_rn
= NULL
;
1673 rfapiQprefix2Rprefix(&rn
->l_left
->p
, &rprefix
);
1674 if (rfd_rib_table
) {
1675 rib_rn
= route_node_get(rfd_rib_table
,
1679 count
= rfapiNhlAddNodeRoutes(
1680 rn
->l_left
, &rprefix
, lifetime
, 0, head
, tail
,
1681 exclude_vnaddr
, rib_rn
, pfx_target_original
);
1683 count
= rfapiNhlAddNodeRoutes(
1684 rn
->l_left
, &rprefix
, lifetime
, 1, head
,
1685 tail
, exclude_vnaddr
, rib_rn
,
1686 pfx_target_original
);
1690 route_unlock_node(rib_rn
);
1694 if (rn
->l_right
&& rn
->l_right
!= omit_node
) {
1695 if (rn
->l_right
->info
) {
1697 struct route_node
*rib_rn
= NULL
;
1699 rfapiQprefix2Rprefix(&rn
->l_right
->p
, &rprefix
);
1700 if (rfd_rib_table
) {
1701 rib_rn
= route_node_get(rfd_rib_table
,
1704 count
= rfapiNhlAddNodeRoutes(
1705 rn
->l_right
, &rprefix
, lifetime
, 0, head
, tail
,
1706 exclude_vnaddr
, rib_rn
, pfx_target_original
);
1708 count
= rfapiNhlAddNodeRoutes(
1709 rn
->l_right
, &rprefix
, lifetime
, 1,
1710 head
, tail
, exclude_vnaddr
, rib_rn
,
1711 pfx_target_original
);
1715 route_unlock_node(rib_rn
);
1720 rcount
+= rfapiNhlAddSubtree(
1721 rn
->l_left
, lifetime
, head
, tail
, omit_node
,
1722 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1725 rcount
+= rfapiNhlAddSubtree(
1726 rn
->l_right
, lifetime
, head
, tail
, omit_node
,
1727 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1734 * Implementation of ROUTE_LIST(node) from RFAPI-Import-Event-Handling.txt
1736 * Construct an rfapi nexthop list based on the routes attached to
1737 * the specified node.
1739 * If there are any routes that do NOT have BGP_INFO_REMOVED set,
1740 * return those only. If there are ONLY routes with BGP_INFO_REMOVED,
1741 * then return those, and also include all the non-removed routes from the
1742 * next less-specific node (i.e., this node's parent) at the end.
1744 struct rfapi_next_hop_entry
*rfapiRouteNode2NextHopList(
1745 struct route_node
*rn
, uint32_t lifetime
, /* put into nexthop entries */
1746 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1747 struct route_table
*rfd_rib_table
, /* preload here */
1748 struct prefix
*pfx_target_original
) /* query target */
1750 struct rfapi_ip_prefix rprefix
;
1751 struct rfapi_next_hop_entry
*answer
= NULL
;
1752 struct rfapi_next_hop_entry
*last
= NULL
;
1753 struct route_node
*parent
;
1755 struct route_node
*rib_rn
;
1757 #if DEBUG_RETURNED_NHL
1759 char buf
[PREFIX_STRLEN
];
1761 prefix2str(&rn
->p
, buf
, sizeof(buf
));
1762 vnc_zlog_debug_verbose("%s: called with node pfx=%s", __func__
,
1765 rfapiDebugBacktrace();
1768 rfapiQprefix2Rprefix(&rn
->p
, &rprefix
);
1770 rib_rn
= rfd_rib_table
? route_node_get(rfd_rib_table
, &rn
->p
) : NULL
;
1773 * Add non-withdrawn routes at this node
1775 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 0, &answer
, &last
,
1776 exclude_vnaddr
, rib_rn
,
1777 pfx_target_original
);
1780 * If the list has at least one entry, it's finished
1783 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
, &last
, NULL
,
1784 exclude_vnaddr
, rfd_rib_table
,
1785 pfx_target_original
);
1786 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
,
1788 #if DEBUG_RETURNED_NHL
1789 rfapiPrintNhl(NULL
, answer
);
1792 route_unlock_node(rib_rn
);
1797 * Add withdrawn routes at this node
1799 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 1, &answer
, &last
,
1800 exclude_vnaddr
, rib_rn
,
1801 pfx_target_original
);
1803 route_unlock_node(rib_rn
);
1805 // rfapiPrintNhl(NULL, answer);
1808 * walk up the tree until we find a node with non-deleted
1809 * routes, then add them
1811 for (parent
= rn
->parent
; parent
; parent
= parent
->parent
) {
1812 if (rfapiHasNonRemovedRoutes(parent
)) {
1818 * Add non-withdrawn routes from less-specific prefix
1821 rib_rn
= rfd_rib_table
1822 ? route_node_get(rfd_rib_table
, &parent
->p
)
1824 rfapiQprefix2Rprefix(&parent
->p
, &rprefix
);
1825 count
+= rfapiNhlAddNodeRoutes(parent
, &rprefix
, lifetime
, 0,
1826 &answer
, &last
, exclude_vnaddr
,
1827 rib_rn
, pfx_target_original
);
1828 count
+= rfapiNhlAddSubtree(parent
, lifetime
, &answer
, &last
,
1829 rn
, exclude_vnaddr
, rfd_rib_table
,
1830 pfx_target_original
);
1832 route_unlock_node(rib_rn
);
1835 * There is no parent with non-removed routes. Still need to
1836 * add subtree of original node if it contributed routes to the
1840 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
,
1841 &last
, rn
, exclude_vnaddr
,
1843 pfx_target_original
);
1846 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
, count
,
1848 #if DEBUG_RETURNED_NHL
1849 rfapiPrintNhl(NULL
, answer
);
1855 * Construct nexthop list of all routes in table
1857 struct rfapi_next_hop_entry
*rfapiRouteTable2NextHopList(
1858 struct route_table
*rt
,
1859 uint32_t lifetime
, /* put into nexthop entries */
1860 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1861 struct route_table
*rfd_rib_table
, /* preload this NVE rib table */
1862 struct prefix
*pfx_target_original
) /* query target */
1864 struct route_node
*rn
;
1865 struct rfapi_next_hop_entry
*biglist
= NULL
;
1866 struct rfapi_next_hop_entry
*nhl
;
1867 struct rfapi_next_hop_entry
*tail
= NULL
;
1870 for (rn
= route_top(rt
); rn
; rn
= route_next(rn
)) {
1872 nhl
= rfapiRouteNode2NextHopList(rn
, lifetime
, exclude_vnaddr
,
1874 pfx_target_original
);
1876 tail
= biglist
= nhl
;
1883 while (tail
->next
) {
1890 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1894 struct rfapi_next_hop_entry
*rfapiEthRouteNode2NextHopList(
1895 struct route_node
*rn
, struct rfapi_ip_prefix
*rprefix
,
1896 uint32_t lifetime
, /* put into nexthop entries */
1897 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1898 struct route_table
*rfd_rib_table
, /* preload NVE rib table */
1899 struct prefix
*pfx_target_original
) /* query target */
1902 struct rfapi_next_hop_entry
*answer
= NULL
;
1903 struct rfapi_next_hop_entry
*last
= NULL
;
1904 struct route_node
*rib_rn
;
1906 rib_rn
= rfd_rib_table
? route_node_get(rfd_rib_table
, &rn
->p
) : NULL
;
1908 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 0, &answer
, &last
,
1909 NULL
, rib_rn
, pfx_target_original
);
1911 #if DEBUG_ENCAP_MONITOR
1912 vnc_zlog_debug_verbose("%s: node %p: %d non-holddown routes", __func__
,
1917 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 1, &answer
,
1918 &last
, exclude_vnaddr
, rib_rn
,
1919 pfx_target_original
);
1920 vnc_zlog_debug_verbose("%s: node %p: %d holddown routes",
1921 __func__
, rn
, count
);
1925 route_unlock_node(rib_rn
);
1927 #if DEBUG_RETURNED_NHL
1928 rfapiPrintNhl(NULL
, answer
);
1936 * Construct nexthop list of all routes in table
1938 struct rfapi_next_hop_entry
*rfapiEthRouteTable2NextHopList(
1939 uint32_t logical_net_id
, struct rfapi_ip_prefix
*rprefix
,
1940 uint32_t lifetime
, /* put into nexthop entries */
1941 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1942 struct route_table
*rfd_rib_table
, /* preload NVE rib node */
1943 struct prefix
*pfx_target_original
) /* query target */
1945 struct rfapi_import_table
*it
;
1946 struct bgp
*bgp
= bgp_get_default();
1947 struct route_table
*rt
;
1948 struct route_node
*rn
;
1949 struct rfapi_next_hop_entry
*biglist
= NULL
;
1950 struct rfapi_next_hop_entry
*nhl
;
1951 struct rfapi_next_hop_entry
*tail
= NULL
;
1955 it
= rfapiMacImportTableGet(bgp
, logical_net_id
);
1956 rt
= it
->imported_vpn
[AFI_L2VPN
];
1958 for (rn
= route_top(rt
); rn
; rn
= route_next(rn
)) {
1960 nhl
= rfapiEthRouteNode2NextHopList(
1961 rn
, rprefix
, lifetime
, exclude_vnaddr
, rfd_rib_table
,
1962 pfx_target_original
);
1964 tail
= biglist
= nhl
;
1971 while (tail
->next
) {
1978 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1983 * Insert a new bi to the imported route table node,
1984 * keeping the list of BIs sorted best route first
1986 static void rfapiBgpInfoAttachSorted(struct route_node
*rn
,
1987 struct bgp_info
*info_new
, afi_t afi
,
1991 struct bgp_info
*prev
;
1992 struct bgp_info
*next
;
1993 char pfx_buf
[PREFIX2STR_BUFFER
];
1996 bgp
= bgp_get_default(); /* assume 1 instance for now */
1998 if (VNC_DEBUG(IMPORT_BI_ATTACH
)) {
1999 vnc_zlog_debug_verbose("%s: info_new->peer=%p", __func__
,
2001 vnc_zlog_debug_verbose("%s: info_new->peer->su_remote=%p",
2002 __func__
, info_new
->peer
->su_remote
);
2005 for (prev
= NULL
, next
= rn
->info
; next
;
2006 prev
= next
, next
= next
->next
) {
2007 if (!bgp
|| (!CHECK_FLAG(info_new
->flags
, BGP_INFO_REMOVED
)
2008 && CHECK_FLAG(next
->flags
, BGP_INFO_REMOVED
))
2009 || bgp_info_cmp_compatible(bgp
, info_new
, next
, pfx_buf
,
2011 == -1) { /* -1 if 1st is better */
2015 vnc_zlog_debug_verbose("%s: prev=%p, next=%p", __func__
, prev
, next
);
2017 prev
->next
= info_new
;
2019 rn
->info
= info_new
;
2021 info_new
->prev
= prev
;
2022 info_new
->next
= next
;
2024 next
->prev
= info_new
;
2025 bgp_attr_intern(info_new
->attr
);
2028 static void rfapiBgpInfoDetach(struct route_node
*rn
, struct bgp_info
*bi
)
2031 * Remove the route (doubly-linked)
2033 // bgp_attr_unintern (&bi->attr);
2035 bi
->next
->prev
= bi
->prev
;
2037 bi
->prev
->next
= bi
->next
;
2039 rn
->info
= bi
->next
;
2043 * For L3-indexed import tables
2045 static int rfapi_bi_peer_rd_cmp(void *b1
, void *b2
)
2047 struct bgp_info
*bi1
= b1
;
2048 struct bgp_info
*bi2
= b2
;
2053 if (bi1
->peer
< bi2
->peer
)
2055 if (bi1
->peer
> bi2
->peer
)
2061 return vnc_prefix_cmp((struct prefix
*)&bi1
->extra
->vnc
.import
.rd
,
2062 (struct prefix
*)&bi2
->extra
->vnc
.import
.rd
);
2066 * For L2-indexed import tables
2067 * The BIs in these tables should ALWAYS have an aux_prefix set because
2068 * they arrive via IPv4 or IPv6 advertisements.
2070 static int rfapi_bi_peer_rd_aux_cmp(void *b1
, void *b2
)
2072 struct bgp_info
*bi1
= b1
;
2073 struct bgp_info
*bi2
= b2
;
2079 if (bi1
->peer
< bi2
->peer
)
2081 if (bi1
->peer
> bi2
->peer
)
2087 rc
= vnc_prefix_cmp((struct prefix
*)&bi1
->extra
->vnc
.import
.rd
,
2088 (struct prefix
*)&bi2
->extra
->vnc
.import
.rd
);
2094 * L2 import tables can have multiple entries with the
2095 * same MAC address, same RD, but different L3 addresses.
2097 * Use presence of aux_prefix with AF=ethernet and prefixlen=1
2098 * as magic value to signify explicit wildcarding of the aux_prefix.
2099 * This magic value will not appear in bona fide bi entries in
2100 * the import table, but is allowed in the "fake" bi used to
2101 * probe the table when searching. (We have to test both b1 and b2
2102 * because there is no guarantee of the order the test key and
2103 * the real key will be passed)
2105 if ((bi1
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2106 && (bi1
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))
2107 || (bi2
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2108 && (bi2
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))) {
2111 * wildcard aux address specified
2116 return vnc_prefix_cmp(&bi1
->extra
->vnc
.import
.aux_prefix
,
2117 &bi2
->extra
->vnc
.import
.aux_prefix
);
2122 * Index on RD and Peer
2124 static void rfapiItBiIndexAdd(struct route_node
*rn
, /* Import table VPN node */
2125 struct bgp_info
*bi
) /* new BI */
2127 struct skiplist
*sl
;
2134 char buf
[RD_ADDRSTRLEN
];
2136 vnc_zlog_debug_verbose("%s: bi %p, peer %p, rd %s", __func__
,
2138 prefix_rd2str(&bi
->extra
->vnc
.import
.rd
,
2142 sl
= RFAPI_RDINDEX_W_ALLOC(rn
);
2144 if (AF_ETHERNET
== rn
->p
.family
) {
2145 sl
= skiplist_new(0, rfapi_bi_peer_rd_aux_cmp
, NULL
);
2147 sl
= skiplist_new(0, rfapi_bi_peer_rd_cmp
, NULL
);
2149 RFAPI_IT_EXTRA_GET(rn
)->u
.vpn
.idx_rd
= sl
;
2150 route_lock_node(rn
); /* for skiplist */
2152 assert(!skiplist_insert(sl
, (void *)bi
, (void *)bi
));
2153 route_lock_node(rn
); /* for skiplist entry */
2155 /* NB: BIs in import tables are not refcounted */
2158 static void rfapiItBiIndexDump(struct route_node
*rn
)
2160 struct skiplist
*sl
;
2161 void *cursor
= NULL
;
2166 sl
= RFAPI_RDINDEX(rn
);
2170 for (rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
); !rc
;
2171 rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
)) {
2173 char buf
[RD_ADDRSTRLEN
];
2174 char buf_aux_pfx
[PREFIX_STRLEN
];
2176 prefix_rd2str(&k
->extra
->vnc
.import
.rd
, buf
, sizeof(buf
));
2177 if (k
->extra
->vnc
.import
.aux_prefix
.family
) {
2178 prefix2str(&k
->extra
->vnc
.import
.aux_prefix
,
2179 buf_aux_pfx
, sizeof(buf_aux_pfx
));
2181 strncpy(buf_aux_pfx
, "(none)", PREFIX_STRLEN
);
2183 vnc_zlog_debug_verbose("bi %p, peer %p, rd %s, aux_prefix %s",
2184 k
, k
->peer
, buf
, buf_aux_pfx
);
2188 static struct bgp_info
*rfapiItBiIndexSearch(
2189 struct route_node
*rn
, /* Import table VPN node */
2190 struct prefix_rd
*prd
, struct peer
*peer
,
2191 struct prefix
*aux_prefix
) /* optional L3 addr for L2 ITs */
2193 struct skiplist
*sl
;
2195 struct bgp_info bi_fake
;
2196 struct bgp_info_extra bi_extra
;
2197 struct bgp_info
*bi_result
;
2199 sl
= RFAPI_RDINDEX(rn
);
2205 char buf
[RD_ADDRSTRLEN
];
2206 char buf_aux_pfx
[PREFIX_STRLEN
];
2209 prefix2str(aux_prefix
, buf_aux_pfx
,
2210 sizeof(buf_aux_pfx
));
2212 strncpy(buf_aux_pfx
, "(nil)", sizeof(buf_aux_pfx
));
2214 vnc_zlog_debug_verbose("%s want prd=%s, peer=%p, aux_prefix=%s",
2216 prefix_rd2str(prd
, buf
, sizeof(buf
)),
2218 rfapiItBiIndexDump(rn
);
2222 /* threshold is a WAG */
2223 if (sl
->count
< 3) {
2225 vnc_zlog_debug_verbose("%s: short list algorithm", __func__
);
2227 /* if short list, linear search might be faster */
2228 for (bi_result
= rn
->info
; bi_result
;
2229 bi_result
= bi_result
->next
) {
2232 char buf
[RD_ADDRSTRLEN
];
2234 vnc_zlog_debug_verbose(
2235 "%s: bi has prd=%s, peer=%p", __func__
,
2236 prefix_rd2str(&bi_result
->extra
->vnc
2242 if (peer
== bi_result
->peer
2243 && !prefix_cmp((struct prefix
*)&bi_result
->extra
2245 (struct prefix
*)prd
)) {
2248 vnc_zlog_debug_verbose(
2249 "%s: peer and RD same, doing aux_prefix check",
2253 || !prefix_cmp(aux_prefix
,
2254 &bi_result
->extra
->vnc
.import
2258 vnc_zlog_debug_verbose("%s: match",
2268 bi_fake
.peer
= peer
;
2269 bi_fake
.extra
= &bi_extra
;
2270 bi_fake
.extra
->vnc
.import
.rd
= *(struct prefix_rd
*)prd
;
2272 bi_fake
.extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
2275 bi_fake
.extra
->vnc
.import
.aux_prefix
.family
= AF_ETHERNET
;
2276 bi_fake
.extra
->vnc
.import
.aux_prefix
.prefixlen
= 1;
2279 rc
= skiplist_search(sl
, (void *)&bi_fake
, (void *)&bi_result
);
2283 vnc_zlog_debug_verbose("%s: no match", __func__
);
2289 vnc_zlog_debug_verbose("%s: matched bi=%p", __func__
, bi_result
);
2295 static void rfapiItBiIndexDel(struct route_node
*rn
, /* Import table VPN node */
2296 struct bgp_info
*bi
) /* old BI */
2298 struct skiplist
*sl
;
2302 char buf
[RD_ADDRSTRLEN
];
2304 vnc_zlog_debug_verbose("%s: bi %p, peer %p, rd %s", __func__
,
2306 prefix_rd2str(&bi
->extra
->vnc
.import
.rd
,
2310 sl
= RFAPI_RDINDEX(rn
);
2313 rc
= skiplist_delete(sl
, (void *)(bi
), (void *)bi
);
2315 rfapiItBiIndexDump(rn
);
2319 route_unlock_node(rn
); /* for skiplist entry */
2321 /* NB: BIs in import tables are not refcounted */
2325 * Add a backreference at the ENCAP node to the VPN route that
2328 static void rfapiMonitorEncapAdd(struct rfapi_import_table
*import_table
,
2329 struct prefix
*p
, /* VN address */
2330 struct route_node
*vpn_rn
, /* VPN node */
2331 struct bgp_info
*vpn_bi
) /* VPN bi/route */
2333 afi_t afi
= family2afi(p
->family
);
2334 struct route_node
*rn
;
2335 struct rfapi_monitor_encap
*m
;
2338 rn
= route_node_get(import_table
->imported_encap
[afi
],
2342 m
= XCALLOC(MTYPE_RFAPI_MONITOR_ENCAP
,
2343 sizeof(struct rfapi_monitor_encap
));
2350 /* insert to encap node's list */
2351 m
->next
= RFAPI_MONITOR_ENCAP(rn
);
2354 RFAPI_MONITOR_ENCAP_W_ALLOC(rn
) = m
;
2356 /* for easy lookup when deleting vpn route */
2357 vpn_bi
->extra
->vnc
.import
.hme
= m
;
2359 vnc_zlog_debug_verbose(
2360 "%s: it=%p, vpn_bi=%p, afi=%d, encap rn=%p, setting vpn_bi->extra->vnc.import.hme=%p",
2361 __func__
, import_table
, vpn_bi
, afi
, rn
, m
);
2363 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
2364 bgp_attr_intern(vpn_bi
->attr
);
2367 static void rfapiMonitorEncapDelete(struct bgp_info
*vpn_bi
)
2370 * Remove encap monitor
2372 vnc_zlog_debug_verbose("%s: vpn_bi=%p", __func__
, vpn_bi
);
2373 if (vpn_bi
->extra
) {
2374 struct rfapi_monitor_encap
*hme
= vpn_bi
->extra
->vnc
.import
.hme
;
2378 vnc_zlog_debug_verbose("%s: hme=%p", __func__
, hme
);
2380 /* Refcount checking takes too long here */
2381 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 0);
2383 hme
->next
->prev
= hme
->prev
;
2385 hme
->prev
->next
= hme
->next
;
2387 RFAPI_MONITOR_ENCAP_W_ALLOC(hme
->rn
) =
2389 /* Refcount checking takes too long here */
2390 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 1);
2392 /* see if the struct rfapi_it_extra is empty and can be
2394 rfapiMonitorExtraPrune(SAFI_ENCAP
, hme
->rn
);
2396 route_unlock_node(hme
->rn
); /* decr ref count */
2397 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, hme
);
2398 vpn_bi
->extra
->vnc
.import
.hme
= NULL
;
2404 * quagga lib/thread.h says this must return int even though
2405 * it doesn't do anything with the return value
2407 static int rfapiWithdrawTimerVPN(struct thread
*t
)
2409 struct rfapi_withdraw
*wcb
= t
->arg
;
2410 struct bgp_info
*bi
= wcb
->info
;
2411 struct bgp
*bgp
= bgp_get_default();
2413 struct rfapi_monitor_vpn
*moved
;
2418 assert(wcb
->import_table
);
2421 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, wcb
->lockoffset
);
2426 vnc_zlog_debug_verbose(
2427 "%s: removing bi %p at prefix %s/%d", __func__
, bi
,
2428 rfapi_ntop(wcb
->node
->p
.family
, &wcb
->node
->p
.u
.prefix
,
2430 wcb
->node
->p
.prefixlen
);
2434 * Remove the route (doubly-linked)
2436 if (CHECK_FLAG(bi
->flags
, BGP_INFO_VALID
)
2437 && VALID_INTERIOR_TYPE(bi
->type
))
2438 RFAPI_MONITOR_EXTERIOR(wcb
->node
)->valid_interior_count
--;
2440 afi
= family2afi(wcb
->node
->p
.family
);
2441 wcb
->import_table
->holddown_count
[afi
] -= 1; /* keep count consistent */
2442 rfapiItBiIndexDel(wcb
->node
, bi
);
2443 rfapiBgpInfoDetach(wcb
->node
, bi
); /* with removed bi */
2445 vnc_import_bgp_exterior_del_route_interior(bgp
, wcb
->import_table
,
2450 * If VNC is configured to send response remove messages, AND
2451 * if the removed route had a UN address, do response removal
2454 if (!(bgp
->rfapi_cfg
->flags
2455 & BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE
)) {
2457 int has_valid_duplicate
= 0;
2458 struct bgp_info
*bii
;
2461 * First check if there are any OTHER routes at this node
2462 * that have the same nexthop and a valid UN address. If
2463 * there are (e.g., from other peers), then the route isn't
2464 * really gone, so skip sending a response removal message.
2466 for (bii
= wcb
->node
->info
; bii
; bii
= bii
->next
) {
2467 if (rfapiVpnBiSamePtUn(bi
, bii
)) {
2468 has_valid_duplicate
= 1;
2473 vnc_zlog_debug_verbose("%s: has_valid_duplicate=%d", __func__
,
2474 has_valid_duplicate
);
2476 if (!has_valid_duplicate
) {
2477 rfapiRibPendingDeleteRoute(bgp
, wcb
->import_table
, afi
,
2482 rfapiMonitorEncapDelete(bi
);
2485 * If there are no VPN monitors at this VPN Node A,
2488 if (!RFAPI_MONITOR_VPN(wcb
->node
)) {
2489 vnc_zlog_debug_verbose("%s: no VPN monitors at this node",
2495 * rfapiMonitorMoveShorter only moves monitors if there are
2496 * no remaining valid routes at the current node
2498 moved
= rfapiMonitorMoveShorter(wcb
->node
, 1);
2501 rfapiMonitorMovedUp(wcb
->import_table
, wcb
->node
, moved
->node
,
2509 rfapiBgpInfoFree(bi
);
2513 * If route count at this node has gone to 0, withdraw exported prefix
2515 if (!wcb
->node
->info
) {
2516 /* see if the struct rfapi_it_extra is empty and can be freed */
2517 rfapiMonitorExtraPrune(SAFI_MPLS_VPN
, wcb
->node
);
2518 vnc_direct_bgp_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2519 vnc_zebra_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2522 * nexthop change event
2523 * vnc_direct_bgp_add_prefix() will recompute the VN addr
2526 vnc_direct_bgp_add_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2529 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, 1 + wcb
->lockoffset
);
2530 route_unlock_node(wcb
->node
); /* decr ref count */
2531 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2536 * This works for multiprotocol extension, but not for plain ol'
2537 * unicast IPv4 because that nexthop is stored in attr->nexthop
2539 void rfapiNexthop2Prefix(struct attr
*attr
, struct prefix
*p
)
2544 memset(p
, 0, sizeof(struct prefix
));
2546 switch (p
->family
= BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2548 p
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2553 p
->u
.prefix6
= attr
->mp_nexthop_global
;
2558 vnc_zlog_debug_verbose("%s: Family is unknown = %d", __func__
,
2563 void rfapiUnicastNexthop2Prefix(afi_t afi
, struct attr
*attr
, struct prefix
*p
)
2565 if (afi
== AFI_IP
) {
2566 p
->family
= AF_INET
;
2568 p
->u
.prefix4
= attr
->nexthop
;
2570 rfapiNexthop2Prefix(attr
, p
);
2574 static int rfapiAttrNexthopAddrDifferent(struct prefix
*p1
, struct prefix
*p2
)
2577 vnc_zlog_debug_verbose("%s: p1 or p2 is NULL", __func__
);
2582 * Are address families the same?
2584 if (p1
->family
!= p2
->family
) {
2588 switch (p1
->family
) {
2590 if (IPV4_ADDR_SAME(&p1
->u
.prefix4
, &p2
->u
.prefix4
))
2595 if (IPV6_ADDR_SAME(&p1
->u
.prefix6
, &p2
->u
.prefix6
))
2606 static void rfapiCopyUnEncap2VPN(struct bgp_info
*encap_bi
,
2607 struct bgp_info
*vpn_bi
)
2609 if (!encap_bi
->attr
) {
2610 zlog_warn("%s: no encap bi attr/extra, can't copy UN address",
2615 if (!vpn_bi
|| !vpn_bi
->extra
) {
2616 zlog_warn("%s: no vpn bi attr/extra, can't copy UN address",
2621 switch (BGP_MP_NEXTHOP_FAMILY(encap_bi
->attr
->mp_nexthop_len
)) {
2625 * instrumentation to debug segfault of 091127
2627 vnc_zlog_debug_verbose("%s: vpn_bi=%p", __func__
, vpn_bi
);
2629 vnc_zlog_debug_verbose("%s: vpn_bi->extra=%p", __func__
,
2633 vpn_bi
->extra
->vnc
.import
.un_family
= AF_INET
;
2634 vpn_bi
->extra
->vnc
.import
.un
.addr4
=
2635 encap_bi
->attr
->mp_nexthop_global_in
;
2639 vpn_bi
->extra
->vnc
.import
.un_family
= AF_INET6
;
2640 vpn_bi
->extra
->vnc
.import
.un
.addr6
=
2641 encap_bi
->attr
->mp_nexthop_global
;
2645 zlog_warn("%s: invalid encap nexthop length: %d", __func__
,
2646 encap_bi
->attr
->mp_nexthop_len
);
2647 vpn_bi
->extra
->vnc
.import
.un_family
= 0;
2653 * returns 0 on success, nonzero on error
2655 static int rfapiWithdrawEncapUpdateCachedUn(
2656 struct rfapi_import_table
*import_table
, struct bgp_info
*encap_bi
,
2657 struct route_node
*vpn_rn
, struct bgp_info
*vpn_bi
)
2662 * clear cached UN address
2664 if (!vpn_bi
|| !vpn_bi
->extra
) {
2666 "%s: missing VPN bi/extra, can't clear UN addr",
2670 vpn_bi
->extra
->vnc
.import
.un_family
= 0;
2671 memset(&vpn_bi
->extra
->vnc
.import
.un
, 0,
2672 sizeof(vpn_bi
->extra
->vnc
.import
.un
));
2673 if (CHECK_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
)) {
2674 if (rfapiGetVncTunnelUnAddr(vpn_bi
->attr
, NULL
)) {
2675 UNSET_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
);
2676 if (VALID_INTERIOR_TYPE(vpn_bi
->type
))
2677 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2678 ->valid_interior_count
--;
2679 /* signal interior route withdrawal to
2680 * import-exterior */
2681 vnc_import_bgp_exterior_del_route_interior(
2682 bgp_get_default(), import_table
, vpn_rn
,
2689 zlog_warn("%s: missing VPN bi, can't clear UN addr",
2693 rfapiCopyUnEncap2VPN(encap_bi
, vpn_bi
);
2694 if (!CHECK_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
)) {
2695 SET_FLAG(vpn_bi
->flags
, BGP_INFO_VALID
);
2696 if (VALID_INTERIOR_TYPE(vpn_bi
->type
))
2697 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2698 ->valid_interior_count
++;
2699 /* signal interior route withdrawal to import-exterior
2701 vnc_import_bgp_exterior_add_route_interior(
2702 bgp_get_default(), import_table
, vpn_rn
,
2709 static int rfapiWithdrawTimerEncap(struct thread
*t
)
2711 struct rfapi_withdraw
*wcb
= t
->arg
;
2712 struct bgp_info
*bi
= wcb
->info
;
2713 int was_first_route
= 0;
2714 struct rfapi_monitor_encap
*em
;
2715 struct skiplist
*vpn_node_sl
= skiplist_new(0, NULL
, NULL
);
2719 assert(wcb
->import_table
);
2721 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 0);
2723 if (wcb
->node
->info
== bi
)
2724 was_first_route
= 1;
2727 * Remove the route/bi and free it
2729 rfapiBgpInfoDetach(wcb
->node
, bi
);
2730 rfapiBgpInfoFree(bi
);
2732 if (!was_first_route
)
2735 for (em
= RFAPI_MONITOR_ENCAP(wcb
->node
); em
; em
= em
->next
) {
2738 * Update monitoring VPN BIs with new encap info at the
2739 * head of the encap bi chain (which could be NULL after
2740 * removing the expiring bi above)
2742 if (rfapiWithdrawEncapUpdateCachedUn(wcb
->import_table
,
2743 wcb
->node
->info
, em
->node
,
2748 * Build a list of unique VPN nodes referenced by these
2750 * Use a skiplist for speed.
2752 skiplist_insert(vpn_node_sl
, em
->node
, em
->node
);
2757 * for each VPN node referenced in the ENCAP monitors:
2759 struct route_node
*rn
;
2760 while (!skiplist_first(vpn_node_sl
, (void **)&rn
, NULL
)) {
2761 if (!wcb
->node
->info
) {
2762 struct rfapi_monitor_vpn
*moved
;
2764 moved
= rfapiMonitorMoveShorter(rn
, 0);
2766 // rfapiDoRouteCallback(wcb->import_table,
2767 // moved->node, moved);
2768 rfapiMonitorMovedUp(wcb
->import_table
, rn
,
2769 moved
->node
, moved
);
2772 // rfapiDoRouteCallback(wcb->import_table, rn, NULL);
2773 rfapiMonitorItNodeChanged(wcb
->import_table
, rn
, NULL
);
2775 skiplist_delete_first(vpn_node_sl
);
2779 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 1);
2780 route_unlock_node(wcb
->node
); /* decr ref count */
2781 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2782 skiplist_free(vpn_node_sl
);
2788 * Works for both VPN and ENCAP routes; timer_service_func is different
2792 rfapiBiStartWithdrawTimer(struct rfapi_import_table
*import_table
,
2793 struct route_node
*rn
, struct bgp_info
*bi
, afi_t afi
,
2795 int (*timer_service_func
)(struct thread
*))
2798 struct rfapi_withdraw
*wcb
;
2801 CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
2804 * Already on the path to being withdrawn,
2805 * should already have a timer set up to
2808 vnc_zlog_debug_verbose(
2809 "%s: already being withdrawn, do nothing",
2814 rfapiGetVncLifetime(bi
->attr
, &lifetime
);
2815 vnc_zlog_debug_verbose("%s: VNC lifetime is %u", __func__
, lifetime
);
2818 * withdrawn routes get to hang around for a while
2820 SET_FLAG(bi
->flags
, BGP_INFO_REMOVED
);
2822 /* set timer to remove the route later */
2823 lifetime
= rfapiGetHolddownFromLifetime(lifetime
);
2824 vnc_zlog_debug_verbose("%s: using timeout %u", __func__
, lifetime
);
2827 * Stash import_table, node, and info for use by timer
2828 * service routine, which is supposed to free the wcb.
2830 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2834 wcb
->import_table
= import_table
;
2835 bgp_attr_intern(bi
->attr
);
2837 if (VNC_DEBUG(VERBOSE
)) {
2838 vnc_zlog_debug_verbose(
2839 "%s: wcb values: node=%p, info=%p, import_table=%p (bi follows)",
2840 __func__
, wcb
->node
, wcb
->info
, wcb
->import_table
);
2841 rfapiPrintBi(NULL
, bi
);
2846 if (lifetime
> UINT32_MAX
/ 1001) {
2847 /* sub-optimal case, but will probably never happen */
2848 bi
->extra
->vnc
.import
.timer
= NULL
;
2849 thread_add_timer(bm
->master
, timer_service_func
, wcb
, lifetime
,
2850 &bi
->extra
->vnc
.import
.timer
);
2852 static uint32_t jitter
;
2853 uint32_t lifetime_msec
;
2856 * the goal here is to spread out the timers so they are
2857 * sortable in the skip list
2859 if (++jitter
>= 1000)
2862 lifetime_msec
= (lifetime
* 1000) + jitter
;
2864 bi
->extra
->vnc
.import
.timer
= NULL
;
2865 thread_add_timer_msec(bm
->master
, timer_service_func
, wcb
,
2867 &bi
->extra
->vnc
.import
.timer
);
2870 /* re-sort route list (BGP_INFO_REMOVED routes are last) */
2871 if (((struct bgp_info
*)rn
->info
)->next
) {
2872 rfapiBgpInfoDetach(rn
, bi
);
2873 rfapiBgpInfoAttachSorted(rn
, bi
, afi
, safi
);
2878 typedef void(rfapi_bi_filtered_import_f
)(struct rfapi_import_table
*, int,
2879 struct peer
*, void *, struct prefix
*,
2880 struct prefix
*, afi_t
,
2881 struct prefix_rd
*, struct attr
*,
2882 u_char
, u_char
, uint32_t *);
2885 static void rfapiExpireEncapNow(struct rfapi_import_table
*it
,
2886 struct route_node
*rn
, struct bgp_info
*bi
)
2888 struct rfapi_withdraw
*wcb
;
2892 * pretend we're an expiring timer
2894 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2897 wcb
->import_table
= it
;
2898 memset(&t
, 0, sizeof(t
));
2900 rfapiWithdrawTimerEncap(&t
); /* frees wcb */
2903 static int rfapiGetNexthop(struct attr
*attr
, struct prefix
*prefix
)
2905 switch (BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2907 prefix
->family
= AF_INET
;
2908 prefix
->prefixlen
= 32;
2909 prefix
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2912 prefix
->family
= AF_INET6
;
2913 prefix
->prefixlen
= 128;
2914 prefix
->u
.prefix6
= attr
->mp_nexthop_global
;
2917 vnc_zlog_debug_verbose("%s: unknown attr->mp_nexthop_len %d",
2918 __func__
, attr
->mp_nexthop_len
);
2925 * import a bgp_info if its route target list intersects with the
2926 * import table's route target list
2928 static void rfapiBgpInfoFilteredImportEncap(
2929 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
2930 void *rfd
, /* set for looped back routes */
2932 struct prefix
*aux_prefix
, /* Unused for encap routes */
2933 afi_t afi
, struct prefix_rd
*prd
,
2934 struct attr
*attr
, /* part of bgp_info */
2935 u_char type
, /* part of bgp_info */
2936 u_char sub_type
, /* part of bgp_info */
2937 uint32_t *label
) /* part of bgp_info */
2939 struct route_table
*rt
= NULL
;
2940 struct route_node
*rn
;
2941 struct bgp_info
*info_new
;
2942 struct bgp_info
*bi
;
2943 struct bgp_info
*next
;
2946 struct prefix p_firstbi_old
;
2947 struct prefix p_firstbi_new
;
2949 const char *action_str
= NULL
;
2950 struct prefix un_prefix
;
2953 bgp
= bgp_get_default(); /* assume 1 instance for now */
2956 case FIF_ACTION_UPDATE
:
2957 action_str
= "update";
2959 case FIF_ACTION_WITHDRAW
:
2960 action_str
= "withdraw";
2962 case FIF_ACTION_KILL
:
2963 action_str
= "kill";
2970 vnc_zlog_debug_verbose(
2971 "%s: entry: %s: prefix %s/%d", __func__
, action_str
,
2972 inet_ntop(p
->family
, &p
->u
.prefix
, buf
, BUFSIZ
), p
->prefixlen
);
2974 memset(&p_firstbi_old
, 0, sizeof(p_firstbi_old
));
2975 memset(&p_firstbi_new
, 0, sizeof(p_firstbi_new
));
2977 if (action
== FIF_ACTION_UPDATE
) {
2979 * Compare rt lists. If no intersection, don't import this route
2980 * On a withdraw, peer and RD are sufficient to determine if
2983 if (!attr
|| !attr
->ecommunity
) {
2985 vnc_zlog_debug_verbose(
2986 "%s: attr, extra, or ecommunity missing, not importing",
2990 #if RFAPI_REQUIRE_ENCAP_BEEC
2991 if (!rfapiEcommunitiesMatchBeec(attr
->ecommunity
)) {
2992 vnc_zlog_debug_verbose(
2993 "%s: it=%p: no match for BGP Encapsulation ecommunity",
2994 __func__
, import_table
);
2998 if (!rfapiEcommunitiesIntersect(import_table
->rt_import_list
,
2999 attr
->ecommunity
)) {
3001 vnc_zlog_debug_verbose(
3002 "%s: it=%p: no ecommunity intersection",
3003 __func__
, import_table
);
3008 * Updates must also have a nexthop address
3010 memset(&un_prefix
, 0,
3011 sizeof(un_prefix
)); /* keep valgrind happy */
3012 if (rfapiGetNexthop(attr
, &un_prefix
)) {
3013 vnc_zlog_debug_verbose("%s: missing nexthop address",
3020 * Figure out which radix tree the route would go into
3025 rt
= import_table
->imported_encap
[afi
];
3029 zlog_err("%s: bad afi %d", __func__
, afi
);
3034 * route_node_lookup returns a node only if there is at least
3035 * one route attached.
3037 rn
= route_node_lookup(rt
, p
);
3039 #if DEBUG_ENCAP_MONITOR
3040 vnc_zlog_debug_verbose("%s: initial encap lookup(it=%p) rn=%p",
3041 __func__
, import_table
, rn
);
3046 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 1);
3047 route_unlock_node(rn
); /* undo lock in route_node_lookup */
3051 * capture nexthop of first bi
3054 rfapiNexthop2Prefix(
3055 ((struct bgp_info
*)(rn
->info
))->attr
,
3059 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
3062 * Does this bgp_info refer to the same route
3063 * as we are trying to add?
3065 vnc_zlog_debug_verbose("%s: comparing BI %p", __func__
,
3072 * RD of import table bi is in bi->extra->vnc.import.rd
3073 * RD of info_orig is in prd
3076 vnc_zlog_debug_verbose("%s: no bi->extra",
3081 (struct prefix
*)&bi
->extra
->vnc
.import
.rd
,
3082 (struct prefix
*)prd
)) {
3084 vnc_zlog_debug_verbose("%s: prd does not match",
3092 if (bi
->peer
!= peer
) {
3093 vnc_zlog_debug_verbose(
3094 "%s: peer does not match", __func__
);
3098 vnc_zlog_debug_verbose("%s: found matching bi",
3101 /* Same route. Delete this bi, replace with new one */
3103 if (action
== FIF_ACTION_WITHDRAW
) {
3105 vnc_zlog_debug_verbose(
3106 "%s: withdrawing at prefix %s/%d",
3108 inet_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
,
3112 rfapiBiStartWithdrawTimer(
3113 import_table
, rn
, bi
, afi
, SAFI_ENCAP
,
3114 rfapiWithdrawTimerEncap
);
3117 vnc_zlog_debug_verbose(
3118 "%s: %s at prefix %s/%d", __func__
,
3119 ((action
== FIF_ACTION_KILL
)
3122 inet_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
,
3127 * If this route is waiting to be deleted
3129 * a previous withdraw, we must cancel its
3132 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
3133 && bi
->extra
->vnc
.import
.timer
) {
3136 (struct thread
*)bi
->extra
->vnc
3138 struct rfapi_withdraw
*wcb
= t
->arg
;
3140 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3144 if (action
== FIF_ACTION_UPDATE
) {
3145 rfapiBgpInfoDetach(rn
, bi
);
3146 rfapiBgpInfoFree(bi
);
3150 * Kill: do export stuff when removing
3153 struct rfapi_withdraw
*wcb
;
3157 * pretend we're an expiring timer
3160 MTYPE_RFAPI_WITHDRAW
,
3161 sizeof(struct rfapi_withdraw
));
3164 wcb
->import_table
= import_table
;
3165 memset(&t
, 0, sizeof(t
));
3167 rfapiWithdrawTimerEncap(
3168 &t
); /* frees wcb */
3177 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, replacing
? 1 : 0);
3179 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
)
3183 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, NULL
);
3187 route_lock_node(rn
); /* incr ref count for new BI */
3189 rn
= route_node_get(rt
, p
);
3192 vnc_zlog_debug_verbose(
3193 "%s: (afi=%d, rn=%p) inserting at prefix %s/%d", __func__
, afi
,
3194 rn
, inet_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
, buf
, BUFSIZ
),
3197 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_ENCAP
);
3200 * Delete holddown routes from same NVE. See details in
3201 * rfapiBgpInfoFilteredImportVPN()
3203 for (bi
= info_new
->next
; bi
; bi
= next
) {
3205 struct prefix pfx_un
;
3209 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
))
3213 * We already match the VN address (it is the prefix
3214 * of the route node)
3217 if (!rfapiGetNexthop(bi
->attr
, &pfx_un
)
3218 && prefix_same(&pfx_un
, &un_prefix
)) {
3226 vnc_zlog_debug_verbose(
3227 "%s: removing holddown bi matching NVE of new route",
3229 if (bi
->extra
->vnc
.import
.timer
) {
3231 (struct thread
*)bi
->extra
->vnc
.import
.timer
;
3232 struct rfapi_withdraw
*wcb
= t
->arg
;
3234 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3237 rfapiExpireEncapNow(import_table
, rn
, bi
);
3240 rfapiNexthop2Prefix(((struct bgp_info
*)(rn
->info
))->attr
,
3244 * If the nexthop address of the selected Encap route (i.e.,
3245 * the UN address) has changed, then we must update the VPN
3246 * routes that refer to this Encap route and possibly force
3249 if (rfapiAttrNexthopAddrDifferent(&p_firstbi_old
, &p_firstbi_new
)) {
3251 struct rfapi_monitor_encap
*m
;
3252 struct rfapi_monitor_encap
*mnext
;
3254 struct route_node
*referenced_vpn_prefix
;
3257 * Optimized approach: build radix tree on the fly to
3258 * hold list of VPN nodes referenced by the ENCAP monitors
3260 * The nodes in this table correspond to prefixes of VPN routes.
3261 * The "info" pointer of the node points to a chain of
3262 * struct rfapi_monitor_encap, each of which refers to a
3263 * specific VPN node.
3265 struct route_table
*referenced_vpn_table
;
3267 referenced_vpn_table
= route_table_init();
3268 assert(referenced_vpn_table
);
3271 * iterate over the set of monitors at this ENCAP node.
3273 #if DEBUG_ENCAP_MONITOR
3274 vnc_zlog_debug_verbose("%s: examining monitors at rn=%p",
3277 for (m
= RFAPI_MONITOR_ENCAP(rn
); m
; m
= m
->next
) {
3280 * For each referenced bi/route, copy the ENCAP route's
3281 * nexthop to the VPN route's cached UN address field
3283 * the address family of the cached UN address field.
3285 rfapiCopyUnEncap2VPN(info_new
, m
->bi
);
3286 if (!CHECK_FLAG(m
->bi
->flags
, BGP_INFO_VALID
)) {
3287 SET_FLAG(m
->bi
->flags
, BGP_INFO_VALID
);
3288 if (VALID_INTERIOR_TYPE(m
->bi
->type
))
3289 RFAPI_MONITOR_EXTERIOR(m
->node
)
3290 ->valid_interior_count
++;
3291 vnc_import_bgp_exterior_add_route_interior(
3292 bgp
, import_table
, m
->node
, m
->bi
);
3296 * Build a list of unique VPN nodes referenced by these
3299 * There could be more than one VPN node here with a
3301 * prefix. Those are currently in an unsorted linear
3306 referenced_vpn_prefix
= route_node_get(
3307 referenced_vpn_table
, &m
->node
->p
);
3308 assert(referenced_vpn_prefix
);
3309 for (mnext
= referenced_vpn_prefix
->info
; mnext
;
3310 mnext
= mnext
->next
) {
3312 if (mnext
->node
== m
->node
)
3318 * already have an entry for this VPN node
3320 route_unlock_node(referenced_vpn_prefix
);
3323 MTYPE_RFAPI_MONITOR_ENCAP
,
3324 sizeof(struct rfapi_monitor_encap
));
3326 mnext
->node
= m
->node
;
3327 mnext
->next
= referenced_vpn_prefix
->info
;
3328 referenced_vpn_prefix
->info
= mnext
;
3333 * for each VPN node referenced in the ENCAP monitors:
3335 for (referenced_vpn_prefix
= route_top(referenced_vpn_table
);
3336 referenced_vpn_prefix
; referenced_vpn_prefix
= route_next(
3337 referenced_vpn_prefix
)) {
3339 while ((m
= referenced_vpn_prefix
->info
)) {
3341 struct route_node
*n
;
3343 rfapiMonitorMoveLonger(m
->node
);
3344 for (n
= m
->node
; n
; n
= n
->parent
) {
3345 // rfapiDoRouteCallback(import_table, n,
3348 rfapiMonitorItNodeChanged(import_table
, m
->node
,
3351 referenced_vpn_prefix
->info
= m
->next
;
3352 route_unlock_node(referenced_vpn_prefix
);
3353 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, m
);
3356 route_table_finish(referenced_vpn_table
);
3359 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
3362 static void rfapiExpireVpnNow(struct rfapi_import_table
*it
,
3363 struct route_node
*rn
, struct bgp_info
*bi
,
3366 struct rfapi_withdraw
*wcb
;
3370 * pretend we're an expiring timer
3372 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
3375 wcb
->import_table
= it
;
3376 wcb
->lockoffset
= lockoffset
;
3377 memset(&t
, 0, sizeof(t
));
3379 rfapiWithdrawTimerVPN(&t
); /* frees wcb */
3384 * import a bgp_info if its route target list intersects with the
3385 * import table's route target list
3387 void rfapiBgpInfoFilteredImportVPN(
3388 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3389 void *rfd
, /* set for looped back routes */
3391 struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3392 afi_t afi
, struct prefix_rd
*prd
,
3393 struct attr
*attr
, /* part of bgp_info */
3394 u_char type
, /* part of bgp_info */
3395 u_char sub_type
, /* part of bgp_info */
3396 uint32_t *label
) /* part of bgp_info */
3398 struct route_table
*rt
= NULL
;
3399 struct route_node
*rn
;
3400 struct route_node
*n
;
3401 struct bgp_info
*info_new
;
3402 struct bgp_info
*bi
;
3403 struct bgp_info
*next
;
3405 struct prefix vn_prefix
;
3406 struct prefix un_prefix
;
3407 int un_prefix_valid
= 0;
3408 struct route_node
*ern
;
3410 int original_had_routes
= 0;
3411 struct prefix original_nexthop
;
3412 const char *action_str
= NULL
;
3416 bgp
= bgp_get_default(); /* assume 1 instance for now */
3419 case FIF_ACTION_UPDATE
:
3420 action_str
= "update";
3422 case FIF_ACTION_WITHDRAW
:
3423 action_str
= "withdraw";
3425 case FIF_ACTION_KILL
:
3426 action_str
= "kill";
3433 if (import_table
== bgp
->rfapi
->it_ce
)
3436 vnc_zlog_debug_verbose("%s: entry: %s%s: prefix %s/%d: it %p, afi %s",
3437 __func__
, (is_it_ce
? "CE-IT " : ""), action_str
,
3438 rfapi_ntop(p
->family
, &p
->u
.prefix
, buf
, BUFSIZ
),
3439 p
->prefixlen
, import_table
, afi2str(afi
));
3444 * Compare rt lists. If no intersection, don't import this route
3445 * On a withdraw, peer and RD are sufficient to determine if
3448 if (action
== FIF_ACTION_UPDATE
) {
3449 if (!attr
|| !attr
->ecommunity
) {
3451 vnc_zlog_debug_verbose(
3452 "%s: attr, extra, or ecommunity missing, not importing",
3456 if ((import_table
!= bgp
->rfapi
->it_ce
)
3457 && !rfapiEcommunitiesIntersect(import_table
->rt_import_list
,
3458 attr
->ecommunity
)) {
3460 vnc_zlog_debug_verbose(
3461 "%s: it=%p: no ecommunity intersection",
3462 __func__
, import_table
);
3466 memset(&vn_prefix
, 0,
3467 sizeof(vn_prefix
)); /* keep valgrind happy */
3468 if (rfapiGetNexthop(attr
, &vn_prefix
)) {
3469 /* missing nexthop address would be a bad, bad thing */
3470 vnc_zlog_debug_verbose("%s: missing nexthop", __func__
);
3476 * Figure out which radix tree the route would go into
3482 rt
= import_table
->imported_vpn
[afi
];
3486 zlog_err("%s: bad afi %d", __func__
, afi
);
3491 memset(&original_nexthop
, 0, sizeof(original_nexthop
));
3494 * route_node_lookup returns a node only if there is at least
3495 * one route attached.
3497 rn
= route_node_lookup(rt
, p
);
3499 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
3503 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
3504 route_unlock_node(rn
); /* undo lock in route_node_lookup */
3507 original_had_routes
= 1;
3509 if (VNC_DEBUG(VERBOSE
)) {
3510 vnc_zlog_debug_verbose("%s: showing IT node on entry",
3512 rfapiShowItNode(NULL
, rn
); /* debug */
3516 * Look for same route (will have same RD and peer)
3518 bi
= rfapiItBiIndexSearch(rn
, prd
, peer
, aux_prefix
);
3523 * This was an old test when we iterated over the
3524 * BIs linearly. Since we're now looking up with
3525 * RD and peer, comparing types should not be
3526 * needed. Changed to assertion.
3528 * Compare types. Doing so prevents a RFP-originated
3529 * route from matching an imported route, for example.
3531 if (VNC_DEBUG(VERBOSE
) && bi
->type
!= type
)
3532 /* should be handled by RDs, but warn for now */
3533 zlog_warn("%s: type mismatch! (bi=%d, arg=%d)",
3534 __func__
, bi
->type
, type
);
3536 vnc_zlog_debug_verbose("%s: found matching bi",
3540 * In the special CE table, withdrawals occur without
3543 if (import_table
== bgp
->rfapi
->it_ce
) {
3544 vnc_direct_bgp_del_route_ce(bgp
, rn
, bi
);
3545 if (action
== FIF_ACTION_WITHDRAW
)
3546 action
= FIF_ACTION_KILL
;
3549 if (action
== FIF_ACTION_WITHDRAW
) {
3552 CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
);
3554 vnc_zlog_debug_verbose(
3555 "%s: withdrawing at prefix %s/%d%s",
3556 __func__
, rfapi_ntop(rn
->p
.family
,
3561 ? " (already being withdrawn)"
3566 rfapiBiStartWithdrawTimer(
3567 import_table
, rn
, bi
, afi
,
3569 rfapiWithdrawTimerVPN
);
3571 RFAPI_UPDATE_ITABLE_COUNT(
3572 bi
, import_table
, afi
, -1);
3573 import_table
->holddown_count
[afi
] += 1;
3577 vnc_zlog_debug_verbose(
3578 "%s: %s at prefix %s/%d", __func__
,
3579 ((action
== FIF_ACTION_KILL
)
3582 rfapi_ntop(rn
->p
.family
,
3583 &rn
->p
.u
.prefix
, buf
,
3588 * If this route is waiting to be deleted
3590 * a previous withdraw, we must cancel its
3593 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)
3594 && bi
->extra
->vnc
.import
.timer
) {
3597 (struct thread
*)bi
->extra
->vnc
3599 struct rfapi_withdraw
*wcb
= t
->arg
;
3601 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3604 import_table
->holddown_count
[afi
] -= 1;
3605 RFAPI_UPDATE_ITABLE_COUNT(
3606 bi
, import_table
, afi
, 1);
3609 * decrement remote count (if route is remote)
3611 * we are going to remove it below
3613 RFAPI_UPDATE_ITABLE_COUNT(bi
, import_table
, afi
,
3615 if (action
== FIF_ACTION_UPDATE
) {
3619 * make copy of original nexthop so we
3620 * can see if it changed
3622 rfapiGetNexthop(bi
->attr
,
3626 * remove bi without doing any export
3629 if (CHECK_FLAG(bi
->flags
,
3631 && VALID_INTERIOR_TYPE(bi
->type
))
3632 RFAPI_MONITOR_EXTERIOR(rn
)
3633 ->valid_interior_count
--;
3634 rfapiItBiIndexDel(rn
, bi
);
3635 rfapiBgpInfoDetach(rn
, bi
);
3636 rfapiMonitorEncapDelete(bi
);
3637 vnc_import_bgp_exterior_del_route_interior(
3638 bgp
, import_table
, rn
, bi
);
3639 rfapiBgpInfoFree(bi
);
3643 * remove bi and do export processing
3645 import_table
->holddown_count
[afi
] += 1;
3646 rfapiExpireVpnNow(import_table
, rn
, bi
,
3654 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, replacing
? 1 : 0);
3656 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
) {
3662 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, label
);
3665 * lookup un address in encap table
3667 ern
= route_node_match(import_table
->imported_encap
[afi
], &vn_prefix
);
3669 rfapiCopyUnEncap2VPN(ern
->info
, info_new
);
3670 route_unlock_node(ern
); /* undo lock in route_note_match */
3672 char buf
[PREFIX_STRLEN
];
3674 prefix2str(&vn_prefix
, buf
, sizeof(buf
));
3675 /* Not a big deal, just means VPN route got here first */
3676 vnc_zlog_debug_verbose("%s: no encap route for vn addr %s",
3678 info_new
->extra
->vnc
.import
.un_family
= 0;
3683 route_lock_node(rn
);
3686 * No need to increment reference count, so only "get"
3687 * if the node is not there already
3689 rn
= route_node_get(rt
, p
);
3693 * For ethernet routes, if there is an accompanying IP address,
3696 if ((AFI_L2VPN
== afi
) && aux_prefix
) {
3698 vnc_zlog_debug_verbose("%s: setting BI's aux_prefix", __func__
);
3699 info_new
->extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
3702 vnc_zlog_debug_verbose(
3703 "%s: inserting bi %p at prefix %s/%d #%d", __func__
, info_new
,
3704 rfapi_ntop(rn
->p
.family
, &rn
->p
.u
.prefix
, buf
, BUFSIZ
),
3705 rn
->p
.prefixlen
, rn
->lock
);
3707 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_MPLS_VPN
);
3708 rfapiItBiIndexAdd(rn
, info_new
);
3709 if (!rfapiGetUnAddrOfVpnBi(info_new
, NULL
)) {
3710 if (VALID_INTERIOR_TYPE(info_new
->type
))
3711 RFAPI_MONITOR_EXTERIOR(rn
)->valid_interior_count
++;
3712 SET_FLAG(info_new
->flags
, BGP_INFO_VALID
);
3714 RFAPI_UPDATE_ITABLE_COUNT(info_new
, import_table
, afi
, 1);
3715 vnc_import_bgp_exterior_add_route_interior(bgp
, import_table
, rn
,
3718 if (import_table
== bgp
->rfapi
->it_ce
)
3719 vnc_direct_bgp_add_route_ce(bgp
, rn
, info_new
);
3721 if (VNC_DEBUG(VERBOSE
)) {
3722 vnc_zlog_debug_verbose("%s: showing IT node", __func__
);
3723 rfapiShowItNode(NULL
, rn
); /* debug */
3726 rfapiMonitorEncapAdd(import_table
, &vn_prefix
, rn
, info_new
);
3728 if (!rfapiGetUnAddrOfVpnBi(info_new
, &un_prefix
)) {
3731 * if we have a valid UN address (either via Encap route
3732 * or via tunnel attribute), then we should attempt
3733 * to move any monitors at less-specific nodes to this node
3735 rfapiMonitorMoveLonger(rn
);
3737 un_prefix_valid
= 1;
3741 * 101129 Enhancement: if we add a route (implication: it is not
3742 * in holddown), delete all other routes from this nve at this
3743 * node that are in holddown, regardless of peer.
3745 * Reasons it's OK to do that:
3747 * - if the holddown route being deleted originally came from BGP VPN,
3748 * it is already gone from BGP (implication of holddown), so there
3749 * won't be any added inconsistency with the BGP RIB.
3751 * - once a fresh route is added at a prefix, any routes in holddown
3752 * at that prefix will not show up in RFP responses, so deleting
3753 * the holddown routes won't affect the contents of responses.
3755 * - lifetimes are supposed to be consistent, so there should not
3756 * be a case where the fresh route has a shorter lifetime than
3757 * the holddown route, so we don't expect the fresh route to
3758 * disappear and complete its holddown time before the existing
3759 * holddown routes time out. Therefore, we won't have a situation
3760 * where we expect the existing holddown routes to be hidden and
3761 * then to reappear sometime later (as holddown routes) in a
3764 * Among other things, this would enable us to skirt the problem
3765 * of local holddown routes that refer to NVE descriptors that
3766 * have already been closed (if the same NVE triggers a subsequent
3767 * rfapi_open(), the new peer is different and doesn't match the
3768 * peer of the holddown route, so the stale holddown route still
3769 * hangs around until it times out instead of just being replaced
3770 * by the fresh route).
3773 * We know that the new bi will have been inserted before any routes
3774 * in holddown, so we can skip any that came before it
3776 for (bi
= info_new
->next
; bi
; bi
= next
) {
3778 struct prefix pfx_vn
;
3779 struct prefix pfx_un
;
3781 int remote_peer_match
= 0;
3788 if (!CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
))
3792 * Must match VN address (nexthop of VPN route)
3794 if (rfapiGetNexthop(bi
->attr
, &pfx_vn
))
3796 if (!prefix_same(&pfx_vn
, &vn_prefix
))
3799 if (un_prefix_valid
&& /* new route UN addr */
3800 !rfapiGetUnAddrOfVpnBi(bi
, &pfx_un
)
3801 && /* old route UN addr */
3802 prefix_same(&pfx_un
, &un_prefix
)) { /* compare */
3805 if (!RFAPI_LOCAL_BI(bi
) && !RFAPI_LOCAL_BI(info_new
)
3806 && sockunion_same(&bi
->peer
->su
, &info_new
->peer
->su
)) {
3807 /* old & new are both remote, same peer */
3808 remote_peer_match
= 1;
3811 if (!un_match
& !remote_peer_match
)
3814 vnc_zlog_debug_verbose(
3815 "%s: removing holddown bi matching NVE of new route",
3817 if (bi
->extra
->vnc
.import
.timer
) {
3819 (struct thread
*)bi
->extra
->vnc
.import
.timer
;
3820 struct rfapi_withdraw
*wcb
= t
->arg
;
3822 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3825 rfapiExpireVpnNow(import_table
, rn
, bi
, 0);
3828 if (!original_had_routes
) {
3830 * We went from 0 usable routes to 1 usable route. Perform the
3831 * "Adding a Route" export process.
3833 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3834 vnc_zebra_add_prefix(bgp
, import_table
, rn
);
3837 * Check for nexthop change event
3838 * Note: the prefix_same() test below detects two situations:
3839 * 1. route is replaced, new route has different nexthop
3840 * 2. new route is added (original_nexthop is 0)
3842 struct prefix new_nexthop
;
3844 rfapiGetNexthop(attr
, &new_nexthop
);
3845 if (!prefix_same(&original_nexthop
, &new_nexthop
)) {
3847 * nexthop change event
3848 * vnc_direct_bgp_add_prefix() will recompute VN addr
3851 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3855 if (!(bgp
->rfapi_cfg
->flags
& BGP_VNC_CONFIG_CALLBACK_DISABLE
)) {
3856 for (n
= rn
; n
; n
= n
->parent
) {
3857 // rfapiDoRouteCallback(import_table, n, NULL);
3859 rfapiMonitorItNodeChanged(import_table
, rn
, NULL
);
3861 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 0);
3865 static void rfapiBgpInfoFilteredImportBadSafi(
3866 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3867 void *rfd
, /* set for looped back routes */
3869 struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3870 afi_t afi
, struct prefix_rd
*prd
,
3871 struct attr
*attr
, /* part of bgp_info */
3872 u_char type
, /* part of bgp_info */
3873 u_char sub_type
, /* part of bgp_info */
3874 uint32_t *label
) /* part of bgp_info */
3876 vnc_zlog_debug_verbose("%s: Error, bad safi", __func__
);
3879 static rfapi_bi_filtered_import_f
*
3880 rfapiBgpInfoFilteredImportFunction(safi_t safi
)
3884 return rfapiBgpInfoFilteredImportVPN
;
3887 return rfapiBgpInfoFilteredImportEncap
;
3891 zlog_err("%s: bad safi %d", __func__
, safi
);
3892 return rfapiBgpInfoFilteredImportBadSafi
;
3896 void rfapiProcessUpdate(struct peer
*peer
,
3897 void *rfd
, /* set when looped from RFP/RFAPI */
3898 struct prefix
*p
, struct prefix_rd
*prd
,
3899 struct attr
*attr
, afi_t afi
, safi_t safi
, u_char type
,
3900 u_char sub_type
, uint32_t *label
)
3904 struct rfapi_import_table
*it
;
3905 int has_ip_route
= 1;
3908 bgp
= bgp_get_default(); /* assume 1 instance for now */
3915 * look at high-order byte of RD. FF means MAC
3916 * address is present (VNC L2VPN)
3918 if ((safi
== SAFI_MPLS_VPN
)
3919 && (decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
)) {
3920 struct prefix pfx_mac_buf
;
3921 struct prefix pfx_nexthop_buf
;
3925 * Set flag if prefix and nexthop are the same - don't
3926 * add the route to normal IP-based import tables
3928 if (!rfapiGetNexthop(attr
, &pfx_nexthop_buf
)) {
3929 if (!prefix_cmp(&pfx_nexthop_buf
, p
)) {
3934 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
3935 pfx_mac_buf
.family
= AF_ETHERNET
;
3936 pfx_mac_buf
.prefixlen
= 48;
3937 memcpy(&pfx_mac_buf
.u
.prefix_eth
.octet
, prd
->val
+ 2, 6);
3940 * Find rt containing LNI (Logical Network ID), which
3941 * _should_ always be present when mac address is present
3943 rc
= rfapiEcommunityGetLNI(attr
->ecommunity
, &lni
);
3945 vnc_zlog_debug_verbose(
3946 "%s: rfapiEcommunityGetLNI returned %d, lni=%d, attr=%p",
3947 __func__
, rc
, lni
, attr
);
3949 it
= rfapiMacImportTableGet(bgp
, lni
);
3951 rfapiBgpInfoFilteredImportVPN(
3952 it
, FIF_ACTION_UPDATE
, peer
, rfd
,
3953 &pfx_mac_buf
, /* prefix */
3954 p
, /* aux prefix: IP addr */
3955 AFI_L2VPN
, prd
, attr
, type
, sub_type
, label
);
3963 * Iterate over all import tables; do a filtered import
3964 * for the afi/safi combination
3966 for (it
= h
->imports
; it
; it
= it
->next
) {
3967 (*rfapiBgpInfoFilteredImportFunction(safi
))(
3968 it
, FIF_ACTION_UPDATE
, peer
, rfd
, p
, /* prefix */
3969 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3972 if (safi
== SAFI_MPLS_VPN
) {
3973 vnc_direct_bgp_rh_add_route(bgp
, afi
, p
, peer
, attr
);
3974 rfapiBgpInfoFilteredImportVPN(
3975 bgp
->rfapi
->it_ce
, FIF_ACTION_UPDATE
, peer
, rfd
,
3977 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3982 void rfapiProcessWithdraw(struct peer
*peer
, void *rfd
, struct prefix
*p
,
3983 struct prefix_rd
*prd
, struct attr
*attr
, afi_t afi
,
3984 safi_t safi
, u_char type
, int kill
)
3988 struct rfapi_import_table
*it
;
3990 bgp
= bgp_get_default(); /* assume 1 instance for now */
3997 * look at high-order byte of RD. FF means MAC
3998 * address is present (VNC L2VPN)
4000 if (h
->import_mac
!= NULL
&& safi
== SAFI_MPLS_VPN
4001 && decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
) {
4002 struct prefix pfx_mac_buf
;
4003 void *cursor
= NULL
;
4006 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
4007 pfx_mac_buf
.family
= AF_ETHERNET
;
4008 pfx_mac_buf
.prefixlen
= 48;
4009 memcpy(&pfx_mac_buf
.u
.prefix_eth
, prd
->val
+ 2, 6);
4012 * withdraw does not contain attrs, so we don't have
4013 * access to the route's LNI, which would ordinarily
4014 * select the specific mac-based import table. Instead,
4015 * we must iterate over all mac-based tables and rely
4016 * on the RD to match.
4018 * If this approach is too slow, add an index where
4019 * key is {RD, peer} and value is the import table
4021 for (rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4023 rc
== 0; rc
= skiplist_next(h
->import_mac
, NULL
,
4024 (void **)&it
, &cursor
)) {
4027 vnc_zlog_debug_verbose(
4028 "%s: calling rfapiBgpInfoFilteredImportVPN(it=%p, afi=AFI_L2VPN)",
4032 rfapiBgpInfoFilteredImportVPN(
4034 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
4035 peer
, rfd
, &pfx_mac_buf
, /* prefix */
4036 p
, /* aux_prefix: IP */
4037 AFI_L2VPN
, prd
, attr
, type
, 0,
4038 NULL
); /* sub_type & label unused for withdraw
4044 * XXX For the case where the withdraw involves an L2
4045 * route with no IP information, we rely on the lack
4046 * of RT-list intersection to filter out the withdraw
4047 * from the IP-based import tables below
4051 * Iterate over all import tables; do a filtered import
4052 * for the afi/safi combination
4055 for (it
= h
->imports
; it
; it
= it
->next
) {
4056 (*rfapiBgpInfoFilteredImportFunction(safi
))(
4057 it
, (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
4058 peer
, rfd
, p
, /* prefix */
4059 NULL
, afi
, prd
, attr
, type
, 0,
4060 NULL
); /* sub_type & label unused for withdraw */
4063 /* TBD the deletion should happen after the lifetime expires */
4064 if (safi
== SAFI_MPLS_VPN
)
4065 vnc_direct_bgp_rh_del_route(bgp
, afi
, p
, peer
);
4067 if (safi
== SAFI_MPLS_VPN
) {
4068 rfapiBgpInfoFilteredImportVPN(
4070 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
), peer
,
4071 rfd
, p
, /* prefix */
4072 NULL
, afi
, prd
, attr
, type
, 0,
4073 NULL
); /* sub_type & label unused for withdraw */
4078 * TBD optimized withdraw timer algorithm for case of many
4079 * routes expiring at the same time due to peer drop.
4082 * 1. Visit all BIs in all ENCAP import tables.
4084 * a. If a bi's peer is the failed peer, remove the bi.
4085 * b. If the removed ENCAP bi was first in the list of
4086 * BIs at this ENCAP node, loop over all monitors
4089 * (1) for each ENCAP monitor, loop over all its
4090 * VPN node monitors and set their RFAPI_MON_FLAG_NEEDCALLBACK
4093 * 2. Visit all BIs in all VPN import tables.
4094 * a. If a bi's peer is the failed peer, remove the bi.
4095 * b. loop over all the VPN node monitors and set their
4096 * RFAPI_MON_FLAG_NEEDCALLBACK flags
4097 * c. If there are no BIs left at this VPN node,
4102 /* surprise, this gets called from peer_delete(), from rfapi_close() */
4103 static void rfapiProcessPeerDownRt(struct peer
*peer
,
4104 struct rfapi_import_table
*import_table
,
4105 afi_t afi
, safi_t safi
)
4107 struct route_node
*rn
;
4108 struct bgp_info
*bi
;
4109 struct route_table
*rt
;
4110 int (*timer_service_func
)(struct thread
*);
4112 assert(afi
== AFI_IP
|| afi
== AFI_IP6
);
4118 rt
= import_table
->imported_vpn
[afi
];
4119 timer_service_func
= rfapiWithdrawTimerVPN
;
4122 rt
= import_table
->imported_encap
[afi
];
4123 timer_service_func
= rfapiWithdrawTimerEncap
;
4130 for (rn
= route_top(rt
); rn
; rn
= route_next(rn
)) {
4131 for (bi
= rn
->info
; bi
; bi
= bi
->next
) {
4132 if (bi
->peer
== peer
) {
4134 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
4135 /* already in holddown, skip */
4139 if (safi
== SAFI_MPLS_VPN
) {
4140 RFAPI_UPDATE_ITABLE_COUNT(
4141 bi
, import_table
, afi
, -1);
4142 import_table
->holddown_count
[afi
] += 1;
4144 rfapiBiStartWithdrawTimer(import_table
, rn
, bi
,
4146 timer_service_func
);
4154 * This gets called when a peer connection drops. We have to remove
4155 * all the routes from this peer.
4157 * Current approach is crude. TBD Optimize by setting fewer timers and
4158 * grouping withdrawn routes so we can generate callbacks more
4161 void rfapiProcessPeerDown(struct peer
*peer
)
4165 struct rfapi_import_table
*it
;
4168 * If this peer is a "dummy" peer structure atached to a RFAPI
4169 * nve_descriptor, we don't need to walk the import tables
4170 * because the routes are already withdrawn by rfapi_close()
4172 if (CHECK_FLAG(peer
->flags
, PEER_FLAG_IS_RFAPI_HD
))
4176 * 1. Visit all BIs in all ENCAP import tables.
4177 * Start withdraw timer on the BIs that match peer.
4179 * 2. Visit All BIs in all VPN import tables.
4180 * Start withdraw timer on the BIs that match peer.
4183 bgp
= bgp_get_default(); /* assume 1 instance for now */
4190 for (it
= h
->imports
; it
; it
= it
->next
) {
4191 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_ENCAP
);
4192 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_ENCAP
);
4193 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_MPLS_VPN
);
4194 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_MPLS_VPN
);
4198 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4199 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4204 * Import an entire RIB (for an afi/safi) to an import table RIB,
4205 * filtered according to the import table's RT list
4207 * TBD: does this function need additions to match rfapiProcessUpdate()
4208 * for, e.g., L2 handling?
4210 static void rfapiBgpTableFilteredImport(struct bgp
*bgp
,
4211 struct rfapi_import_table
*it
,
4212 afi_t afi
, safi_t safi
)
4214 struct bgp_node
*rn1
;
4215 struct bgp_node
*rn2
;
4217 /* Only these SAFIs have 2-level RIBS */
4218 assert(safi
== SAFI_MPLS_VPN
|| safi
== SAFI_ENCAP
);
4221 * Now visit all the rd nodes and the nodes of all the
4222 * route tables attached to them, and import the routes
4223 * if they have matching route targets
4225 for (rn1
= bgp_table_top(bgp
->rib
[afi
][safi
]); rn1
;
4226 rn1
= bgp_route_next(rn1
)) {
4229 for (rn2
= bgp_table_top(rn1
->info
); rn2
;
4230 rn2
= bgp_route_next(rn2
)) {
4232 struct bgp_info
*bi
;
4234 for (bi
= rn2
->info
; bi
; bi
= bi
->next
) {
4235 u_int32_t label
= 0;
4237 if (CHECK_FLAG(bi
->flags
,
4242 label
= decode_label(
4243 &bi
->extra
->label
[0]);
4244 (*rfapiBgpInfoFilteredImportFunction(
4246 it
, /* which import table */
4247 FIF_ACTION_UPDATE
, bi
->peer
,
4248 NULL
, &rn2
->p
, /* prefix */
4250 (struct prefix_rd
*)&rn1
->p
,
4252 bi
->sub_type
, &label
);
4260 /* per-bgp-instance rfapi data */
4261 struct rfapi
*bgp_rfapi_new(struct bgp
*bgp
)
4265 struct rfapi_rfp_cfg
*cfg
= NULL
;
4266 struct rfapi_rfp_cb_methods
*cbm
= NULL
;
4268 assert(bgp
->rfapi_cfg
== NULL
);
4270 h
= (struct rfapi
*)XCALLOC(MTYPE_RFAPI
, sizeof(struct rfapi
));
4272 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4273 h
->un
[afi
] = route_table_init();
4277 * initialize the ce import table
4279 h
->it_ce
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4280 sizeof(struct rfapi_import_table
));
4281 h
->it_ce
->imported_vpn
[AFI_IP
] = route_table_init();
4282 h
->it_ce
->imported_vpn
[AFI_IP6
] = route_table_init();
4283 h
->it_ce
->imported_encap
[AFI_IP
] = route_table_init();
4284 h
->it_ce
->imported_encap
[AFI_IP6
] = route_table_init();
4285 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4286 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4289 * Set up work queue for deferred rfapi_close operations
4291 h
->deferred_close_q
=
4292 work_queue_new(bm
->master
, "rfapi deferred close");
4293 h
->deferred_close_q
->spec
.workfunc
= rfapi_deferred_close_workfunc
;
4294 h
->deferred_close_q
->spec
.data
= h
;
4296 h
->rfp
= rfp_start(bm
->master
, &cfg
, &cbm
);
4297 bgp
->rfapi_cfg
= bgp_rfapi_cfg_new(cfg
);
4299 h
->rfp_methods
= *cbm
;
4304 void bgp_rfapi_destroy(struct bgp
*bgp
, struct rfapi
*h
)
4308 if (bgp
== NULL
|| h
== NULL
)
4311 if (h
->resolve_nve_nexthop
) {
4312 skiplist_free(h
->resolve_nve_nexthop
);
4313 h
->resolve_nve_nexthop
= NULL
;
4316 route_table_finish(h
->it_ce
->imported_vpn
[AFI_IP
]);
4317 route_table_finish(h
->it_ce
->imported_vpn
[AFI_IP6
]);
4318 route_table_finish(h
->it_ce
->imported_encap
[AFI_IP
]);
4319 route_table_finish(h
->it_ce
->imported_encap
[AFI_IP6
]);
4321 if (h
->import_mac
) {
4322 struct rfapi_import_table
*it
;
4327 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4329 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4332 rfapiImportTableFlush(it
);
4333 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
4335 skiplist_free(h
->import_mac
);
4336 h
->import_mac
= NULL
;
4339 work_queue_free(h
->deferred_close_q
);
4344 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4345 route_table_finish(h
->un
[afi
]);
4348 XFREE(MTYPE_RFAPI_IMPORTTABLE
, h
->it_ce
);
4349 XFREE(MTYPE_RFAPI
, h
);
4352 struct rfapi_import_table
*
4353 rfapiImportTableRefAdd(struct bgp
*bgp
, struct ecommunity
*rt_import_list
,
4354 struct rfapi_nve_group_cfg
*rfg
)
4357 struct rfapi_import_table
*it
;
4363 for (it
= h
->imports
; it
; it
= it
->next
) {
4364 if (ecommunity_cmp(it
->rt_import_list
, rt_import_list
))
4368 vnc_zlog_debug_verbose("%s: matched it=%p", __func__
, it
);
4371 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4372 sizeof(struct rfapi_import_table
));
4374 it
->next
= h
->imports
;
4377 it
->rt_import_list
= ecommunity_dup(rt_import_list
);
4379 it
->monitor_exterior_orphans
=
4380 skiplist_new(0, NULL
, (void (*)(void *))prefix_free
);
4383 * fill import route tables from RIBs
4385 * Potential area for optimization. If this occurs when
4386 * tables are large (e.g., the operator adds a nve group
4387 * with a new RT list to a running system), it could take
4391 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4393 it
->imported_vpn
[afi
] = route_table_init();
4394 it
->imported_encap
[afi
] = route_table_init();
4396 rfapiBgpTableFilteredImport(bgp
, it
, afi
,
4398 rfapiBgpTableFilteredImport(bgp
, it
, afi
, SAFI_ENCAP
);
4400 vnc_import_bgp_exterior_redist_enable_it(bgp
, afi
, it
);
4410 * skiplist element free function
4412 static void delete_rem_pfx_na_free(void *na
)
4414 uint32_t *pCounter
= ((struct rfapi_nve_addr
*)na
)->info
;
4417 XFREE(MTYPE_RFAPI_NVE_ADDR
, na
);
4421 * Common deleter for IP and MAC import tables
4423 static void rfapiDeleteRemotePrefixesIt(
4424 struct bgp
*bgp
, struct rfapi_import_table
*it
, struct prefix
*un
,
4425 struct prefix
*vn
, struct prefix
*p
, int delete_active
,
4426 int delete_holddown
, uint32_t *pARcount
, uint32_t *pAHcount
,
4427 uint32_t *pHRcount
, uint32_t *pHHcount
,
4428 struct skiplist
*uniq_active_nves
, struct skiplist
*uniq_holddown_nves
)
4434 char buf_pfx
[PREFIX_STRLEN
];
4437 prefix2str(p
, buf_pfx
, sizeof(buf_pfx
));
4443 vnc_zlog_debug_verbose(
4444 "%s: entry, p=%s, delete_active=%d, delete_holddown=%d",
4445 __func__
, buf_pfx
, delete_active
, delete_holddown
);
4449 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4451 struct route_table
*rt
;
4452 struct route_node
*rn
;
4454 if (p
&& (family2afi(p
->family
) != afi
)) {
4458 rt
= it
->imported_vpn
[afi
];
4462 vnc_zlog_debug_verbose("%s: scanning rt for afi=%d", __func__
,
4465 for (rn
= route_top(rt
); rn
; rn
= route_next(rn
)) {
4466 struct bgp_info
*bi
;
4467 struct bgp_info
*next
;
4469 if (VNC_DEBUG(IMPORT_DEL_REMOTE
)) {
4470 char p1line
[PREFIX_STRLEN
];
4471 char p2line
[PREFIX_STRLEN
];
4473 prefix2str(p
, p1line
, sizeof(p1line
));
4474 prefix2str(&rn
->p
, p2line
, sizeof(p2line
));
4475 vnc_zlog_debug_any("%s: want %s, have %s",
4476 __func__
, p1line
, p2line
);
4479 if (p
&& prefix_cmp(p
, &rn
->p
))
4483 char buf_pfx
[PREFIX_STRLEN
];
4485 prefix2str(&rn
->p
, buf_pfx
, sizeof(buf_pfx
));
4486 vnc_zlog_debug_verbose("%s: rn pfx=%s",
4490 /* TBD is this valid for afi == AFI_L2VPN? */
4491 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
4493 for (bi
= rn
->info
; bi
; bi
= next
) {
4502 vnc_zlog_debug_verbose("%s: examining bi %p",
4506 if (!rfapiGetNexthop(bi
->attr
, &qpt
))
4511 || !prefix_match(vn
, &qpt
)) {
4513 vnc_zlog_debug_verbose(
4514 "%s: continue at vn && !qpt_valid || !prefix_match(vn, &qpt)",
4521 if (!rfapiGetUnAddrOfVpnBi(bi
, &qct
))
4526 || !prefix_match(un
, &qct
)) {
4528 vnc_zlog_debug_verbose(
4529 "%s: continue at un && !qct_valid || !prefix_match(un, &qct)",
4541 * If this route is waiting to be deleted
4543 * a previous withdraw, we must cancel its
4546 if (CHECK_FLAG(bi
->flags
, BGP_INFO_REMOVED
)) {
4547 if (!delete_holddown
)
4549 if (bi
->extra
->vnc
.import
.timer
) {
4555 struct rfapi_withdraw
*wcb
=
4559 ->holddown_count
[afi
] -=
4561 RFAPI_UPDATE_ITABLE_COUNT(
4562 bi
, wcb
->import_table
,
4564 XFREE(MTYPE_RFAPI_WITHDRAW
,
4574 vnc_zlog_debug_verbose(
4575 "%s: deleting bi %p (qct_valid=%d, qpt_valid=%d, delete_holddown=%d, delete_active=%d)",
4576 __func__
, bi
, qct_valid
, qpt_valid
,
4577 delete_holddown
, delete_active
);
4583 if (qct_valid
&& qpt_valid
) {
4585 struct rfapi_nve_addr na
;
4586 struct rfapi_nve_addr
*nap
;
4588 memset(&na
, 0, sizeof(na
));
4589 assert(!rfapiQprefix2Raddr(&qct
,
4591 assert(!rfapiQprefix2Raddr(&qpt
,
4594 if (skiplist_search(
4597 : uniq_holddown_nves
),
4598 &na
, (void **)&nap
)) {
4602 MTYPE_RFAPI_NVE_ADDR
,
4607 nap
->info
= is_active
4613 : uniq_holddown_nves
),
4616 rfapiNveAddr2Str(nap
, line
,
4621 vnc_direct_bgp_rh_del_route(bgp
, afi
, &rn
->p
,
4624 RFAPI_UPDATE_ITABLE_COUNT(bi
, it
, afi
, -1);
4625 it
->holddown_count
[afi
] += 1;
4626 rfapiExpireVpnNow(it
, rn
, bi
, 1);
4628 vnc_zlog_debug_verbose(
4629 "%s: incrementing count (is_active=%d)",
4630 __func__
, is_active
);
4643 * For use by the "clear vnc prefixes" command
4645 /*------------------------------------------
4646 * rfapiDeleteRemotePrefixes
4648 * UI helper: For use by the "clear vnc prefixes" command
4651 * un if set, tunnel must match this prefix
4652 * vn if set, nexthop prefix must match this prefix
4653 * p if set, prefix must match this prefix
4654 * it if set, only look in this import table
4657 * pARcount number of active routes deleted
4658 * pAHcount number of active nves deleted
4659 * pHRcount number of holddown routes deleted
4660 * pHHcount number of holddown nves deleted
4664 --------------------------------------------*/
4665 void rfapiDeleteRemotePrefixes(struct prefix
*un
, struct prefix
*vn
,
4667 struct rfapi_import_table
*arg_it
,
4668 int delete_active
, int delete_holddown
,
4669 uint32_t *pARcount
, uint32_t *pAHcount
,
4670 uint32_t *pHRcount
, uint32_t *pHHcount
)
4674 struct rfapi_import_table
*it
;
4675 uint32_t deleted_holddown_route_count
= 0;
4676 uint32_t deleted_active_route_count
= 0;
4677 uint32_t deleted_holddown_nve_count
= 0;
4678 uint32_t deleted_active_nve_count
= 0;
4679 struct skiplist
*uniq_holddown_nves
;
4680 struct skiplist
*uniq_active_nves
;
4684 bgp
= bgp_get_default(); /* assume 1 instance for now */
4685 /* If no bgp instantiated yet, no vnc prefixes exist */
4692 uniq_holddown_nves
=
4693 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4695 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4698 * Iterate over all import tables; do a filtered import
4699 * for the afi/safi combination
4708 vnc_zlog_debug_verbose(
4709 "%s: calling rfapiDeleteRemotePrefixesIt() on (IP) import %p",
4712 rfapiDeleteRemotePrefixesIt(
4713 bgp
, it
, un
, vn
, p
, delete_active
, delete_holddown
,
4714 &deleted_active_route_count
, &deleted_active_nve_count
,
4715 &deleted_holddown_route_count
,
4716 &deleted_holddown_nve_count
, uniq_active_nves
,
4717 uniq_holddown_nves
);
4726 * Now iterate over L2 import tables
4728 if (h
->import_mac
&& !(p
&& (p
->family
!= AF_ETHERNET
))) {
4730 void *cursor
= NULL
;
4734 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4736 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4739 vnc_zlog_debug_verbose(
4740 "%s: calling rfapiDeleteRemotePrefixesIt() on import_mac %p",
4743 rfapiDeleteRemotePrefixesIt(
4744 bgp
, it
, un
, vn
, p
, delete_active
,
4745 delete_holddown
, &deleted_active_route_count
,
4746 &deleted_active_nve_count
,
4747 &deleted_holddown_route_count
,
4748 &deleted_holddown_nve_count
, uniq_active_nves
,
4749 uniq_holddown_nves
);
4754 * our custom element freeing function above counts as it deletes
4756 skiplist_free(uniq_holddown_nves
);
4757 skiplist_free(uniq_active_nves
);
4760 *pARcount
= deleted_active_route_count
;
4762 *pAHcount
= deleted_active_nve_count
;
4764 *pHRcount
= deleted_holddown_route_count
;
4766 *pHHcount
= deleted_holddown_nve_count
;
4771 /*------------------------------------------
4772 * rfapiCountRemoteRoutes
4774 * UI helper: count VRF routes from BGP side
4779 * pALRcount count of active local routes
4780 * pARRcount count of active remote routes
4781 * pHRcount count of holddown routes
4782 * pIRcount count of direct imported routes
4786 --------------------------------------------*/
4787 void rfapiCountAllItRoutes(int *pALRcount
, /* active local routes */
4788 int *pARRcount
, /* active remote routes */
4789 int *pHRcount
, /* holddown routes */
4790 int *pIRcount
) /* imported routes */
4794 struct rfapi_import_table
*it
;
4797 int total_active_local
= 0;
4798 int total_active_remote
= 0;
4799 int total_holddown
= 0;
4800 int total_imported
= 0;
4802 bgp
= bgp_get_default(); /* assume 1 instance for now */
4809 * Iterate over all import tables; do a filtered import
4810 * for the afi/safi combination
4813 for (it
= h
->imports
; it
; it
= it
->next
) {
4815 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4817 total_active_local
+= it
->local_count
[afi
];
4818 total_active_remote
+= it
->remote_count
[afi
];
4819 total_holddown
+= it
->holddown_count
[afi
];
4820 total_imported
+= it
->imported_count
[afi
];
4827 if (h
->import_mac
) {
4829 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4831 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4834 total_active_local
+= it
->local_count
[AFI_L2VPN
];
4835 total_active_remote
+= it
->remote_count
[AFI_L2VPN
];
4836 total_holddown
+= it
->holddown_count
[AFI_L2VPN
];
4837 total_imported
+= it
->imported_count
[AFI_L2VPN
];
4843 *pALRcount
= total_active_local
;
4846 *pARRcount
= total_active_remote
;
4849 *pHRcount
= total_holddown
;
4852 *pIRcount
= total_imported
;
4856 /*------------------------------------------
4857 * rfapiGetHolddownFromLifetime
4859 * calculate holddown value based on lifetime
4865 * Holddown value based on lifetime, holddown_factor,
4866 * and RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
4868 --------------------------------------------*/
4869 /* hold down time maxes out at RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY */
4870 uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime
)
4875 bgp
= bgp_get_default();
4876 if (bgp
&& bgp
->rfapi_cfg
)
4877 factor
= bgp
->rfapi_cfg
->rfp_cfg
.holddown_factor
;
4879 factor
= RFAPI_RFP_CFG_DEFAULT_HOLDDOWN_FACTOR
;
4881 if (factor
< 100 || lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4882 lifetime
= lifetime
* factor
/ 100;
4883 if (lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4886 return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
;