3 * Copyright 2009-2016, LabN Consulting, L.L.C.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 * File: rfapi_import.c
23 * Purpose: Handle import of routes from BGP to RFAPI
26 #include "lib/zebra.h"
27 #include "lib/prefix.h"
28 #include "lib/agg_table.h"
30 #include "lib/memory.h"
32 #include "lib/skiplist.h"
33 #include "lib/thread.h"
34 #include "lib/stream.h"
35 #include "lib/lib_errors.h"
37 #include "bgpd/bgpd.h"
38 #include "bgpd/bgp_ecommunity.h"
39 #include "bgpd/bgp_attr.h"
40 #include "bgpd/bgp_route.h"
41 #include "bgpd/bgp_mplsvpn.h" /* prefix_rd2str() */
42 #include "bgpd/bgp_vnc_types.h"
43 #include "bgpd/bgp_rd.h"
45 #include "bgpd/rfapi/rfapi.h"
46 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
47 #include "bgpd/rfapi/rfapi_backend.h"
48 #include "bgpd/rfapi/rfapi_import.h"
49 #include "bgpd/rfapi/rfapi_private.h"
50 #include "bgpd/rfapi/rfapi_monitor.h"
51 #include "bgpd/rfapi/rfapi_nve_addr.h"
52 #include "bgpd/rfapi/rfapi_vty.h"
53 #include "bgpd/rfapi/vnc_export_bgp.h"
54 #include "bgpd/rfapi/vnc_export_bgp_p.h"
55 #include "bgpd/rfapi/vnc_zebra.h"
56 #include "bgpd/rfapi/vnc_import_bgp.h"
57 #include "bgpd/rfapi/vnc_import_bgp_p.h"
58 #include "bgpd/rfapi/rfapi_rib.h"
59 #include "bgpd/rfapi/rfapi_encap_tlv.h"
60 #include "bgpd/rfapi/vnc_debug.h"
62 #ifdef HAVE_GLIBC_BACKTRACE
63 /* for backtrace and friends */
65 #endif /* HAVE_GLIBC_BACKTRACE */
67 #undef DEBUG_MONITOR_MOVE_SHORTER
68 #undef DEBUG_RETURNED_NHL
69 #undef DEBUG_ROUTE_COUNTERS
70 #undef DEBUG_ENCAP_MONITOR
73 #undef DEBUG_BI_SEARCH
76 * Allocated for each withdraw timer instance; freed when the timer
77 * expires or is canceled
79 struct rfapi_withdraw
{
80 struct rfapi_import_table
*import_table
;
81 struct agg_node
*node
;
82 struct bgp_path_info
*info
;
83 safi_t safi
; /* used only for bulk operations */
85 * For import table node reference count checking (i.e., debugging).
86 * Normally when a timer expires, lockoffset should be 0. However, if
87 * the timer expiration function is called directly (e.g.,
88 * rfapiExpireVpnNow), the node could be locked by a preceding
89 * agg_route_top() or agg_route_next() in a loop, so we need to pass
97 * It's evil and fiendish. It's compiler-dependent.
98 * ? Might need LDFLAGS -rdynamic to produce all function names
100 void rfapiDebugBacktrace(void)
102 #ifdef HAVE_GLIBC_BACKTRACE
103 #define RFAPI_DEBUG_BACKTRACE_NENTRIES 200
104 void *buf
[RFAPI_DEBUG_BACKTRACE_NENTRIES
];
109 size
= backtrace(buf
, RFAPI_DEBUG_BACKTRACE_NENTRIES
);
110 syms
= backtrace_symbols(buf
, size
);
112 for (i
= 0; i
< size
&& i
< RFAPI_DEBUG_BACKTRACE_NENTRIES
; ++i
) {
113 vnc_zlog_debug_verbose("backtrace[%2zu]: %s", i
, syms
[i
]);
123 * Count remote routes and compare with actively-maintained values.
124 * Abort if they disagree.
126 void rfapiCheckRouteCount(void)
128 struct bgp
*bgp
= bgp_get_default();
130 struct rfapi_import_table
*it
;
138 for (it
= h
->imports
; it
; it
= it
->next
) {
139 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
141 struct agg_table
*rt
;
144 int holddown_count
= 0;
146 int imported_count
= 0;
147 int remote_count
= 0;
149 rt
= it
->imported_vpn
[afi
];
151 for (rn
= agg_route_top(rt
); rn
;
152 rn
= agg_route_next(rn
)) {
153 struct bgp_path_info
*bpi
;
154 struct bgp_path_info
*next
;
156 for (bpi
= rn
->info
; bpi
; bpi
= next
) {
159 if (CHECK_FLAG(bpi
->flags
,
164 if (RFAPI_LOCAL_BI(bpi
)) {
167 if (RFAPI_DIRECT_IMPORT_BI(
178 if (it
->holddown_count
[afi
] != holddown_count
) {
179 vnc_zlog_debug_verbose(
180 "%s: it->holddown_count %d != holddown_count %d",
181 __func__
, it
->holddown_count
[afi
],
185 if (it
->remote_count
[afi
] != remote_count
) {
186 vnc_zlog_debug_verbose(
187 "%s: it->remote_count %d != remote_count %d",
188 __func__
, it
->remote_count
[afi
],
192 if (it
->imported_count
[afi
] != imported_count
) {
193 vnc_zlog_debug_verbose(
194 "%s: it->imported_count %d != imported_count %d",
195 __func__
, it
->imported_count
[afi
],
203 #ifdef DEBUG_ROUTE_COUNTERS
204 #define VNC_ITRCCK do {rfapiCheckRouteCount();} while (0)
210 * Validate reference count for a node in an import table
212 * Normally lockoffset is 0 for nodes in quiescent state. However,
213 * agg_unlock_node will delete the node if it is called when
214 * node->lock == 1, and we have to validate the refcount before
215 * the node is deleted. In this case, we specify lockoffset 1.
217 void rfapiCheckRefcount(struct agg_node
*rn
, safi_t safi
, int lockoffset
)
219 unsigned int count_bpi
= 0;
220 unsigned int count_monitor
= 0;
221 struct bgp_path_info
*bpi
;
222 struct rfapi_monitor_encap
*hme
;
223 struct rfapi_monitor_vpn
*hmv
;
225 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
)
230 ++count_monitor
; /* rfapi_it_extra */
237 for (hme
= RFAPI_MONITOR_ENCAP(rn
); hme
;
244 for (hmv
= RFAPI_MONITOR_VPN(rn
); hmv
; hmv
= hmv
->next
)
247 if (RFAPI_MONITOR_EXTERIOR(rn
)->source
) {
248 ++count_monitor
; /* sl */
250 for (rc
= skiplist_next(
251 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
252 NULL
, NULL
, &cursor
);
255 RFAPI_MONITOR_EXTERIOR(rn
)->source
,
256 NULL
, NULL
, &cursor
)) {
258 ++count_monitor
; /* sl entry */
268 if (count_bpi
+ count_monitor
+ lockoffset
269 != agg_node_get_lock_count(rn
)) {
270 vnc_zlog_debug_verbose(
271 "%s: count_bpi=%d, count_monitor=%d, lockoffset=%d, rn->lock=%d",
272 __func__
, count_bpi
, count_monitor
, lockoffset
,
273 agg_node_get_lock_count(rn
));
279 * Perform deferred rfapi_close operations that were queued
282 static wq_item_status
rfapi_deferred_close_workfunc(struct work_queue
*q
,
285 struct rfapi_descriptor
*rfd
= data
;
286 struct rfapi
*h
= q
->spec
.data
;
288 assert(!(h
->flags
& RFAPI_INCALLBACK
));
290 vnc_zlog_debug_verbose("%s: completed deferred close on handle %p",
296 * Extract layer 2 option from Encap TLVS in BGP attrs
298 int rfapiGetL2o(struct attr
*attr
, struct rfapi_l2address_option
*l2o
)
301 struct bgp_attr_encap_subtlv
*pEncap
;
303 for (pEncap
= bgp_attr_get_vnc_subtlvs(attr
); pEncap
;
304 pEncap
= pEncap
->next
) {
306 if (pEncap
->type
== BGP_VNC_SUBTLV_TYPE_RFPOPTION
) {
308 == RFAPI_VN_OPTION_TYPE_L2ADDR
) {
310 if (pEncap
->value
[1] == 14) {
311 memcpy(l2o
->macaddr
.octet
,
328 l2o
->logical_net_id
=
331 + ((pEncap
->value
[14]
334 + ((pEncap
->value
[13]
349 * Extract the lifetime from the Tunnel Encap attribute of a route in
352 int rfapiGetVncLifetime(struct attr
*attr
, uint32_t *lifetime
)
354 struct bgp_attr_encap_subtlv
*pEncap
;
356 *lifetime
= RFAPI_INFINITE_LIFETIME
; /* default to infinite */
360 for (pEncap
= bgp_attr_get_vnc_subtlvs(attr
); pEncap
;
361 pEncap
= pEncap
->next
) {
364 == BGP_VNC_SUBTLV_TYPE_LIFETIME
) { /* lifetime */
365 if (pEncap
->length
== 4) {
366 memcpy(lifetime
, pEncap
->value
, 4);
367 *lifetime
= ntohl(*lifetime
);
378 * Look for UN address in Encap attribute
380 int rfapiGetVncTunnelUnAddr(struct attr
*attr
, struct prefix
*p
)
382 struct bgp_attr_encap_subtlv
*pEncap
;
383 bgp_encap_types tun_type
= BGP_ENCAP_TYPE_MPLS
;/*Default tunnel type*/
385 bgp_attr_extcom_tunnel_type(attr
, &tun_type
);
386 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
389 /* MPLS carries UN address in next hop */
390 rfapiNexthop2Prefix(attr
, p
);
391 if (p
->family
!= AF_UNSPEC
)
397 for (pEncap
= attr
->encap_subtlvs
; pEncap
;
398 pEncap
= pEncap
->next
) {
401 == BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
) { /* un
404 switch (pEncap
->length
) {
408 p
->prefixlen
= IPV4_MAX_BITLEN
;
409 memcpy(p
->u
.val
, pEncap
->value
,
416 p
->family
= AF_INET6
;
417 p
->prefixlen
= IPV6_MAX_BITLEN
;
418 memcpy(p
->u
.val
, pEncap
->value
,
431 * Get UN address wherever it might be
433 int rfapiGetUnAddrOfVpnBi(struct bgp_path_info
*bpi
, struct prefix
*p
)
435 /* If it's in this route's VNC attribute, we're done */
436 if (!rfapiGetVncTunnelUnAddr(bpi
->attr
, p
))
439 * Otherwise, see if it's cached from a corresponding ENCAP SAFI
443 switch (bpi
->extra
->vnc
.import
.un_family
) {
446 p
->family
= bpi
->extra
->vnc
.import
.un_family
;
447 p
->u
.prefix4
= bpi
->extra
->vnc
.import
.un
.addr4
;
448 p
->prefixlen
= IPV4_MAX_BITLEN
;
453 p
->family
= bpi
->extra
->vnc
.import
.un_family
;
454 p
->u
.prefix6
= bpi
->extra
->vnc
.import
.un
.addr6
;
455 p
->prefixlen
= IPV6_MAX_BITLEN
;
460 p
->family
= AF_UNSPEC
;
461 #ifdef DEBUG_ENCAP_MONITOR
462 vnc_zlog_debug_verbose(
463 "%s: bpi->extra->vnc.import.un_family is 0, no UN addr",
475 * Make a new bgp_path_info from gathered parameters
477 static struct bgp_path_info
*rfapiBgpInfoCreate(struct attr
*attr
,
478 struct peer
*peer
, void *rfd
,
479 struct prefix_rd
*prd
,
480 uint8_t type
, uint8_t sub_type
,
483 struct bgp_path_info
*new;
485 new = info_make(type
, sub_type
, 0, peer
, attr
, NULL
);
487 new->attr
= bgp_attr_intern(attr
);
489 bgp_path_info_extra_get(new);
491 new->extra
->vnc
.import
.rd
= *prd
;
492 rfapi_time(&new->extra
->vnc
.import
.create_time
);
495 encode_label(*label
, &new->extra
->label
[0]);
503 * Frees bgp_path_info as used in import tables (parts are not
504 * allocated exactly the way they are in the main RIBs)
506 static void rfapiBgpInfoFree(struct bgp_path_info
*goner
)
512 vnc_zlog_debug_verbose("%s: calling peer_unlock(%p), #%d",
513 __func__
, goner
->peer
,
515 peer_unlock(goner
->peer
);
518 bgp_attr_unintern(&goner
->attr
);
521 bgp_path_info_extra_free(&goner
->extra
);
522 XFREE(MTYPE_BGP_ROUTE
, goner
);
525 struct rfapi_import_table
*rfapiMacImportTableGetNoAlloc(struct bgp
*bgp
,
529 struct rfapi_import_table
*it
= NULL
;
530 uintptr_t lni_as_ptr
= lni
;
539 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
))
545 struct rfapi_import_table
*rfapiMacImportTableGet(struct bgp
*bgp
, uint32_t lni
)
548 struct rfapi_import_table
*it
= NULL
;
549 uintptr_t lni_as_ptr
= lni
;
554 if (!h
->import_mac
) {
555 /* default cmp is good enough for LNI */
556 h
->import_mac
= skiplist_new(0, NULL
, NULL
);
559 if (skiplist_search(h
->import_mac
, (void *)lni_as_ptr
, (void **)&it
)) {
561 struct ecommunity
*enew
;
562 struct ecommunity_val eval
;
565 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
566 sizeof(struct rfapi_import_table
));
567 /* set RT list of new import table based on LNI */
568 memset((char *)&eval
, 0, sizeof(eval
));
569 eval
.val
[0] = 0; /* VNC L2VPN */
570 eval
.val
[1] = 2; /* VNC L2VPN */
571 eval
.val
[5] = (lni
>> 16) & 0xff;
572 eval
.val
[6] = (lni
>> 8) & 0xff;
573 eval
.val
[7] = (lni
>> 0) & 0xff;
575 enew
= ecommunity_new();
576 ecommunity_add_val(enew
, &eval
, false, false);
577 it
->rt_import_list
= enew
;
579 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
580 it
->imported_vpn
[afi
] = agg_table_init();
581 it
->imported_encap
[afi
] = agg_table_init();
584 it
->l2_logical_net_id
= lni
;
586 skiplist_insert(h
->import_mac
, (void *)lni_as_ptr
, it
);
594 * Implement MONITOR_MOVE_SHORTER(original_node) from
595 * RFAPI-Import-Event-Handling.txt
597 * Returns pointer to the list of moved monitors
599 static struct rfapi_monitor_vpn
*
600 rfapiMonitorMoveShorter(struct agg_node
*original_vpn_node
, int lockoffset
)
602 struct bgp_path_info
*bpi
;
603 struct agg_node
*par
;
604 struct rfapi_monitor_vpn
*m
;
605 struct rfapi_monitor_vpn
*mlast
;
606 struct rfapi_monitor_vpn
*moved
;
608 int parent_already_refcounted
= 0;
610 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
, lockoffset
);
612 #ifdef DEBUG_MONITOR_MOVE_SHORTER
614 vnc_zlog_debug_verbose("%s: called with node pfx=%pFX",
615 __func__
, &original_vpn_node
->p
);
620 * 1. If there is at least one bpi (either regular route or
621 * route marked as withdrawn, with a pending timer) at
622 * original_node with a valid UN address, we're done. Return.
624 for (bpi
= original_vpn_node
->info
; bpi
; bpi
= bpi
->next
) {
627 if (!rfapiGetUnAddrOfVpnBi(bpi
, &pfx
)) {
628 #ifdef DEBUG_MONITOR_MOVE_SHORTER
629 vnc_zlog_debug_verbose(
630 "%s: have valid UN at original node, no change",
638 * 2. Travel up the tree (toward less-specific prefixes) from
639 * original_node to find the first node that has at least
640 * one route (even if it is only a withdrawn route) with a
641 * valid UN address. Call this node "Node P."
643 for (par
= agg_node_parent(original_vpn_node
); par
;
644 par
= agg_node_parent(par
)) {
645 for (bpi
= par
->info
; bpi
; bpi
= bpi
->next
) {
647 if (!rfapiGetUnAddrOfVpnBi(bpi
, &pfx
)) {
656 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 0);
660 * If no less-specific routes, try to use the 0/0 node
663 const struct prefix
*p
;
664 /* this isn't necessarily 0/0 */
665 par
= agg_route_table_top(original_vpn_node
);
668 p
= agg_node_get_prefix(par
);
670 * If we got the top node but it wasn't 0/0,
673 if (par
&& p
->prefixlen
) {
674 agg_unlock_node(par
); /* maybe free */
679 ++parent_already_refcounted
;
684 * Create 0/0 node if it isn't there
687 struct prefix pfx_default
;
688 const struct prefix
*p
= agg_node_get_prefix(original_vpn_node
);
690 memset(&pfx_default
, 0, sizeof(pfx_default
));
691 pfx_default
.family
= p
->family
;
693 /* creates default node if none exists */
694 par
= agg_node_get(agg_get_table(original_vpn_node
),
696 ++parent_already_refcounted
;
700 * 3. Move each of the monitors found at original_node to Node P.
701 * These are "Moved Monitors."
706 * Attach at end so that the list pointer we return points
707 * only to the moved routes
709 for (m
= RFAPI_MONITOR_VPN(par
), mlast
= NULL
; m
;
710 mlast
= m
, m
= m
->next
)
714 moved
= mlast
->next
= RFAPI_MONITOR_VPN(original_vpn_node
);
716 moved
= RFAPI_MONITOR_VPN_W_ALLOC(par
) =
717 RFAPI_MONITOR_VPN(original_vpn_node
);
719 if (RFAPI_MONITOR_VPN(
720 original_vpn_node
)) /* check agg, so not allocated */
721 RFAPI_MONITOR_VPN_W_ALLOC(original_vpn_node
) = NULL
;
724 * update the node pointers on the monitors
726 for (m
= moved
; m
; m
= m
->next
) {
731 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
,
732 parent_already_refcounted
- movecount
);
733 while (movecount
> parent_already_refcounted
) {
735 ++parent_already_refcounted
;
737 while (movecount
< parent_already_refcounted
) {
738 /* unlikely, but code defensively */
739 agg_unlock_node(par
);
740 --parent_already_refcounted
;
742 RFAPI_CHECK_REFCOUNT(original_vpn_node
, SAFI_MPLS_VPN
,
743 movecount
+ lockoffset
);
744 while (movecount
--) {
745 agg_unlock_node(original_vpn_node
);
748 #ifdef DEBUG_MONITOR_MOVE_SHORTER
750 vnc_zlog_debug_verbose("%s: moved to node pfx=%pFX", __func__
,
760 * Implement MONITOR_MOVE_LONGER(new_node) from
761 * RFAPI-Import-Event-Handling.txt
763 static void rfapiMonitorMoveLonger(struct agg_node
*new_vpn_node
)
765 struct rfapi_monitor_vpn
*monitor
;
766 struct rfapi_monitor_vpn
*mlast
;
767 struct bgp_path_info
*bpi
;
768 struct agg_node
*par
;
769 const struct prefix
*new_vpn_node_p
= agg_node_get_prefix(new_vpn_node
);
771 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
774 * Make sure we have at least one valid route at the new node
776 for (bpi
= new_vpn_node
->info
; bpi
; bpi
= bpi
->next
) {
778 if (!rfapiGetUnAddrOfVpnBi(bpi
, &pfx
))
783 vnc_zlog_debug_verbose(
784 "%s: no valid routes at node %p, so not attempting moves",
785 __func__
, new_vpn_node
);
790 * Find first parent node that has monitors
792 for (par
= agg_node_parent(new_vpn_node
); par
;
793 par
= agg_node_parent(par
)) {
794 if (RFAPI_MONITOR_VPN(par
))
799 vnc_zlog_debug_verbose(
800 "%s: no parent nodes with monitors, done", __func__
);
805 * Check each of these monitors to see of their longest-match
806 * is now the updated node. Move any such monitors to the more-
807 * specific updated node
809 for (mlast
= NULL
, monitor
= RFAPI_MONITOR_VPN(par
); monitor
;) {
811 * If new longest match for monitor prefix is the new
812 * route's prefix, move monitor to new route's prefix
814 if (prefix_match(new_vpn_node_p
, &monitor
->p
)) {
817 mlast
->next
= monitor
->next
;
819 RFAPI_MONITOR_VPN_W_ALLOC(par
) = monitor
->next
;
824 monitor
->next
= RFAPI_MONITOR_VPN(new_vpn_node
);
825 RFAPI_MONITOR_VPN_W_ALLOC(new_vpn_node
) = monitor
;
826 monitor
->node
= new_vpn_node
;
828 agg_lock_node(new_vpn_node
); /* incr refcount */
830 monitor
= mlast
? mlast
->next
: RFAPI_MONITOR_VPN(par
);
832 RFAPI_CHECK_REFCOUNT(par
, SAFI_MPLS_VPN
, 1);
833 /* decr refcount after we're done with par as this might
835 agg_unlock_node(par
);
840 monitor
= monitor
->next
;
843 RFAPI_CHECK_REFCOUNT(new_vpn_node
, SAFI_MPLS_VPN
, 0);
847 static void rfapiBgpInfoChainFree(struct bgp_path_info
*bpi
)
849 struct bgp_path_info
*next
;
854 * If there is a timer waiting to delete this bpi, cancel
855 * the timer and delete immediately
857 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
858 && bpi
->extra
->vnc
.import
.timer
) {
861 &(bpi
->extra
->vnc
.import
.timer
);
862 struct rfapi_withdraw
*wcb
= (*t
)->arg
;
864 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
870 rfapiBgpInfoFree(bpi
);
875 static void rfapiImportTableFlush(struct rfapi_import_table
*it
)
882 ecommunity_free(&it
->rt_import_list
);
883 it
->rt_import_list
= NULL
;
885 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
889 for (rn
= agg_route_top(it
->imported_vpn
[afi
]); rn
;
890 rn
= agg_route_next(rn
)) {
892 * Each route_node has:
893 * aggregate: points to rfapi_it_extra with monitor
895 * info: points to chain of bgp_path_info
897 /* free bgp_path_info and its children */
898 rfapiBgpInfoChainFree(rn
->info
);
901 rfapiMonitorExtraFlush(SAFI_MPLS_VPN
, rn
);
904 for (rn
= agg_route_top(it
->imported_encap
[afi
]); rn
;
905 rn
= agg_route_next(rn
)) {
906 /* free bgp_path_info and its children */
907 rfapiBgpInfoChainFree(rn
->info
);
910 rfapiMonitorExtraFlush(SAFI_ENCAP
, rn
);
913 agg_table_finish(it
->imported_vpn
[afi
]);
914 agg_table_finish(it
->imported_encap
[afi
]);
916 if (it
->monitor_exterior_orphans
) {
917 skiplist_free(it
->monitor_exterior_orphans
);
921 void rfapiImportTableRefDelByIt(struct bgp
*bgp
,
922 struct rfapi_import_table
*it_target
)
925 struct rfapi_import_table
*it
;
926 struct rfapi_import_table
*prev
= NULL
;
933 for (it
= h
->imports
; it
; prev
= it
, it
= it
->next
) {
939 assert(it
->refcount
);
945 prev
->next
= it
->next
;
947 h
->imports
= it
->next
;
949 rfapiImportTableFlush(it
);
950 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
954 #ifdef RFAPI_REQUIRE_ENCAP_BEEC
956 * Look for magic BGP Encapsulation Extended Community value
957 * Format in RFC 5512 Sect. 4.5
959 static int rfapiEcommunitiesMatchBeec(struct ecommunity
*ecom
,
960 bgp_encap_types type
)
967 for (i
= 0; i
< (ecom
->size
* ECOMMUNITY_SIZE
); i
+= ECOMMUNITY_SIZE
) {
973 if (ep
[0] == ECOMMUNITY_ENCODE_OPAQUE
974 && ep
[1] == ECOMMUNITY_OPAQUE_SUBTYPE_ENCAP
975 && ep
[6] == ((type
&& 0xff00) >> 8)
976 && ep
[7] == (type
& 0xff)) {
985 int rfapiEcommunitiesIntersect(struct ecommunity
*e1
, struct ecommunity
*e2
)
994 s1
= ecommunity_ecom2str(e1
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
995 s2
= ecommunity_ecom2str(e2
, ECOMMUNITY_FORMAT_DISPLAY
, 0);
996 vnc_zlog_debug_verbose("%s: e1[%s], e2[%s]", __func__
, s1
, s2
);
997 XFREE(MTYPE_ECOMMUNITY_STR
, s1
);
998 XFREE(MTYPE_ECOMMUNITY_STR
, s2
);
1001 for (i
= 0; i
< e1
->size
; ++i
) {
1002 for (j
= 0; j
< e2
->size
; ++j
) {
1003 if (!memcmp(e1
->val
+ (i
* ECOMMUNITY_SIZE
),
1004 e2
->val
+ (j
* ECOMMUNITY_SIZE
),
1014 int rfapiEcommunityGetLNI(struct ecommunity
*ecom
, uint32_t *lni
)
1019 for (i
= 0; i
< ecom
->size
; ++i
) {
1020 uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1022 if ((*(p
+ 0) == 0x00) && (*(p
+ 1) == 0x02)) {
1024 *lni
= (*(p
+ 5) << 16) | (*(p
+ 6) << 8)
1033 int rfapiEcommunityGetEthernetTag(struct ecommunity
*ecom
, uint16_t *tag_id
)
1035 struct bgp
*bgp
= bgp_get_default();
1036 *tag_id
= 0; /* default to untagged */
1040 for (i
= 0; i
< ecom
->size
; ++i
) {
1043 const uint8_t *p
= ecom
->val
+ (i
* ECOMMUNITY_SIZE
);
1045 /* High-order octet of type. */
1048 if (*p
++ == ECOMMUNITY_ROUTE_TARGET
) {
1049 if (encode
== ECOMMUNITY_ENCODE_AS4
) {
1050 p
= ptr_get_be32(p
, &as
);
1051 } else if (encode
== ECOMMUNITY_ENCODE_AS
) {
1054 p
+= 2; /* skip next two, tag/vid
1055 always in lowest bytes */
1057 if (as
== bgp
->as
) {
1058 *tag_id
= *p
++ << 8;
1068 static int rfapiVpnBiNhEqualsPt(struct bgp_path_info
*bpi
,
1069 struct rfapi_ip_addr
*hpt
)
1076 family
= BGP_MP_NEXTHOP_FAMILY(bpi
->attr
->mp_nexthop_len
);
1078 if (hpt
->addr_family
!= family
)
1083 if (bpi
->attr
->mp_nexthop_global_in
.s_addr
1084 != hpt
->addr
.v4
.s_addr
)
1089 if (IPV6_ADDR_CMP(&bpi
->attr
->mp_nexthop_global
, &hpt
->addr
.v6
))
1102 * Compare 2 VPN BIs. Return true if they have the same VN and UN addresses
1104 static int rfapiVpnBiSamePtUn(struct bgp_path_info
*bpi1
,
1105 struct bgp_path_info
*bpi2
)
1107 struct prefix pfx_un1
;
1108 struct prefix pfx_un2
;
1114 * VN address comparisons
1117 if (BGP_MP_NEXTHOP_FAMILY(bpi1
->attr
->mp_nexthop_len
)
1118 != BGP_MP_NEXTHOP_FAMILY(bpi2
->attr
->mp_nexthop_len
)) {
1122 switch (BGP_MP_NEXTHOP_FAMILY(bpi1
->attr
->mp_nexthop_len
)) {
1124 if (bpi1
->attr
->mp_nexthop_global_in
.s_addr
1125 != bpi2
->attr
->mp_nexthop_global_in
.s_addr
)
1130 if (IPV6_ADDR_CMP(&bpi1
->attr
->mp_nexthop_global
,
1131 &bpi2
->attr
->mp_nexthop_global
))
1139 memset(&pfx_un1
, 0, sizeof(pfx_un1
));
1140 memset(&pfx_un2
, 0, sizeof(pfx_un2
));
1143 * UN address comparisons
1145 if (rfapiGetVncTunnelUnAddr(bpi1
->attr
, &pfx_un1
)) {
1147 pfx_un1
.family
= bpi1
->extra
->vnc
.import
.un_family
;
1148 switch (bpi1
->extra
->vnc
.import
.un_family
) {
1151 bpi1
->extra
->vnc
.import
.un
.addr4
;
1155 bpi1
->extra
->vnc
.import
.un
.addr6
;
1158 pfx_un1
.family
= AF_UNSPEC
;
1164 if (rfapiGetVncTunnelUnAddr(bpi2
->attr
, &pfx_un2
)) {
1166 pfx_un2
.family
= bpi2
->extra
->vnc
.import
.un_family
;
1167 switch (bpi2
->extra
->vnc
.import
.un_family
) {
1170 bpi2
->extra
->vnc
.import
.un
.addr4
;
1174 bpi2
->extra
->vnc
.import
.un
.addr6
;
1177 pfx_un2
.family
= AF_UNSPEC
;
1183 if (pfx_un1
.family
== AF_UNSPEC
|| pfx_un2
.family
== AF_UNSPEC
)
1186 if (pfx_un1
.family
!= pfx_un2
.family
)
1189 switch (pfx_un1
.family
) {
1191 if (!IPV4_ADDR_SAME(&pfx_un1
.u
.prefix4
, &pfx_un2
.u
.prefix4
))
1195 if (!IPV6_ADDR_SAME(&pfx_un1
.u
.prefix6
, &pfx_un2
.u
.prefix6
))
1204 uint8_t rfapiRfpCost(struct attr
*attr
)
1206 if (attr
->flag
& ATTR_FLAG_BIT(BGP_ATTR_LOCAL_PREF
)) {
1207 if (attr
->local_pref
> 255) {
1210 return 255 - attr
->local_pref
;
1216 /*------------------------------------------
1219 * Find Layer 2 options in an option chain
1225 * l2o layer 2 options extracted
1229 * 1 no options found
1231 --------------------------------------------*/
1232 int rfapi_extract_l2o(
1233 struct bgp_tea_options
*pHop
, /* chain of options */
1234 struct rfapi_l2address_option
*l2o
) /* return extracted value */
1236 struct bgp_tea_options
*p
;
1238 for (p
= pHop
; p
; p
= p
->next
) {
1239 if ((p
->type
== RFAPI_VN_OPTION_TYPE_L2ADDR
)
1240 && (p
->length
>= 8)) {
1244 memcpy(&l2o
->macaddr
, v
, 6);
1246 l2o
->label
= ((v
[6] << 12) & 0xff000)
1247 + ((v
[7] << 4) & 0xff0)
1248 + ((v
[8] >> 4) & 0xf);
1250 l2o
->local_nve_id
= (uint8_t)v
[10];
1252 l2o
->logical_net_id
=
1253 (v
[11] << 16) + (v
[12] << 8) + (v
[13] << 0);
1261 static struct rfapi_next_hop_entry
*
1262 rfapiRouteInfo2NextHopEntry(struct rfapi_ip_prefix
*rprefix
,
1263 struct bgp_path_info
*bpi
, /* route to encode */
1264 uint32_t lifetime
, /* use this in nhe */
1265 struct agg_node
*rn
) /* req for L2 eth addr */
1267 struct rfapi_next_hop_entry
*new;
1268 int have_vnc_tunnel_un
= 0;
1269 const struct prefix
*p
= agg_node_get_prefix(rn
);
1271 #ifdef DEBUG_ENCAP_MONITOR
1272 vnc_zlog_debug_verbose("%s: entry, bpi %p, rn %p", __func__
, bpi
, rn
);
1275 new = XCALLOC(MTYPE_RFAPI_NEXTHOP
, sizeof(struct rfapi_next_hop_entry
));
1278 new->prefix
= *rprefix
;
1281 && decode_rd_type(bpi
->extra
->vnc
.import
.rd
.val
)
1282 == RD_TYPE_VNC_ETH
) {
1285 struct rfapi_vn_option
*vo
;
1287 vo
= XCALLOC(MTYPE_RFAPI_VN_OPTION
,
1288 sizeof(struct rfapi_vn_option
));
1291 vo
->type
= RFAPI_VN_OPTION_TYPE_L2ADDR
;
1293 memcpy(&vo
->v
.l2addr
.macaddr
, &p
->u
.prefix_eth
.octet
, ETH_ALEN
);
1294 /* only low 3 bytes of this are significant */
1295 (void)rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(bpi
->attr
),
1296 &vo
->v
.l2addr
.logical_net_id
);
1297 (void)rfapiEcommunityGetEthernetTag(
1298 bgp_attr_get_ecommunity(bpi
->attr
),
1299 &vo
->v
.l2addr
.tag_id
);
1301 /* local_nve_id comes from lower byte of RD type */
1302 vo
->v
.l2addr
.local_nve_id
= bpi
->extra
->vnc
.import
.rd
.val
[1];
1304 /* label comes from MP_REACH_NLRI label */
1305 vo
->v
.l2addr
.label
= decode_label(&bpi
->extra
->label
[0]);
1307 new->vn_options
= vo
;
1310 * If there is an auxiliary prefix (i.e., host IP address),
1311 * use it as the nexthop prefix instead of the query prefix
1313 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
1314 rfapiQprefix2Rprefix(&bpi
->extra
->vnc
.import
.aux_prefix
,
1319 bgp_encap_types tun_type
= BGP_ENCAP_TYPE_MPLS
; /*Default*/
1320 new->prefix
.cost
= rfapiRfpCost(bpi
->attr
);
1322 struct bgp_attr_encap_subtlv
*pEncap
;
1324 switch (BGP_MP_NEXTHOP_FAMILY(bpi
->attr
->mp_nexthop_len
)) {
1326 new->vn_address
.addr_family
= AF_INET
;
1327 new->vn_address
.addr
.v4
= bpi
->attr
->mp_nexthop_global_in
;
1331 new->vn_address
.addr_family
= AF_INET6
;
1332 new->vn_address
.addr
.v6
= bpi
->attr
->mp_nexthop_global
;
1336 zlog_warn("%s: invalid vpn nexthop length: %d", __func__
,
1337 bpi
->attr
->mp_nexthop_len
);
1338 rfapi_free_next_hop_list(new);
1342 for (pEncap
= bgp_attr_get_vnc_subtlvs(bpi
->attr
); pEncap
;
1343 pEncap
= pEncap
->next
) {
1344 switch (pEncap
->type
) {
1345 case BGP_VNC_SUBTLV_TYPE_LIFETIME
:
1346 /* use configured lifetime, not attr lifetime */
1350 zlog_warn("%s: unknown VNC option type %d", __func__
,
1357 bgp_attr_extcom_tunnel_type(bpi
->attr
, &tun_type
);
1358 if (tun_type
== BGP_ENCAP_TYPE_MPLS
) {
1360 /* MPLS carries UN address in next hop */
1361 rfapiNexthop2Prefix(bpi
->attr
, &p
);
1362 if (p
.family
!= AF_UNSPEC
) {
1363 rfapiQprefix2Raddr(&p
, &new->un_address
);
1364 have_vnc_tunnel_un
= 1;
1368 for (pEncap
= bpi
->attr
->encap_subtlvs
; pEncap
; pEncap
= pEncap
->next
) {
1369 switch (pEncap
->type
) {
1370 case BGP_ENCAP_SUBTLV_TYPE_REMOTE_ENDPOINT
:
1372 * Overrides ENCAP UN address, if any
1374 switch (pEncap
->length
) {
1377 new->un_address
.addr_family
= AF_INET
;
1378 memcpy(&new->un_address
.addr
.v4
, pEncap
->value
,
1380 have_vnc_tunnel_un
= 1;
1384 new->un_address
.addr_family
= AF_INET6
;
1385 memcpy(&new->un_address
.addr
.v6
, pEncap
->value
,
1387 have_vnc_tunnel_un
= 1;
1392 "%s: invalid tunnel subtlv UN addr length (%d) for bpi %p",
1393 __func__
, pEncap
->length
, bpi
);
1398 zlog_warn("%s: unknown Encap Attribute option type %d",
1399 __func__
, pEncap
->type
);
1404 new->un_options
= rfapi_encap_tlv_to_un_option(bpi
->attr
);
1406 #ifdef DEBUG_ENCAP_MONITOR
1407 vnc_zlog_debug_verbose("%s: line %d: have_vnc_tunnel_un=%d", __func__
,
1408 __LINE__
, have_vnc_tunnel_un
);
1411 if (!have_vnc_tunnel_un
&& bpi
->extra
) {
1413 * use cached UN address from ENCAP route
1415 new->un_address
.addr_family
= bpi
->extra
->vnc
.import
.un_family
;
1416 switch (new->un_address
.addr_family
) {
1418 new->un_address
.addr
.v4
=
1419 bpi
->extra
->vnc
.import
.un
.addr4
;
1422 new->un_address
.addr
.v6
=
1423 bpi
->extra
->vnc
.import
.un
.addr6
;
1426 zlog_warn("%s: invalid UN addr family (%d) for bpi %p",
1427 __func__
, new->un_address
.addr_family
, bpi
);
1428 rfapi_free_next_hop_list(new);
1433 new->lifetime
= lifetime
;
1437 int rfapiHasNonRemovedRoutes(struct agg_node
*rn
)
1439 struct bgp_path_info
*bpi
;
1441 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
1444 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
1445 && (bpi
->extra
&& !rfapiGetUnAddrOfVpnBi(bpi
, &pfx
))) {
1453 #ifdef DEBUG_IT_NODES
1457 void rfapiDumpNode(struct agg_node
*rn
)
1459 struct bgp_path_info
*bpi
;
1461 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
1462 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
1464 int ctrc
= rfapiGetUnAddrOfVpnBi(bpi
, &pfx
);
1467 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
1468 && (bpi
->extra
&& !ctrc
)) {
1475 vnc_zlog_debug_verbose(
1476 " bpi=%p, nr=%d, flags=0x%x, extra=%p, ctrc=%d", bpi
,
1477 nr
, bpi
->flags
, bpi
->extra
, ctrc
);
1482 static int rfapiNhlAddNodeRoutes(
1483 struct agg_node
*rn
, /* in */
1484 struct rfapi_ip_prefix
*rprefix
, /* in */
1485 uint32_t lifetime
, /* in */
1486 int removed
, /* in */
1487 struct rfapi_next_hop_entry
**head
, /* in/out */
1488 struct rfapi_next_hop_entry
**tail
, /* in/out */
1489 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1490 struct agg_node
*rfd_rib_node
, /* preload this NVE rib node */
1491 struct prefix
*pfx_target_original
) /* query target */
1493 struct bgp_path_info
*bpi
;
1494 struct rfapi_next_hop_entry
*new;
1495 struct prefix pfx_un
;
1496 struct skiplist
*seen_nexthops
;
1498 const struct prefix
*p
= agg_node_get_prefix(rn
);
1499 int is_l2
= (p
->family
== AF_ETHERNET
);
1502 struct agg_table
*atable
= agg_get_table(rfd_rib_node
);
1503 struct rfapi_descriptor
*rfd
;
1506 rfd
= agg_get_table_info(atable
);
1508 if (rfapiRibFTDFilterRecentPrefix(rfd
, rn
,
1509 pfx_target_original
))
1515 skiplist_new(0, vnc_prefix_cmp
, prefix_free_lists
);
1517 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
1519 struct prefix pfx_vn
;
1520 struct prefix
*newpfx
;
1522 if (removed
&& !CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
1523 #ifdef DEBUG_RETURNED_NHL
1524 vnc_zlog_debug_verbose(
1525 "%s: want holddown, this route not holddown, skip",
1530 if (!removed
&& CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
1539 * Check for excluded VN address
1541 if (rfapiVpnBiNhEqualsPt(bpi
, exclude_vnaddr
))
1545 * Check for VN address (nexthop) copied already
1548 /* L2 routes: semantic nexthop in aux_prefix; VN addr
1550 pfx_vn
= bpi
->extra
->vnc
.import
.aux_prefix
;
1552 rfapiNexthop2Prefix(bpi
->attr
, &pfx_vn
);
1554 if (!skiplist_search(seen_nexthops
, &pfx_vn
, NULL
)) {
1555 #ifdef DEBUG_RETURNED_NHL
1556 vnc_zlog_debug_verbose(
1557 "%s: already put VN/nexthop %pFX, skip",
1563 if (rfapiGetUnAddrOfVpnBi(bpi
, &pfx_un
)) {
1564 #ifdef DEBUG_ENCAP_MONITOR
1565 vnc_zlog_debug_verbose(
1566 "%s: failed to get UN address of this VPN bpi",
1572 newpfx
= prefix_new();
1574 skiplist_insert(seen_nexthops
, newpfx
, newpfx
);
1576 new = rfapiRouteInfo2NextHopEntry(rprefix
, bpi
, lifetime
, rn
);
1578 if (rfapiRibPreloadBi(rfd_rib_node
, &pfx_vn
, &pfx_un
,
1580 /* duplicate filtered by RIB */
1581 rfapi_free_next_hop_list(new);
1588 (*tail
)->next
= new;
1597 skiplist_free(seen_nexthops
);
1606 * omit_node is meant for the situation where we are adding a subtree
1607 * of a parent of some original requested node. The response already
1608 * contains the original requested node, and we don't want to duplicate
1609 * its routes in the list, so we skip it if the right or left node
1610 * matches (of course, we still travel down its child subtrees).
1612 static int rfapiNhlAddSubtree(
1613 struct agg_node
*rn
, /* in */
1614 uint32_t lifetime
, /* in */
1615 struct rfapi_next_hop_entry
**head
, /* in/out */
1616 struct rfapi_next_hop_entry
**tail
, /* in/out */
1617 struct agg_node
*omit_node
, /* in */
1618 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1619 struct agg_table
*rfd_rib_table
, /* preload here */
1620 struct prefix
*pfx_target_original
) /* query target */
1622 struct rfapi_ip_prefix rprefix
;
1625 /* FIXME: need to find a better way here to work without sticking our
1626 * hands in node->link */
1627 if (agg_node_left(rn
) && agg_node_left(rn
) != omit_node
) {
1628 if (agg_node_left(rn
)->info
) {
1629 const struct prefix
*p
=
1630 agg_node_get_prefix(agg_node_left(rn
));
1632 struct agg_node
*rib_rn
= NULL
;
1634 rfapiQprefix2Rprefix(p
, &rprefix
);
1636 rib_rn
= agg_node_get(rfd_rib_table
, p
);
1638 count
= rfapiNhlAddNodeRoutes(
1639 agg_node_left(rn
), &rprefix
, lifetime
, 0, head
,
1640 tail
, exclude_vnaddr
, rib_rn
,
1641 pfx_target_original
);
1643 count
= rfapiNhlAddNodeRoutes(
1644 agg_node_left(rn
), &rprefix
, lifetime
,
1645 1, head
, tail
, exclude_vnaddr
, rib_rn
,
1646 pfx_target_original
);
1650 agg_unlock_node(rib_rn
);
1654 if (agg_node_right(rn
) && agg_node_right(rn
) != omit_node
) {
1655 if (agg_node_right(rn
)->info
) {
1656 const struct prefix
*p
=
1657 agg_node_get_prefix(agg_node_right(rn
));
1659 struct agg_node
*rib_rn
= NULL
;
1661 rfapiQprefix2Rprefix(p
, &rprefix
);
1663 rib_rn
= agg_node_get(rfd_rib_table
, p
);
1665 count
= rfapiNhlAddNodeRoutes(
1666 agg_node_right(rn
), &rprefix
, lifetime
, 0, head
,
1667 tail
, exclude_vnaddr
, rib_rn
,
1668 pfx_target_original
);
1670 count
= rfapiNhlAddNodeRoutes(
1671 agg_node_right(rn
), &rprefix
, lifetime
,
1672 1, head
, tail
, exclude_vnaddr
, rib_rn
,
1673 pfx_target_original
);
1677 agg_unlock_node(rib_rn
);
1681 if (agg_node_left(rn
)) {
1682 rcount
+= rfapiNhlAddSubtree(
1683 agg_node_left(rn
), lifetime
, head
, tail
, omit_node
,
1684 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1686 if (agg_node_right(rn
)) {
1687 rcount
+= rfapiNhlAddSubtree(
1688 agg_node_right(rn
), lifetime
, head
, tail
, omit_node
,
1689 exclude_vnaddr
, rfd_rib_table
, pfx_target_original
);
1696 * Implementation of ROUTE_LIST(node) from RFAPI-Import-Event-Handling.txt
1698 * Construct an rfapi nexthop list based on the routes attached to
1699 * the specified node.
1701 * If there are any routes that do NOT have BGP_PATH_REMOVED set,
1702 * return those only. If there are ONLY routes with BGP_PATH_REMOVED,
1703 * then return those, and also include all the non-removed routes from the
1704 * next less-specific node (i.e., this node's parent) at the end.
1706 struct rfapi_next_hop_entry
*rfapiRouteNode2NextHopList(
1707 struct agg_node
*rn
, uint32_t lifetime
, /* put into nexthop entries */
1708 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1709 struct agg_table
*rfd_rib_table
, /* preload here */
1710 struct prefix
*pfx_target_original
) /* query target */
1712 struct rfapi_ip_prefix rprefix
;
1713 struct rfapi_next_hop_entry
*answer
= NULL
;
1714 struct rfapi_next_hop_entry
*last
= NULL
;
1715 struct agg_node
*parent
;
1716 const struct prefix
*p
= agg_node_get_prefix(rn
);
1718 struct agg_node
*rib_rn
;
1720 #ifdef DEBUG_RETURNED_NHL
1721 vnc_zlog_debug_verbose("%s: called with node pfx=%rRN", __func__
, rn
);
1722 rfapiDebugBacktrace();
1725 rfapiQprefix2Rprefix(p
, &rprefix
);
1727 rib_rn
= rfd_rib_table
? agg_node_get(rfd_rib_table
, p
) : NULL
;
1730 * Add non-withdrawn routes at this node
1732 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 0, &answer
, &last
,
1733 exclude_vnaddr
, rib_rn
,
1734 pfx_target_original
);
1737 * If the list has at least one entry, it's finished
1740 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
, &last
, NULL
,
1741 exclude_vnaddr
, rfd_rib_table
,
1742 pfx_target_original
);
1743 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
,
1745 #ifdef DEBUG_RETURNED_NHL
1746 rfapiPrintNhl(NULL
, answer
);
1749 agg_unlock_node(rib_rn
);
1754 * Add withdrawn routes at this node
1756 count
= rfapiNhlAddNodeRoutes(rn
, &rprefix
, lifetime
, 1, &answer
, &last
,
1757 exclude_vnaddr
, rib_rn
,
1758 pfx_target_original
);
1760 agg_unlock_node(rib_rn
);
1762 // rfapiPrintNhl(NULL, answer);
1765 * walk up the tree until we find a node with non-deleted
1766 * routes, then add them
1768 for (parent
= agg_node_parent(rn
); parent
;
1769 parent
= agg_node_parent(parent
)) {
1770 if (rfapiHasNonRemovedRoutes(parent
)) {
1776 * Add non-withdrawn routes from less-specific prefix
1779 const struct prefix
*p
= agg_node_get_prefix(parent
);
1781 rib_rn
= rfd_rib_table
? agg_node_get(rfd_rib_table
, p
) : NULL
;
1782 rfapiQprefix2Rprefix(p
, &rprefix
);
1783 count
+= rfapiNhlAddNodeRoutes(parent
, &rprefix
, lifetime
, 0,
1784 &answer
, &last
, exclude_vnaddr
,
1785 rib_rn
, pfx_target_original
);
1786 count
+= rfapiNhlAddSubtree(parent
, lifetime
, &answer
, &last
,
1787 rn
, exclude_vnaddr
, rfd_rib_table
,
1788 pfx_target_original
);
1790 agg_unlock_node(rib_rn
);
1793 * There is no parent with non-removed routes. Still need to
1794 * add subtree of original node if it contributed routes to the
1798 count
+= rfapiNhlAddSubtree(rn
, lifetime
, &answer
,
1799 &last
, rn
, exclude_vnaddr
,
1801 pfx_target_original
);
1804 vnc_zlog_debug_verbose("%s: %d nexthops, answer=%p", __func__
, count
,
1806 #ifdef DEBUG_RETURNED_NHL
1807 rfapiPrintNhl(NULL
, answer
);
1813 * Construct nexthop list of all routes in table
1815 struct rfapi_next_hop_entry
*rfapiRouteTable2NextHopList(
1816 struct agg_table
*rt
, uint32_t lifetime
, /* put into nexthop entries */
1817 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1818 struct agg_table
*rfd_rib_table
, /* preload this NVE rib table */
1819 struct prefix
*pfx_target_original
) /* query target */
1821 struct agg_node
*rn
;
1822 struct rfapi_next_hop_entry
*biglist
= NULL
;
1823 struct rfapi_next_hop_entry
*nhl
;
1824 struct rfapi_next_hop_entry
*tail
= NULL
;
1827 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
1829 nhl
= rfapiRouteNode2NextHopList(rn
, lifetime
, exclude_vnaddr
,
1831 pfx_target_original
);
1833 tail
= biglist
= nhl
;
1840 while (tail
->next
) {
1847 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1851 struct rfapi_next_hop_entry
*rfapiEthRouteNode2NextHopList(
1852 struct agg_node
*rn
, struct rfapi_ip_prefix
*rprefix
,
1853 uint32_t lifetime
, /* put into nexthop entries */
1854 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1855 struct agg_table
*rfd_rib_table
, /* preload NVE rib table */
1856 struct prefix
*pfx_target_original
) /* query target */
1859 struct rfapi_next_hop_entry
*answer
= NULL
;
1860 struct rfapi_next_hop_entry
*last
= NULL
;
1861 struct agg_node
*rib_rn
;
1863 rib_rn
= rfd_rib_table
1864 ? agg_node_get(rfd_rib_table
, agg_node_get_prefix(rn
))
1867 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 0, &answer
, &last
,
1868 NULL
, rib_rn
, pfx_target_original
);
1870 #ifdef DEBUG_ENCAP_MONITOR
1871 vnc_zlog_debug_verbose("%s: node %p: %d non-holddown routes", __func__
,
1876 count
= rfapiNhlAddNodeRoutes(rn
, rprefix
, lifetime
, 1, &answer
,
1877 &last
, exclude_vnaddr
, rib_rn
,
1878 pfx_target_original
);
1879 vnc_zlog_debug_verbose("%s: node %p: %d holddown routes",
1880 __func__
, rn
, count
);
1884 agg_unlock_node(rib_rn
);
1886 #ifdef DEBUG_RETURNED_NHL
1887 rfapiPrintNhl(NULL
, answer
);
1895 * Construct nexthop list of all routes in table
1897 struct rfapi_next_hop_entry
*rfapiEthRouteTable2NextHopList(
1898 uint32_t logical_net_id
, struct rfapi_ip_prefix
*rprefix
,
1899 uint32_t lifetime
, /* put into nexthop entries */
1900 struct rfapi_ip_addr
*exclude_vnaddr
, /* omit routes to same NVE */
1901 struct agg_table
*rfd_rib_table
, /* preload NVE rib node */
1902 struct prefix
*pfx_target_original
) /* query target */
1904 struct rfapi_import_table
*it
;
1905 struct bgp
*bgp
= bgp_get_default();
1906 struct agg_table
*rt
;
1907 struct agg_node
*rn
;
1908 struct rfapi_next_hop_entry
*biglist
= NULL
;
1909 struct rfapi_next_hop_entry
*nhl
;
1910 struct rfapi_next_hop_entry
*tail
= NULL
;
1914 it
= rfapiMacImportTableGet(bgp
, logical_net_id
);
1915 rt
= it
->imported_vpn
[AFI_L2VPN
];
1917 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
1919 nhl
= rfapiEthRouteNode2NextHopList(
1920 rn
, rprefix
, lifetime
, exclude_vnaddr
, rfd_rib_table
,
1921 pfx_target_original
);
1923 tail
= biglist
= nhl
;
1930 while (tail
->next
) {
1937 vnc_zlog_debug_verbose("%s: returning %d routes", __func__
, count
);
1942 * Insert a new bpi to the imported route table node,
1943 * keeping the list of BPIs sorted best route first
1945 static void rfapiBgpInfoAttachSorted(struct agg_node
*rn
,
1946 struct bgp_path_info
*info_new
, afi_t afi
,
1950 struct bgp_path_info
*prev
;
1951 struct bgp_path_info
*next
;
1952 char pfx_buf
[PREFIX2STR_BUFFER
];
1955 bgp
= bgp_get_default(); /* assume 1 instance for now */
1957 if (VNC_DEBUG(IMPORT_BI_ATTACH
)) {
1958 vnc_zlog_debug_verbose("%s: info_new->peer=%p", __func__
,
1960 vnc_zlog_debug_verbose("%s: info_new->peer->su_remote=%p",
1961 __func__
, info_new
->peer
->su_remote
);
1964 for (prev
= NULL
, next
= rn
->info
; next
;
1965 prev
= next
, next
= next
->next
) {
1966 enum bgp_path_selection_reason reason
;
1969 || (!CHECK_FLAG(info_new
->flags
, BGP_PATH_REMOVED
)
1970 && CHECK_FLAG(next
->flags
, BGP_PATH_REMOVED
))
1971 || bgp_path_info_cmp_compatible(bgp
, info_new
, next
,
1974 == -1) { /* -1 if 1st is better */
1978 vnc_zlog_debug_verbose("%s: prev=%p, next=%p", __func__
, prev
, next
);
1980 prev
->next
= info_new
;
1982 rn
->info
= info_new
;
1984 info_new
->prev
= prev
;
1985 info_new
->next
= next
;
1987 next
->prev
= info_new
;
1988 bgp_attr_intern(info_new
->attr
);
1991 static void rfapiBgpInfoDetach(struct agg_node
*rn
, struct bgp_path_info
*bpi
)
1994 * Remove the route (doubly-linked)
1996 // bgp_attr_unintern (&bpi->attr);
1998 bpi
->next
->prev
= bpi
->prev
;
2000 bpi
->prev
->next
= bpi
->next
;
2002 rn
->info
= bpi
->next
;
2006 * For L3-indexed import tables
2008 static int rfapi_bi_peer_rd_cmp(const void *b1
, const void *b2
)
2010 const struct bgp_path_info
*bpi1
= b1
;
2011 const struct bgp_path_info
*bpi2
= b2
;
2016 if (bpi1
->peer
< bpi2
->peer
)
2018 if (bpi1
->peer
> bpi2
->peer
)
2024 return vnc_prefix_cmp(
2025 (const struct prefix
*)&bpi1
->extra
->vnc
.import
.rd
,
2026 (const struct prefix
*)&bpi2
->extra
->vnc
.import
.rd
);
2030 * For L2-indexed import tables
2031 * The BPIs in these tables should ALWAYS have an aux_prefix set because
2032 * they arrive via IPv4 or IPv6 advertisements.
2034 static int rfapi_bi_peer_rd_aux_cmp(const void *b1
, const void *b2
)
2036 const struct bgp_path_info
*bpi1
= b1
;
2037 const struct bgp_path_info
*bpi2
= b2
;
2043 if (bpi1
->peer
< bpi2
->peer
)
2045 if (bpi1
->peer
> bpi2
->peer
)
2051 rc
= vnc_prefix_cmp((struct prefix
*)&bpi1
->extra
->vnc
.import
.rd
,
2052 (struct prefix
*)&bpi2
->extra
->vnc
.import
.rd
);
2058 * L2 import tables can have multiple entries with the
2059 * same MAC address, same RD, but different L3 addresses.
2061 * Use presence of aux_prefix with AF=ethernet and prefixlen=1
2062 * as magic value to signify explicit wildcarding of the aux_prefix.
2063 * This magic value will not appear in bona fide bpi entries in
2064 * the import table, but is allowed in the "fake" bpi used to
2065 * probe the table when searching. (We have to test both b1 and b2
2066 * because there is no guarantee of the order the test key and
2067 * the real key will be passed)
2069 if ((bpi1
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2070 && (bpi1
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))
2071 || (bpi2
->extra
->vnc
.import
.aux_prefix
.family
== AF_ETHERNET
2072 && (bpi2
->extra
->vnc
.import
.aux_prefix
.prefixlen
== 1))) {
2075 * wildcard aux address specified
2080 return vnc_prefix_cmp(&bpi1
->extra
->vnc
.import
.aux_prefix
,
2081 &bpi2
->extra
->vnc
.import
.aux_prefix
);
2086 * Index on RD and Peer
2088 static void rfapiItBiIndexAdd(struct agg_node
*rn
, /* Import table VPN node */
2089 struct bgp_path_info
*bpi
) /* new BPI */
2091 struct skiplist
*sl
;
2092 const struct prefix
*p
;
2099 char buf
[RD_ADDRSTRLEN
];
2101 vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %s", __func__
,
2103 prefix_rd2str(&bpi
->extra
->vnc
.import
.rd
,
2107 sl
= RFAPI_RDINDEX_W_ALLOC(rn
);
2109 p
= agg_node_get_prefix(rn
);
2110 if (AF_ETHERNET
== p
->family
) {
2111 sl
= skiplist_new(0, rfapi_bi_peer_rd_aux_cmp
, NULL
);
2113 sl
= skiplist_new(0, rfapi_bi_peer_rd_cmp
, NULL
);
2115 RFAPI_IT_EXTRA_GET(rn
)->u
.vpn
.idx_rd
= sl
;
2116 agg_lock_node(rn
); /* for skiplist */
2118 assert(!skiplist_insert(sl
, (void *)bpi
, (void *)bpi
));
2119 agg_lock_node(rn
); /* for skiplist entry */
2121 /* NB: BPIs in import tables are not refcounted */
2124 static void rfapiItBiIndexDump(struct agg_node
*rn
)
2126 struct skiplist
*sl
;
2127 void *cursor
= NULL
;
2128 struct bgp_path_info
*k
;
2129 struct bgp_path_info
*v
;
2132 sl
= RFAPI_RDINDEX(rn
);
2136 for (rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
); !rc
;
2137 rc
= skiplist_next(sl
, (void **)&k
, (void **)&v
, &cursor
)) {
2139 char buf
[RD_ADDRSTRLEN
];
2140 char buf_aux_pfx
[PREFIX_STRLEN
];
2142 prefix_rd2str(&k
->extra
->vnc
.import
.rd
, buf
, sizeof(buf
));
2143 if (k
->extra
->vnc
.import
.aux_prefix
.family
) {
2144 prefix2str(&k
->extra
->vnc
.import
.aux_prefix
,
2145 buf_aux_pfx
, sizeof(buf_aux_pfx
));
2147 strlcpy(buf_aux_pfx
, "(none)", sizeof(buf_aux_pfx
));
2149 vnc_zlog_debug_verbose("bpi %p, peer %p, rd %s, aux_prefix %s",
2150 k
, k
->peer
, buf
, buf_aux_pfx
);
2154 static struct bgp_path_info
*rfapiItBiIndexSearch(
2155 struct agg_node
*rn
, /* Import table VPN node */
2156 struct prefix_rd
*prd
, struct peer
*peer
,
2157 const struct prefix
*aux_prefix
) /* optional L3 addr for L2 ITs */
2159 struct skiplist
*sl
;
2161 struct bgp_path_info bpi_fake
= {0};
2162 struct bgp_path_info_extra bpi_extra
= {0};
2163 struct bgp_path_info
*bpi_result
;
2165 sl
= RFAPI_RDINDEX(rn
);
2169 #ifdef DEBUG_BI_SEARCH
2171 char buf
[RD_ADDRSTRLEN
];
2172 char buf_aux_pfx
[PREFIX_STRLEN
];
2175 prefix2str(aux_prefix
, buf_aux_pfx
,
2176 sizeof(buf_aux_pfx
));
2178 strlcpy(buf_aux_pfx
, "(nil)", sizeof(buf_aux_pfx
));
2180 vnc_zlog_debug_verbose("%s want prd=%s, peer=%p, aux_prefix=%s",
2182 prefix_rd2str(prd
, buf
, sizeof(buf
)),
2184 rfapiItBiIndexDump(rn
);
2188 /* threshold is a WAG */
2189 if (sl
->count
< 3) {
2190 #ifdef DEBUG_BI_SEARCH
2191 vnc_zlog_debug_verbose("%s: short list algorithm", __func__
);
2193 /* if short list, linear search might be faster */
2194 for (bpi_result
= rn
->info
; bpi_result
;
2195 bpi_result
= bpi_result
->next
) {
2196 #ifdef DEBUG_BI_SEARCH
2198 char buf
[RD_ADDRSTRLEN
];
2200 vnc_zlog_debug_verbose(
2201 "%s: bpi has prd=%s, peer=%p", __func__
,
2202 prefix_rd2str(&bpi_result
->extra
->vnc
2208 if (peer
== bpi_result
->peer
2209 && !prefix_cmp((struct prefix
*)&bpi_result
->extra
2211 (struct prefix
*)prd
)) {
2213 #ifdef DEBUG_BI_SEARCH
2214 vnc_zlog_debug_verbose(
2215 "%s: peer and RD same, doing aux_prefix check",
2221 &bpi_result
->extra
->vnc
.import
2224 #ifdef DEBUG_BI_SEARCH
2225 vnc_zlog_debug_verbose("%s: match",
2235 bpi_fake
.peer
= peer
;
2236 bpi_fake
.extra
= &bpi_extra
;
2237 bpi_fake
.extra
->vnc
.import
.rd
= *prd
;
2239 bpi_fake
.extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
2242 bpi_fake
.extra
->vnc
.import
.aux_prefix
.family
= AF_ETHERNET
;
2243 bpi_fake
.extra
->vnc
.import
.aux_prefix
.prefixlen
= 1;
2246 rc
= skiplist_search(sl
, (void *)&bpi_fake
, (void *)&bpi_result
);
2249 #ifdef DEBUG_BI_SEARCH
2250 vnc_zlog_debug_verbose("%s: no match", __func__
);
2255 #ifdef DEBUG_BI_SEARCH
2256 vnc_zlog_debug_verbose("%s: matched bpi=%p", __func__
, bpi_result
);
2262 static void rfapiItBiIndexDel(struct agg_node
*rn
, /* Import table VPN node */
2263 struct bgp_path_info
*bpi
) /* old BPI */
2265 struct skiplist
*sl
;
2269 char buf
[RD_ADDRSTRLEN
];
2271 vnc_zlog_debug_verbose("%s: bpi %p, peer %p, rd %s", __func__
,
2273 prefix_rd2str(&bpi
->extra
->vnc
.import
.rd
,
2277 sl
= RFAPI_RDINDEX(rn
);
2280 rc
= skiplist_delete(sl
, (void *)(bpi
), (void *)bpi
);
2282 rfapiItBiIndexDump(rn
);
2286 agg_unlock_node(rn
); /* for skiplist entry */
2288 /* NB: BPIs in import tables are not refcounted */
2292 * Add a backreference at the ENCAP node to the VPN route that
2296 rfapiMonitorEncapAdd(struct rfapi_import_table
*import_table
,
2297 struct prefix
*p
, /* VN address */
2298 struct agg_node
*vpn_rn
, /* VPN node */
2299 struct bgp_path_info
*vpn_bpi
) /* VPN bpi/route */
2301 afi_t afi
= family2afi(p
->family
);
2302 struct agg_node
*rn
;
2303 struct rfapi_monitor_encap
*m
;
2306 rn
= agg_node_get(import_table
->imported_encap
[afi
], p
); /* locks rn */
2309 m
= XCALLOC(MTYPE_RFAPI_MONITOR_ENCAP
,
2310 sizeof(struct rfapi_monitor_encap
));
2317 /* insert to encap node's list */
2318 m
->next
= RFAPI_MONITOR_ENCAP(rn
);
2321 RFAPI_MONITOR_ENCAP_W_ALLOC(rn
) = m
;
2323 /* for easy lookup when deleting vpn route */
2324 vpn_bpi
->extra
->vnc
.import
.hme
= m
;
2326 vnc_zlog_debug_verbose(
2327 "%s: it=%p, vpn_bpi=%p, afi=%d, encap rn=%p, setting vpn_bpi->extra->vnc.import.hme=%p",
2328 __func__
, import_table
, vpn_bpi
, afi
, rn
, m
);
2330 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
2331 bgp_attr_intern(vpn_bpi
->attr
);
2334 static void rfapiMonitorEncapDelete(struct bgp_path_info
*vpn_bpi
)
2337 * Remove encap monitor
2339 vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__
, vpn_bpi
);
2340 if (vpn_bpi
->extra
) {
2341 struct rfapi_monitor_encap
*hme
=
2342 vpn_bpi
->extra
->vnc
.import
.hme
;
2346 vnc_zlog_debug_verbose("%s: hme=%p", __func__
, hme
);
2348 /* Refcount checking takes too long here */
2349 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 0);
2351 hme
->next
->prev
= hme
->prev
;
2353 hme
->prev
->next
= hme
->next
;
2355 RFAPI_MONITOR_ENCAP_W_ALLOC(hme
->rn
) =
2357 /* Refcount checking takes too long here */
2358 // RFAPI_CHECK_REFCOUNT(hme->rn, SAFI_ENCAP, 1);
2360 /* see if the struct rfapi_it_extra is empty and can be
2362 rfapiMonitorExtraPrune(SAFI_ENCAP
, hme
->rn
);
2364 agg_unlock_node(hme
->rn
); /* decr ref count */
2365 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, hme
);
2366 vpn_bpi
->extra
->vnc
.import
.hme
= NULL
;
2372 * quagga lib/thread.h says this must return int even though
2373 * it doesn't do anything with the return value
2375 static void rfapiWithdrawTimerVPN(struct thread
*t
)
2377 struct rfapi_withdraw
*wcb
= t
->arg
;
2378 struct bgp_path_info
*bpi
= wcb
->info
;
2379 struct bgp
*bgp
= bgp_get_default();
2380 const struct prefix
*p
;
2381 struct rfapi_monitor_vpn
*moved
;
2385 vnc_zlog_debug_verbose(
2386 "%s: NULL BGP pointer, assume shutdown race condition!!!",
2390 if (CHECK_FLAG(bgp
->flags
, BGP_FLAG_DELETE_IN_PROGRESS
)) {
2391 vnc_zlog_debug_verbose(
2392 "%s: BGP delete in progress, assume shutdown race condition!!!",
2398 assert(wcb
->import_table
);
2401 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, wcb
->lockoffset
);
2403 vnc_zlog_debug_verbose("%s: removing bpi %p at prefix %pRN", __func__
,
2407 * Remove the route (doubly-linked)
2409 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_VALID
)
2410 && VALID_INTERIOR_TYPE(bpi
->type
))
2411 RFAPI_MONITOR_EXTERIOR(wcb
->node
)->valid_interior_count
--;
2413 p
= agg_node_get_prefix(wcb
->node
);
2414 afi
= family2afi(p
->family
);
2415 wcb
->import_table
->holddown_count
[afi
] -= 1; /* keep count consistent */
2416 rfapiItBiIndexDel(wcb
->node
, bpi
);
2417 rfapiBgpInfoDetach(wcb
->node
, bpi
); /* with removed bpi */
2419 vnc_import_bgp_exterior_del_route_interior(bgp
, wcb
->import_table
,
2424 * If VNC is configured to send response remove messages, AND
2425 * if the removed route had a UN address, do response removal
2428 if (!(bgp
->rfapi_cfg
->flags
2429 & BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE
)) {
2431 int has_valid_duplicate
= 0;
2432 struct bgp_path_info
*bpii
;
2435 * First check if there are any OTHER routes at this node
2436 * that have the same nexthop and a valid UN address. If
2437 * there are (e.g., from other peers), then the route isn't
2438 * really gone, so skip sending a response removal message.
2440 for (bpii
= wcb
->node
->info
; bpii
; bpii
= bpii
->next
) {
2441 if (rfapiVpnBiSamePtUn(bpi
, bpii
)) {
2442 has_valid_duplicate
= 1;
2447 vnc_zlog_debug_verbose("%s: has_valid_duplicate=%d", __func__
,
2448 has_valid_duplicate
);
2450 if (!has_valid_duplicate
) {
2451 rfapiRibPendingDeleteRoute(bgp
, wcb
->import_table
, afi
,
2456 rfapiMonitorEncapDelete(bpi
);
2459 * If there are no VPN monitors at this VPN Node A,
2462 if (!RFAPI_MONITOR_VPN(wcb
->node
)) {
2463 vnc_zlog_debug_verbose("%s: no VPN monitors at this node",
2469 * rfapiMonitorMoveShorter only moves monitors if there are
2470 * no remaining valid routes at the current node
2472 moved
= rfapiMonitorMoveShorter(wcb
->node
, 1);
2475 rfapiMonitorMovedUp(wcb
->import_table
, wcb
->node
, moved
->node
,
2483 rfapiBgpInfoFree(bpi
);
2487 * If route count at this node has gone to 0, withdraw exported prefix
2489 if (!wcb
->node
->info
) {
2490 /* see if the struct rfapi_it_extra is empty and can be freed */
2491 rfapiMonitorExtraPrune(SAFI_MPLS_VPN
, wcb
->node
);
2492 vnc_direct_bgp_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2493 vnc_zebra_del_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2496 * nexthop change event
2497 * vnc_direct_bgp_add_prefix() will recompute the VN addr
2500 vnc_direct_bgp_add_prefix(bgp
, wcb
->import_table
, wcb
->node
);
2503 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_MPLS_VPN
, 1 + wcb
->lockoffset
);
2504 agg_unlock_node(wcb
->node
); /* decr ref count */
2505 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2509 * This works for multiprotocol extension, but not for plain ol'
2510 * unicast IPv4 because that nexthop is stored in attr->nexthop
2512 void rfapiNexthop2Prefix(struct attr
*attr
, struct prefix
*p
)
2517 memset(p
, 0, sizeof(struct prefix
));
2519 switch (p
->family
= BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2521 p
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2522 p
->prefixlen
= IPV4_MAX_BITLEN
;
2526 p
->u
.prefix6
= attr
->mp_nexthop_global
;
2527 p
->prefixlen
= IPV6_MAX_BITLEN
;
2531 vnc_zlog_debug_verbose("%s: Family is unknown = %d", __func__
,
2536 void rfapiUnicastNexthop2Prefix(afi_t afi
, struct attr
*attr
, struct prefix
*p
)
2538 if (afi
== AFI_IP
) {
2539 p
->family
= AF_INET
;
2540 p
->prefixlen
= IPV4_MAX_BITLEN
;
2541 p
->u
.prefix4
= attr
->nexthop
;
2543 rfapiNexthop2Prefix(attr
, p
);
2547 static int rfapiAttrNexthopAddrDifferent(struct prefix
*p1
, struct prefix
*p2
)
2550 vnc_zlog_debug_verbose("%s: p1 or p2 is NULL", __func__
);
2555 * Are address families the same?
2557 if (p1
->family
!= p2
->family
) {
2561 switch (p1
->family
) {
2563 if (IPV4_ADDR_SAME(&p1
->u
.prefix4
, &p2
->u
.prefix4
))
2568 if (IPV6_ADDR_SAME(&p1
->u
.prefix6
, &p2
->u
.prefix6
))
2579 static void rfapiCopyUnEncap2VPN(struct bgp_path_info
*encap_bpi
,
2580 struct bgp_path_info
*vpn_bpi
)
2582 if (!vpn_bpi
|| !vpn_bpi
->extra
) {
2583 zlog_warn("%s: no vpn bpi attr/extra, can't copy UN address",
2588 switch (BGP_MP_NEXTHOP_FAMILY(encap_bpi
->attr
->mp_nexthop_len
)) {
2592 * instrumentation to debug segfault of 091127
2594 vnc_zlog_debug_verbose("%s: vpn_bpi=%p", __func__
, vpn_bpi
);
2595 vnc_zlog_debug_verbose("%s: vpn_bpi->extra=%p", __func__
,
2598 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_INET
;
2599 vpn_bpi
->extra
->vnc
.import
.un
.addr4
=
2600 encap_bpi
->attr
->mp_nexthop_global_in
;
2604 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_INET6
;
2605 vpn_bpi
->extra
->vnc
.import
.un
.addr6
=
2606 encap_bpi
->attr
->mp_nexthop_global
;
2610 zlog_warn("%s: invalid encap nexthop length: %d", __func__
,
2611 encap_bpi
->attr
->mp_nexthop_len
);
2612 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_UNSPEC
;
2618 * returns 0 on success, nonzero on error
2621 rfapiWithdrawEncapUpdateCachedUn(struct rfapi_import_table
*import_table
,
2622 struct bgp_path_info
*encap_bpi
,
2623 struct agg_node
*vpn_rn
,
2624 struct bgp_path_info
*vpn_bpi
)
2629 * clear cached UN address
2631 if (!vpn_bpi
|| !vpn_bpi
->extra
) {
2633 "%s: missing VPN bpi/extra, can't clear UN addr",
2637 vpn_bpi
->extra
->vnc
.import
.un_family
= AF_UNSPEC
;
2638 memset(&vpn_bpi
->extra
->vnc
.import
.un
, 0,
2639 sizeof(vpn_bpi
->extra
->vnc
.import
.un
));
2640 if (CHECK_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
)) {
2641 if (rfapiGetVncTunnelUnAddr(vpn_bpi
->attr
, NULL
)) {
2642 UNSET_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
);
2643 if (VALID_INTERIOR_TYPE(vpn_bpi
->type
))
2644 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2645 ->valid_interior_count
--;
2646 /* signal interior route withdrawal to
2647 * import-exterior */
2648 vnc_import_bgp_exterior_del_route_interior(
2649 bgp_get_default(), import_table
, vpn_rn
,
2656 zlog_warn("%s: missing VPN bpi, can't clear UN addr",
2660 rfapiCopyUnEncap2VPN(encap_bpi
, vpn_bpi
);
2661 if (!CHECK_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
)) {
2662 SET_FLAG(vpn_bpi
->flags
, BGP_PATH_VALID
);
2663 if (VALID_INTERIOR_TYPE(vpn_bpi
->type
))
2664 RFAPI_MONITOR_EXTERIOR(vpn_rn
)
2665 ->valid_interior_count
++;
2666 /* signal interior route withdrawal to import-exterior
2668 vnc_import_bgp_exterior_add_route_interior(
2669 bgp_get_default(), import_table
, vpn_rn
,
2676 static void rfapiWithdrawTimerEncap(struct thread
*t
)
2678 struct rfapi_withdraw
*wcb
= t
->arg
;
2679 struct bgp_path_info
*bpi
= wcb
->info
;
2680 int was_first_route
= 0;
2681 struct rfapi_monitor_encap
*em
;
2682 struct skiplist
*vpn_node_sl
= skiplist_new(0, NULL
, NULL
);
2686 assert(wcb
->import_table
);
2688 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 0);
2690 if (wcb
->node
->info
== bpi
)
2691 was_first_route
= 1;
2694 * Remove the route/bpi and free it
2696 rfapiBgpInfoDetach(wcb
->node
, bpi
);
2697 rfapiBgpInfoFree(bpi
);
2699 if (!was_first_route
)
2702 for (em
= RFAPI_MONITOR_ENCAP(wcb
->node
); em
; em
= em
->next
) {
2705 * Update monitoring VPN BPIs with new encap info at the
2706 * head of the encap bpi chain (which could be NULL after
2707 * removing the expiring bpi above)
2709 if (rfapiWithdrawEncapUpdateCachedUn(wcb
->import_table
,
2710 wcb
->node
->info
, em
->node
,
2715 * Build a list of unique VPN nodes referenced by these
2717 * Use a skiplist for speed.
2719 skiplist_insert(vpn_node_sl
, em
->node
, em
->node
);
2724 * for each VPN node referenced in the ENCAP monitors:
2726 struct agg_node
*rn
;
2727 while (!skiplist_first(vpn_node_sl
, (void **)&rn
, NULL
)) {
2728 if (!wcb
->node
->info
) {
2729 struct rfapi_monitor_vpn
*moved
;
2731 moved
= rfapiMonitorMoveShorter(rn
, 0);
2733 // rfapiDoRouteCallback(wcb->import_table,
2734 // moved->node, moved);
2735 rfapiMonitorMovedUp(wcb
->import_table
, rn
,
2736 moved
->node
, moved
);
2739 // rfapiDoRouteCallback(wcb->import_table, rn, NULL);
2740 rfapiMonitorItNodeChanged(wcb
->import_table
, rn
, NULL
);
2742 skiplist_delete_first(vpn_node_sl
);
2746 RFAPI_CHECK_REFCOUNT(wcb
->node
, SAFI_ENCAP
, 1);
2747 agg_unlock_node(wcb
->node
); /* decr ref count */
2748 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
2749 skiplist_free(vpn_node_sl
);
2754 * Works for both VPN and ENCAP routes; timer_service_func is different
2758 rfapiBiStartWithdrawTimer(struct rfapi_import_table
*import_table
,
2759 struct agg_node
*rn
, struct bgp_path_info
*bpi
,
2760 afi_t afi
, safi_t safi
,
2761 void (*timer_service_func
)(struct thread
*))
2764 struct rfapi_withdraw
*wcb
;
2766 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
2768 * Already on the path to being withdrawn,
2769 * should already have a timer set up to
2772 vnc_zlog_debug_verbose(
2773 "%s: already being withdrawn, do nothing", __func__
);
2777 rfapiGetVncLifetime(bpi
->attr
, &lifetime
);
2778 vnc_zlog_debug_verbose("%s: VNC lifetime is %u", __func__
, lifetime
);
2781 * withdrawn routes get to hang around for a while
2783 SET_FLAG(bpi
->flags
, BGP_PATH_REMOVED
);
2785 /* set timer to remove the route later */
2786 lifetime
= rfapiGetHolddownFromLifetime(lifetime
);
2787 vnc_zlog_debug_verbose("%s: using timeout %u", __func__
, lifetime
);
2790 * Stash import_table, node, and info for use by timer
2791 * service routine, which is supposed to free the wcb.
2793 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2797 wcb
->import_table
= import_table
;
2798 bgp_attr_intern(bpi
->attr
);
2800 if (VNC_DEBUG(VERBOSE
)) {
2801 vnc_zlog_debug_verbose(
2802 "%s: wcb values: node=%p, info=%p, import_table=%p (bpi follows)",
2803 __func__
, wcb
->node
, wcb
->info
, wcb
->import_table
);
2804 rfapiPrintBi(NULL
, bpi
);
2809 if (lifetime
> UINT32_MAX
/ 1001) {
2810 /* sub-optimal case, but will probably never happen */
2811 bpi
->extra
->vnc
.import
.timer
= NULL
;
2812 thread_add_timer(bm
->master
, timer_service_func
, wcb
, lifetime
,
2813 &bpi
->extra
->vnc
.import
.timer
);
2815 static uint32_t jitter
;
2816 uint32_t lifetime_msec
;
2819 * the goal here is to spread out the timers so they are
2820 * sortable in the skip list
2822 if (++jitter
>= 1000)
2825 lifetime_msec
= (lifetime
* 1000) + jitter
;
2827 bpi
->extra
->vnc
.import
.timer
= NULL
;
2828 thread_add_timer_msec(bm
->master
, timer_service_func
, wcb
,
2830 &bpi
->extra
->vnc
.import
.timer
);
2833 /* re-sort route list (BGP_PATH_REMOVED routes are last) */
2834 if (((struct bgp_path_info
*)rn
->info
)->next
) {
2835 rfapiBgpInfoDetach(rn
, bpi
);
2836 rfapiBgpInfoAttachSorted(rn
, bpi
, afi
, safi
);
2841 typedef void(rfapi_bi_filtered_import_f
)(struct rfapi_import_table
*table
,
2842 int action
, struct peer
*peer
,
2843 void *rfd
, const struct prefix
*prefix
,
2844 const struct prefix
*aux_prefix
,
2845 afi_t afi
, struct prefix_rd
*prd
,
2846 struct attr
*attr
, uint8_t type
,
2847 uint8_t sub_type
, uint32_t *label
);
2850 static void rfapiExpireEncapNow(struct rfapi_import_table
*it
,
2851 struct agg_node
*rn
, struct bgp_path_info
*bpi
)
2853 struct rfapi_withdraw
*wcb
;
2857 * pretend we're an expiring timer
2859 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
2862 wcb
->import_table
= it
;
2863 memset(&t
, 0, sizeof(t
));
2865 rfapiWithdrawTimerEncap(&t
); /* frees wcb */
2868 static int rfapiGetNexthop(struct attr
*attr
, struct prefix
*prefix
)
2870 switch (BGP_MP_NEXTHOP_FAMILY(attr
->mp_nexthop_len
)) {
2872 prefix
->family
= AF_INET
;
2873 prefix
->prefixlen
= IPV4_MAX_BITLEN
;
2874 prefix
->u
.prefix4
= attr
->mp_nexthop_global_in
;
2877 prefix
->family
= AF_INET6
;
2878 prefix
->prefixlen
= IPV6_MAX_BITLEN
;
2879 prefix
->u
.prefix6
= attr
->mp_nexthop_global
;
2882 vnc_zlog_debug_verbose("%s: unknown attr->mp_nexthop_len %d",
2883 __func__
, attr
->mp_nexthop_len
);
2890 * import a bgp_path_info if its route target list intersects with the
2891 * import table's route target list
2893 static void rfapiBgpInfoFilteredImportEncap(
2894 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
2895 void *rfd
, /* set for looped back routes */
2896 const struct prefix
*p
,
2897 const struct prefix
*aux_prefix
, /* Unused for encap routes */
2898 afi_t afi
, struct prefix_rd
*prd
,
2899 struct attr
*attr
, /* part of bgp_path_info */
2900 uint8_t type
, /* part of bgp_path_info */
2901 uint8_t sub_type
, /* part of bgp_path_info */
2902 uint32_t *label
) /* part of bgp_path_info */
2904 struct agg_table
*rt
= NULL
;
2905 struct agg_node
*rn
;
2906 struct bgp_path_info
*info_new
;
2907 struct bgp_path_info
*bpi
;
2908 struct bgp_path_info
*next
;
2911 struct prefix p_firstbpi_old
;
2912 struct prefix p_firstbpi_new
;
2914 const char *action_str
= NULL
;
2915 struct prefix un_prefix
;
2918 bgp
= bgp_get_default(); /* assume 1 instance for now */
2921 case FIF_ACTION_UPDATE
:
2922 action_str
= "update";
2924 case FIF_ACTION_WITHDRAW
:
2925 action_str
= "withdraw";
2927 case FIF_ACTION_KILL
:
2928 action_str
= "kill";
2935 vnc_zlog_debug_verbose(
2936 "%s: entry: %s: prefix %s/%d", __func__
, action_str
,
2937 inet_ntop(p
->family
, &p
->u
.prefix
, buf
, BUFSIZ
), p
->prefixlen
);
2939 memset(&p_firstbpi_old
, 0, sizeof(p_firstbpi_old
));
2940 memset(&p_firstbpi_new
, 0, sizeof(p_firstbpi_new
));
2942 if (action
== FIF_ACTION_UPDATE
) {
2944 * Compare rt lists. If no intersection, don't import this route
2945 * On a withdraw, peer and RD are sufficient to determine if
2948 if (!attr
|| !bgp_attr_get_ecommunity(attr
)) {
2950 vnc_zlog_debug_verbose(
2951 "%s: attr, extra, or ecommunity missing, not importing",
2955 #ifdef RFAPI_REQUIRE_ENCAP_BEEC
2956 if (!rfapiEcommunitiesMatchBeec(
2957 bgp_attr_get_ecommunity(attr
))) {
2958 vnc_zlog_debug_verbose(
2959 "%s: it=%p: no match for BGP Encapsulation ecommunity",
2960 __func__
, import_table
);
2964 if (!rfapiEcommunitiesIntersect(
2965 import_table
->rt_import_list
,
2966 bgp_attr_get_ecommunity(attr
))) {
2968 vnc_zlog_debug_verbose(
2969 "%s: it=%p: no ecommunity intersection",
2970 __func__
, import_table
);
2975 * Updates must also have a nexthop address
2977 memset(&un_prefix
, 0,
2978 sizeof(un_prefix
)); /* keep valgrind happy */
2979 if (rfapiGetNexthop(attr
, &un_prefix
)) {
2980 vnc_zlog_debug_verbose("%s: missing nexthop address",
2987 * Figure out which radix tree the route would go into
2992 rt
= import_table
->imported_encap
[afi
];
2996 flog_err(EC_LIB_DEVELOPMENT
, "%s: bad afi %d", __func__
, afi
);
3001 * agg_node_lookup returns a node only if there is at least
3002 * one route attached.
3004 rn
= agg_node_lookup(rt
, p
);
3006 #ifdef DEBUG_ENCAP_MONITOR
3007 vnc_zlog_debug_verbose("%s: initial encap lookup(it=%p) rn=%p",
3008 __func__
, import_table
, rn
);
3013 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 1);
3014 agg_unlock_node(rn
); /* undo lock in agg_node_lookup */
3018 * capture nexthop of first bpi
3021 rfapiNexthop2Prefix(
3022 ((struct bgp_path_info
*)(rn
->info
))->attr
,
3026 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
3029 * Does this bgp_path_info refer to the same route
3030 * as we are trying to add?
3032 vnc_zlog_debug_verbose("%s: comparing BPI %p", __func__
,
3039 * RD of import table bpi is in
3040 * bpi->extra->vnc.import.rd RD of info_orig is in prd
3043 vnc_zlog_debug_verbose("%s: no bpi->extra",
3048 (struct prefix
*)&bpi
->extra
->vnc
.import
.rd
,
3049 (struct prefix
*)prd
)) {
3051 vnc_zlog_debug_verbose("%s: prd does not match",
3059 if (bpi
->peer
!= peer
) {
3060 vnc_zlog_debug_verbose(
3061 "%s: peer does not match", __func__
);
3065 vnc_zlog_debug_verbose("%s: found matching bpi",
3068 /* Same route. Delete this bpi, replace with new one */
3070 if (action
== FIF_ACTION_WITHDRAW
) {
3072 vnc_zlog_debug_verbose(
3073 "%s: withdrawing at prefix %pRN",
3076 rfapiBiStartWithdrawTimer(
3077 import_table
, rn
, bpi
, afi
, SAFI_ENCAP
,
3078 rfapiWithdrawTimerEncap
);
3081 vnc_zlog_debug_verbose(
3082 "%s: %s at prefix %pRN", __func__
,
3083 ((action
== FIF_ACTION_KILL
)
3089 * If this route is waiting to be deleted
3091 * a previous withdraw, we must cancel its
3094 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
3095 && bpi
->extra
->vnc
.import
.timer
) {
3098 &(bpi
->extra
->vnc
.import
.timer
);
3099 struct rfapi_withdraw
*wcb
= (*t
)->arg
;
3101 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3105 if (action
== FIF_ACTION_UPDATE
) {
3106 rfapiBgpInfoDetach(rn
, bpi
);
3107 rfapiBgpInfoFree(bpi
);
3111 * Kill: do export stuff when removing
3114 struct rfapi_withdraw
*wcb
;
3118 * pretend we're an expiring timer
3121 MTYPE_RFAPI_WITHDRAW
,
3122 sizeof(struct rfapi_withdraw
));
3125 wcb
->import_table
= import_table
;
3126 memset(&t
, 0, sizeof(t
));
3128 rfapiWithdrawTimerEncap(
3129 &t
); /* frees wcb */
3138 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, replacing
? 1 : 0);
3140 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
)
3144 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, NULL
);
3148 agg_lock_node(rn
); /* incr ref count for new BPI */
3150 rn
= agg_node_get(rt
, p
);
3153 vnc_zlog_debug_verbose("%s: (afi=%d, rn=%p) inserting at prefix %pRN",
3154 __func__
, afi
, rn
, rn
);
3156 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_ENCAP
);
3159 * Delete holddown routes from same NVE. See details in
3160 * rfapiBgpInfoFilteredImportVPN()
3162 for (bpi
= info_new
->next
; bpi
; bpi
= next
) {
3164 struct prefix pfx_un
;
3168 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
))
3172 * We already match the VN address (it is the prefix
3173 * of the route node)
3176 if (!rfapiGetNexthop(bpi
->attr
, &pfx_un
)
3177 && prefix_same(&pfx_un
, &un_prefix
)) {
3185 vnc_zlog_debug_verbose(
3186 "%s: removing holddown bpi matching NVE of new route",
3188 if (bpi
->extra
->vnc
.import
.timer
) {
3190 &(bpi
->extra
->vnc
.import
.timer
);
3191 struct rfapi_withdraw
*wcb
= (*t
)->arg
;
3193 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3196 rfapiExpireEncapNow(import_table
, rn
, bpi
);
3199 rfapiNexthop2Prefix(((struct bgp_path_info
*)(rn
->info
))->attr
,
3203 * If the nexthop address of the selected Encap route (i.e.,
3204 * the UN address) has changed, then we must update the VPN
3205 * routes that refer to this Encap route and possibly force
3208 if (rfapiAttrNexthopAddrDifferent(&p_firstbpi_old
, &p_firstbpi_new
)) {
3210 struct rfapi_monitor_encap
*m
;
3211 struct rfapi_monitor_encap
*mnext
;
3213 struct agg_node
*referenced_vpn_prefix
;
3216 * Optimized approach: build radix tree on the fly to
3217 * hold list of VPN nodes referenced by the ENCAP monitors
3219 * The nodes in this table correspond to prefixes of VPN routes.
3220 * The "info" pointer of the node points to a chain of
3221 * struct rfapi_monitor_encap, each of which refers to a
3222 * specific VPN node.
3224 struct agg_table
*referenced_vpn_table
;
3226 referenced_vpn_table
= agg_table_init();
3227 assert(referenced_vpn_table
);
3230 * iterate over the set of monitors at this ENCAP node.
3232 #ifdef DEBUG_ENCAP_MONITOR
3233 vnc_zlog_debug_verbose("%s: examining monitors at rn=%p",
3236 for (m
= RFAPI_MONITOR_ENCAP(rn
); m
; m
= m
->next
) {
3237 const struct prefix
*p
;
3240 * For each referenced bpi/route, copy the ENCAP route's
3241 * nexthop to the VPN route's cached UN address field
3243 * the address family of the cached UN address field.
3245 rfapiCopyUnEncap2VPN(info_new
, m
->bpi
);
3246 if (!CHECK_FLAG(m
->bpi
->flags
, BGP_PATH_VALID
)) {
3247 SET_FLAG(m
->bpi
->flags
, BGP_PATH_VALID
);
3248 if (VALID_INTERIOR_TYPE(m
->bpi
->type
))
3249 RFAPI_MONITOR_EXTERIOR(m
->node
)
3250 ->valid_interior_count
++;
3251 vnc_import_bgp_exterior_add_route_interior(
3252 bgp
, import_table
, m
->node
, m
->bpi
);
3256 * Build a list of unique VPN nodes referenced by these
3259 * There could be more than one VPN node here with a
3261 * prefix. Those are currently in an unsorted linear
3265 p
= agg_node_get_prefix(m
->node
);
3266 referenced_vpn_prefix
=
3267 agg_node_get(referenced_vpn_table
, p
);
3268 assert(referenced_vpn_prefix
);
3269 for (mnext
= referenced_vpn_prefix
->info
; mnext
;
3270 mnext
= mnext
->next
) {
3272 if (mnext
->node
== m
->node
)
3278 * already have an entry for this VPN node
3280 agg_unlock_node(referenced_vpn_prefix
);
3283 MTYPE_RFAPI_MONITOR_ENCAP
,
3284 sizeof(struct rfapi_monitor_encap
));
3286 mnext
->node
= m
->node
;
3287 mnext
->next
= referenced_vpn_prefix
->info
;
3288 referenced_vpn_prefix
->info
= mnext
;
3293 * for each VPN node referenced in the ENCAP monitors:
3295 for (referenced_vpn_prefix
=
3296 agg_route_top(referenced_vpn_table
);
3297 referenced_vpn_prefix
;
3298 referenced_vpn_prefix
=
3299 agg_route_next(referenced_vpn_prefix
)) {
3301 while ((m
= referenced_vpn_prefix
->info
)) {
3305 rfapiMonitorMoveLonger(m
->node
);
3306 for (n
= m
->node
; n
; n
= agg_node_parent(n
)) {
3307 // rfapiDoRouteCallback(import_table, n,
3310 rfapiMonitorItNodeChanged(import_table
, m
->node
,
3313 referenced_vpn_prefix
->info
= m
->next
;
3314 agg_unlock_node(referenced_vpn_prefix
);
3315 XFREE(MTYPE_RFAPI_MONITOR_ENCAP
, m
);
3318 agg_table_finish(referenced_vpn_table
);
3321 RFAPI_CHECK_REFCOUNT(rn
, SAFI_ENCAP
, 0);
3324 static void rfapiExpireVpnNow(struct rfapi_import_table
*it
,
3325 struct agg_node
*rn
, struct bgp_path_info
*bpi
,
3328 struct rfapi_withdraw
*wcb
;
3332 * pretend we're an expiring timer
3334 wcb
= XCALLOC(MTYPE_RFAPI_WITHDRAW
, sizeof(struct rfapi_withdraw
));
3337 wcb
->import_table
= it
;
3338 wcb
->lockoffset
= lockoffset
;
3339 memset(&t
, 0, sizeof(t
));
3341 rfapiWithdrawTimerVPN(&t
); /* frees wcb */
3346 * import a bgp_path_info if its route target list intersects with the
3347 * import table's route target list
3349 void rfapiBgpInfoFilteredImportVPN(
3350 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3351 void *rfd
, /* set for looped back routes */
3352 const struct prefix
*p
,
3353 const struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3354 afi_t afi
, struct prefix_rd
*prd
,
3355 struct attr
*attr
, /* part of bgp_path_info */
3356 uint8_t type
, /* part of bgp_path_info */
3357 uint8_t sub_type
, /* part of bgp_path_info */
3358 uint32_t *label
) /* part of bgp_path_info */
3360 struct agg_table
*rt
= NULL
;
3361 struct agg_node
*rn
;
3363 struct bgp_path_info
*info_new
;
3364 struct bgp_path_info
*bpi
;
3365 struct bgp_path_info
*next
;
3367 struct prefix vn_prefix
;
3368 struct prefix un_prefix
;
3369 int un_prefix_valid
= 0;
3370 struct agg_node
*ern
;
3372 int original_had_routes
= 0;
3373 struct prefix original_nexthop
;
3374 const char *action_str
= NULL
;
3378 bgp
= bgp_get_default(); /* assume 1 instance for now */
3381 case FIF_ACTION_UPDATE
:
3382 action_str
= "update";
3384 case FIF_ACTION_WITHDRAW
:
3385 action_str
= "withdraw";
3387 case FIF_ACTION_KILL
:
3388 action_str
= "kill";
3395 if (import_table
== bgp
->rfapi
->it_ce
)
3398 vnc_zlog_debug_verbose("%s: entry: %s%s: prefix %s/%d: it %p, afi %s",
3399 __func__
, (is_it_ce
? "CE-IT " : ""), action_str
,
3400 rfapi_ntop(p
->family
, &p
->u
.prefix
, buf
, BUFSIZ
),
3401 p
->prefixlen
, import_table
, afi2str(afi
));
3406 * Compare rt lists. If no intersection, don't import this route
3407 * On a withdraw, peer and RD are sufficient to determine if
3410 if (action
== FIF_ACTION_UPDATE
) {
3411 if (!attr
|| !bgp_attr_get_ecommunity(attr
)) {
3413 vnc_zlog_debug_verbose(
3414 "%s: attr, extra, or ecommunity missing, not importing",
3418 if ((import_table
!= bgp
->rfapi
->it_ce
) &&
3419 !rfapiEcommunitiesIntersect(
3420 import_table
->rt_import_list
,
3421 bgp_attr_get_ecommunity(attr
))) {
3423 vnc_zlog_debug_verbose(
3424 "%s: it=%p: no ecommunity intersection",
3425 __func__
, import_table
);
3429 memset(&vn_prefix
, 0,
3430 sizeof(vn_prefix
)); /* keep valgrind happy */
3431 if (rfapiGetNexthop(attr
, &vn_prefix
)) {
3432 /* missing nexthop address would be a bad, bad thing */
3433 vnc_zlog_debug_verbose("%s: missing nexthop", __func__
);
3439 * Figure out which radix tree the route would go into
3445 rt
= import_table
->imported_vpn
[afi
];
3449 flog_err(EC_LIB_DEVELOPMENT
, "%s: bad afi %d", __func__
, afi
);
3454 memset(&original_nexthop
, 0, sizeof(original_nexthop
));
3457 * agg_node_lookup returns a node only if there is at least
3458 * one route attached.
3460 rn
= agg_node_lookup(rt
, p
);
3462 vnc_zlog_debug_verbose("%s: rn=%p", __func__
, rn
);
3466 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
3467 agg_unlock_node(rn
); /* undo lock in agg_node_lookup */
3470 original_had_routes
= 1;
3472 if (VNC_DEBUG(VERBOSE
)) {
3473 vnc_zlog_debug_verbose("%s: showing IT node on entry",
3475 rfapiShowItNode(NULL
, rn
); /* debug */
3479 * Look for same route (will have same RD and peer)
3481 bpi
= rfapiItBiIndexSearch(rn
, prd
, peer
, aux_prefix
);
3486 * This was an old test when we iterated over the
3487 * BPIs linearly. Since we're now looking up with
3488 * RD and peer, comparing types should not be
3489 * needed. Changed to assertion.
3491 * Compare types. Doing so prevents a RFP-originated
3492 * route from matching an imported route, for example.
3494 if (VNC_DEBUG(VERBOSE
) && bpi
->type
!= type
)
3495 /* should be handled by RDs, but warn for now */
3496 zlog_warn("%s: type mismatch! (bpi=%d, arg=%d)",
3497 __func__
, bpi
->type
, type
);
3499 vnc_zlog_debug_verbose("%s: found matching bpi",
3503 * In the special CE table, withdrawals occur without
3506 if (import_table
== bgp
->rfapi
->it_ce
) {
3507 vnc_direct_bgp_del_route_ce(bgp
, rn
, bpi
);
3508 if (action
== FIF_ACTION_WITHDRAW
)
3509 action
= FIF_ACTION_KILL
;
3512 if (action
== FIF_ACTION_WITHDRAW
) {
3514 int washolddown
= CHECK_FLAG(bpi
->flags
,
3517 vnc_zlog_debug_verbose(
3518 "%s: withdrawing at prefix %pRN%s",
3521 ? " (already being withdrawn)"
3526 rfapiBiStartWithdrawTimer(
3527 import_table
, rn
, bpi
, afi
,
3529 rfapiWithdrawTimerVPN
);
3531 RFAPI_UPDATE_ITABLE_COUNT(
3532 bpi
, import_table
, afi
, -1);
3533 import_table
->holddown_count
[afi
] += 1;
3537 vnc_zlog_debug_verbose(
3538 "%s: %s at prefix %pRN", __func__
,
3539 ((action
== FIF_ACTION_KILL
)
3545 * If this route is waiting to be deleted
3547 * a previous withdraw, we must cancel its
3550 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)
3551 && bpi
->extra
->vnc
.import
.timer
) {
3554 &(bpi
->extra
->vnc
.import
.timer
);
3555 struct rfapi_withdraw
*wcb
= (*t
)->arg
;
3557 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3560 import_table
->holddown_count
[afi
] -= 1;
3561 RFAPI_UPDATE_ITABLE_COUNT(
3562 bpi
, import_table
, afi
, 1);
3565 * decrement remote count (if route is remote)
3567 * we are going to remove it below
3569 RFAPI_UPDATE_ITABLE_COUNT(bpi
, import_table
,
3571 if (action
== FIF_ACTION_UPDATE
) {
3575 * make copy of original nexthop so we
3576 * can see if it changed
3578 rfapiGetNexthop(bpi
->attr
,
3582 * remove bpi without doing any export
3585 if (CHECK_FLAG(bpi
->flags
,
3587 && VALID_INTERIOR_TYPE(bpi
->type
))
3588 RFAPI_MONITOR_EXTERIOR(rn
)
3589 ->valid_interior_count
--;
3590 rfapiItBiIndexDel(rn
, bpi
);
3591 rfapiBgpInfoDetach(rn
, bpi
);
3592 rfapiMonitorEncapDelete(bpi
);
3593 vnc_import_bgp_exterior_del_route_interior(
3594 bgp
, import_table
, rn
, bpi
);
3595 rfapiBgpInfoFree(bpi
);
3599 * remove bpi and do export processing
3601 import_table
->holddown_count
[afi
] += 1;
3602 rfapiExpireVpnNow(import_table
, rn
, bpi
,
3610 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, replacing
? 1 : 0);
3612 if (action
== FIF_ACTION_WITHDRAW
|| action
== FIF_ACTION_KILL
) {
3618 rfapiBgpInfoCreate(attr
, peer
, rfd
, prd
, type
, sub_type
, label
);
3621 * lookup un address in encap table
3623 ern
= agg_node_match(import_table
->imported_encap
[afi
], &vn_prefix
);
3625 rfapiCopyUnEncap2VPN(ern
->info
, info_new
);
3626 agg_unlock_node(ern
); /* undo lock in route_note_match */
3628 /* Not a big deal, just means VPN route got here first */
3629 vnc_zlog_debug_verbose("%s: no encap route for vn addr %pFX",
3630 __func__
, &vn_prefix
);
3631 info_new
->extra
->vnc
.import
.un_family
= AF_UNSPEC
;
3639 * No need to increment reference count, so only "get"
3640 * if the node is not there already
3642 rn
= agg_node_get(rt
, p
);
3646 * For ethernet routes, if there is an accompanying IP address,
3647 * save it in the bpi
3649 if ((AFI_L2VPN
== afi
) && aux_prefix
) {
3651 vnc_zlog_debug_verbose("%s: setting BPI's aux_prefix",
3653 info_new
->extra
->vnc
.import
.aux_prefix
= *aux_prefix
;
3656 vnc_zlog_debug_verbose("%s: inserting bpi %p at prefix %pRN #%d",
3657 __func__
, info_new
, rn
,
3658 agg_node_get_lock_count(rn
));
3660 rfapiBgpInfoAttachSorted(rn
, info_new
, afi
, SAFI_MPLS_VPN
);
3661 rfapiItBiIndexAdd(rn
, info_new
);
3662 if (!rfapiGetUnAddrOfVpnBi(info_new
, NULL
)) {
3663 if (VALID_INTERIOR_TYPE(info_new
->type
))
3664 RFAPI_MONITOR_EXTERIOR(rn
)->valid_interior_count
++;
3665 SET_FLAG(info_new
->flags
, BGP_PATH_VALID
);
3667 RFAPI_UPDATE_ITABLE_COUNT(info_new
, import_table
, afi
, 1);
3668 vnc_import_bgp_exterior_add_route_interior(bgp
, import_table
, rn
,
3671 if (import_table
== bgp
->rfapi
->it_ce
)
3672 vnc_direct_bgp_add_route_ce(bgp
, rn
, info_new
);
3674 if (VNC_DEBUG(VERBOSE
)) {
3675 vnc_zlog_debug_verbose("%s: showing IT node", __func__
);
3676 rfapiShowItNode(NULL
, rn
); /* debug */
3679 rfapiMonitorEncapAdd(import_table
, &vn_prefix
, rn
, info_new
);
3681 if (!rfapiGetUnAddrOfVpnBi(info_new
, &un_prefix
)) {
3684 * if we have a valid UN address (either via Encap route
3685 * or via tunnel attribute), then we should attempt
3686 * to move any monitors at less-specific nodes to this node
3688 rfapiMonitorMoveLonger(rn
);
3690 un_prefix_valid
= 1;
3694 * 101129 Enhancement: if we add a route (implication: it is not
3695 * in holddown), delete all other routes from this nve at this
3696 * node that are in holddown, regardless of peer.
3698 * Reasons it's OK to do that:
3700 * - if the holddown route being deleted originally came from BGP VPN,
3701 * it is already gone from BGP (implication of holddown), so there
3702 * won't be any added inconsistency with the BGP RIB.
3704 * - once a fresh route is added at a prefix, any routes in holddown
3705 * at that prefix will not show up in RFP responses, so deleting
3706 * the holddown routes won't affect the contents of responses.
3708 * - lifetimes are supposed to be consistent, so there should not
3709 * be a case where the fresh route has a shorter lifetime than
3710 * the holddown route, so we don't expect the fresh route to
3711 * disappear and complete its holddown time before the existing
3712 * holddown routes time out. Therefore, we won't have a situation
3713 * where we expect the existing holddown routes to be hidden and
3714 * then to reappear sometime later (as holddown routes) in a
3717 * Among other things, this would enable us to skirt the problem
3718 * of local holddown routes that refer to NVE descriptors that
3719 * have already been closed (if the same NVE triggers a subsequent
3720 * rfapi_open(), the new peer is different and doesn't match the
3721 * peer of the holddown route, so the stale holddown route still
3722 * hangs around until it times out instead of just being replaced
3723 * by the fresh route).
3726 * We know that the new bpi will have been inserted before any routes
3727 * in holddown, so we can skip any that came before it
3729 for (bpi
= info_new
->next
; bpi
; bpi
= next
) {
3731 struct prefix pfx_vn
;
3732 struct prefix pfx_un
;
3734 int remote_peer_match
= 0;
3741 if (!CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
))
3745 * Must match VN address (nexthop of VPN route)
3747 if (rfapiGetNexthop(bpi
->attr
, &pfx_vn
))
3749 if (!prefix_same(&pfx_vn
, &vn_prefix
))
3752 if (un_prefix_valid
&& /* new route UN addr */
3753 !rfapiGetUnAddrOfVpnBi(bpi
, &pfx_un
)
3754 && /* old route UN addr */
3755 prefix_same(&pfx_un
, &un_prefix
)) { /* compare */
3758 if (!RFAPI_LOCAL_BI(bpi
) && !RFAPI_LOCAL_BI(info_new
)
3759 && sockunion_same(&bpi
->peer
->su
, &info_new
->peer
->su
)) {
3760 /* old & new are both remote, same peer */
3761 remote_peer_match
= 1;
3764 if (!un_match
& !remote_peer_match
)
3767 vnc_zlog_debug_verbose(
3768 "%s: removing holddown bpi matching NVE of new route",
3770 if (bpi
->extra
->vnc
.import
.timer
) {
3772 &(bpi
->extra
->vnc
.import
.timer
);
3773 struct rfapi_withdraw
*wcb
= (*t
)->arg
;
3775 XFREE(MTYPE_RFAPI_WITHDRAW
, wcb
);
3778 rfapiExpireVpnNow(import_table
, rn
, bpi
, 0);
3781 if (!original_had_routes
) {
3783 * We went from 0 usable routes to 1 usable route. Perform the
3784 * "Adding a Route" export process.
3786 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3787 vnc_zebra_add_prefix(bgp
, import_table
, rn
);
3790 * Check for nexthop change event
3791 * Note: the prefix_same() test below detects two situations:
3792 * 1. route is replaced, new route has different nexthop
3793 * 2. new route is added (original_nexthop is 0)
3795 struct prefix new_nexthop
;
3797 rfapiGetNexthop(attr
, &new_nexthop
);
3798 if (!prefix_same(&original_nexthop
, &new_nexthop
)) {
3800 * nexthop change event
3801 * vnc_direct_bgp_add_prefix() will recompute VN addr
3804 vnc_direct_bgp_add_prefix(bgp
, import_table
, rn
);
3808 if (!(bgp
->rfapi_cfg
->flags
& BGP_VNC_CONFIG_CALLBACK_DISABLE
)) {
3809 for (n
= rn
; n
; n
= agg_node_parent(n
)) {
3810 // rfapiDoRouteCallback(import_table, n, NULL);
3812 rfapiMonitorItNodeChanged(import_table
, rn
, NULL
);
3814 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 0);
3818 static void rfapiBgpInfoFilteredImportBadSafi(
3819 struct rfapi_import_table
*import_table
, int action
, struct peer
*peer
,
3820 void *rfd
, /* set for looped back routes */
3821 const struct prefix
*p
,
3822 const struct prefix
*aux_prefix
, /* AFI_L2VPN: optional IP */
3823 afi_t afi
, struct prefix_rd
*prd
,
3824 struct attr
*attr
, /* part of bgp_path_info */
3825 uint8_t type
, /* part of bgp_path_info */
3826 uint8_t sub_type
, /* part of bgp_path_info */
3827 uint32_t *label
) /* part of bgp_path_info */
3829 vnc_zlog_debug_verbose("%s: Error, bad safi", __func__
);
3832 static rfapi_bi_filtered_import_f
*
3833 rfapiBgpInfoFilteredImportFunction(safi_t safi
)
3837 return rfapiBgpInfoFilteredImportVPN
;
3840 return rfapiBgpInfoFilteredImportEncap
;
3844 flog_err(EC_LIB_DEVELOPMENT
, "%s: bad safi %d", __func__
, safi
);
3845 return rfapiBgpInfoFilteredImportBadSafi
;
3849 void rfapiProcessUpdate(struct peer
*peer
,
3850 void *rfd
, /* set when looped from RFP/RFAPI */
3851 const struct prefix
*p
, struct prefix_rd
*prd
,
3852 struct attr
*attr
, afi_t afi
, safi_t safi
, uint8_t type
,
3853 uint8_t sub_type
, uint32_t *label
)
3857 struct rfapi_import_table
*it
;
3858 int has_ip_route
= 1;
3861 bgp
= bgp_get_default(); /* assume 1 instance for now */
3868 * look at high-order byte of RD. FF means MAC
3869 * address is present (VNC L2VPN)
3871 if ((safi
== SAFI_MPLS_VPN
)
3872 && (decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
)) {
3873 struct prefix pfx_mac_buf
;
3874 struct prefix pfx_nexthop_buf
;
3878 * Set flag if prefix and nexthop are the same - don't
3879 * add the route to normal IP-based import tables
3881 if (!rfapiGetNexthop(attr
, &pfx_nexthop_buf
)) {
3882 if (!prefix_cmp(&pfx_nexthop_buf
, p
)) {
3887 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
3888 pfx_mac_buf
.family
= AF_ETHERNET
;
3889 pfx_mac_buf
.prefixlen
= 48;
3890 memcpy(&pfx_mac_buf
.u
.prefix_eth
.octet
, prd
->val
+ 2, 6);
3893 * Find rt containing LNI (Logical Network ID), which
3894 * _should_ always be present when mac address is present
3896 rc
= rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(attr
), &lni
);
3898 vnc_zlog_debug_verbose(
3899 "%s: rfapiEcommunityGetLNI returned %d, lni=%d, attr=%p",
3900 __func__
, rc
, lni
, attr
);
3902 it
= rfapiMacImportTableGet(bgp
, lni
);
3904 rfapiBgpInfoFilteredImportVPN(
3905 it
, FIF_ACTION_UPDATE
, peer
, rfd
,
3906 &pfx_mac_buf
, /* prefix */
3907 p
, /* aux prefix: IP addr */
3908 AFI_L2VPN
, prd
, attr
, type
, sub_type
, label
);
3916 * Iterate over all import tables; do a filtered import
3917 * for the afi/safi combination
3919 for (it
= h
->imports
; it
; it
= it
->next
) {
3920 (*rfapiBgpInfoFilteredImportFunction(safi
))(
3921 it
, FIF_ACTION_UPDATE
, peer
, rfd
, p
, /* prefix */
3922 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3925 if (safi
== SAFI_MPLS_VPN
) {
3926 vnc_direct_bgp_rh_add_route(bgp
, afi
, p
, peer
, attr
);
3927 rfapiBgpInfoFilteredImportVPN(
3928 bgp
->rfapi
->it_ce
, FIF_ACTION_UPDATE
, peer
, rfd
,
3930 NULL
, afi
, prd
, attr
, type
, sub_type
, label
);
3935 void rfapiProcessWithdraw(struct peer
*peer
, void *rfd
, const struct prefix
*p
,
3936 struct prefix_rd
*prd
, struct attr
*attr
, afi_t afi
,
3937 safi_t safi
, uint8_t type
, int kill
)
3941 struct rfapi_import_table
*it
;
3943 bgp
= bgp_get_default(); /* assume 1 instance for now */
3950 * look at high-order byte of RD. FF means MAC
3951 * address is present (VNC L2VPN)
3953 if (h
->import_mac
!= NULL
&& safi
== SAFI_MPLS_VPN
3954 && decode_rd_type(prd
->val
) == RD_TYPE_VNC_ETH
) {
3955 struct prefix pfx_mac_buf
;
3956 void *cursor
= NULL
;
3959 memset(&pfx_mac_buf
, 0, sizeof(pfx_mac_buf
));
3960 pfx_mac_buf
.family
= AF_ETHERNET
;
3961 pfx_mac_buf
.prefixlen
= 48;
3962 memcpy(&pfx_mac_buf
.u
.prefix_eth
, prd
->val
+ 2, 6);
3965 * withdraw does not contain attrs, so we don't have
3966 * access to the route's LNI, which would ordinarily
3967 * select the specific mac-based import table. Instead,
3968 * we must iterate over all mac-based tables and rely
3969 * on the RD to match.
3971 * If this approach is too slow, add an index where
3972 * key is {RD, peer} and value is the import table
3974 for (rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
3976 rc
== 0; rc
= skiplist_next(h
->import_mac
, NULL
,
3977 (void **)&it
, &cursor
)) {
3979 #ifdef DEBUG_L2_EXTRA
3980 vnc_zlog_debug_verbose(
3981 "%s: calling rfapiBgpInfoFilteredImportVPN(it=%p, afi=AFI_L2VPN)",
3985 rfapiBgpInfoFilteredImportVPN(
3987 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
3988 peer
, rfd
, &pfx_mac_buf
, /* prefix */
3989 p
, /* aux_prefix: IP */
3990 AFI_L2VPN
, prd
, attr
, type
, 0,
3991 NULL
); /* sub_type & label unused for withdraw
3997 * XXX For the case where the withdraw involves an L2
3998 * route with no IP information, we rely on the lack
3999 * of RT-list intersection to filter out the withdraw
4000 * from the IP-based import tables below
4004 * Iterate over all import tables; do a filtered import
4005 * for the afi/safi combination
4008 for (it
= h
->imports
; it
; it
= it
->next
) {
4009 (*rfapiBgpInfoFilteredImportFunction(safi
))(
4010 it
, (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
),
4011 peer
, rfd
, p
, /* prefix */
4012 NULL
, afi
, prd
, attr
, type
, 0,
4013 NULL
); /* sub_type & label unused for withdraw */
4016 /* TBD the deletion should happen after the lifetime expires */
4017 if (safi
== SAFI_MPLS_VPN
)
4018 vnc_direct_bgp_rh_del_route(bgp
, afi
, p
, peer
);
4020 if (safi
== SAFI_MPLS_VPN
) {
4021 rfapiBgpInfoFilteredImportVPN(
4023 (kill
? FIF_ACTION_KILL
: FIF_ACTION_WITHDRAW
), peer
,
4024 rfd
, p
, /* prefix */
4025 NULL
, afi
, prd
, attr
, type
, 0,
4026 NULL
); /* sub_type & label unused for withdraw */
4031 * TBD optimized withdraw timer algorithm for case of many
4032 * routes expiring at the same time due to peer drop.
4035 * 1. Visit all BPIs in all ENCAP import tables.
4037 * a. If a bpi's peer is the failed peer, remove the bpi.
4038 * b. If the removed ENCAP bpi was first in the list of
4039 * BPIs at this ENCAP node, loop over all monitors
4042 * (1) for each ENCAP monitor, loop over all its
4043 * VPN node monitors and set their RFAPI_MON_FLAG_NEEDCALLBACK
4046 * 2. Visit all BPIs in all VPN import tables.
4047 * a. If a bpi's peer is the failed peer, remove the bpi.
4048 * b. loop over all the VPN node monitors and set their
4049 * RFAPI_MON_FLAG_NEEDCALLBACK flags
4050 * c. If there are no BPIs left at this VPN node,
4055 /* surprise, this gets called from peer_delete(), from rfapi_close() */
4056 static void rfapiProcessPeerDownRt(struct peer
*peer
,
4057 struct rfapi_import_table
*import_table
,
4058 afi_t afi
, safi_t safi
)
4060 struct agg_node
*rn
;
4061 struct bgp_path_info
*bpi
;
4062 struct agg_table
*rt
;
4063 void (*timer_service_func
)(struct thread
*);
4065 assert(afi
== AFI_IP
|| afi
== AFI_IP6
);
4071 rt
= import_table
->imported_vpn
[afi
];
4072 timer_service_func
= rfapiWithdrawTimerVPN
;
4075 rt
= import_table
->imported_encap
[afi
];
4076 timer_service_func
= rfapiWithdrawTimerEncap
;
4079 /* Suppress uninitialized variable warning */
4081 timer_service_func
= NULL
;
4086 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
4087 for (bpi
= rn
->info
; bpi
; bpi
= bpi
->next
) {
4088 if (bpi
->peer
== peer
) {
4090 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
4091 /* already in holddown, skip */
4095 if (safi
== SAFI_MPLS_VPN
) {
4096 RFAPI_UPDATE_ITABLE_COUNT(
4097 bpi
, import_table
, afi
, -1);
4098 import_table
->holddown_count
[afi
] += 1;
4100 rfapiBiStartWithdrawTimer(import_table
, rn
, bpi
,
4102 timer_service_func
);
4110 * This gets called when a peer connection drops. We have to remove
4111 * all the routes from this peer.
4113 * Current approach is crude. TBD Optimize by setting fewer timers and
4114 * grouping withdrawn routes so we can generate callbacks more
4117 void rfapiProcessPeerDown(struct peer
*peer
)
4121 struct rfapi_import_table
*it
;
4124 * If this peer is a "dummy" peer structure atached to a RFAPI
4125 * nve_descriptor, we don't need to walk the import tables
4126 * because the routes are already withdrawn by rfapi_close()
4128 if (CHECK_FLAG(peer
->flags
, PEER_FLAG_IS_RFAPI_HD
))
4132 * 1. Visit all BPIs in all ENCAP import tables.
4133 * Start withdraw timer on the BPIs that match peer.
4135 * 2. Visit All BPIs in all VPN import tables.
4136 * Start withdraw timer on the BPIs that match peer.
4139 bgp
= bgp_get_default(); /* assume 1 instance for now */
4146 for (it
= h
->imports
; it
; it
= it
->next
) {
4147 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_ENCAP
);
4148 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_ENCAP
);
4149 rfapiProcessPeerDownRt(peer
, it
, AFI_IP
, SAFI_MPLS_VPN
);
4150 rfapiProcessPeerDownRt(peer
, it
, AFI_IP6
, SAFI_MPLS_VPN
);
4154 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4155 rfapiProcessPeerDownRt(peer
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4160 * Import an entire RIB (for an afi/safi) to an import table RIB,
4161 * filtered according to the import table's RT list
4163 * TBD: does this function need additions to match rfapiProcessUpdate()
4164 * for, e.g., L2 handling?
4166 static void rfapiBgpTableFilteredImport(struct bgp
*bgp
,
4167 struct rfapi_import_table
*it
,
4168 afi_t afi
, safi_t safi
)
4170 struct bgp_dest
*dest1
;
4171 struct bgp_dest
*dest2
;
4173 /* Only these SAFIs have 2-level RIBS */
4174 assert(safi
== SAFI_MPLS_VPN
|| safi
== SAFI_ENCAP
);
4177 * Now visit all the rd nodes and the nodes of all the
4178 * route tables attached to them, and import the routes
4179 * if they have matching route targets
4181 for (dest1
= bgp_table_top(bgp
->rib
[afi
][safi
]); dest1
;
4182 dest1
= bgp_route_next(dest1
)) {
4184 if (bgp_dest_has_bgp_path_info_data(dest1
)) {
4186 for (dest2
= bgp_table_top(
4187 bgp_dest_get_bgp_table_info(dest1
));
4188 dest2
; dest2
= bgp_route_next(dest2
)) {
4190 struct bgp_path_info
*bpi
;
4192 for (bpi
= bgp_dest_get_bgp_path_info(dest2
);
4193 bpi
; bpi
= bpi
->next
) {
4196 if (CHECK_FLAG(bpi
->flags
,
4201 label
= decode_label(
4202 &bpi
->extra
->label
[0]);
4203 (*rfapiBgpInfoFilteredImportFunction(
4205 it
, /* which import table */
4206 FIF_ACTION_UPDATE
, bpi
->peer
,
4208 bgp_dest_get_prefix(dest2
),
4210 (struct prefix_rd
*)
4211 bgp_dest_get_prefix(
4213 bpi
->attr
, bpi
->type
,
4214 bpi
->sub_type
, &label
);
4222 /* per-bgp-instance rfapi data */
4223 struct rfapi
*bgp_rfapi_new(struct bgp
*bgp
)
4227 struct rfapi_rfp_cfg
*cfg
= NULL
;
4228 struct rfapi_rfp_cb_methods
*cbm
= NULL
;
4230 assert(bgp
->rfapi_cfg
== NULL
);
4232 h
= XCALLOC(MTYPE_RFAPI
, sizeof(struct rfapi
));
4234 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4235 h
->un
[afi
] = agg_table_init();
4239 * initialize the ce import table
4241 h
->it_ce
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4242 sizeof(struct rfapi_import_table
));
4243 h
->it_ce
->imported_vpn
[AFI_IP
] = agg_table_init();
4244 h
->it_ce
->imported_vpn
[AFI_IP6
] = agg_table_init();
4245 h
->it_ce
->imported_encap
[AFI_IP
] = agg_table_init();
4246 h
->it_ce
->imported_encap
[AFI_IP6
] = agg_table_init();
4247 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP
, SAFI_MPLS_VPN
);
4248 rfapiBgpTableFilteredImport(bgp
, h
->it_ce
, AFI_IP6
, SAFI_MPLS_VPN
);
4251 * Set up work queue for deferred rfapi_close operations
4253 h
->deferred_close_q
=
4254 work_queue_new(bm
->master
, "rfapi deferred close");
4255 h
->deferred_close_q
->spec
.workfunc
= rfapi_deferred_close_workfunc
;
4256 h
->deferred_close_q
->spec
.data
= h
;
4258 h
->rfp
= rfp_start(bm
->master
, &cfg
, &cbm
);
4259 bgp
->rfapi_cfg
= bgp_rfapi_cfg_new(cfg
);
4261 h
->rfp_methods
= *cbm
;
4266 void bgp_rfapi_destroy(struct bgp
*bgp
, struct rfapi
*h
)
4270 if (bgp
== NULL
|| h
== NULL
)
4273 if (h
->resolve_nve_nexthop
) {
4274 skiplist_free(h
->resolve_nve_nexthop
);
4275 h
->resolve_nve_nexthop
= NULL
;
4278 agg_table_finish(h
->it_ce
->imported_vpn
[AFI_IP
]);
4279 agg_table_finish(h
->it_ce
->imported_vpn
[AFI_IP6
]);
4280 agg_table_finish(h
->it_ce
->imported_encap
[AFI_IP
]);
4281 agg_table_finish(h
->it_ce
->imported_encap
[AFI_IP6
]);
4283 if (h
->import_mac
) {
4284 struct rfapi_import_table
*it
;
4289 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4291 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4294 rfapiImportTableFlush(it
);
4295 XFREE(MTYPE_RFAPI_IMPORTTABLE
, it
);
4297 skiplist_free(h
->import_mac
);
4298 h
->import_mac
= NULL
;
4301 work_queue_free_and_null(&h
->deferred_close_q
);
4306 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
4307 agg_table_finish(h
->un
[afi
]);
4310 XFREE(MTYPE_RFAPI_IMPORTTABLE
, h
->it_ce
);
4311 XFREE(MTYPE_RFAPI
, h
);
4314 struct rfapi_import_table
*
4315 rfapiImportTableRefAdd(struct bgp
*bgp
, struct ecommunity
*rt_import_list
,
4316 struct rfapi_nve_group_cfg
*rfg
)
4319 struct rfapi_import_table
*it
;
4325 for (it
= h
->imports
; it
; it
= it
->next
) {
4326 if (ecommunity_cmp(it
->rt_import_list
, rt_import_list
))
4330 vnc_zlog_debug_verbose("%s: matched it=%p", __func__
, it
);
4333 it
= XCALLOC(MTYPE_RFAPI_IMPORTTABLE
,
4334 sizeof(struct rfapi_import_table
));
4336 it
->next
= h
->imports
;
4339 it
->rt_import_list
= ecommunity_dup(rt_import_list
);
4341 it
->monitor_exterior_orphans
=
4342 skiplist_new(0, NULL
, prefix_free_lists
);
4345 * fill import route tables from RIBs
4347 * Potential area for optimization. If this occurs when
4348 * tables are large (e.g., the operator adds a nve group
4349 * with a new RT list to a running system), it could take
4353 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4355 it
->imported_vpn
[afi
] = agg_table_init();
4356 it
->imported_encap
[afi
] = agg_table_init();
4358 rfapiBgpTableFilteredImport(bgp
, it
, afi
,
4360 rfapiBgpTableFilteredImport(bgp
, it
, afi
, SAFI_ENCAP
);
4362 vnc_import_bgp_exterior_redist_enable_it(bgp
, afi
, it
);
4372 * skiplist element free function
4374 static void delete_rem_pfx_na_free(void *na
)
4376 uint32_t *pCounter
= ((struct rfapi_nve_addr
*)na
)->info
;
4379 XFREE(MTYPE_RFAPI_NVE_ADDR
, na
);
4383 * Common deleter for IP and MAC import tables
4385 static void rfapiDeleteRemotePrefixesIt(
4386 struct bgp
*bgp
, struct rfapi_import_table
*it
, struct prefix
*un
,
4387 struct prefix
*vn
, struct prefix
*p
, int delete_active
,
4388 int delete_holddown
, uint32_t *pARcount
, uint32_t *pAHcount
,
4389 uint32_t *pHRcount
, uint32_t *pHHcount
,
4390 struct skiplist
*uniq_active_nves
, struct skiplist
*uniq_holddown_nves
)
4394 #ifdef DEBUG_L2_EXTRA
4396 char buf_pfx
[PREFIX_STRLEN
];
4399 prefix2str(p
, buf_pfx
, sizeof(buf_pfx
));
4405 vnc_zlog_debug_verbose(
4406 "%s: entry, p=%s, delete_active=%d, delete_holddown=%d",
4407 __func__
, buf_pfx
, delete_active
, delete_holddown
);
4411 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4413 struct agg_table
*rt
;
4414 struct agg_node
*rn
;
4416 if (p
&& (family2afi(p
->family
) != afi
)) {
4420 rt
= it
->imported_vpn
[afi
];
4424 vnc_zlog_debug_verbose("%s: scanning rt for afi=%d", __func__
,
4427 for (rn
= agg_route_top(rt
); rn
; rn
= agg_route_next(rn
)) {
4428 struct bgp_path_info
*bpi
;
4429 struct bgp_path_info
*next
;
4430 const struct prefix
*rn_p
= agg_node_get_prefix(rn
);
4432 if (p
&& VNC_DEBUG(IMPORT_DEL_REMOTE
))
4433 vnc_zlog_debug_any("%s: want %pFX, have %pRN",
4436 if (p
&& prefix_cmp(p
, rn_p
))
4439 vnc_zlog_debug_verbose("%s: rn pfx=%pRN", __func__
, rn
);
4441 /* TBD is this valid for afi == AFI_L2VPN? */
4442 RFAPI_CHECK_REFCOUNT(rn
, SAFI_MPLS_VPN
, 1);
4444 for (bpi
= rn
->info
; bpi
; bpi
= next
) {
4453 vnc_zlog_debug_verbose("%s: examining bpi %p",
4456 if (!rfapiGetNexthop(bpi
->attr
, &qpt
))
4461 || !prefix_match(vn
, &qpt
)) {
4462 #ifdef DEBUG_L2_EXTRA
4463 vnc_zlog_debug_verbose(
4464 "%s: continue at vn && !qpt_valid || !prefix_match(vn, &qpt)",
4471 if (!rfapiGetUnAddrOfVpnBi(bpi
, &qct
))
4476 || !prefix_match(un
, &qct
)) {
4477 #ifdef DEBUG_L2_EXTRA
4478 vnc_zlog_debug_verbose(
4479 "%s: continue at un && !qct_valid || !prefix_match(un, &qct)",
4491 * If this route is waiting to be deleted
4493 * a previous withdraw, we must cancel its
4496 if (CHECK_FLAG(bpi
->flags
, BGP_PATH_REMOVED
)) {
4497 if (!delete_holddown
)
4499 if (bpi
->extra
->vnc
.import
.timer
) {
4504 struct rfapi_withdraw
*wcb
=
4508 ->holddown_count
[afi
] -=
4510 RFAPI_UPDATE_ITABLE_COUNT(
4511 bpi
, wcb
->import_table
,
4513 XFREE(MTYPE_RFAPI_WITHDRAW
,
4523 vnc_zlog_debug_verbose(
4524 "%s: deleting bpi %p (qct_valid=%d, qpt_valid=%d, delete_holddown=%d, delete_active=%d)",
4525 __func__
, bpi
, qct_valid
, qpt_valid
,
4526 delete_holddown
, delete_active
);
4532 if (qct_valid
&& qpt_valid
) {
4534 struct rfapi_nve_addr na
;
4535 struct rfapi_nve_addr
*nap
;
4537 memset(&na
, 0, sizeof(na
));
4538 assert(!rfapiQprefix2Raddr(&qct
,
4540 assert(!rfapiQprefix2Raddr(&qpt
,
4543 if (skiplist_search(
4546 : uniq_holddown_nves
),
4547 &na
, (void **)&nap
)) {
4551 MTYPE_RFAPI_NVE_ADDR
,
4556 nap
->info
= is_active
4562 : uniq_holddown_nves
),
4565 rfapiNveAddr2Str(nap
, line
,
4570 vnc_direct_bgp_rh_del_route(bgp
, afi
, rn_p
,
4573 RFAPI_UPDATE_ITABLE_COUNT(bpi
, it
, afi
, -1);
4574 it
->holddown_count
[afi
] += 1;
4575 rfapiExpireVpnNow(it
, rn
, bpi
, 1);
4577 vnc_zlog_debug_verbose(
4578 "%s: incrementing count (is_active=%d)",
4579 __func__
, is_active
);
4592 * For use by the "clear vnc prefixes" command
4594 /*------------------------------------------
4595 * rfapiDeleteRemotePrefixes
4597 * UI helper: For use by the "clear vnc prefixes" command
4600 * un if set, tunnel must match this prefix
4601 * vn if set, nexthop prefix must match this prefix
4602 * p if set, prefix must match this prefix
4603 * it if set, only look in this import table
4606 * pARcount number of active routes deleted
4607 * pAHcount number of active nves deleted
4608 * pHRcount number of holddown routes deleted
4609 * pHHcount number of holddown nves deleted
4613 --------------------------------------------*/
4614 void rfapiDeleteRemotePrefixes(struct prefix
*un
, struct prefix
*vn
,
4616 struct rfapi_import_table
*arg_it
,
4617 int delete_active
, int delete_holddown
,
4618 uint32_t *pARcount
, uint32_t *pAHcount
,
4619 uint32_t *pHRcount
, uint32_t *pHHcount
)
4623 struct rfapi_import_table
*it
;
4624 uint32_t deleted_holddown_route_count
= 0;
4625 uint32_t deleted_active_route_count
= 0;
4626 uint32_t deleted_holddown_nve_count
= 0;
4627 uint32_t deleted_active_nve_count
= 0;
4628 struct skiplist
*uniq_holddown_nves
;
4629 struct skiplist
*uniq_active_nves
;
4633 bgp
= bgp_get_default(); /* assume 1 instance for now */
4634 /* If no bgp instantiated yet, no vnc prefixes exist */
4641 uniq_holddown_nves
=
4642 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4644 skiplist_new(0, rfapi_nve_addr_cmp
, delete_rem_pfx_na_free
);
4647 * Iterate over all import tables; do a filtered import
4648 * for the afi/safi combination
4657 vnc_zlog_debug_verbose(
4658 "%s: calling rfapiDeleteRemotePrefixesIt() on (IP) import %p",
4661 rfapiDeleteRemotePrefixesIt(
4662 bgp
, it
, un
, vn
, p
, delete_active
, delete_holddown
,
4663 &deleted_active_route_count
, &deleted_active_nve_count
,
4664 &deleted_holddown_route_count
,
4665 &deleted_holddown_nve_count
, uniq_active_nves
,
4666 uniq_holddown_nves
);
4675 * Now iterate over L2 import tables
4677 if (h
->import_mac
&& !(p
&& (p
->family
!= AF_ETHERNET
))) {
4679 void *cursor
= NULL
;
4683 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4685 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4688 vnc_zlog_debug_verbose(
4689 "%s: calling rfapiDeleteRemotePrefixesIt() on import_mac %p",
4692 rfapiDeleteRemotePrefixesIt(
4693 bgp
, it
, un
, vn
, p
, delete_active
,
4694 delete_holddown
, &deleted_active_route_count
,
4695 &deleted_active_nve_count
,
4696 &deleted_holddown_route_count
,
4697 &deleted_holddown_nve_count
, uniq_active_nves
,
4698 uniq_holddown_nves
);
4703 * our custom element freeing function above counts as it deletes
4705 skiplist_free(uniq_holddown_nves
);
4706 skiplist_free(uniq_active_nves
);
4709 *pARcount
= deleted_active_route_count
;
4711 *pAHcount
= deleted_active_nve_count
;
4713 *pHRcount
= deleted_holddown_route_count
;
4715 *pHHcount
= deleted_holddown_nve_count
;
4720 /*------------------------------------------
4721 * rfapiCountRemoteRoutes
4723 * UI helper: count VRF routes from BGP side
4728 * pALRcount count of active local routes
4729 * pARRcount count of active remote routes
4730 * pHRcount count of holddown routes
4731 * pIRcount count of direct imported routes
4735 --------------------------------------------*/
4736 void rfapiCountAllItRoutes(int *pALRcount
, /* active local routes */
4737 int *pARRcount
, /* active remote routes */
4738 int *pHRcount
, /* holddown routes */
4739 int *pIRcount
) /* imported routes */
4743 struct rfapi_import_table
*it
;
4746 int total_active_local
= 0;
4747 int total_active_remote
= 0;
4748 int total_holddown
= 0;
4749 int total_imported
= 0;
4751 bgp
= bgp_get_default(); /* assume 1 instance for now */
4758 * Iterate over all import tables; do a filtered import
4759 * for the afi/safi combination
4762 for (it
= h
->imports
; it
; it
= it
->next
) {
4764 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
4766 total_active_local
+= it
->local_count
[afi
];
4767 total_active_remote
+= it
->remote_count
[afi
];
4768 total_holddown
+= it
->holddown_count
[afi
];
4769 total_imported
+= it
->imported_count
[afi
];
4776 if (h
->import_mac
) {
4778 rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4780 !rc
; rc
= skiplist_next(h
->import_mac
, NULL
, (void **)&it
,
4783 total_active_local
+= it
->local_count
[AFI_L2VPN
];
4784 total_active_remote
+= it
->remote_count
[AFI_L2VPN
];
4785 total_holddown
+= it
->holddown_count
[AFI_L2VPN
];
4786 total_imported
+= it
->imported_count
[AFI_L2VPN
];
4792 *pALRcount
= total_active_local
;
4795 *pARRcount
= total_active_remote
;
4798 *pHRcount
= total_holddown
;
4801 *pIRcount
= total_imported
;
4805 /*------------------------------------------
4806 * rfapiGetHolddownFromLifetime
4808 * calculate holddown value based on lifetime
4814 * Holddown value based on lifetime, holddown_factor,
4815 * and RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
4817 --------------------------------------------*/
4818 /* hold down time maxes out at RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY */
4819 uint32_t rfapiGetHolddownFromLifetime(uint32_t lifetime
)
4824 bgp
= bgp_get_default();
4825 if (bgp
&& bgp
->rfapi_cfg
)
4826 factor
= bgp
->rfapi_cfg
->rfp_cfg
.holddown_factor
;
4828 factor
= RFAPI_RFP_CFG_DEFAULT_HOLDDOWN_FACTOR
;
4830 if (factor
< 100 || lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4831 lifetime
= lifetime
* factor
/ 100;
4832 if (lifetime
< RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
4835 return RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
;