3 * Copyright 2009-2016, LabN Consulting, L.L.C.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 * Purpose: maintain per-nve ribs and generate change lists
26 #include "lib/zebra.h"
27 #include "lib/prefix.h"
28 #include "lib/agg_table.h"
30 #include "lib/memory.h"
32 #include "lib/skiplist.h"
33 #include "lib/workqueue.h"
35 #include "bgpd/bgpd.h"
36 #include "bgpd/bgp_route.h"
37 #include "bgpd/bgp_ecommunity.h"
38 #include "bgpd/bgp_mplsvpn.h"
39 #include "bgpd/bgp_vnc_types.h"
41 #include "bgpd/rfapi/rfapi.h"
42 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
43 #include "bgpd/rfapi/rfapi_import.h"
44 #include "bgpd/rfapi/rfapi_private.h"
45 #include "bgpd/rfapi/rfapi_vty.h"
46 #include "bgpd/rfapi/vnc_import_bgp.h"
47 #include "bgpd/rfapi/rfapi_rib.h"
48 #include "bgpd/rfapi/rfapi_monitor.h"
49 #include "bgpd/rfapi/rfapi_encap_tlv.h"
50 #include "bgpd/rfapi/vnc_debug.h"
52 #define DEBUG_PROCESS_PENDING_NODE 0
53 #define DEBUG_PENDING_DELETE_ROUTE 0
55 #define DEBUG_RIB_SL_RD 0
59 static void rfapiRibShowRibSl(void *stream
, struct prefix
*pfx
,
66 * Model of the set of routes currently in the NVE's RIB.
68 * node->info ptr to "struct skiplist".
69 * MUST be NULL if there are no routes.
70 * key = ptr to struct prefix {vn}
71 * val = ptr to struct rfapi_info
73 * skiplist.cmp = vnc_prefix_cmp
75 * node->aggregate ptr to "struct skiplist".
76 * key = ptr to struct prefix {vn}
77 * val = ptr to struct rfapi_info
78 * skiplist.del = rfapi_info_free
79 * skiplist.cmp = vnc_prefix_cmp
81 * This skiplist at "aggregate"
82 * contains the routes recently
88 * Sparse list of prefixes that need to be updated. Each node
89 * will have the complete set of routes for the prefix.
91 * node->info ptr to "struct list" (lib/linklist.h)
93 * List of routes sorted lowest cost first.
94 * This list is how the new complete set
95 * of routes should look.
96 * Set if there are updates to the prefix;
97 * MUST be NULL if there are no updates.
99 * .data = ptr to struct rfapi_info
100 * list.cmp = NULL (sorted manually)
101 * list.del = rfapi_info_free
103 * Special case: if node->info is 1, it means
104 * "delete all routes at this prefix".
106 * node->aggregate ptr to struct skiplist
107 * key = ptr to struct prefix {vn} (part of ri)
108 * val = struct rfapi_info
109 * skiplist.cmp = vnc_prefix_cmp
110 * skiplist.del = NULL
112 * ptlist is rewritten anew each time
113 * rfapiRibUpdatePendingNode() is called
115 * THE ptlist VALUES ARE REFERENCES TO THE
116 * rfapi_info STRUCTS IN THE node->info LIST.
120 * iterate over RIB to count responses, compare with running counters
122 void rfapiRibCheckCounts(
123 int checkstats
, /* validate rfd & global counts */
124 unsigned int offset
) /* number of ri's held separately */
126 struct rfapi_descriptor
*rfd
;
127 struct listnode
*node
;
129 struct bgp
*bgp
= bgp_get_default();
131 uint32_t t_pfx_active
= 0;
132 uint32_t t_pfx_deleted
= 0;
134 uint32_t t_ri_active
= 0;
135 uint32_t t_ri_deleted
= 0;
136 uint32_t t_ri_pend
= 0;
138 unsigned int alloc_count
;
143 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
146 uint32_t pfx_active
= 0;
147 uint32_t pfx_deleted
= 0;
149 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
153 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
154 rn
= agg_route_next(rn
)) {
156 struct skiplist
*sl
= rn
->info
;
157 struct skiplist
*dsl
= rn
->aggregate
;
158 uint32_t ri_active
= 0;
159 uint32_t ri_deleted
= 0;
162 ri_active
= skiplist_count(sl
);
164 t_ri_active
+= ri_active
;
170 ri_deleted
= skiplist_count(dsl
);
171 t_ri_deleted
+= ri_deleted
;
176 for (rn
= agg_route_top(rfd
->rib_pending
[afi
]); rn
;
177 rn
= agg_route_next(rn
)) {
179 struct list
*l
= rn
->info
; /* sorted by cost */
180 struct skiplist
*sl
= rn
->aggregate
;
181 uint32_t ri_pend_cost
= 0;
182 uint32_t ri_pend_uniq
= 0;
185 ri_pend_uniq
= skiplist_count(sl
);
188 if (l
&& (l
!= (void *)1)) {
189 ri_pend_cost
= l
->count
;
190 t_ri_pend
+= l
->count
;
193 assert(ri_pend_uniq
== ri_pend_cost
);
198 if (pfx_active
!= rfd
->rib_prefix_count
) {
199 vnc_zlog_debug_verbose(
200 "%s: rfd %p actual pfx count %u != running %u",
201 __func__
, rfd
, pfx_active
,
202 rfd
->rib_prefix_count
);
208 if (checkstats
&& bgp
->rfapi
) {
209 if (t_pfx_active
!= bgp
->rfapi
->rib_prefix_count_total
) {
210 vnc_zlog_debug_verbose(
211 "%s: actual total pfx count %u != running %u",
212 __func__
, t_pfx_active
,
213 bgp
->rfapi
->rib_prefix_count_total
);
219 * Check against memory allocation count
221 alloc_count
= mtype_stats_alloc(MTYPE_RFAPI_INFO
);
222 assert(t_ri_active
+ t_ri_deleted
+ t_ri_pend
+ offset
== alloc_count
);
225 static struct rfapi_info
*rfapi_info_new(void)
227 return XCALLOC(MTYPE_RFAPI_INFO
, sizeof(struct rfapi_info
));
230 void rfapiFreeRfapiUnOptionChain(struct rfapi_un_option
*p
)
233 struct rfapi_un_option
*next
;
236 XFREE(MTYPE_RFAPI_UN_OPTION
, p
);
241 void rfapiFreeRfapiVnOptionChain(struct rfapi_vn_option
*p
)
244 struct rfapi_vn_option
*next
;
247 XFREE(MTYPE_RFAPI_VN_OPTION
, p
);
253 static void rfapi_info_free(struct rfapi_info
*goner
)
256 if (goner
->tea_options
) {
257 rfapiFreeBgpTeaOptionChain(goner
->tea_options
);
258 goner
->tea_options
= NULL
;
260 if (goner
->un_options
) {
261 rfapiFreeRfapiUnOptionChain(goner
->un_options
);
262 goner
->un_options
= NULL
;
264 if (goner
->vn_options
) {
265 rfapiFreeRfapiVnOptionChain(goner
->vn_options
);
266 goner
->vn_options
= NULL
;
269 struct rfapi_rib_tcb
*tcb
;
271 tcb
= ((struct thread
*)goner
->timer
)->arg
;
272 thread_cancel((struct thread
*)goner
->timer
);
273 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
276 XFREE(MTYPE_RFAPI_INFO
, goner
);
281 * Timer control block for recently-deleted and expired routes
283 struct rfapi_rib_tcb
{
284 struct rfapi_descriptor
*rfd
;
286 struct rfapi_info
*ri
;
289 #define RFAPI_RIB_TCB_FLAG_DELETED 0x00000001
293 * remove route from rib
295 static int rfapiRibExpireTimer(struct thread
*t
)
297 struct rfapi_rib_tcb
*tcb
= t
->arg
;
299 RFAPI_RIB_CHECK_COUNTS(1, 0);
302 * Forget reference to thread. Otherwise rfapi_info_free() will
303 * attempt to free thread pointer as an option chain
305 tcb
->ri
->timer
= NULL
;
307 /* "deleted" skiplist frees ri, "active" doesn't */
308 assert(!skiplist_delete(tcb
->sl
, &tcb
->ri
->rk
, NULL
));
311 * XXX in this case, skiplist has no delete function: we must
312 * therefore delete rfapi_info explicitly.
314 rfapi_info_free(tcb
->ri
);
317 if (skiplist_empty(tcb
->sl
)) {
318 if (CHECK_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
))
319 tcb
->rn
->aggregate
= NULL
;
321 struct bgp
*bgp
= bgp_get_default();
322 tcb
->rn
->info
= NULL
;
323 RFAPI_RIB_PREFIX_COUNT_DECR(tcb
->rfd
, bgp
->rfapi
);
325 skiplist_free(tcb
->sl
);
326 agg_unlock_node(tcb
->rn
);
329 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
331 RFAPI_RIB_CHECK_COUNTS(1, 0);
336 static void rfapiRibStartTimer(struct rfapi_descriptor
*rfd
,
337 struct rfapi_info
*ri
,
338 struct agg_node
*rn
, /* route node attached to */
341 struct thread
*t
= ri
->timer
;
342 struct rfapi_rib_tcb
*tcb
= NULL
;
349 tcb
= XCALLOC(MTYPE_RFAPI_RECENT_DELETE
,
350 sizeof(struct rfapi_rib_tcb
));
356 tcb
->sl
= (struct skiplist
*)rn
->aggregate
;
357 SET_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
);
359 tcb
->sl
= (struct skiplist
*)rn
->info
;
360 UNSET_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
);
363 vnc_zlog_debug_verbose("%s: rfd %p pfx %pRN life %u", __func__
, rfd
, rn
,
366 thread_add_timer(bm
->master
, rfapiRibExpireTimer
, tcb
, ri
->lifetime
,
371 extern void rfapi_rib_key_init(struct prefix
*prefix
, /* may be NULL */
372 struct prefix_rd
*rd
, /* may be NULL */
373 struct prefix
*aux
, /* may be NULL */
374 struct rfapi_rib_key
*rk
)
377 memset((void *)rk
, 0, sizeof(struct rfapi_rib_key
));
383 rk
->aux_prefix
= *aux
;
387 * Compares two <struct rfapi_rib_key>s
389 int rfapi_rib_key_cmp(const void *k1
, const void *k2
)
391 const struct rfapi_rib_key
*a
= (struct rfapi_rib_key
*)k1
;
392 const struct rfapi_rib_key
*b
= (struct rfapi_rib_key
*)k2
;
398 ret
= vnc_prefix_cmp(&a
->vn
, &b
->vn
);
402 ret
= vnc_prefix_cmp(&a
->rd
, &b
->rd
);
406 ret
= vnc_prefix_cmp(&a
->aux_prefix
, &b
->aux_prefix
);
413 * Note: this function will claim that two option chains are
414 * different unless their option items are in identical order.
415 * The consequence is that RFP updated responses can be sent
416 * unnecessarily, or that they might contain nexthop items
417 * that are not strictly needed.
419 * This function could be modified to compare option chains more
420 * thoroughly, but it's not clear that the extra compuation would
423 static int bgp_tea_options_cmp(struct bgp_tea_options
*a
,
424 struct bgp_tea_options
*b
)
432 if (a
->type
!= b
->type
)
433 return (a
->type
- b
->type
);
434 if (a
->length
!= b
->length
)
435 return (a
->length
= b
->length
);
436 if ((rc
= memcmp(a
->value
, b
->value
, a
->length
)))
438 if (!a
->next
!= !b
->next
) { /* logical xor */
439 return (a
->next
- b
->next
);
442 return bgp_tea_options_cmp(a
->next
, b
->next
);
446 static int rfapi_info_cmp(struct rfapi_info
*a
, struct rfapi_info
*b
)
453 if ((rc
= rfapi_rib_key_cmp(&a
->rk
, &b
->rk
)))
456 if ((rc
= vnc_prefix_cmp(&a
->un
, &b
->un
)))
459 if (a
->cost
!= b
->cost
)
460 return (a
->cost
- b
->cost
);
462 if (a
->lifetime
!= b
->lifetime
)
463 return (a
->lifetime
- b
->lifetime
);
465 if ((rc
= bgp_tea_options_cmp(a
->tea_options
, b
->tea_options
)))
471 void rfapiRibClear(struct rfapi_descriptor
*rfd
)
479 bgp
= bgp_get_default();
480 #ifdef DEBUG_L2_EXTRA
481 vnc_zlog_debug_verbose("%s: rfd=%p", __func__
, rfd
);
484 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
488 if (rfd
->rib_pending
[afi
]) {
489 for (pn
= agg_route_top(rfd
->rib_pending
[afi
]); pn
;
490 pn
= agg_route_next(pn
)) {
493 * free references into the rfapi_info
495 * freeing the structures themselves
500 pn
->aggregate
= NULL
;
502 pn
); /* skiplist deleted */
505 * free the rfapi_info structures
508 if (pn
->info
!= (void *)1) {
514 /* linklist or 1 deleted */
520 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
521 rn
= agg_route_next(rn
)) {
524 struct rfapi_info
*ri
;
526 while (0 == skiplist_first(
533 skiplist_delete_first(
538 (struct skiplist
*)rn
->info
);
541 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
,
546 struct rfapi_info
*ri_del
;
548 /* delete skiplist & contents */
549 while (!skiplist_first(
552 NULL
, (void **)&ri_del
)) {
554 /* sl->del takes care of ri_del
556 skiplist_delete_first((
564 rn
->aggregate
= NULL
;
570 if (rfd
->updated_responses_queue
)
571 work_queue_free_and_null(&rfd
->updated_responses_queue
);
575 * Release all dynamically-allocated memory that is part of an HD's RIB
577 void rfapiRibFree(struct rfapi_descriptor
*rfd
)
583 * NB rfd is typically detached from master list, so is not included
584 * in the count performed by RFAPI_RIB_CHECK_COUNTS
588 * Free routes attached to radix trees
592 /* Now the uncounted rfapi_info's are freed, so the check should succeed
594 RFAPI_RIB_CHECK_COUNTS(1, 0);
599 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
600 if (rfd
->rib_pending
[afi
])
601 agg_table_finish(rfd
->rib_pending
[afi
]);
602 rfd
->rib_pending
[afi
] = NULL
;
605 agg_table_finish(rfd
->rib
[afi
]);
606 rfd
->rib
[afi
] = NULL
;
608 /* NB agg_table_finish frees only prefix nodes, not chained
610 if (rfd
->rsp_times
[afi
])
611 agg_table_finish(rfd
->rsp_times
[afi
]);
612 rfd
->rib
[afi
] = NULL
;
617 * Copies struct bgp_path_info to struct rfapi_info, except for rk fields and un
619 static void rfapiRibBi2Ri(struct bgp_path_info
*bpi
, struct rfapi_info
*ri
,
622 struct bgp_attr_encap_subtlv
*pEncap
;
624 ri
->cost
= rfapiRfpCost(bpi
->attr
);
625 ri
->lifetime
= lifetime
;
627 /* This loop based on rfapiRouteInfo2NextHopEntry() */
628 for (pEncap
= bpi
->attr
->vnc_subtlvs
; pEncap
; pEncap
= pEncap
->next
) {
629 struct bgp_tea_options
*hop
;
631 switch (pEncap
->type
) {
632 case BGP_VNC_SUBTLV_TYPE_LIFETIME
:
633 /* use configured lifetime, not attr lifetime */
636 case BGP_VNC_SUBTLV_TYPE_RFPOPTION
:
637 hop
= XCALLOC(MTYPE_BGP_TEA_OPTIONS
,
638 sizeof(struct bgp_tea_options
));
640 hop
->type
= pEncap
->value
[0];
641 hop
->length
= pEncap
->value
[1];
642 hop
->value
= XCALLOC(MTYPE_BGP_TEA_OPTIONS_VALUE
,
645 memcpy(hop
->value
, pEncap
->value
+ 2,
647 if (hop
->length
> pEncap
->length
- 2) {
649 "%s: VNC subtlv length mismatch: "
650 "RFP option says %d, attr says %d "
652 __func__
, hop
->length
,
654 hop
->length
= pEncap
->length
- 2;
656 hop
->next
= ri
->tea_options
;
657 ri
->tea_options
= hop
;
665 rfapi_un_options_free(ri
->un_options
); /* maybe free old version */
666 ri
->un_options
= rfapi_encap_tlv_to_un_option(bpi
->attr
);
672 && decode_rd_type(bpi
->extra
->vnc
.import
.rd
.val
)
673 == RD_TYPE_VNC_ETH
) {
676 struct rfapi_vn_option
*vo
;
678 vo
= XCALLOC(MTYPE_RFAPI_VN_OPTION
,
679 sizeof(struct rfapi_vn_option
));
682 vo
->type
= RFAPI_VN_OPTION_TYPE_L2ADDR
;
684 /* copy from RD already stored in bpi, so we don't need it_node
686 memcpy(&vo
->v
.l2addr
.macaddr
, bpi
->extra
->vnc
.import
.rd
.val
+ 2,
689 (void)rfapiEcommunityGetLNI(bpi
->attr
->ecommunity
,
690 &vo
->v
.l2addr
.logical_net_id
);
691 (void)rfapiEcommunityGetEthernetTag(bpi
->attr
->ecommunity
,
692 &vo
->v
.l2addr
.tag_id
);
694 /* local_nve_id comes from RD */
695 vo
->v
.l2addr
.local_nve_id
= bpi
->extra
->vnc
.import
.rd
.val
[1];
697 /* label comes from MP_REACH_NLRI label */
698 vo
->v
.l2addr
.label
= decode_label(&bpi
->extra
->label
[0]);
700 rfapi_vn_options_free(
701 ri
->vn_options
); /* maybe free old version */
706 * If there is an auxiliary IP address (L2 can have it), copy it
708 if (bpi
->extra
&& bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
709 ri
->rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
716 * Install route into NVE RIB model so as to be consistent with
717 * caller's response to rfapi_query().
719 * Also: return indication to caller whether this specific route
720 * should be included in the response to the NVE according to
721 * the following tests:
723 * 1. If there were prior duplicates of this route in this same
724 * query response, don't include the route.
728 * 0 OK to include route in response
729 * !0 do not include route in response
731 int rfapiRibPreloadBi(
732 struct agg_node
*rfd_rib_node
, /* NULL = don't preload or filter */
733 struct prefix
*pfx_vn
, struct prefix
*pfx_un
, uint32_t lifetime
,
734 struct bgp_path_info
*bpi
)
736 struct rfapi_descriptor
*rfd
;
737 struct skiplist
*slRibPt
= NULL
;
738 struct rfapi_info
*ori
= NULL
;
739 struct rfapi_rib_key rk
;
740 struct agg_node
*trn
;
742 const struct prefix
*p
= agg_node_get_prefix(rfd_rib_node
);
747 afi
= family2afi(p
->family
);
749 rfd
= agg_get_table_info(agg_get_table(rfd_rib_node
));
751 memset((void *)&rk
, 0, sizeof(rk
));
753 rk
.rd
= bpi
->extra
->vnc
.import
.rd
;
756 * If there is an auxiliary IP address (L2 can have it), copy it
758 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
759 rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
763 * is this route already in NVE's RIB?
765 slRibPt
= (struct skiplist
*)rfd_rib_node
->info
;
767 if (slRibPt
&& !skiplist_search(slRibPt
, &rk
, (void **)&ori
)) {
769 if ((ori
->rsp_counter
== rfd
->rsp_counter
)
770 && (ori
->last_sent_time
== rfd
->rsp_time
)) {
771 return -1; /* duplicate in this response */
774 /* found: update contents of existing route in RIB */
776 rfapiRibBi2Ri(bpi
, ori
, lifetime
);
778 /* not found: add new route to RIB */
779 ori
= rfapi_info_new();
782 rfapiRibBi2Ri(bpi
, ori
, lifetime
);
785 slRibPt
= skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
786 rfd_rib_node
->info
= slRibPt
;
787 agg_lock_node(rfd_rib_node
);
788 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, rfd
->bgp
->rfapi
);
790 skiplist_insert(slRibPt
, &ori
->rk
, ori
);
793 ori
->last_sent_time
= rfapi_time(NULL
);
798 RFAPI_RIB_CHECK_COUNTS(0, 0);
799 rfapiRibStartTimer(rfd
, ori
, rfd_rib_node
, 0);
800 RFAPI_RIB_CHECK_COUNTS(0, 0);
803 * Update last sent time for prefix
805 trn
= agg_node_get(rfd
->rsp_times
[afi
], p
); /* locks trn */
806 trn
->info
= (void *)(uintptr_t)bgp_clock();
808 agg_unlock_node(trn
);
814 * Frees rfapi_info items at node
816 * Adjust 'rib' and 'rib_pending' as follows:
818 * If rib_pending node->info is 1 (magic value):
819 * callback: NHL = RIB NHL with lifetime = withdraw_lifetime_value
820 * RIB = remove all routes at the node
823 * For each item at rib node:
824 * if not present in pending node, move RIB item to "delete list"
826 * For each item at pending rib node:
827 * if present (same vn/un) in rib node with same lifetime & options, drop
828 * matching item from pending node
830 * For each remaining item at pending rib node, add or replace item
833 * Construct NHL as concatenation of pending list + delete list
837 static void process_pending_node(struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
839 struct agg_node
*pn
, /* pending node */
840 struct rfapi_next_hop_entry
**head
,
841 struct rfapi_next_hop_entry
**tail
)
843 struct listnode
*node
= NULL
;
844 struct listnode
*nnode
= NULL
;
845 struct rfapi_info
*ri
= NULL
; /* happy valgrind */
846 struct rfapi_ip_prefix hp
= {0}; /* pfx to put in NHE */
847 struct agg_node
*rn
= NULL
;
848 struct skiplist
*slRibPt
= NULL
; /* rib list */
849 struct skiplist
*slPendPt
= NULL
;
850 struct list
*lPendCost
= NULL
;
851 struct list
*delete_list
= NULL
;
852 int printedprefix
= 0;
853 int rib_node_started_nonempty
= 0;
854 int sendingsomeroutes
= 0;
855 const struct prefix
*p
;
856 #if DEBUG_PROCESS_PENDING_NODE
857 unsigned int count_rib_initial
= 0;
858 unsigned int count_pend_vn_initial
= 0;
859 unsigned int count_pend_cost_initial
= 0;
863 p
= agg_node_get_prefix(pn
);
864 vnc_zlog_debug_verbose("%s: afi=%d, %pRN pn->info=%p", __func__
, afi
,
867 if (AFI_L2VPN
!= afi
) {
868 rfapiQprefix2Rprefix(p
, &hp
);
871 RFAPI_RIB_CHECK_COUNTS(1, 0);
874 * Find corresponding RIB node
876 rn
= agg_node_get(rfd
->rib
[afi
], p
); /* locks rn */
879 * RIB skiplist has key=rfapi_addr={vn,un}, val = rfapi_info,
880 * skiplist.del = NULL
882 slRibPt
= (struct skiplist
*)rn
->info
;
884 rib_node_started_nonempty
= 1;
886 slPendPt
= (struct skiplist
*)(pn
->aggregate
);
887 lPendCost
= (struct list
*)(pn
->info
);
889 #if DEBUG_PROCESS_PENDING_NODE
892 count_rib_initial
= skiplist_count(slRibPt
);
895 count_pend_vn_initial
= skiplist_count(slPendPt
);
897 if (lPendCost
&& lPendCost
!= (struct list
*)1)
898 count_pend_cost_initial
= lPendCost
->count
;
903 * Handle special case: delete all routes at prefix
905 if (lPendCost
== (struct list
*)1) {
906 vnc_zlog_debug_verbose("%s: lPendCost=1 => delete all",
908 if (slRibPt
&& !skiplist_empty(slRibPt
)) {
909 delete_list
= list_new();
911 == skiplist_first(slRibPt
, NULL
, (void **)&ri
)) {
913 char buf
[PREFIX_STRLEN
];
914 char buf2
[PREFIX_STRLEN
];
916 listnode_add(delete_list
, ri
);
917 vnc_zlog_debug_verbose(
918 "%s: after listnode_add, delete_list->count=%d",
919 __func__
, delete_list
->count
);
920 rfapiFreeBgpTeaOptionChain(ri
->tea_options
);
921 ri
->tea_options
= NULL
;
924 struct rfapi_rib_tcb
*tcb
;
926 tcb
= ((struct thread
*)ri
->timer
)->arg
;
927 thread_cancel(ri
->timer
);
928 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
932 prefix2str(&ri
->rk
.vn
, buf
, sizeof(buf
));
933 prefix2str(&ri
->un
, buf2
, sizeof(buf2
));
934 vnc_zlog_debug_verbose(
935 "%s: put dl pfx=%pRN vn=%s un=%s cost=%d life=%d vn_options=%p",
936 __func__
, pn
, buf
, buf2
, ri
->cost
,
937 ri
->lifetime
, ri
->vn_options
);
939 skiplist_delete_first(slRibPt
);
942 assert(skiplist_empty(slRibPt
));
944 skiplist_free(slRibPt
);
945 rn
->info
= slRibPt
= NULL
;
948 lPendCost
= pn
->info
= NULL
;
954 skiplist_free(slRibPt
);
960 if (slPendPt
) { /* TBD I think we can toss this block */
961 skiplist_free(slPendPt
);
962 pn
->aggregate
= NULL
;
969 agg_unlock_node(rn
); /* agg_node_get() */
971 if (rib_node_started_nonempty
) {
972 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
, bgp
->rfapi
);
975 RFAPI_RIB_CHECK_COUNTS(1, 0);
980 vnc_zlog_debug_verbose("%s: lPendCost->count=%d, slRibPt->count=%d",
982 (lPendCost
? (int)lPendCost
->count
: -1),
983 (slRibPt
? (int)slRibPt
->count
: -1));
986 * Iterate over routes at RIB Node.
987 * If not found at Pending Node, delete from RIB Node and add to
989 * If found at Pending Node
990 * If identical rfapi_info, delete from Pending Node
994 struct rfapi_info
*ori
;
997 * Iterate over RIB List
1000 while (!skiplist_next(slRibPt
, NULL
, (void **)&ori
, &cursor
)) {
1002 if (skiplist_search(slPendPt
, &ori
->rk
, (void **)&ri
)) {
1004 * Not in Pending list, so it should be deleted
1007 delete_list
= list_new();
1008 listnode_add(delete_list
, ori
);
1009 rfapiFreeBgpTeaOptionChain(ori
->tea_options
);
1010 ori
->tea_options
= NULL
;
1012 struct rfapi_rib_tcb
*tcb
;
1014 tcb
= ((struct thread
*)ori
->timer
)
1016 thread_cancel(ori
->timer
);
1017 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
1021 #if DEBUG_PROCESS_PENDING_NODE
1022 /* deleted from slRibPt below, after we're done
1024 vnc_zlog_debug_verbose(
1025 "%s: slRibPt ri %p not matched in pending list, delete",
1031 * Found in pending list. If same lifetime,
1033 * then remove from pending list because the
1037 if (!rfapi_info_cmp(ori
, ri
)) {
1038 skiplist_delete(slPendPt
, &ri
->rk
,
1042 /* linear walk: might need
1044 listnode_delete(lPendCost
,
1054 #if DEBUG_PROCESS_PENDING_NODE
1055 vnc_zlog_debug_verbose(
1056 "%s: slRibPt ri %p matched in pending list, %s",
1059 : "different info"));
1064 * Go back and delete items from RIB
1067 for (ALL_LIST_ELEMENTS_RO(delete_list
, node
, ri
)) {
1068 vnc_zlog_debug_verbose(
1069 "%s: deleting ri %p from slRibPt",
1071 assert(!skiplist_delete(slRibPt
, &ri
->rk
,
1074 if (skiplist_empty(slRibPt
)) {
1075 skiplist_free(slRibPt
);
1076 slRibPt
= rn
->info
= NULL
;
1077 agg_unlock_node(rn
);
1082 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1085 * Iterate over routes at Pending Node
1087 * If {vn} found at RIB Node, update RIB Node route contents to match PN
1088 * If {vn} NOT found at RIB Node, add copy to RIB Node
1091 for (ALL_LIST_ELEMENTS_RO(lPendCost
, node
, ri
)) {
1093 struct rfapi_info
*ori
;
1096 && !skiplist_search(slRibPt
, &ri
->rk
,
1099 /* found: update contents of existing route in
1102 ori
->cost
= ri
->cost
;
1103 ori
->lifetime
= ri
->lifetime
;
1104 rfapiFreeBgpTeaOptionChain(ori
->tea_options
);
1106 rfapiOptionsDup(ri
->tea_options
);
1107 ori
->last_sent_time
= rfapi_time(NULL
);
1109 rfapiFreeRfapiVnOptionChain(ori
->vn_options
);
1111 rfapiVnOptionsDup(ri
->vn_options
);
1113 rfapiFreeRfapiUnOptionChain(ori
->un_options
);
1115 rfapiUnOptionsDup(ri
->un_options
);
1117 vnc_zlog_debug_verbose(
1118 "%s: matched lPendCost item %p in slRibPt, rewrote",
1123 char buf_rd
[RD_ADDRSTRLEN
];
1125 /* not found: add new route to RIB */
1126 ori
= rfapi_info_new();
1129 ori
->cost
= ri
->cost
;
1130 ori
->lifetime
= ri
->lifetime
;
1132 rfapiOptionsDup(ri
->tea_options
);
1133 ori
->last_sent_time
= rfapi_time(NULL
);
1135 rfapiVnOptionsDup(ri
->vn_options
);
1137 rfapiUnOptionsDup(ri
->un_options
);
1140 slRibPt
= skiplist_new(
1141 0, rfapi_rib_key_cmp
, NULL
);
1145 skiplist_insert(slRibPt
, &ori
->rk
, ori
);
1148 prefix_rd2str(&ori
->rk
.rd
, buf_rd
,
1154 vnc_zlog_debug_verbose(
1155 "%s: nomatch lPendCost item %p in slRibPt, added (rd=%s)",
1156 __func__
, ri
, buf_rd
);
1162 RFAPI_RIB_CHECK_COUNTS(
1163 0, (delete_list
? delete_list
->count
: 0));
1164 rfapiRibStartTimer(rfd
, ori
, rn
, 0);
1165 RFAPI_RIB_CHECK_COUNTS(
1166 0, (delete_list
? delete_list
->count
: 0));
1173 * Construct NHL as concatenation of pending list + delete list
1177 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1184 vnc_zlog_debug_verbose("%s: lPendCost->count now %d", __func__
,
1186 vnc_zlog_debug_verbose("%s: For prefix %pRN (a)", __func__
, pn
);
1189 for (ALL_LIST_ELEMENTS(lPendCost
, node
, nnode
, ri
)) {
1191 struct rfapi_next_hop_entry
*new;
1192 struct agg_node
*trn
;
1194 new = XCALLOC(MTYPE_RFAPI_NEXTHOP
,
1195 sizeof(struct rfapi_next_hop_entry
));
1198 if (ri
->rk
.aux_prefix
.family
) {
1199 rfapiQprefix2Rprefix(&ri
->rk
.aux_prefix
,
1203 if (AFI_L2VPN
== afi
) {
1204 /* hp is 0; need to set length to match
1206 new->prefix
.length
=
1207 (ri
->rk
.vn
.family
== AF_INET
)
1212 new->prefix
.cost
= ri
->cost
;
1213 new->lifetime
= ri
->lifetime
;
1214 rfapiQprefix2Raddr(&ri
->rk
.vn
, &new->vn_address
);
1215 rfapiQprefix2Raddr(&ri
->un
, &new->un_address
);
1216 /* free option chain from ri */
1217 rfapiFreeBgpTeaOptionChain(ri
->tea_options
);
1220 NULL
; /* option chain was transferred to NHL */
1222 new->vn_options
= ri
->vn_options
;
1224 NULL
; /* option chain was transferred to NHL */
1226 new->un_options
= ri
->un_options
;
1228 NULL
; /* option chain was transferred to NHL */
1231 (*tail
)->next
= new;
1236 sendingsomeroutes
= 1;
1238 ++rfd
->stat_count_nh_reachable
;
1239 ++bgp
->rfapi
->stat
.count_updated_response_updates
;
1242 * update this NVE's timestamp for this prefix
1244 trn
= agg_node_get(rfd
->rsp_times
[afi
],
1246 trn
->info
= (void *)(uintptr_t)bgp_clock();
1248 agg_unlock_node(trn
);
1250 rfapiRfapiIpAddr2Str(&new->vn_address
, buf
, BUFSIZ
);
1251 rfapiRfapiIpAddr2Str(&new->un_address
, buf2
, BUFSIZ
);
1252 vnc_zlog_debug_verbose(
1253 "%s: add vn=%s un=%s cost=%d life=%d",
1254 __func__
, buf
, buf2
, new->prefix
.cost
,
1259 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1266 if (!printedprefix
) {
1267 vnc_zlog_debug_verbose("%s: For prefix %pRN (d)",
1270 vnc_zlog_debug_verbose("%s: delete_list has %d elements",
1271 __func__
, delete_list
->count
);
1273 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1274 if (!CHECK_FLAG(bgp
->rfapi_cfg
->flags
,
1275 BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE
)) {
1277 for (ALL_LIST_ELEMENTS(delete_list
, node
, nnode
, ri
)) {
1279 struct rfapi_next_hop_entry
*new;
1280 struct rfapi_info
*ri_del
;
1282 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1284 MTYPE_RFAPI_NEXTHOP
,
1285 sizeof(struct rfapi_next_hop_entry
));
1288 if (ri
->rk
.aux_prefix
.family
) {
1289 rfapiQprefix2Rprefix(&ri
->rk
.aux_prefix
,
1293 if (AFI_L2VPN
== afi
) {
1294 /* hp is 0; need to set length
1295 * to match AF of vn */
1296 new->prefix
.length
=
1304 new->prefix
.cost
= ri
->cost
;
1305 new->lifetime
= RFAPI_REMOVE_RESPONSE_LIFETIME
;
1306 rfapiQprefix2Raddr(&ri
->rk
.vn
,
1308 rfapiQprefix2Raddr(&ri
->un
, &new->un_address
);
1310 new->vn_options
= ri
->vn_options
;
1311 ri
->vn_options
= NULL
; /* option chain was
1312 transferred to NHL */
1314 new->un_options
= ri
->un_options
;
1315 ri
->un_options
= NULL
; /* option chain was
1316 transferred to NHL */
1319 (*tail
)->next
= new;
1324 ++rfd
->stat_count_nh_removal
;
1326 .count_updated_response_deletes
;
1328 rfapiRfapiIpAddr2Str(&new->vn_address
, buf
,
1330 rfapiRfapiIpAddr2Str(&new->un_address
, buf2
,
1332 vnc_zlog_debug_verbose(
1333 "%s: DEL vn=%s un=%s cost=%d life=%d",
1334 __func__
, buf
, buf2
, new->prefix
.cost
,
1337 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1339 * Update/add to list of recent deletions at
1342 if (!rn
->aggregate
) {
1343 rn
->aggregate
= skiplist_new(
1344 0, rfapi_rib_key_cmp
,
1349 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1351 /* sanity check lifetime */
1353 > RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
1355 RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
;
1357 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1358 /* cancel normal expire timer */
1360 struct rfapi_rib_tcb
*tcb
;
1362 tcb
= ((struct thread
*)ri
->timer
)->arg
;
1364 (struct thread
*)ri
->timer
);
1365 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
1368 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1371 * Look in "recently-deleted" list
1373 if (skiplist_search(
1374 (struct skiplist
*)(rn
->aggregate
),
1375 &ri
->rk
, (void **)&ri_del
)) {
1379 RFAPI_RIB_CHECK_COUNTS(
1380 0, delete_list
->count
);
1382 * NOT in "recently-deleted" list
1386 node
); /* does not free ri */
1387 rc
= skiplist_insert(
1393 RFAPI_RIB_CHECK_COUNTS(
1394 0, delete_list
->count
);
1395 rfapiRibStartTimer(rfd
, ri
, rn
, 1);
1396 RFAPI_RIB_CHECK_COUNTS(
1397 0, delete_list
->count
);
1398 ri
->last_sent_time
= rfapi_time(NULL
);
1401 char buf_rd
[RD_ADDRSTRLEN
];
1403 vnc_zlog_debug_verbose(
1404 "%s: move route to recently deleted list, rd=%s",
1415 * IN "recently-deleted" list
1417 RFAPI_RIB_CHECK_COUNTS(
1418 0, delete_list
->count
);
1419 rfapiRibStartTimer(rfd
, ri_del
, rn
, 1);
1420 RFAPI_RIB_CHECK_COUNTS(
1421 0, delete_list
->count
);
1422 ri
->last_sent_time
= rfapi_time(NULL
);
1426 vnc_zlog_debug_verbose(
1427 "%s: response removal disabled, omitting removals",
1431 delete_list
->del
= (void (*)(void *))rfapi_info_free
;
1432 list_delete(&delete_list
);
1435 RFAPI_RIB_CHECK_COUNTS(0, 0);
1438 * Reset pending lists. The final agg_unlock_node() will probably
1439 * cause the pending node to be released.
1442 skiplist_free(slPendPt
);
1443 pn
->aggregate
= NULL
;
1444 agg_unlock_node(pn
);
1447 list_delete(&lPendCost
);
1449 agg_unlock_node(pn
);
1451 RFAPI_RIB_CHECK_COUNTS(0, 0);
1453 if (rib_node_started_nonempty
) {
1455 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
, bgp
->rfapi
);
1459 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, bgp
->rfapi
);
1463 if (sendingsomeroutes
)
1464 rfapiMonitorTimersRestart(rfd
, p
);
1466 agg_unlock_node(rn
); /* agg_node_get() */
1468 RFAPI_RIB_CHECK_COUNTS(1, 0);
1472 * regardless of targets, construct a single callback by doing
1473 * only one traversal of the pending RIB
1479 static void rib_do_callback_onepass(struct rfapi_descriptor
*rfd
, afi_t afi
)
1481 struct bgp
*bgp
= bgp_get_default();
1482 struct rfapi_next_hop_entry
*head
= NULL
;
1483 struct rfapi_next_hop_entry
*tail
= NULL
;
1484 struct agg_node
*rn
;
1486 #ifdef DEBUG_L2_EXTRA
1487 vnc_zlog_debug_verbose("%s: rfd=%p, afi=%d", __func__
, rfd
, afi
);
1490 if (!rfd
->rib_pending
[afi
])
1495 for (rn
= agg_route_top(rfd
->rib_pending
[afi
]); rn
;
1496 rn
= agg_route_next(rn
)) {
1497 process_pending_node(bgp
, rfd
, afi
, rn
, &head
, &tail
);
1501 rfapi_response_cb_t
*f
;
1504 vnc_zlog_debug_verbose("%s: response callback NHL follows:",
1506 rfapiPrintNhl(NULL
, head
);
1509 if (rfd
->response_cb
)
1510 f
= rfd
->response_cb
;
1512 f
= bgp
->rfapi
->rfp_methods
.response_cb
;
1514 bgp
->rfapi
->flags
|= RFAPI_INCALLBACK
;
1515 vnc_zlog_debug_verbose("%s: invoking updated response callback",
1517 (*f
)(head
, rfd
->cookie
);
1518 bgp
->rfapi
->flags
&= ~RFAPI_INCALLBACK
;
1519 ++bgp
->rfapi
->response_updated_count
;
1523 static wq_item_status
rfapiRibDoQueuedCallback(struct work_queue
*wq
,
1526 struct rfapi_descriptor
*rfd
;
1528 uint32_t queued_flag
;
1530 RFAPI_RIB_CHECK_COUNTS(1, 0);
1532 rfd
= ((struct rfapi_updated_responses_queue
*)data
)->rfd
;
1533 afi
= ((struct rfapi_updated_responses_queue
*)data
)->afi
;
1535 /* Make sure the HD wasn't closed after the work item was scheduled */
1536 if (rfapi_check(rfd
))
1539 rib_do_callback_onepass(rfd
, afi
);
1541 queued_flag
= RFAPI_QUEUED_FLAG(afi
);
1543 UNSET_FLAG(rfd
->flags
, queued_flag
);
1545 RFAPI_RIB_CHECK_COUNTS(1, 0);
1550 static void rfapiRibQueueItemDelete(struct work_queue
*wq
, void *data
)
1552 XFREE(MTYPE_RFAPI_UPDATED_RESPONSE_QUEUE
, data
);
1555 static void updated_responses_queue_init(struct rfapi_descriptor
*rfd
)
1557 if (rfd
->updated_responses_queue
)
1560 rfd
->updated_responses_queue
=
1561 work_queue_new(bm
->master
, "rfapi updated responses");
1562 assert(rfd
->updated_responses_queue
);
1564 rfd
->updated_responses_queue
->spec
.workfunc
= rfapiRibDoQueuedCallback
;
1565 rfd
->updated_responses_queue
->spec
.del_item_data
=
1566 rfapiRibQueueItemDelete
;
1567 rfd
->updated_responses_queue
->spec
.max_retries
= 0;
1568 rfd
->updated_responses_queue
->spec
.hold
= 1;
1572 * Called when an import table node is modified. Construct a
1573 * new complete nexthop list, sorted by cost (lowest first),
1574 * based on the import table node.
1576 * Filter out duplicate nexthops (vn address). There should be
1577 * only one UN address per VN address from the point of view of
1578 * a given import table, so we can probably ignore UN addresses
1581 * Based on rfapiNhlAddNodeRoutes()
1583 void rfapiRibUpdatePendingNode(
1584 struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1585 struct rfapi_import_table
*it
, /* needed for L2 */
1586 struct agg_node
*it_node
, uint32_t lifetime
)
1588 const struct prefix
*prefix
;
1589 struct bgp_path_info
*bpi
;
1590 struct agg_node
*pn
;
1592 uint32_t queued_flag
;
1594 char buf
[PREFIX_STRLEN
];
1596 vnc_zlog_debug_verbose("%s: entry", __func__
);
1598 if (CHECK_FLAG(bgp
->rfapi_cfg
->flags
, BGP_VNC_CONFIG_CALLBACK_DISABLE
))
1601 vnc_zlog_debug_verbose("%s: callbacks are not disabled", __func__
);
1603 RFAPI_RIB_CHECK_COUNTS(1, 0);
1605 prefix
= agg_node_get_prefix(it_node
);
1606 afi
= family2afi(prefix
->family
);
1607 prefix2str(prefix
, buf
, sizeof(buf
));
1608 vnc_zlog_debug_verbose("%s: prefix=%s", __func__
, buf
);
1610 pn
= agg_node_get(rfd
->rib_pending
[afi
], prefix
);
1613 vnc_zlog_debug_verbose("%s: pn->info=%p, pn->aggregate=%p", __func__
,
1614 pn
->info
, pn
->aggregate
);
1616 if (pn
->aggregate
) {
1618 * free references into the rfapi_info structures before
1619 * freeing the structures themselves
1621 skiplist_free((struct skiplist
*)(pn
->aggregate
));
1622 pn
->aggregate
= NULL
;
1623 agg_unlock_node(pn
); /* skiplist deleted */
1628 * free the rfapi_info structures
1631 if (pn
->info
!= (void *)1) {
1632 list_delete((struct list
**)(&pn
->info
));
1635 agg_unlock_node(pn
); /* linklist or 1 deleted */
1639 * The BPIs in the import table are already sorted by cost
1641 for (bpi
= it_node
->info
; bpi
; bpi
= bpi
->next
) {
1643 struct rfapi_info
*ri
;
1644 struct prefix pfx_nh
;
1647 /* shouldn't happen */
1648 /* TBD increment error stats counter */
1652 rfapiNexthop2Prefix(bpi
->attr
, &pfx_nh
);
1655 * Omit route if nexthop is self
1657 if (CHECK_FLAG(bgp
->rfapi_cfg
->flags
,
1658 BGP_VNC_CONFIG_FILTER_SELF_FROM_RSP
)) {
1660 struct prefix pfx_vn
;
1662 assert(!rfapiRaddr2Qprefix(&rfd
->vn_addr
, &pfx_vn
));
1663 if (prefix_same(&pfx_vn
, &pfx_nh
))
1667 ri
= rfapi_info_new();
1669 ri
->rk
.rd
= bpi
->extra
->vnc
.import
.rd
;
1671 * If there is an auxiliary IP address (L2 can have it), copy it
1673 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
1674 ri
->rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
1677 if (rfapiGetUnAddrOfVpnBi(bpi
, &ri
->un
)) {
1678 rfapi_info_free(ri
);
1682 if (!pn
->aggregate
) {
1684 skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
1689 * If we have already added this nexthop, the insert will fail.
1690 * Note that the skiplist key is a pointer INTO the rfapi_info
1691 * structure which will be added to the "info" list.
1692 * The skiplist entry VALUE is not used for anything but
1693 * might be useful during debugging.
1695 if (skiplist_insert((struct skiplist
*)pn
->aggregate
, &ri
->rk
,
1701 rfapi_info_free(ri
);
1705 rfapiRibBi2Ri(bpi
, ri
, lifetime
);
1708 pn
->info
= list_new();
1709 ((struct list
*)(pn
->info
))->del
=
1710 (void (*)(void *))rfapi_info_free
;
1714 listnode_add((struct list
*)(pn
->info
), ri
);
1718 count
= ((struct list
*)(pn
->info
))->count
;
1723 assert(!pn
->aggregate
);
1724 pn
->info
= (void *)1; /* magic value means this node has no
1729 agg_unlock_node(pn
); /* agg_node_get */
1731 queued_flag
= RFAPI_QUEUED_FLAG(afi
);
1733 if (!CHECK_FLAG(rfd
->flags
, queued_flag
)) {
1735 struct rfapi_updated_responses_queue
*urq
;
1737 urq
= XCALLOC(MTYPE_RFAPI_UPDATED_RESPONSE_QUEUE
,
1738 sizeof(struct rfapi_updated_responses_queue
));
1740 if (!rfd
->updated_responses_queue
)
1741 updated_responses_queue_init(rfd
);
1743 SET_FLAG(rfd
->flags
, queued_flag
);
1746 work_queue_add(rfd
->updated_responses_queue
, urq
);
1748 RFAPI_RIB_CHECK_COUNTS(1, 0);
1751 void rfapiRibUpdatePendingNodeSubtree(
1752 struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1753 struct rfapi_import_table
*it
, struct agg_node
*it_node
,
1754 struct agg_node
*omit_subtree
, /* may be NULL */
1757 /* FIXME: need to find a better way here to work without sticking our
1758 * hands in node->link */
1759 if (agg_node_left(it_node
)
1760 && (agg_node_left(it_node
) != omit_subtree
)) {
1761 if (agg_node_left(it_node
)->info
)
1762 rfapiRibUpdatePendingNode(
1763 bgp
, rfd
, it
, agg_node_left(it_node
), lifetime
);
1764 rfapiRibUpdatePendingNodeSubtree(bgp
, rfd
, it
,
1765 agg_node_left(it_node
),
1766 omit_subtree
, lifetime
);
1769 if (agg_node_right(it_node
)
1770 && (agg_node_right(it_node
) != omit_subtree
)) {
1771 if (agg_node_right(it_node
)->info
)
1772 rfapiRibUpdatePendingNode(bgp
, rfd
, it
,
1773 agg_node_right(it_node
),
1775 rfapiRibUpdatePendingNodeSubtree(bgp
, rfd
, it
,
1776 agg_node_right(it_node
),
1777 omit_subtree
, lifetime
);
1784 * 0 allow prefix to be included in response
1785 * !0 don't allow prefix to be included in response
1787 int rfapiRibFTDFilterRecentPrefix(
1788 struct rfapi_descriptor
*rfd
,
1789 struct agg_node
*it_rn
, /* import table node */
1790 struct prefix
*pfx_target_original
) /* query target */
1792 struct bgp
*bgp
= rfd
->bgp
;
1793 const struct prefix
*p
= agg_node_get_prefix(it_rn
);
1794 afi_t afi
= family2afi(p
->family
);
1796 struct agg_node
*trn
;
1799 * Not in FTD mode, so allow prefix
1801 if (bgp
->rfapi_cfg
->rfp_cfg
.download_type
!= RFAPI_RFP_DOWNLOAD_FULL
)
1806 * This matches behavior of now-obsolete rfapiRibFTDFilterRecent(),
1807 * but we need to decide if that is correct.
1809 if (p
->family
== AF_ETHERNET
)
1812 #ifdef DEBUG_FTD_FILTER_RECENT
1814 char buf_pfx
[PREFIX_STRLEN
];
1816 prefix2str(&it_rn
->p
, buf_pfx
, sizeof(buf_pfx
));
1817 vnc_zlog_debug_verbose("%s: prefix %s", __func__
, buf_pfx
);
1822 * prefix covers target address, so allow prefix
1824 if (prefix_match(p
, pfx_target_original
)) {
1825 #ifdef DEBUG_FTD_FILTER_RECENT
1826 vnc_zlog_debug_verbose("%s: prefix covers target, allowed",
1833 * check this NVE's timestamp for this prefix
1835 trn
= agg_node_get(rfd
->rsp_times
[afi
], p
); /* locks trn */
1836 prefix_time
= (time_t)trn
->info
;
1838 agg_unlock_node(trn
);
1840 #ifdef DEBUG_FTD_FILTER_RECENT
1841 vnc_zlog_debug_verbose("%s: last sent time %lu, last allowed time %lu",
1842 __func__
, prefix_time
,
1843 rfd
->ftd_last_allowed_time
);
1847 * haven't sent this prefix, which doesn't cover target address,
1848 * to NVE since ftd_advertisement_interval, so OK to send now.
1850 if (prefix_time
<= rfd
->ftd_last_allowed_time
)
1857 * Call when rfapi returns from rfapi_query() so the RIB reflects
1858 * the routes sent to the NVE before the first updated response
1860 * Also: remove duplicates from response. Caller should use returned
1861 * value of nexthop chain.
1863 struct rfapi_next_hop_entry
*
1864 rfapiRibPreload(struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1865 struct rfapi_next_hop_entry
*response
, int use_eth_resolution
)
1867 struct rfapi_next_hop_entry
*nhp
;
1868 struct rfapi_next_hop_entry
*nhp_next
;
1869 struct rfapi_next_hop_entry
*head
= NULL
;
1870 struct rfapi_next_hop_entry
*tail
= NULL
;
1871 time_t new_last_sent_time
;
1873 vnc_zlog_debug_verbose("%s: loading response=%p, use_eth_resolution=%d",
1874 __func__
, response
, use_eth_resolution
);
1876 new_last_sent_time
= rfapi_time(NULL
);
1878 for (nhp
= response
; nhp
; nhp
= nhp_next
) {
1881 struct rfapi_rib_key rk
;
1883 struct rfapi_info
*ri
;
1885 struct agg_node
*rn
;
1886 int rib_node_started_nonempty
= 0;
1887 struct agg_node
*trn
;
1890 /* save in case we delete nhp */
1891 nhp_next
= nhp
->next
;
1893 if (nhp
->lifetime
== RFAPI_REMOVE_RESPONSE_LIFETIME
) {
1895 * weird, shouldn't happen
1897 vnc_zlog_debug_verbose(
1898 "%s: got nhp->lifetime == RFAPI_REMOVE_RESPONSE_LIFETIME",
1904 if (use_eth_resolution
) {
1905 /* get the prefix of the ethernet address in the L2
1907 struct rfapi_l2address_option
*pL2o
;
1908 struct rfapi_vn_option
*vo
;
1911 * Look for VN option of type
1912 * RFAPI_VN_OPTION_TYPE_L2ADDR
1914 for (pL2o
= NULL
, vo
= nhp
->vn_options
; vo
;
1916 if (RFAPI_VN_OPTION_TYPE_L2ADDR
== vo
->type
) {
1917 pL2o
= &vo
->v
.l2addr
;
1924 * not supposed to happen
1926 vnc_zlog_debug_verbose("%s: missing L2 info",
1932 rfapiL2o2Qprefix(pL2o
, &pfx
);
1934 rfapiRprefix2Qprefix(&nhp
->prefix
, &pfx
);
1935 afi
= family2afi(pfx
.family
);
1939 * TBD for ethernet, rib must know the right way to distinguish
1942 * Current approach: prefix is key to radix tree; then
1943 * each prefix has a set of routes with unique VN addrs
1947 * Look up prefix in RIB
1949 rn
= agg_node_get(rfd
->rib
[afi
], &pfx
); /* locks rn */
1952 rib_node_started_nonempty
= 1;
1954 rn
->info
= skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
1959 * Look up route at prefix
1962 memset((void *)&rk
, 0, sizeof(rk
));
1963 assert(!rfapiRaddr2Qprefix(&nhp
->vn_address
, &rk
.vn
));
1965 if (use_eth_resolution
) {
1966 /* copy what came from aux_prefix to rk.aux_prefix */
1967 rfapiRprefix2Qprefix(&nhp
->prefix
, &rk
.aux_prefix
);
1968 if (RFAPI_0_PREFIX(&rk
.aux_prefix
)
1969 && RFAPI_HOST_PREFIX(&rk
.aux_prefix
)) {
1970 /* mark as "none" if nhp->prefix is 0/32 or
1972 rk
.aux_prefix
.family
= 0;
1978 char str_vn
[PREFIX_STRLEN
];
1979 char str_aux_prefix
[PREFIX_STRLEN
];
1982 str_aux_prefix
[0] = 0;
1984 prefix2str(&rk
.vn
, str_vn
, sizeof(str_vn
));
1985 prefix2str(&rk
.aux_prefix
, str_aux_prefix
,
1986 sizeof(str_aux_prefix
));
1988 if (!rk
.aux_prefix
.family
) {
1990 vnc_zlog_debug_verbose(
1991 "%s: rk.vn=%s rk.aux_prefix=%s", __func__
,
1993 (rk
.aux_prefix
.family
? str_aux_prefix
: "-"));
1995 vnc_zlog_debug_verbose(
1996 "%s: RIB skiplist for this prefix follows", __func__
);
1997 rfapiRibShowRibSl(NULL
, &rn
->p
, (struct skiplist
*)rn
->info
);
2001 if (!skiplist_search((struct skiplist
*)rn
->info
, &rk
,
2004 * Already have this route; make values match
2006 rfapiFreeRfapiUnOptionChain(ri
->un_options
);
2007 ri
->un_options
= NULL
;
2008 rfapiFreeRfapiVnOptionChain(ri
->vn_options
);
2009 ri
->vn_options
= NULL
;
2012 vnc_zlog_debug_verbose("%s: found in RIB", __func__
);
2016 * Filter duplicate routes from initial response.
2017 * Check timestamps to avoid wraparound problems
2019 if ((ri
->rsp_counter
!= rfd
->rsp_counter
)
2020 || (ri
->last_sent_time
!= new_last_sent_time
)) {
2023 vnc_zlog_debug_verbose(
2024 "%s: allowed due to counter/timestamp diff",
2033 vnc_zlog_debug_verbose(
2034 "%s: allowed due to not yet in RIB", __func__
);
2036 /* not found: add new route to RIB */
2037 ri
= rfapi_info_new();
2043 assert(!rfapiRaddr2Qprefix(&nhp
->un_address
, &ri
->un
));
2044 ri
->cost
= nhp
->prefix
.cost
;
2045 ri
->lifetime
= nhp
->lifetime
;
2046 ri
->vn_options
= rfapiVnOptionsDup(nhp
->vn_options
);
2047 ri
->rsp_counter
= rfd
->rsp_counter
;
2048 ri
->last_sent_time
= rfapi_time(NULL
);
2052 rc
= skiplist_insert((struct skiplist
*)rn
->info
,
2057 if (!rib_node_started_nonempty
) {
2058 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, bgp
->rfapi
);
2061 RFAPI_RIB_CHECK_COUNTS(0, 0);
2062 rfapiRibStartTimer(rfd
, ri
, rn
, 0);
2063 RFAPI_RIB_CHECK_COUNTS(0, 0);
2065 agg_unlock_node(rn
);
2068 * update this NVE's timestamp for this prefix
2070 trn
= agg_node_get(rfd
->rsp_times
[afi
], &pfx
); /* locks trn */
2071 trn
->info
= (void *)(uintptr_t)bgp_clock();
2073 agg_unlock_node(trn
);
2076 char str_pfx
[PREFIX_STRLEN
];
2077 char str_pfx_vn
[PREFIX_STRLEN
];
2079 prefix2str(&pfx
, str_pfx
, sizeof(str_pfx
));
2080 prefix2str(&rk
.vn
, str_pfx_vn
, sizeof(str_pfx_vn
));
2081 vnc_zlog_debug_verbose(
2082 "%s: added pfx=%s nh[vn]=%s, cost=%u, lifetime=%u, allowed=%d",
2083 __func__
, str_pfx
, str_pfx_vn
, nhp
->prefix
.cost
,
2084 nhp
->lifetime
, allowed
);
2095 rfapi_un_options_free(nhp
->un_options
);
2096 nhp
->un_options
= NULL
;
2097 rfapi_vn_options_free(nhp
->vn_options
);
2098 nhp
->vn_options
= NULL
;
2100 XFREE(MTYPE_RFAPI_NEXTHOP
, nhp
);
2109 void rfapiRibPendingDeleteRoute(struct bgp
*bgp
, struct rfapi_import_table
*it
,
2110 afi_t afi
, struct agg_node
*it_node
)
2112 struct rfapi_descriptor
*rfd
;
2113 struct listnode
*node
;
2114 const struct prefix
*p
= agg_node_get_prefix(it_node
);
2116 vnc_zlog_debug_verbose("%s: entry, it=%p, afi=%d, it_node=%p, pfx=%pRN",
2117 __func__
, it
, afi
, it_node
, it_node
);
2119 if (AFI_L2VPN
== afi
) {
2121 * ethernet import tables are per-LNI and each ethernet monitor
2122 * identifies the rfd that owns it.
2124 struct rfapi_monitor_eth
*m
;
2125 struct agg_node
*rn
;
2126 struct skiplist
*sl
;
2131 * route-specific monitors
2133 if ((sl
= RFAPI_MONITOR_ETH(it_node
))) {
2135 vnc_zlog_debug_verbose(
2136 "%s: route-specific skiplist: %p", __func__
,
2140 rc
= skiplist_next(sl
, NULL
, (void **)&m
,
2142 !rc
; rc
= skiplist_next(sl
, NULL
, (void **)&m
,
2143 (void **)&cursor
)) {
2145 #if DEBUG_PENDING_DELETE_ROUTE
2146 vnc_zlog_debug_verbose("%s: eth monitor rfd=%p",
2150 * If we have already sent a route with this
2152 * NVE, it's OK to send an update with the
2155 if ((rn
= agg_node_lookup(m
->rfd
->rib
[afi
],
2157 rfapiRibUpdatePendingNode(
2158 bgp
, m
->rfd
, it
, it_node
,
2159 m
->rfd
->response_lifetime
);
2160 agg_unlock_node(rn
);
2166 * all-routes/FTD monitors
2168 for (m
= it
->eth0_queries
; m
; m
= m
->next
) {
2169 #if DEBUG_PENDING_DELETE_ROUTE
2170 vnc_zlog_debug_verbose("%s: eth0 monitor rfd=%p",
2174 * If we have already sent a route with this prefix to
2176 * NVE, it's OK to send an update with the delete
2178 if ((rn
= agg_node_lookup(m
->rfd
->rib
[afi
], p
))) {
2179 rfapiRibUpdatePendingNode(
2180 bgp
, m
->rfd
, it
, it_node
,
2181 m
->rfd
->response_lifetime
);
2182 agg_unlock_node(rn
);
2188 * Find RFDs that reference this import table
2190 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
,
2193 struct agg_node
*rn
;
2195 vnc_zlog_debug_verbose(
2196 "%s: comparing rfd(%p)->import_table=%p to it=%p",
2197 __func__
, rfd
, rfd
->import_table
, it
);
2199 if (rfd
->import_table
!= it
)
2202 vnc_zlog_debug_verbose("%s: matched rfd %p", __func__
,
2206 * If we have sent a response to this NVE with this
2208 * previously, we should send an updated response.
2210 if ((rn
= agg_node_lookup(rfd
->rib
[afi
], p
))) {
2211 rfapiRibUpdatePendingNode(
2212 bgp
, rfd
, it
, it_node
,
2213 rfd
->response_lifetime
);
2214 agg_unlock_node(rn
);
2220 void rfapiRibShowResponsesSummary(void *stream
)
2222 int (*fp
)(void *, const char *, ...);
2225 const char *vty_newline
;
2226 struct bgp
*bgp
= bgp_get_default();
2229 int nves_with_nonempty_ribs
= 0;
2230 struct rfapi_descriptor
*rfd
;
2231 struct listnode
*node
;
2233 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2236 fp(out
, "Unable to find default BGP instance\n");
2240 fp(out
, "%-24s ", "Responses: (Prefixes)");
2241 fp(out
, "%-8s %-8u ", "Active:", bgp
->rfapi
->rib_prefix_count_total
);
2242 fp(out
, "%-8s %-8u",
2243 "Maximum:", bgp
->rfapi
->rib_prefix_count_total_max
);
2246 fp(out
, "%-24s ", " (Updated)");
2247 fp(out
, "%-8s %-8u ",
2248 "Update:", bgp
->rfapi
->stat
.count_updated_response_updates
);
2249 fp(out
, "%-8s %-8u",
2250 "Remove:", bgp
->rfapi
->stat
.count_updated_response_deletes
);
2251 fp(out
, "%-8s %-8u", "Total:",
2252 bgp
->rfapi
->stat
.count_updated_response_updates
2253 + bgp
->rfapi
->stat
.count_updated_response_deletes
);
2256 fp(out
, "%-24s ", " (NVEs)");
2257 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
2259 if (rfd
->rib_prefix_count
)
2260 ++nves_with_nonempty_ribs
;
2262 fp(out
, "%-8s %-8u ", "Active:", nves_with_nonempty_ribs
);
2263 fp(out
, "%-8s %-8u", "Total:", nves
);
2267 void rfapiRibShowResponsesSummaryClear(void)
2269 struct bgp
*bgp
= bgp_get_default();
2271 bgp
->rfapi
->rib_prefix_count_total_max
=
2272 bgp
->rfapi
->rib_prefix_count_total
;
2275 static int print_rib_sl(int (*fp
)(void *, const char *, ...), struct vty
*vty
,
2276 void *out
, struct skiplist
*sl
, int deleted
,
2277 char *str_pfx
, int *printedprefix
)
2279 struct rfapi_info
*ri
;
2282 int routes_displayed
= 0;
2285 for (rc
= skiplist_next(sl
, NULL
, (void **)&ri
, &cursor
); !rc
;
2286 rc
= skiplist_next(sl
, NULL
, (void **)&ri
, &cursor
)) {
2288 char str_vn
[PREFIX_STRLEN
];
2289 char str_un
[PREFIX_STRLEN
];
2290 char str_lifetime
[BUFSIZ
];
2291 char str_age
[BUFSIZ
];
2293 char str_rd
[RD_ADDRSTRLEN
];
2297 prefix2str(&ri
->rk
.vn
, str_vn
, sizeof(str_vn
));
2298 p
= index(str_vn
, '/');
2302 prefix2str(&ri
->un
, str_un
, sizeof(str_un
));
2303 p
= index(str_un
, '/');
2307 rfapiFormatSeconds(ri
->lifetime
, str_lifetime
, BUFSIZ
);
2308 #ifdef RFAPI_REGISTRATIONS_REPORT_AGE
2309 rfapiFormatAge(ri
->last_sent_time
, str_age
, BUFSIZ
);
2312 time_t now
= rfapi_time(NULL
);
2314 ri
->last_sent_time
+ (time_t)ri
->lifetime
;
2315 /* allow for delayed/async removal */
2316 rfapiFormatSeconds((expire
> now
? expire
- now
: 1),
2321 str_rd
[0] = 0; /* start empty */
2323 prefix_rd2str(&ri
->rk
.rd
, str_rd
, sizeof(str_rd
));
2326 fp(out
, " %c %-20s %-15s %-15s %-4u %-8s %-8s %s\n",
2327 deleted
? 'r' : ' ', *printedprefix
? "" : str_pfx
, str_vn
,
2328 str_un
, ri
->cost
, str_lifetime
, str_age
, str_rd
);
2330 if (!*printedprefix
)
2333 return routes_displayed
;
2338 * This one is for debugging (set stream to NULL to send output to log)
2340 static void rfapiRibShowRibSl(void *stream
, struct prefix
*pfx
,
2341 struct skiplist
*sl
)
2343 int (*fp
)(void *, const char *, ...);
2346 const char *vty_newline
;
2348 int nhs_displayed
= 0;
2349 char str_pfx
[PREFIX_STRLEN
];
2350 int printedprefix
= 0;
2352 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2355 prefix2str(pfx
, str_pfx
, sizeof(str_pfx
));
2358 print_rib_sl(fp
, vty
, out
, sl
, 0, str_pfx
, &printedprefix
);
2362 void rfapiRibShowResponses(void *stream
, struct prefix
*pfx_match
,
2365 int (*fp
)(void *, const char *, ...);
2368 const char *vty_newline
;
2370 struct rfapi_descriptor
*rfd
;
2371 struct listnode
*node
;
2373 struct bgp
*bgp
= bgp_get_default();
2374 int printedheader
= 0;
2375 int routes_total
= 0;
2377 int prefixes_total
= 0;
2378 int prefixes_displayed
= 0;
2380 int nves_with_routes
= 0;
2381 int nves_displayed
= 0;
2382 int routes_displayed
= 0;
2383 int nhs_displayed
= 0;
2385 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2388 fp(out
, "Unable to find default BGP instance\n");
2395 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
2401 if (rfd
->rib_prefix_count
)
2404 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
2406 struct agg_node
*rn
;
2411 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
2412 rn
= agg_route_next(rn
)) {
2413 const struct prefix
*p
=
2414 agg_node_get_prefix(rn
);
2415 struct skiplist
*sl
;
2416 char str_pfx
[PREFIX_STRLEN
];
2417 int printedprefix
= 0;
2428 nhs_total
+= skiplist_count(sl
);
2431 if (pfx_match
&& !prefix_match(pfx_match
, p
)
2432 && !prefix_match(p
, pfx_match
))
2435 ++prefixes_displayed
;
2437 if (!printedheader
) {
2441 show_removed
? "Removed" : "Active");
2442 fp(out
, "%-15s %-15s\n", "Querying VN",
2445 " %-20s %-15s %-15s %4s %-8s %-8s\n",
2446 "Prefix", "Registered VN",
2447 "Registered UN", "Cost", "Lifetime",
2448 #ifdef RFAPI_REGISTRATIONS_REPORT_AGE
2456 char str_vn
[BUFSIZ
];
2457 char str_un
[BUFSIZ
];
2462 fp(out
, "%-15s %-15s\n",
2463 rfapiRfapiIpAddr2Str(&rfd
->vn_addr
,
2465 rfapiRfapiIpAddr2Str(&rfd
->un_addr
,
2469 prefix2str(p
, str_pfx
, sizeof(str_pfx
));
2470 // fp(out, " %s\n", buf); /* prefix */
2473 nhs_displayed
+= print_rib_sl(
2474 fp
, vty
, out
, sl
, show_removed
, str_pfx
,
2482 fp(out
, "Displayed %u NVEs, and %u out of %u %s prefixes",
2483 nves_displayed
, routes_displayed
, routes_total
,
2484 show_removed
? "removed" : "active");
2485 if (nhs_displayed
!= routes_displayed
2486 || nhs_total
!= routes_total
)
2487 fp(out
, " with %u out of %u next hops", nhs_displayed
,