1 // SPDX-License-Identifier: GPL-2.0-or-later
4 * Copyright 2009-2016, LabN Consulting, L.L.C.
10 * Purpose: maintain per-nve ribs and generate change lists
13 #include "lib/zebra.h"
14 #include "lib/prefix.h"
15 #include "lib/agg_table.h"
17 #include "lib/memory.h"
19 #include "lib/skiplist.h"
20 #include "lib/workqueue.h"
22 #include "bgpd/bgpd.h"
23 #include "bgpd/bgp_route.h"
24 #include "bgpd/bgp_ecommunity.h"
25 #include "bgpd/bgp_mplsvpn.h"
26 #include "bgpd/bgp_vnc_types.h"
28 #include "bgpd/rfapi/rfapi.h"
29 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
30 #include "bgpd/rfapi/rfapi_import.h"
31 #include "bgpd/rfapi/rfapi_private.h"
32 #include "bgpd/rfapi/rfapi_vty.h"
33 #include "bgpd/rfapi/vnc_import_bgp.h"
34 #include "bgpd/rfapi/rfapi_rib.h"
35 #include "bgpd/rfapi/rfapi_monitor.h"
36 #include "bgpd/rfapi/rfapi_encap_tlv.h"
37 #include "bgpd/rfapi/vnc_debug.h"
39 #define DEBUG_PROCESS_PENDING_NODE 0
40 #define DEBUG_PENDING_DELETE_ROUTE 0
42 #define DEBUG_RIB_SL_RD 0
46 static void rfapiRibShowRibSl(void *stream
, struct prefix
*pfx
,
53 * Model of the set of routes currently in the NVE's RIB.
55 * node->info ptr to "struct skiplist".
56 * MUST be NULL if there are no routes.
57 * key = ptr to struct prefix {vn}
58 * val = ptr to struct rfapi_info
60 * skiplist.cmp = vnc_prefix_cmp
62 * node->aggregate ptr to "struct skiplist".
63 * key = ptr to struct prefix {vn}
64 * val = ptr to struct rfapi_info
65 * skiplist.del = rfapi_info_free
66 * skiplist.cmp = vnc_prefix_cmp
68 * This skiplist at "aggregate"
69 * contains the routes recently
75 * Sparse list of prefixes that need to be updated. Each node
76 * will have the complete set of routes for the prefix.
78 * node->info ptr to "struct list" (lib/linklist.h)
80 * List of routes sorted lowest cost first.
81 * This list is how the new complete set
82 * of routes should look.
83 * Set if there are updates to the prefix;
84 * MUST be NULL if there are no updates.
86 * .data = ptr to struct rfapi_info
87 * list.cmp = NULL (sorted manually)
88 * list.del = rfapi_info_free
90 * Special case: if node->info is 1, it means
91 * "delete all routes at this prefix".
93 * node->aggregate ptr to struct skiplist
94 * key = ptr to struct prefix {vn} (part of ri)
95 * val = struct rfapi_info
96 * skiplist.cmp = vnc_prefix_cmp
99 * ptlist is rewritten anew each time
100 * rfapiRibUpdatePendingNode() is called
102 * THE ptlist VALUES ARE REFERENCES TO THE
103 * rfapi_info STRUCTS IN THE node->info LIST.
107 * iterate over RIB to count responses, compare with running counters
109 void rfapiRibCheckCounts(
110 int checkstats
, /* validate rfd & global counts */
111 unsigned int offset
) /* number of ri's held separately */
113 struct rfapi_descriptor
*rfd
;
114 struct listnode
*node
;
116 struct bgp
*bgp
= bgp_get_default();
118 uint32_t t_pfx_active
= 0;
119 uint32_t t_pfx_deleted
= 0;
121 uint32_t t_ri_active
= 0;
122 uint32_t t_ri_deleted
= 0;
123 uint32_t t_ri_pend
= 0;
125 unsigned int alloc_count
;
130 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
133 uint32_t pfx_active
= 0;
134 uint32_t pfx_deleted
= 0;
136 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
140 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
141 rn
= agg_route_next(rn
)) {
143 struct skiplist
*sl
= rn
->info
;
144 struct skiplist
*dsl
= rn
->aggregate
;
145 uint32_t ri_active
= 0;
146 uint32_t ri_deleted
= 0;
149 ri_active
= skiplist_count(sl
);
151 t_ri_active
+= ri_active
;
157 ri_deleted
= skiplist_count(dsl
);
158 t_ri_deleted
+= ri_deleted
;
163 for (rn
= agg_route_top(rfd
->rib_pending
[afi
]); rn
;
164 rn
= agg_route_next(rn
)) {
166 struct list
*l
= rn
->info
; /* sorted by cost */
167 struct skiplist
*sl
= rn
->aggregate
;
168 uint32_t ri_pend_cost
= 0;
169 uint32_t ri_pend_uniq
= 0;
172 ri_pend_uniq
= skiplist_count(sl
);
175 if (l
&& (l
!= (void *)1)) {
176 ri_pend_cost
= l
->count
;
177 t_ri_pend
+= l
->count
;
180 assert(ri_pend_uniq
== ri_pend_cost
);
185 if (pfx_active
!= rfd
->rib_prefix_count
) {
186 vnc_zlog_debug_verbose(
187 "%s: rfd %p actual pfx count %u != running %u",
188 __func__
, rfd
, pfx_active
,
189 rfd
->rib_prefix_count
);
195 if (checkstats
&& bgp
->rfapi
) {
196 if (t_pfx_active
!= bgp
->rfapi
->rib_prefix_count_total
) {
197 vnc_zlog_debug_verbose(
198 "%s: actual total pfx count %u != running %u",
199 __func__
, t_pfx_active
,
200 bgp
->rfapi
->rib_prefix_count_total
);
206 * Check against memory allocation count
208 alloc_count
= mtype_stats_alloc(MTYPE_RFAPI_INFO
);
209 assert(t_ri_active
+ t_ri_deleted
+ t_ri_pend
+ offset
== alloc_count
);
212 static struct rfapi_info
*rfapi_info_new(void)
214 return XCALLOC(MTYPE_RFAPI_INFO
, sizeof(struct rfapi_info
));
217 void rfapiFreeRfapiUnOptionChain(struct rfapi_un_option
*p
)
220 struct rfapi_un_option
*next
;
223 XFREE(MTYPE_RFAPI_UN_OPTION
, p
);
228 void rfapiFreeRfapiVnOptionChain(struct rfapi_vn_option
*p
)
231 struct rfapi_vn_option
*next
;
234 XFREE(MTYPE_RFAPI_VN_OPTION
, p
);
240 static void rfapi_info_free(struct rfapi_info
*goner
)
243 if (goner
->tea_options
) {
244 rfapiFreeBgpTeaOptionChain(goner
->tea_options
);
245 goner
->tea_options
= NULL
;
247 if (goner
->un_options
) {
248 rfapiFreeRfapiUnOptionChain(goner
->un_options
);
249 goner
->un_options
= NULL
;
251 if (goner
->vn_options
) {
252 rfapiFreeRfapiVnOptionChain(goner
->vn_options
);
253 goner
->vn_options
= NULL
;
256 struct rfapi_rib_tcb
*tcb
;
258 tcb
= THREAD_ARG(goner
->timer
);
259 THREAD_OFF(goner
->timer
);
260 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
262 XFREE(MTYPE_RFAPI_INFO
, goner
);
267 * Timer control block for recently-deleted and expired routes
269 struct rfapi_rib_tcb
{
270 struct rfapi_descriptor
*rfd
;
272 struct rfapi_info
*ri
;
275 #define RFAPI_RIB_TCB_FLAG_DELETED 0x00000001
279 * remove route from rib
281 static void rfapiRibExpireTimer(struct thread
*t
)
283 struct rfapi_rib_tcb
*tcb
= THREAD_ARG(t
);
285 RFAPI_RIB_CHECK_COUNTS(1, 0);
288 * Forget reference to thread. Otherwise rfapi_info_free() will
289 * attempt to free thread pointer as an option chain
291 tcb
->ri
->timer
= NULL
;
293 /* "deleted" skiplist frees ri, "active" doesn't */
294 assert(!skiplist_delete(tcb
->sl
, &tcb
->ri
->rk
, NULL
));
297 * XXX in this case, skiplist has no delete function: we must
298 * therefore delete rfapi_info explicitly.
300 rfapi_info_free(tcb
->ri
);
303 if (skiplist_empty(tcb
->sl
)) {
304 if (CHECK_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
))
305 tcb
->rn
->aggregate
= NULL
;
307 struct bgp
*bgp
= bgp_get_default();
308 tcb
->rn
->info
= NULL
;
309 RFAPI_RIB_PREFIX_COUNT_DECR(tcb
->rfd
, bgp
->rfapi
);
311 skiplist_free(tcb
->sl
);
312 agg_unlock_node(tcb
->rn
);
315 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
317 RFAPI_RIB_CHECK_COUNTS(1, 0);
320 static void rfapiRibStartTimer(struct rfapi_descriptor
*rfd
,
321 struct rfapi_info
*ri
,
322 struct agg_node
*rn
, /* route node attached to */
325 struct rfapi_rib_tcb
*tcb
= NULL
;
328 tcb
= THREAD_ARG(ri
->timer
);
329 THREAD_OFF(ri
->timer
);
331 tcb
= XCALLOC(MTYPE_RFAPI_RECENT_DELETE
,
332 sizeof(struct rfapi_rib_tcb
));
338 tcb
->sl
= (struct skiplist
*)rn
->aggregate
;
339 SET_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
);
341 tcb
->sl
= (struct skiplist
*)rn
->info
;
342 UNSET_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
);
345 vnc_zlog_debug_verbose("%s: rfd %p pfx %pRN life %u", __func__
, rfd
, rn
,
348 thread_add_timer(bm
->master
, rfapiRibExpireTimer
, tcb
, ri
->lifetime
,
352 extern void rfapi_rib_key_init(struct prefix
*prefix
, /* may be NULL */
353 struct prefix_rd
*rd
, /* may be NULL */
354 struct prefix
*aux
, /* may be NULL */
355 struct rfapi_rib_key
*rk
)
358 memset((void *)rk
, 0, sizeof(struct rfapi_rib_key
));
364 rk
->aux_prefix
= *aux
;
368 * Compares two <struct rfapi_rib_key>s
370 int rfapi_rib_key_cmp(const void *k1
, const void *k2
)
372 const struct rfapi_rib_key
*a
= (struct rfapi_rib_key
*)k1
;
373 const struct rfapi_rib_key
*b
= (struct rfapi_rib_key
*)k2
;
379 ret
= vnc_prefix_cmp(&a
->vn
, &b
->vn
);
383 ret
= vnc_prefix_cmp(&a
->rd
, &b
->rd
);
387 ret
= vnc_prefix_cmp(&a
->aux_prefix
, &b
->aux_prefix
);
394 * Note: this function will claim that two option chains are
395 * different unless their option items are in identical order.
396 * The consequence is that RFP updated responses can be sent
397 * unnecessarily, or that they might contain nexthop items
398 * that are not strictly needed.
400 * This function could be modified to compare option chains more
401 * thoroughly, but it's not clear that the extra compuation would
404 static int bgp_tea_options_cmp(struct bgp_tea_options
*a
,
405 struct bgp_tea_options
*b
)
413 if (a
->type
!= b
->type
)
414 return (a
->type
- b
->type
);
415 if (a
->length
!= b
->length
)
416 return (a
->length
= b
->length
);
417 if ((rc
= memcmp(a
->value
, b
->value
, a
->length
)))
419 if (!a
->next
!= !b
->next
) { /* logical xor */
420 return (a
->next
- b
->next
);
423 return bgp_tea_options_cmp(a
->next
, b
->next
);
427 static int rfapi_info_cmp(struct rfapi_info
*a
, struct rfapi_info
*b
)
434 if ((rc
= rfapi_rib_key_cmp(&a
->rk
, &b
->rk
)))
437 if ((rc
= vnc_prefix_cmp(&a
->un
, &b
->un
)))
440 if (a
->cost
!= b
->cost
)
441 return (a
->cost
- b
->cost
);
443 if (a
->lifetime
!= b
->lifetime
)
444 return (a
->lifetime
- b
->lifetime
);
446 if ((rc
= bgp_tea_options_cmp(a
->tea_options
, b
->tea_options
)))
452 void rfapiRibClear(struct rfapi_descriptor
*rfd
)
460 bgp
= bgp_get_default();
461 #ifdef DEBUG_L2_EXTRA
462 vnc_zlog_debug_verbose("%s: rfd=%p", __func__
, rfd
);
465 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
469 if (rfd
->rib_pending
[afi
]) {
470 for (pn
= agg_route_top(rfd
->rib_pending
[afi
]); pn
;
471 pn
= agg_route_next(pn
)) {
474 * free references into the rfapi_info
476 * freeing the structures themselves
481 pn
->aggregate
= NULL
;
483 pn
); /* skiplist deleted */
486 * free the rfapi_info structures
489 if (pn
->info
!= (void *)1) {
495 /* linklist or 1 deleted */
501 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
502 rn
= agg_route_next(rn
)) {
505 struct rfapi_info
*ri
;
507 while (0 == skiplist_first(
514 skiplist_delete_first(
519 (struct skiplist
*)rn
->info
);
522 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
,
527 struct rfapi_info
*ri_del
;
529 /* delete skiplist & contents */
530 while (!skiplist_first(
533 NULL
, (void **)&ri_del
)) {
535 /* sl->del takes care of ri_del
537 skiplist_delete_first((
545 rn
->aggregate
= NULL
;
551 if (rfd
->updated_responses_queue
)
552 work_queue_free_and_null(&rfd
->updated_responses_queue
);
556 * Release all dynamically-allocated memory that is part of an HD's RIB
558 void rfapiRibFree(struct rfapi_descriptor
*rfd
)
564 * NB rfd is typically detached from master list, so is not included
565 * in the count performed by RFAPI_RIB_CHECK_COUNTS
569 * Free routes attached to radix trees
573 /* Now the uncounted rfapi_info's are freed, so the check should succeed
575 RFAPI_RIB_CHECK_COUNTS(1, 0);
580 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
581 if (rfd
->rib_pending
[afi
])
582 agg_table_finish(rfd
->rib_pending
[afi
]);
583 rfd
->rib_pending
[afi
] = NULL
;
586 agg_table_finish(rfd
->rib
[afi
]);
587 rfd
->rib
[afi
] = NULL
;
589 /* NB agg_table_finish frees only prefix nodes, not chained
591 if (rfd
->rsp_times
[afi
])
592 agg_table_finish(rfd
->rsp_times
[afi
]);
593 rfd
->rib
[afi
] = NULL
;
598 * Copies struct bgp_path_info to struct rfapi_info, except for rk fields and un
600 static void rfapiRibBi2Ri(struct bgp_path_info
*bpi
, struct rfapi_info
*ri
,
603 struct bgp_attr_encap_subtlv
*pEncap
;
605 ri
->cost
= rfapiRfpCost(bpi
->attr
);
606 ri
->lifetime
= lifetime
;
608 /* This loop based on rfapiRouteInfo2NextHopEntry() */
609 for (pEncap
= bgp_attr_get_vnc_subtlvs(bpi
->attr
); pEncap
;
610 pEncap
= pEncap
->next
) {
611 struct bgp_tea_options
*hop
;
613 switch (pEncap
->type
) {
614 case BGP_VNC_SUBTLV_TYPE_LIFETIME
:
615 /* use configured lifetime, not attr lifetime */
618 case BGP_VNC_SUBTLV_TYPE_RFPOPTION
:
619 hop
= XCALLOC(MTYPE_BGP_TEA_OPTIONS
,
620 sizeof(struct bgp_tea_options
));
622 hop
->type
= pEncap
->value
[0];
623 hop
->length
= pEncap
->value
[1];
624 hop
->value
= XCALLOC(MTYPE_BGP_TEA_OPTIONS_VALUE
,
627 memcpy(hop
->value
, pEncap
->value
+ 2,
629 if (hop
->length
> pEncap
->length
- 2) {
631 "%s: VNC subtlv length mismatch: RFP option says %d, attr says %d (shrinking)",
632 __func__
, hop
->length
,
634 hop
->length
= pEncap
->length
- 2;
636 hop
->next
= ri
->tea_options
;
637 ri
->tea_options
= hop
;
645 rfapi_un_options_free(ri
->un_options
); /* maybe free old version */
646 ri
->un_options
= rfapi_encap_tlv_to_un_option(bpi
->attr
);
652 && decode_rd_type(bpi
->extra
->vnc
.import
.rd
.val
)
653 == RD_TYPE_VNC_ETH
) {
656 struct rfapi_vn_option
*vo
;
658 vo
= XCALLOC(MTYPE_RFAPI_VN_OPTION
,
659 sizeof(struct rfapi_vn_option
));
662 vo
->type
= RFAPI_VN_OPTION_TYPE_L2ADDR
;
664 /* copy from RD already stored in bpi, so we don't need it_node
666 memcpy(&vo
->v
.l2addr
.macaddr
, bpi
->extra
->vnc
.import
.rd
.val
+ 2,
669 (void)rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(bpi
->attr
),
670 &vo
->v
.l2addr
.logical_net_id
);
671 (void)rfapiEcommunityGetEthernetTag(
672 bgp_attr_get_ecommunity(bpi
->attr
),
673 &vo
->v
.l2addr
.tag_id
);
675 /* local_nve_id comes from RD */
676 vo
->v
.l2addr
.local_nve_id
= bpi
->extra
->vnc
.import
.rd
.val
[1];
678 /* label comes from MP_REACH_NLRI label */
679 vo
->v
.l2addr
.label
= decode_label(&bpi
->extra
->label
[0]);
681 rfapi_vn_options_free(
682 ri
->vn_options
); /* maybe free old version */
687 * If there is an auxiliary IP address (L2 can have it), copy it
689 if (bpi
->extra
&& bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
690 ri
->rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
697 * Install route into NVE RIB model so as to be consistent with
698 * caller's response to rfapi_query().
700 * Also: return indication to caller whether this specific route
701 * should be included in the response to the NVE according to
702 * the following tests:
704 * 1. If there were prior duplicates of this route in this same
705 * query response, don't include the route.
709 * 0 OK to include route in response
710 * !0 do not include route in response
712 int rfapiRibPreloadBi(
713 struct agg_node
*rfd_rib_node
, /* NULL = don't preload or filter */
714 struct prefix
*pfx_vn
, struct prefix
*pfx_un
, uint32_t lifetime
,
715 struct bgp_path_info
*bpi
)
717 struct rfapi_descriptor
*rfd
;
718 struct skiplist
*slRibPt
= NULL
;
719 struct rfapi_info
*ori
= NULL
;
720 struct rfapi_rib_key rk
;
721 struct agg_node
*trn
;
723 const struct prefix
*p
= agg_node_get_prefix(rfd_rib_node
);
728 afi
= family2afi(p
->family
);
730 rfd
= agg_get_table_info(agg_get_table(rfd_rib_node
));
732 memset((void *)&rk
, 0, sizeof(rk
));
734 rk
.rd
= bpi
->extra
->vnc
.import
.rd
;
737 * If there is an auxiliary IP address (L2 can have it), copy it
739 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
740 rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
744 * is this route already in NVE's RIB?
746 slRibPt
= (struct skiplist
*)rfd_rib_node
->info
;
748 if (slRibPt
&& !skiplist_search(slRibPt
, &rk
, (void **)&ori
)) {
750 if ((ori
->rsp_counter
== rfd
->rsp_counter
)
751 && (ori
->last_sent_time
== rfd
->rsp_time
)) {
752 return -1; /* duplicate in this response */
755 /* found: update contents of existing route in RIB */
757 rfapiRibBi2Ri(bpi
, ori
, lifetime
);
759 /* not found: add new route to RIB */
760 ori
= rfapi_info_new();
763 rfapiRibBi2Ri(bpi
, ori
, lifetime
);
766 slRibPt
= skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
767 rfd_rib_node
->info
= slRibPt
;
768 agg_lock_node(rfd_rib_node
);
769 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, rfd
->bgp
->rfapi
);
771 skiplist_insert(slRibPt
, &ori
->rk
, ori
);
774 ori
->last_sent_time
= monotime(NULL
);
779 RFAPI_RIB_CHECK_COUNTS(0, 0);
780 rfapiRibStartTimer(rfd
, ori
, rfd_rib_node
, 0);
781 RFAPI_RIB_CHECK_COUNTS(0, 0);
784 * Update last sent time for prefix
786 trn
= agg_node_get(rfd
->rsp_times
[afi
], p
); /* locks trn */
787 trn
->info
= (void *)(uintptr_t)monotime(NULL
);
788 if (agg_node_get_lock_count(trn
) > 1)
789 agg_unlock_node(trn
);
795 * Frees rfapi_info items at node
797 * Adjust 'rib' and 'rib_pending' as follows:
799 * If rib_pending node->info is 1 (magic value):
800 * callback: NHL = RIB NHL with lifetime = withdraw_lifetime_value
801 * RIB = remove all routes at the node
804 * For each item at rib node:
805 * if not present in pending node, move RIB item to "delete list"
807 * For each item at pending rib node:
808 * if present (same vn/un) in rib node with same lifetime & options, drop
809 * matching item from pending node
811 * For each remaining item at pending rib node, add or replace item
814 * Construct NHL as concatenation of pending list + delete list
818 static void process_pending_node(struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
820 struct agg_node
*pn
, /* pending node */
821 struct rfapi_next_hop_entry
**head
,
822 struct rfapi_next_hop_entry
**tail
)
824 struct listnode
*node
= NULL
;
825 struct listnode
*nnode
= NULL
;
826 struct rfapi_info
*ri
= NULL
; /* happy valgrind */
827 struct rfapi_ip_prefix hp
= {0}; /* pfx to put in NHE */
828 struct agg_node
*rn
= NULL
;
829 struct skiplist
*slRibPt
= NULL
; /* rib list */
830 struct skiplist
*slPendPt
= NULL
;
831 struct list
*lPendCost
= NULL
;
832 struct list
*delete_list
= NULL
;
833 int printedprefix
= 0;
834 int rib_node_started_nonempty
= 0;
835 int sendingsomeroutes
= 0;
836 const struct prefix
*p
;
837 #if DEBUG_PROCESS_PENDING_NODE
838 unsigned int count_rib_initial
= 0;
839 unsigned int count_pend_vn_initial
= 0;
840 unsigned int count_pend_cost_initial
= 0;
844 p
= agg_node_get_prefix(pn
);
845 vnc_zlog_debug_verbose("%s: afi=%d, %pRN pn->info=%p", __func__
, afi
,
848 if (AFI_L2VPN
!= afi
) {
849 rfapiQprefix2Rprefix(p
, &hp
);
852 RFAPI_RIB_CHECK_COUNTS(1, 0);
855 * Find corresponding RIB node
857 rn
= agg_node_get(rfd
->rib
[afi
], p
); /* locks rn */
860 * RIB skiplist has key=rfapi_addr={vn,un}, val = rfapi_info,
861 * skiplist.del = NULL
863 slRibPt
= (struct skiplist
*)rn
->info
;
865 rib_node_started_nonempty
= 1;
867 slPendPt
= (struct skiplist
*)(pn
->aggregate
);
868 lPendCost
= (struct list
*)(pn
->info
);
870 #if DEBUG_PROCESS_PENDING_NODE
873 count_rib_initial
= skiplist_count(slRibPt
);
876 count_pend_vn_initial
= skiplist_count(slPendPt
);
878 if (lPendCost
&& lPendCost
!= (struct list
*)1)
879 count_pend_cost_initial
= lPendCost
->count
;
884 * Handle special case: delete all routes at prefix
886 if (lPendCost
== (struct list
*)1) {
887 vnc_zlog_debug_verbose("%s: lPendCost=1 => delete all",
889 if (slRibPt
&& !skiplist_empty(slRibPt
)) {
890 delete_list
= list_new();
892 == skiplist_first(slRibPt
, NULL
, (void **)&ri
)) {
893 listnode_add(delete_list
, ri
);
894 vnc_zlog_debug_verbose(
895 "%s: after listnode_add, delete_list->count=%d",
896 __func__
, delete_list
->count
);
897 rfapiFreeBgpTeaOptionChain(ri
->tea_options
);
898 ri
->tea_options
= NULL
;
901 struct rfapi_rib_tcb
*tcb
;
903 tcb
= THREAD_ARG(ri
->timer
);
904 THREAD_OFF(ri
->timer
);
905 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
908 vnc_zlog_debug_verbose(
909 "%s: put dl pfx=%pRN vn=%pFX un=%pFX cost=%d life=%d vn_options=%p",
910 __func__
, pn
, &ri
->rk
.vn
, &ri
->un
,
911 ri
->cost
, ri
->lifetime
, ri
->vn_options
);
913 skiplist_delete_first(slRibPt
);
916 assert(skiplist_empty(slRibPt
));
918 skiplist_free(slRibPt
);
919 rn
->info
= slRibPt
= NULL
;
922 lPendCost
= pn
->info
= NULL
;
928 skiplist_free(slRibPt
);
934 if (slPendPt
) { /* TBD I think we can toss this block */
935 skiplist_free(slPendPt
);
936 pn
->aggregate
= NULL
;
943 agg_unlock_node(rn
); /* agg_node_get() */
945 if (rib_node_started_nonempty
) {
946 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
, bgp
->rfapi
);
949 RFAPI_RIB_CHECK_COUNTS(1, 0);
954 vnc_zlog_debug_verbose("%s: lPendCost->count=%d, slRibPt->count=%d",
956 (lPendCost
? (int)lPendCost
->count
: -1),
957 (slRibPt
? (int)slRibPt
->count
: -1));
960 * Iterate over routes at RIB Node.
961 * If not found at Pending Node, delete from RIB Node and add to
963 * If found at Pending Node
964 * If identical rfapi_info, delete from Pending Node
968 struct rfapi_info
*ori
;
971 * Iterate over RIB List
974 while (!skiplist_next(slRibPt
, NULL
, (void **)&ori
, &cursor
)) {
976 if (skiplist_search(slPendPt
, &ori
->rk
, (void **)&ri
)) {
978 * Not in Pending list, so it should be deleted
981 delete_list
= list_new();
982 listnode_add(delete_list
, ori
);
983 rfapiFreeBgpTeaOptionChain(ori
->tea_options
);
984 ori
->tea_options
= NULL
;
986 struct rfapi_rib_tcb
*tcb
;
988 tcb
= THREAD_ARG(ori
->timer
);
989 THREAD_OFF(ori
->timer
);
990 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
993 #if DEBUG_PROCESS_PENDING_NODE
994 /* deleted from slRibPt below, after we're done
996 vnc_zlog_debug_verbose(
997 "%s: slRibPt ri %p not matched in pending list, delete",
1003 * Found in pending list. If same lifetime,
1005 * then remove from pending list because the
1009 if (!rfapi_info_cmp(ori
, ri
)) {
1010 skiplist_delete(slPendPt
, &ri
->rk
,
1014 /* linear walk: might need
1016 listnode_delete(lPendCost
,
1026 #if DEBUG_PROCESS_PENDING_NODE
1027 vnc_zlog_debug_verbose(
1028 "%s: slRibPt ri %p matched in pending list, %s",
1031 : "different info"));
1036 * Go back and delete items from RIB
1039 for (ALL_LIST_ELEMENTS_RO(delete_list
, node
, ri
)) {
1040 vnc_zlog_debug_verbose(
1041 "%s: deleting ri %p from slRibPt",
1043 assert(!skiplist_delete(slRibPt
, &ri
->rk
,
1046 if (skiplist_empty(slRibPt
)) {
1047 skiplist_free(slRibPt
);
1048 slRibPt
= rn
->info
= NULL
;
1049 agg_unlock_node(rn
);
1054 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1057 * Iterate over routes at Pending Node
1059 * If {vn} found at RIB Node, update RIB Node route contents to match PN
1060 * If {vn} NOT found at RIB Node, add copy to RIB Node
1063 for (ALL_LIST_ELEMENTS_RO(lPendCost
, node
, ri
)) {
1065 struct rfapi_info
*ori
;
1068 && !skiplist_search(slRibPt
, &ri
->rk
,
1071 /* found: update contents of existing route in
1074 ori
->cost
= ri
->cost
;
1075 ori
->lifetime
= ri
->lifetime
;
1076 rfapiFreeBgpTeaOptionChain(ori
->tea_options
);
1078 rfapiOptionsDup(ri
->tea_options
);
1079 ori
->last_sent_time
= monotime(NULL
);
1081 rfapiFreeRfapiVnOptionChain(ori
->vn_options
);
1083 rfapiVnOptionsDup(ri
->vn_options
);
1085 rfapiFreeRfapiUnOptionChain(ori
->un_options
);
1087 rfapiUnOptionsDup(ri
->un_options
);
1089 vnc_zlog_debug_verbose(
1090 "%s: matched lPendCost item %p in slRibPt, rewrote",
1094 /* not found: add new route to RIB */
1095 ori
= rfapi_info_new();
1098 ori
->cost
= ri
->cost
;
1099 ori
->lifetime
= ri
->lifetime
;
1101 rfapiOptionsDup(ri
->tea_options
);
1102 ori
->last_sent_time
= monotime(NULL
);
1104 rfapiVnOptionsDup(ri
->vn_options
);
1106 rfapiUnOptionsDup(ri
->un_options
);
1109 slRibPt
= skiplist_new(
1110 0, rfapi_rib_key_cmp
, NULL
);
1114 skiplist_insert(slRibPt
, &ori
->rk
, ori
);
1116 vnc_zlog_debug_verbose(
1117 "%s: nomatch lPendCost item %p in slRibPt, added (rd=%pRDP)",
1118 __func__
, ri
, &ori
->rk
.rd
);
1124 RFAPI_RIB_CHECK_COUNTS(
1125 0, (delete_list
? delete_list
->count
: 0));
1126 rfapiRibStartTimer(rfd
, ori
, rn
, 0);
1127 RFAPI_RIB_CHECK_COUNTS(
1128 0, (delete_list
? delete_list
->count
: 0));
1135 * Construct NHL as concatenation of pending list + delete list
1139 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1146 vnc_zlog_debug_verbose("%s: lPendCost->count now %d", __func__
,
1148 vnc_zlog_debug_verbose("%s: For prefix %pRN (a)", __func__
, pn
);
1151 for (ALL_LIST_ELEMENTS(lPendCost
, node
, nnode
, ri
)) {
1153 struct rfapi_next_hop_entry
*new;
1154 struct agg_node
*trn
;
1156 new = XCALLOC(MTYPE_RFAPI_NEXTHOP
,
1157 sizeof(struct rfapi_next_hop_entry
));
1159 if (ri
->rk
.aux_prefix
.family
) {
1160 rfapiQprefix2Rprefix(&ri
->rk
.aux_prefix
,
1164 if (AFI_L2VPN
== afi
) {
1165 /* hp is 0; need to set length to match
1167 new->prefix
.length
=
1168 (ri
->rk
.vn
.family
== AF_INET
)
1173 new->prefix
.cost
= ri
->cost
;
1174 new->lifetime
= ri
->lifetime
;
1175 rfapiQprefix2Raddr(&ri
->rk
.vn
, &new->vn_address
);
1176 rfapiQprefix2Raddr(&ri
->un
, &new->un_address
);
1177 /* free option chain from ri */
1178 rfapiFreeBgpTeaOptionChain(ri
->tea_options
);
1181 NULL
; /* option chain was transferred to NHL */
1183 new->vn_options
= ri
->vn_options
;
1185 NULL
; /* option chain was transferred to NHL */
1187 new->un_options
= ri
->un_options
;
1189 NULL
; /* option chain was transferred to NHL */
1192 (*tail
)->next
= new;
1197 sendingsomeroutes
= 1;
1199 ++rfd
->stat_count_nh_reachable
;
1200 ++bgp
->rfapi
->stat
.count_updated_response_updates
;
1203 * update this NVE's timestamp for this prefix
1205 trn
= agg_node_get(rfd
->rsp_times
[afi
],
1207 trn
->info
= (void *)(uintptr_t)monotime(NULL
);
1208 if (agg_node_get_lock_count(trn
) > 1)
1209 agg_unlock_node(trn
);
1211 rfapiRfapiIpAddr2Str(&new->vn_address
, buf
, BUFSIZ
);
1212 rfapiRfapiIpAddr2Str(&new->un_address
, buf2
, BUFSIZ
);
1213 vnc_zlog_debug_verbose(
1214 "%s: add vn=%s un=%s cost=%d life=%d",
1215 __func__
, buf
, buf2
, new->prefix
.cost
,
1220 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1227 if (!printedprefix
) {
1228 vnc_zlog_debug_verbose("%s: For prefix %pRN (d)",
1231 vnc_zlog_debug_verbose("%s: delete_list has %d elements",
1232 __func__
, delete_list
->count
);
1234 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1235 if (!CHECK_FLAG(bgp
->rfapi_cfg
->flags
,
1236 BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE
)) {
1238 for (ALL_LIST_ELEMENTS(delete_list
, node
, nnode
, ri
)) {
1240 struct rfapi_next_hop_entry
*new;
1241 struct rfapi_info
*ri_del
;
1243 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1245 MTYPE_RFAPI_NEXTHOP
,
1246 sizeof(struct rfapi_next_hop_entry
));
1248 if (ri
->rk
.aux_prefix
.family
) {
1249 rfapiQprefix2Rprefix(&ri
->rk
.aux_prefix
,
1253 if (AFI_L2VPN
== afi
) {
1254 /* hp is 0; need to set length
1255 * to match AF of vn */
1256 new->prefix
.length
=
1264 new->prefix
.cost
= ri
->cost
;
1265 new->lifetime
= RFAPI_REMOVE_RESPONSE_LIFETIME
;
1266 rfapiQprefix2Raddr(&ri
->rk
.vn
,
1268 rfapiQprefix2Raddr(&ri
->un
, &new->un_address
);
1270 new->vn_options
= ri
->vn_options
;
1271 ri
->vn_options
= NULL
; /* option chain was
1272 transferred to NHL */
1274 new->un_options
= ri
->un_options
;
1275 ri
->un_options
= NULL
; /* option chain was
1276 transferred to NHL */
1279 (*tail
)->next
= new;
1284 ++rfd
->stat_count_nh_removal
;
1286 .count_updated_response_deletes
;
1288 rfapiRfapiIpAddr2Str(&new->vn_address
, buf
,
1290 rfapiRfapiIpAddr2Str(&new->un_address
, buf2
,
1292 vnc_zlog_debug_verbose(
1293 "%s: DEL vn=%s un=%s cost=%d life=%d",
1294 __func__
, buf
, buf2
, new->prefix
.cost
,
1297 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1299 * Update/add to list of recent deletions at
1302 if (!rn
->aggregate
) {
1303 rn
->aggregate
= skiplist_new(
1304 0, rfapi_rib_key_cmp
,
1309 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1311 /* sanity check lifetime */
1313 > RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
1315 RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
;
1317 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1318 /* cancel normal expire timer */
1320 struct rfapi_rib_tcb
*tcb
;
1322 tcb
= THREAD_ARG(ri
->timer
);
1323 THREAD_OFF(ri
->timer
);
1324 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
1326 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1329 * Look in "recently-deleted" list
1331 if (skiplist_search(
1332 (struct skiplist
*)(rn
->aggregate
),
1333 &ri
->rk
, (void **)&ri_del
)) {
1337 RFAPI_RIB_CHECK_COUNTS(
1338 0, delete_list
->count
);
1340 * NOT in "recently-deleted" list
1344 node
); /* does not free ri */
1345 rc
= skiplist_insert(
1351 RFAPI_RIB_CHECK_COUNTS(
1352 0, delete_list
->count
);
1353 rfapiRibStartTimer(rfd
, ri
, rn
, 1);
1354 RFAPI_RIB_CHECK_COUNTS(
1355 0, delete_list
->count
);
1356 ri
->last_sent_time
= monotime(NULL
);
1358 vnc_zlog_debug_verbose(
1359 "%s: move route to recently deleted list, rd=%pRDP",
1360 __func__
, &ri
->rk
.rd
);
1365 * IN "recently-deleted" list
1367 RFAPI_RIB_CHECK_COUNTS(
1368 0, delete_list
->count
);
1369 rfapiRibStartTimer(rfd
, ri_del
, rn
, 1);
1370 RFAPI_RIB_CHECK_COUNTS(
1371 0, delete_list
->count
);
1372 ri
->last_sent_time
= monotime(NULL
);
1376 vnc_zlog_debug_verbose(
1377 "%s: response removal disabled, omitting removals",
1381 delete_list
->del
= (void (*)(void *))rfapi_info_free
;
1382 list_delete(&delete_list
);
1385 RFAPI_RIB_CHECK_COUNTS(0, 0);
1388 * Reset pending lists. The final agg_unlock_node() will probably
1389 * cause the pending node to be released.
1392 skiplist_free(slPendPt
);
1393 pn
->aggregate
= NULL
;
1394 agg_unlock_node(pn
);
1397 list_delete(&lPendCost
);
1399 agg_unlock_node(pn
);
1401 RFAPI_RIB_CHECK_COUNTS(0, 0);
1403 if (rib_node_started_nonempty
) {
1405 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
, bgp
->rfapi
);
1409 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, bgp
->rfapi
);
1413 if (sendingsomeroutes
)
1414 rfapiMonitorTimersRestart(rfd
, p
);
1416 agg_unlock_node(rn
); /* agg_node_get() */
1418 RFAPI_RIB_CHECK_COUNTS(1, 0);
1422 * regardless of targets, construct a single callback by doing
1423 * only one traversal of the pending RIB
1429 static void rib_do_callback_onepass(struct rfapi_descriptor
*rfd
, afi_t afi
)
1431 struct bgp
*bgp
= bgp_get_default();
1432 struct rfapi_next_hop_entry
*head
= NULL
;
1433 struct rfapi_next_hop_entry
*tail
= NULL
;
1434 struct agg_node
*rn
;
1436 #ifdef DEBUG_L2_EXTRA
1437 vnc_zlog_debug_verbose("%s: rfd=%p, afi=%d", __func__
, rfd
, afi
);
1440 if (!rfd
->rib_pending
[afi
])
1445 for (rn
= agg_route_top(rfd
->rib_pending
[afi
]); rn
;
1446 rn
= agg_route_next(rn
)) {
1447 process_pending_node(bgp
, rfd
, afi
, rn
, &head
, &tail
);
1451 rfapi_response_cb_t
*f
;
1454 vnc_zlog_debug_verbose("%s: response callback NHL follows:",
1456 rfapiPrintNhl(NULL
, head
);
1459 if (rfd
->response_cb
)
1460 f
= rfd
->response_cb
;
1462 f
= bgp
->rfapi
->rfp_methods
.response_cb
;
1464 bgp
->rfapi
->flags
|= RFAPI_INCALLBACK
;
1465 vnc_zlog_debug_verbose("%s: invoking updated response callback",
1467 (*f
)(head
, rfd
->cookie
);
1468 bgp
->rfapi
->flags
&= ~RFAPI_INCALLBACK
;
1469 ++bgp
->rfapi
->response_updated_count
;
1473 static wq_item_status
rfapiRibDoQueuedCallback(struct work_queue
*wq
,
1476 struct rfapi_descriptor
*rfd
;
1478 uint32_t queued_flag
;
1480 RFAPI_RIB_CHECK_COUNTS(1, 0);
1482 rfd
= ((struct rfapi_updated_responses_queue
*)data
)->rfd
;
1483 afi
= ((struct rfapi_updated_responses_queue
*)data
)->afi
;
1485 /* Make sure the HD wasn't closed after the work item was scheduled */
1486 if (rfapi_check(rfd
))
1489 rib_do_callback_onepass(rfd
, afi
);
1491 queued_flag
= RFAPI_QUEUED_FLAG(afi
);
1493 UNSET_FLAG(rfd
->flags
, queued_flag
);
1495 RFAPI_RIB_CHECK_COUNTS(1, 0);
1500 static void rfapiRibQueueItemDelete(struct work_queue
*wq
, void *data
)
1502 XFREE(MTYPE_RFAPI_UPDATED_RESPONSE_QUEUE
, data
);
1505 static void updated_responses_queue_init(struct rfapi_descriptor
*rfd
)
1507 if (rfd
->updated_responses_queue
)
1510 rfd
->updated_responses_queue
=
1511 work_queue_new(bm
->master
, "rfapi updated responses");
1512 assert(rfd
->updated_responses_queue
);
1514 rfd
->updated_responses_queue
->spec
.workfunc
= rfapiRibDoQueuedCallback
;
1515 rfd
->updated_responses_queue
->spec
.del_item_data
=
1516 rfapiRibQueueItemDelete
;
1517 rfd
->updated_responses_queue
->spec
.max_retries
= 0;
1518 rfd
->updated_responses_queue
->spec
.hold
= 1;
1522 * Called when an import table node is modified. Construct a
1523 * new complete nexthop list, sorted by cost (lowest first),
1524 * based on the import table node.
1526 * Filter out duplicate nexthops (vn address). There should be
1527 * only one UN address per VN address from the point of view of
1528 * a given import table, so we can probably ignore UN addresses
1531 * Based on rfapiNhlAddNodeRoutes()
1533 void rfapiRibUpdatePendingNode(
1534 struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1535 struct rfapi_import_table
*it
, /* needed for L2 */
1536 struct agg_node
*it_node
, uint32_t lifetime
)
1538 const struct prefix
*prefix
;
1539 struct bgp_path_info
*bpi
;
1540 struct agg_node
*pn
;
1542 uint32_t queued_flag
;
1545 vnc_zlog_debug_verbose("%s: entry", __func__
);
1547 if (CHECK_FLAG(bgp
->rfapi_cfg
->flags
, BGP_VNC_CONFIG_CALLBACK_DISABLE
))
1550 vnc_zlog_debug_verbose("%s: callbacks are not disabled", __func__
);
1552 RFAPI_RIB_CHECK_COUNTS(1, 0);
1554 prefix
= agg_node_get_prefix(it_node
);
1555 afi
= family2afi(prefix
->family
);
1556 vnc_zlog_debug_verbose("%s: prefix=%pFX", __func__
, prefix
);
1558 pn
= agg_node_get(rfd
->rib_pending
[afi
], prefix
);
1561 vnc_zlog_debug_verbose("%s: pn->info=%p, pn->aggregate=%p", __func__
,
1562 pn
->info
, pn
->aggregate
);
1564 if (pn
->aggregate
) {
1566 * free references into the rfapi_info structures before
1567 * freeing the structures themselves
1569 skiplist_free((struct skiplist
*)(pn
->aggregate
));
1570 pn
->aggregate
= NULL
;
1571 agg_unlock_node(pn
); /* skiplist deleted */
1576 * free the rfapi_info structures
1579 if (pn
->info
!= (void *)1) {
1580 list_delete((struct list
**)(&pn
->info
));
1583 agg_unlock_node(pn
); /* linklist or 1 deleted */
1587 * The BPIs in the import table are already sorted by cost
1589 for (bpi
= it_node
->info
; bpi
; bpi
= bpi
->next
) {
1591 struct rfapi_info
*ri
;
1592 struct prefix pfx_nh
;
1595 /* shouldn't happen */
1596 /* TBD increment error stats counter */
1600 rfapiNexthop2Prefix(bpi
->attr
, &pfx_nh
);
1603 * Omit route if nexthop is self
1605 if (CHECK_FLAG(bgp
->rfapi_cfg
->flags
,
1606 BGP_VNC_CONFIG_FILTER_SELF_FROM_RSP
)) {
1608 struct prefix pfx_vn
;
1610 assert(!rfapiRaddr2Qprefix(&rfd
->vn_addr
, &pfx_vn
));
1611 if (prefix_same(&pfx_vn
, &pfx_nh
))
1615 ri
= rfapi_info_new();
1617 ri
->rk
.rd
= bpi
->extra
->vnc
.import
.rd
;
1619 * If there is an auxiliary IP address (L2 can have it), copy it
1621 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
1622 ri
->rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
1625 if (rfapiGetUnAddrOfVpnBi(bpi
, &ri
->un
)) {
1626 rfapi_info_free(ri
);
1630 if (!pn
->aggregate
) {
1632 skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
1637 * If we have already added this nexthop, the insert will fail.
1638 * Note that the skiplist key is a pointer INTO the rfapi_info
1639 * structure which will be added to the "info" list.
1640 * The skiplist entry VALUE is not used for anything but
1641 * might be useful during debugging.
1643 if (skiplist_insert((struct skiplist
*)pn
->aggregate
, &ri
->rk
,
1649 rfapi_info_free(ri
);
1653 rfapiRibBi2Ri(bpi
, ri
, lifetime
);
1656 pn
->info
= list_new();
1657 ((struct list
*)(pn
->info
))->del
=
1658 (void (*)(void *))rfapi_info_free
;
1662 listnode_add((struct list
*)(pn
->info
), ri
);
1666 count
= ((struct list
*)(pn
->info
))->count
;
1671 assert(!pn
->aggregate
);
1672 pn
->info
= (void *)1; /* magic value means this node has no
1677 agg_unlock_node(pn
); /* agg_node_get */
1679 queued_flag
= RFAPI_QUEUED_FLAG(afi
);
1681 if (!CHECK_FLAG(rfd
->flags
, queued_flag
)) {
1683 struct rfapi_updated_responses_queue
*urq
;
1685 urq
= XCALLOC(MTYPE_RFAPI_UPDATED_RESPONSE_QUEUE
,
1686 sizeof(struct rfapi_updated_responses_queue
));
1687 if (!rfd
->updated_responses_queue
)
1688 updated_responses_queue_init(rfd
);
1690 SET_FLAG(rfd
->flags
, queued_flag
);
1693 work_queue_add(rfd
->updated_responses_queue
, urq
);
1695 RFAPI_RIB_CHECK_COUNTS(1, 0);
1698 void rfapiRibUpdatePendingNodeSubtree(
1699 struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1700 struct rfapi_import_table
*it
, struct agg_node
*it_node
,
1701 struct agg_node
*omit_subtree
, /* may be NULL */
1704 /* FIXME: need to find a better way here to work without sticking our
1705 * hands in node->link */
1706 if (agg_node_left(it_node
)
1707 && (agg_node_left(it_node
) != omit_subtree
)) {
1708 if (agg_node_left(it_node
)->info
)
1709 rfapiRibUpdatePendingNode(
1710 bgp
, rfd
, it
, agg_node_left(it_node
), lifetime
);
1711 rfapiRibUpdatePendingNodeSubtree(bgp
, rfd
, it
,
1712 agg_node_left(it_node
),
1713 omit_subtree
, lifetime
);
1716 if (agg_node_right(it_node
)
1717 && (agg_node_right(it_node
) != omit_subtree
)) {
1718 if (agg_node_right(it_node
)->info
)
1719 rfapiRibUpdatePendingNode(bgp
, rfd
, it
,
1720 agg_node_right(it_node
),
1722 rfapiRibUpdatePendingNodeSubtree(bgp
, rfd
, it
,
1723 agg_node_right(it_node
),
1724 omit_subtree
, lifetime
);
1731 * 0 allow prefix to be included in response
1732 * !0 don't allow prefix to be included in response
1734 int rfapiRibFTDFilterRecentPrefix(
1735 struct rfapi_descriptor
*rfd
,
1736 struct agg_node
*it_rn
, /* import table node */
1737 struct prefix
*pfx_target_original
) /* query target */
1739 struct bgp
*bgp
= rfd
->bgp
;
1740 const struct prefix
*p
= agg_node_get_prefix(it_rn
);
1741 afi_t afi
= family2afi(p
->family
);
1743 struct agg_node
*trn
;
1746 * Not in FTD mode, so allow prefix
1748 if (bgp
->rfapi_cfg
->rfp_cfg
.download_type
!= RFAPI_RFP_DOWNLOAD_FULL
)
1753 * This matches behavior of now-obsolete rfapiRibFTDFilterRecent(),
1754 * but we need to decide if that is correct.
1756 if (p
->family
== AF_ETHERNET
)
1759 #ifdef DEBUG_FTD_FILTER_RECENT
1761 vnc_zlog_debug_verbose("%s: prefix %pFX", __func__
,
1762 agg_node_get_prefix(it_rn
));
1767 * prefix covers target address, so allow prefix
1769 if (prefix_match(p
, pfx_target_original
)) {
1770 #ifdef DEBUG_FTD_FILTER_RECENT
1771 vnc_zlog_debug_verbose("%s: prefix covers target, allowed",
1778 * check this NVE's timestamp for this prefix
1780 trn
= agg_node_get(rfd
->rsp_times
[afi
], p
); /* locks trn */
1781 prefix_time
= (time_t)trn
->info
;
1782 if (agg_node_get_lock_count(trn
) > 1)
1783 agg_unlock_node(trn
);
1785 #ifdef DEBUG_FTD_FILTER_RECENT
1786 vnc_zlog_debug_verbose("%s: last sent time %lu, last allowed time %lu",
1787 __func__
, prefix_time
,
1788 rfd
->ftd_last_allowed_time
);
1792 * haven't sent this prefix, which doesn't cover target address,
1793 * to NVE since ftd_advertisement_interval, so OK to send now.
1795 if (prefix_time
<= rfd
->ftd_last_allowed_time
)
1802 * Call when rfapi returns from rfapi_query() so the RIB reflects
1803 * the routes sent to the NVE before the first updated response
1805 * Also: remove duplicates from response. Caller should use returned
1806 * value of nexthop chain.
1808 struct rfapi_next_hop_entry
*
1809 rfapiRibPreload(struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1810 struct rfapi_next_hop_entry
*response
, int use_eth_resolution
)
1812 struct rfapi_next_hop_entry
*nhp
;
1813 struct rfapi_next_hop_entry
*nhp_next
;
1814 struct rfapi_next_hop_entry
*head
= NULL
;
1815 struct rfapi_next_hop_entry
*tail
= NULL
;
1816 time_t new_last_sent_time
;
1818 vnc_zlog_debug_verbose("%s: loading response=%p, use_eth_resolution=%d",
1819 __func__
, response
, use_eth_resolution
);
1821 new_last_sent_time
= monotime(NULL
);
1823 for (nhp
= response
; nhp
; nhp
= nhp_next
) {
1826 struct rfapi_rib_key rk
;
1828 struct rfapi_info
*ri
;
1830 struct agg_node
*rn
;
1831 int rib_node_started_nonempty
= 0;
1832 struct agg_node
*trn
;
1835 /* save in case we delete nhp */
1836 nhp_next
= nhp
->next
;
1838 if (nhp
->lifetime
== RFAPI_REMOVE_RESPONSE_LIFETIME
) {
1840 * weird, shouldn't happen
1842 vnc_zlog_debug_verbose(
1843 "%s: got nhp->lifetime == RFAPI_REMOVE_RESPONSE_LIFETIME",
1849 if (use_eth_resolution
) {
1850 /* get the prefix of the ethernet address in the L2
1852 struct rfapi_l2address_option
*pL2o
;
1853 struct rfapi_vn_option
*vo
;
1856 * Look for VN option of type
1857 * RFAPI_VN_OPTION_TYPE_L2ADDR
1859 for (pL2o
= NULL
, vo
= nhp
->vn_options
; vo
;
1861 if (RFAPI_VN_OPTION_TYPE_L2ADDR
== vo
->type
) {
1862 pL2o
= &vo
->v
.l2addr
;
1869 * not supposed to happen
1871 vnc_zlog_debug_verbose("%s: missing L2 info",
1877 rfapiL2o2Qprefix(pL2o
, &pfx
);
1879 rfapiRprefix2Qprefix(&nhp
->prefix
, &pfx
);
1880 afi
= family2afi(pfx
.family
);
1884 * TBD for ethernet, rib must know the right way to distinguish
1887 * Current approach: prefix is key to radix tree; then
1888 * each prefix has a set of routes with unique VN addrs
1892 * Look up prefix in RIB
1894 rn
= agg_node_get(rfd
->rib
[afi
], &pfx
); /* locks rn */
1897 rib_node_started_nonempty
= 1;
1899 rn
->info
= skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
1904 * Look up route at prefix
1907 memset((void *)&rk
, 0, sizeof(rk
));
1908 assert(!rfapiRaddr2Qprefix(&nhp
->vn_address
, &rk
.vn
));
1910 if (use_eth_resolution
) {
1911 /* copy what came from aux_prefix to rk.aux_prefix */
1912 rfapiRprefix2Qprefix(&nhp
->prefix
, &rk
.aux_prefix
);
1913 if (RFAPI_0_PREFIX(&rk
.aux_prefix
)
1914 && RFAPI_HOST_PREFIX(&rk
.aux_prefix
)) {
1915 /* mark as "none" if nhp->prefix is 0/32 or
1917 rk
.aux_prefix
.family
= AF_UNSPEC
;
1923 char str_aux_prefix
[PREFIX_STRLEN
];
1925 str_aux_prefix
[0] = 0;
1927 prefix2str(&rk
.aux_prefix
, str_aux_prefix
,
1928 sizeof(str_aux_prefix
));
1930 if (!rk
.aux_prefix
.family
) {
1932 vnc_zlog_debug_verbose(
1933 "%s: rk.vn=%pFX rk.aux_prefix=%s", __func__
,
1935 (rk
.aux_prefix
.family
? str_aux_prefix
: "-"));
1937 vnc_zlog_debug_verbose(
1938 "%s: RIB skiplist for this prefix follows", __func__
);
1939 rfapiRibShowRibSl(NULL
, agg_node_get_prefix(rn
),
1940 (struct skiplist
*)rn
->info
);
1944 if (!skiplist_search((struct skiplist
*)rn
->info
, &rk
,
1947 * Already have this route; make values match
1949 rfapiFreeRfapiUnOptionChain(ri
->un_options
);
1950 ri
->un_options
= NULL
;
1951 rfapiFreeRfapiVnOptionChain(ri
->vn_options
);
1952 ri
->vn_options
= NULL
;
1955 vnc_zlog_debug_verbose("%s: found in RIB", __func__
);
1959 * Filter duplicate routes from initial response.
1960 * Check timestamps to avoid wraparound problems
1962 if ((ri
->rsp_counter
!= rfd
->rsp_counter
)
1963 || (ri
->last_sent_time
!= new_last_sent_time
)) {
1966 vnc_zlog_debug_verbose(
1967 "%s: allowed due to counter/timestamp diff",
1976 vnc_zlog_debug_verbose(
1977 "%s: allowed due to not yet in RIB", __func__
);
1979 /* not found: add new route to RIB */
1980 ri
= rfapi_info_new();
1986 assert(!rfapiRaddr2Qprefix(&nhp
->un_address
, &ri
->un
));
1987 ri
->cost
= nhp
->prefix
.cost
;
1988 ri
->lifetime
= nhp
->lifetime
;
1989 ri
->vn_options
= rfapiVnOptionsDup(nhp
->vn_options
);
1990 ri
->rsp_counter
= rfd
->rsp_counter
;
1991 ri
->last_sent_time
= monotime(NULL
);
1995 rc
= skiplist_insert((struct skiplist
*)rn
->info
,
2000 if (!rib_node_started_nonempty
) {
2001 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, bgp
->rfapi
);
2004 RFAPI_RIB_CHECK_COUNTS(0, 0);
2005 rfapiRibStartTimer(rfd
, ri
, rn
, 0);
2006 RFAPI_RIB_CHECK_COUNTS(0, 0);
2008 agg_unlock_node(rn
);
2011 * update this NVE's timestamp for this prefix
2013 trn
= agg_node_get(rfd
->rsp_times
[afi
], &pfx
); /* locks trn */
2014 trn
->info
= (void *)(uintptr_t)monotime(NULL
);
2015 if (agg_node_get_lock_count(trn
) > 1)
2016 agg_unlock_node(trn
);
2018 vnc_zlog_debug_verbose(
2019 "%s: added pfx=%pFX nh[vn]=%pFX, cost=%u, lifetime=%u, allowed=%d",
2020 __func__
, &pfx
, &rk
.vn
, nhp
->prefix
.cost
, nhp
->lifetime
,
2031 rfapi_un_options_free(nhp
->un_options
);
2032 nhp
->un_options
= NULL
;
2033 rfapi_vn_options_free(nhp
->vn_options
);
2034 nhp
->vn_options
= NULL
;
2036 XFREE(MTYPE_RFAPI_NEXTHOP
, nhp
);
2045 void rfapiRibPendingDeleteRoute(struct bgp
*bgp
, struct rfapi_import_table
*it
,
2046 afi_t afi
, struct agg_node
*it_node
)
2048 struct rfapi_descriptor
*rfd
;
2049 struct listnode
*node
;
2050 const struct prefix
*p
= agg_node_get_prefix(it_node
);
2052 vnc_zlog_debug_verbose("%s: entry, it=%p, afi=%d, it_node=%p, pfx=%pRN",
2053 __func__
, it
, afi
, it_node
, it_node
);
2055 if (AFI_L2VPN
== afi
) {
2057 * ethernet import tables are per-LNI and each ethernet monitor
2058 * identifies the rfd that owns it.
2060 struct rfapi_monitor_eth
*m
;
2061 struct agg_node
*rn
;
2062 struct skiplist
*sl
;
2067 * route-specific monitors
2069 if ((sl
= RFAPI_MONITOR_ETH(it_node
))) {
2071 vnc_zlog_debug_verbose(
2072 "%s: route-specific skiplist: %p", __func__
,
2076 rc
= skiplist_next(sl
, NULL
, (void **)&m
, &cursor
);
2077 !rc
; rc
= skiplist_next(sl
, NULL
, (void **)&m
,
2080 #if DEBUG_PENDING_DELETE_ROUTE
2081 vnc_zlog_debug_verbose("%s: eth monitor rfd=%p",
2085 * If we have already sent a route with this
2087 * NVE, it's OK to send an update with the
2090 if ((rn
= agg_node_lookup(m
->rfd
->rib
[afi
],
2092 rfapiRibUpdatePendingNode(
2093 bgp
, m
->rfd
, it
, it_node
,
2094 m
->rfd
->response_lifetime
);
2095 agg_unlock_node(rn
);
2101 * all-routes/FTD monitors
2103 for (m
= it
->eth0_queries
; m
; m
= m
->next
) {
2104 #if DEBUG_PENDING_DELETE_ROUTE
2105 vnc_zlog_debug_verbose("%s: eth0 monitor rfd=%p",
2109 * If we have already sent a route with this prefix to
2111 * NVE, it's OK to send an update with the delete
2113 if ((rn
= agg_node_lookup(m
->rfd
->rib
[afi
], p
))) {
2114 rfapiRibUpdatePendingNode(
2115 bgp
, m
->rfd
, it
, it_node
,
2116 m
->rfd
->response_lifetime
);
2117 agg_unlock_node(rn
);
2123 * Find RFDs that reference this import table
2125 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
,
2128 struct agg_node
*rn
;
2130 vnc_zlog_debug_verbose(
2131 "%s: comparing rfd(%p)->import_table=%p to it=%p",
2132 __func__
, rfd
, rfd
->import_table
, it
);
2134 if (rfd
->import_table
!= it
)
2137 vnc_zlog_debug_verbose("%s: matched rfd %p", __func__
,
2141 * If we have sent a response to this NVE with this
2143 * previously, we should send an updated response.
2145 if ((rn
= agg_node_lookup(rfd
->rib
[afi
], p
))) {
2146 rfapiRibUpdatePendingNode(
2147 bgp
, rfd
, it
, it_node
,
2148 rfd
->response_lifetime
);
2149 agg_unlock_node(rn
);
2155 void rfapiRibShowResponsesSummary(void *stream
)
2157 int (*fp
)(void *, const char *, ...);
2160 const char *vty_newline
;
2161 struct bgp
*bgp
= bgp_get_default();
2164 int nves_with_nonempty_ribs
= 0;
2165 struct rfapi_descriptor
*rfd
;
2166 struct listnode
*node
;
2168 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2171 fp(out
, "Unable to find default BGP instance\n");
2175 fp(out
, "%-24s ", "Responses: (Prefixes)");
2176 fp(out
, "%-8s %-8u ", "Active:", bgp
->rfapi
->rib_prefix_count_total
);
2177 fp(out
, "%-8s %-8u",
2178 "Maximum:", bgp
->rfapi
->rib_prefix_count_total_max
);
2181 fp(out
, "%-24s ", " (Updated)");
2182 fp(out
, "%-8s %-8u ",
2183 "Update:", bgp
->rfapi
->stat
.count_updated_response_updates
);
2184 fp(out
, "%-8s %-8u",
2185 "Remove:", bgp
->rfapi
->stat
.count_updated_response_deletes
);
2186 fp(out
, "%-8s %-8u", "Total:",
2187 bgp
->rfapi
->stat
.count_updated_response_updates
2188 + bgp
->rfapi
->stat
.count_updated_response_deletes
);
2191 fp(out
, "%-24s ", " (NVEs)");
2192 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
2194 if (rfd
->rib_prefix_count
)
2195 ++nves_with_nonempty_ribs
;
2197 fp(out
, "%-8s %-8u ", "Active:", nves_with_nonempty_ribs
);
2198 fp(out
, "%-8s %-8u", "Total:", nves
);
2202 void rfapiRibShowResponsesSummaryClear(void)
2204 struct bgp
*bgp
= bgp_get_default();
2206 bgp
->rfapi
->rib_prefix_count_total_max
=
2207 bgp
->rfapi
->rib_prefix_count_total
;
2210 static int print_rib_sl(int (*fp
)(void *, const char *, ...), struct vty
*vty
,
2211 void *out
, struct skiplist
*sl
, int deleted
,
2212 char *str_pfx
, int *printedprefix
)
2214 struct rfapi_info
*ri
;
2217 int routes_displayed
= 0;
2220 for (rc
= skiplist_next(sl
, NULL
, (void **)&ri
, &cursor
); !rc
;
2221 rc
= skiplist_next(sl
, NULL
, (void **)&ri
, &cursor
)) {
2223 char str_vn
[PREFIX_STRLEN
];
2224 char str_un
[PREFIX_STRLEN
];
2225 char str_lifetime
[BUFSIZ
];
2226 char str_age
[BUFSIZ
];
2231 prefix2str(&ri
->rk
.vn
, str_vn
, sizeof(str_vn
));
2232 p
= index(str_vn
, '/');
2236 prefix2str(&ri
->un
, str_un
, sizeof(str_un
));
2237 p
= index(str_un
, '/');
2241 rfapiFormatSeconds(ri
->lifetime
, str_lifetime
, BUFSIZ
);
2242 #ifdef RFAPI_REGISTRATIONS_REPORT_AGE
2243 rfapiFormatAge(ri
->last_sent_time
, str_age
, BUFSIZ
);
2246 time_t now
= monotime(NULL
);
2248 ri
->last_sent_time
+ (time_t)ri
->lifetime
;
2249 /* allow for delayed/async removal */
2250 rfapiFormatSeconds((expire
> now
? expire
- now
: 1),
2255 fp(out
, " %c %-20s %-15s %-15s %-4u %-8s %-8s %pRDP\n",
2256 deleted
? 'r' : ' ', *printedprefix
? "" : str_pfx
, str_vn
,
2257 str_un
, ri
->cost
, str_lifetime
, str_age
, &ri
->rk
.rd
);
2259 if (!*printedprefix
)
2262 return routes_displayed
;
2267 * This one is for debugging (set stream to NULL to send output to log)
2269 static void rfapiRibShowRibSl(void *stream
, struct prefix
*pfx
,
2270 struct skiplist
*sl
)
2272 int (*fp
)(void *, const char *, ...);
2275 const char *vty_newline
;
2277 int nhs_displayed
= 0;
2278 char str_pfx
[PREFIX_STRLEN
];
2279 int printedprefix
= 0;
2281 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2284 prefix2str(pfx
, str_pfx
, sizeof(str_pfx
));
2287 print_rib_sl(fp
, vty
, out
, sl
, 0, str_pfx
, &printedprefix
);
2291 void rfapiRibShowResponses(void *stream
, struct prefix
*pfx_match
,
2294 int (*fp
)(void *, const char *, ...);
2297 const char *vty_newline
;
2299 struct rfapi_descriptor
*rfd
;
2300 struct listnode
*node
;
2302 struct bgp
*bgp
= bgp_get_default();
2303 int printedheader
= 0;
2304 int routes_total
= 0;
2306 int prefixes_total
= 0;
2307 int prefixes_displayed
= 0;
2309 int nves_with_routes
= 0;
2310 int nves_displayed
= 0;
2311 int routes_displayed
= 0;
2312 int nhs_displayed
= 0;
2314 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2317 fp(out
, "Unable to find default BGP instance\n");
2324 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
2330 if (rfd
->rib_prefix_count
)
2333 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
2335 struct agg_node
*rn
;
2340 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
2341 rn
= agg_route_next(rn
)) {
2342 const struct prefix
*p
=
2343 agg_node_get_prefix(rn
);
2344 struct skiplist
*sl
;
2345 char str_pfx
[PREFIX_STRLEN
];
2346 int printedprefix
= 0;
2357 nhs_total
+= skiplist_count(sl
);
2360 if (pfx_match
&& !prefix_match(pfx_match
, p
)
2361 && !prefix_match(p
, pfx_match
))
2364 ++prefixes_displayed
;
2366 if (!printedheader
) {
2370 show_removed
? "Removed" : "Active");
2371 fp(out
, "%-15s %-15s\n", "Querying VN",
2374 " %-20s %-15s %-15s %4s %-8s %-8s\n",
2375 "Prefix", "Registered VN",
2376 "Registered UN", "Cost", "Lifetime",
2377 #ifdef RFAPI_REGISTRATIONS_REPORT_AGE
2385 char str_vn
[BUFSIZ
];
2386 char str_un
[BUFSIZ
];
2391 fp(out
, "%-15s %-15s\n",
2392 rfapiRfapiIpAddr2Str(&rfd
->vn_addr
,
2394 rfapiRfapiIpAddr2Str(&rfd
->un_addr
,
2398 prefix2str(p
, str_pfx
, sizeof(str_pfx
));
2399 // fp(out, " %s\n", buf); /* prefix */
2402 nhs_displayed
+= print_rib_sl(
2403 fp
, vty
, out
, sl
, show_removed
, str_pfx
,
2411 fp(out
, "Displayed %u NVEs, and %u out of %u %s prefixes",
2412 nves_displayed
, routes_displayed
, routes_total
,
2413 show_removed
? "removed" : "active");
2414 if (nhs_displayed
!= routes_displayed
2415 || nhs_total
!= routes_total
)
2416 fp(out
, " with %u out of %u next hops", nhs_displayed
,