3 * Copyright 2009-2016, LabN Consulting, L.L.C.
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; see the file COPYING; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 * Purpose: maintain per-nve ribs and generate change lists
26 #include "lib/zebra.h"
27 #include "lib/prefix.h"
28 #include "lib/agg_table.h"
30 #include "lib/memory.h"
32 #include "lib/skiplist.h"
33 #include "lib/workqueue.h"
35 #include "bgpd/bgpd.h"
36 #include "bgpd/bgp_route.h"
37 #include "bgpd/bgp_ecommunity.h"
38 #include "bgpd/bgp_mplsvpn.h"
39 #include "bgpd/bgp_vnc_types.h"
41 #include "bgpd/rfapi/rfapi.h"
42 #include "bgpd/rfapi/bgp_rfapi_cfg.h"
43 #include "bgpd/rfapi/rfapi_import.h"
44 #include "bgpd/rfapi/rfapi_private.h"
45 #include "bgpd/rfapi/rfapi_vty.h"
46 #include "bgpd/rfapi/vnc_import_bgp.h"
47 #include "bgpd/rfapi/rfapi_rib.h"
48 #include "bgpd/rfapi/rfapi_monitor.h"
49 #include "bgpd/rfapi/rfapi_encap_tlv.h"
50 #include "bgpd/rfapi/vnc_debug.h"
52 #define DEBUG_PROCESS_PENDING_NODE 0
53 #define DEBUG_PENDING_DELETE_ROUTE 0
55 #define DEBUG_RIB_SL_RD 0
59 static void rfapiRibShowRibSl(void *stream
, struct prefix
*pfx
,
66 * Model of the set of routes currently in the NVE's RIB.
68 * node->info ptr to "struct skiplist".
69 * MUST be NULL if there are no routes.
70 * key = ptr to struct prefix {vn}
71 * val = ptr to struct rfapi_info
73 * skiplist.cmp = vnc_prefix_cmp
75 * node->aggregate ptr to "struct skiplist".
76 * key = ptr to struct prefix {vn}
77 * val = ptr to struct rfapi_info
78 * skiplist.del = rfapi_info_free
79 * skiplist.cmp = vnc_prefix_cmp
81 * This skiplist at "aggregate"
82 * contains the routes recently
88 * Sparse list of prefixes that need to be updated. Each node
89 * will have the complete set of routes for the prefix.
91 * node->info ptr to "struct list" (lib/linklist.h)
93 * List of routes sorted lowest cost first.
94 * This list is how the new complete set
95 * of routes should look.
96 * Set if there are updates to the prefix;
97 * MUST be NULL if there are no updates.
99 * .data = ptr to struct rfapi_info
100 * list.cmp = NULL (sorted manually)
101 * list.del = rfapi_info_free
103 * Special case: if node->info is 1, it means
104 * "delete all routes at this prefix".
106 * node->aggregate ptr to struct skiplist
107 * key = ptr to struct prefix {vn} (part of ri)
108 * val = struct rfapi_info
109 * skiplist.cmp = vnc_prefix_cmp
110 * skiplist.del = NULL
112 * ptlist is rewritten anew each time
113 * rfapiRibUpdatePendingNode() is called
115 * THE ptlist VALUES ARE REFERENCES TO THE
116 * rfapi_info STRUCTS IN THE node->info LIST.
120 * iterate over RIB to count responses, compare with running counters
122 void rfapiRibCheckCounts(
123 int checkstats
, /* validate rfd & global counts */
124 unsigned int offset
) /* number of ri's held separately */
126 struct rfapi_descriptor
*rfd
;
127 struct listnode
*node
;
129 struct bgp
*bgp
= bgp_get_default();
131 uint32_t t_pfx_active
= 0;
132 uint32_t t_pfx_deleted
= 0;
134 uint32_t t_ri_active
= 0;
135 uint32_t t_ri_deleted
= 0;
136 uint32_t t_ri_pend
= 0;
138 unsigned int alloc_count
;
143 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
146 uint32_t pfx_active
= 0;
147 uint32_t pfx_deleted
= 0;
149 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
153 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
154 rn
= agg_route_next(rn
)) {
156 struct skiplist
*sl
= rn
->info
;
157 struct skiplist
*dsl
= rn
->aggregate
;
158 uint32_t ri_active
= 0;
159 uint32_t ri_deleted
= 0;
162 ri_active
= skiplist_count(sl
);
164 t_ri_active
+= ri_active
;
170 ri_deleted
= skiplist_count(dsl
);
171 t_ri_deleted
+= ri_deleted
;
176 for (rn
= agg_route_top(rfd
->rib_pending
[afi
]); rn
;
177 rn
= agg_route_next(rn
)) {
179 struct list
*l
= rn
->info
; /* sorted by cost */
180 struct skiplist
*sl
= rn
->aggregate
;
181 uint32_t ri_pend_cost
= 0;
182 uint32_t ri_pend_uniq
= 0;
185 ri_pend_uniq
= skiplist_count(sl
);
188 if (l
&& (l
!= (void *)1)) {
189 ri_pend_cost
= l
->count
;
190 t_ri_pend
+= l
->count
;
193 assert(ri_pend_uniq
== ri_pend_cost
);
198 if (pfx_active
!= rfd
->rib_prefix_count
) {
199 vnc_zlog_debug_verbose(
200 "%s: rfd %p actual pfx count %u != running %u",
201 __func__
, rfd
, pfx_active
,
202 rfd
->rib_prefix_count
);
208 if (checkstats
&& bgp
->rfapi
) {
209 if (t_pfx_active
!= bgp
->rfapi
->rib_prefix_count_total
) {
210 vnc_zlog_debug_verbose(
211 "%s: actual total pfx count %u != running %u",
212 __func__
, t_pfx_active
,
213 bgp
->rfapi
->rib_prefix_count_total
);
219 * Check against memory allocation count
221 alloc_count
= mtype_stats_alloc(MTYPE_RFAPI_INFO
);
222 assert(t_ri_active
+ t_ri_deleted
+ t_ri_pend
+ offset
== alloc_count
);
225 static struct rfapi_info
*rfapi_info_new(void)
227 return XCALLOC(MTYPE_RFAPI_INFO
, sizeof(struct rfapi_info
));
230 void rfapiFreeRfapiUnOptionChain(struct rfapi_un_option
*p
)
233 struct rfapi_un_option
*next
;
236 XFREE(MTYPE_RFAPI_UN_OPTION
, p
);
241 void rfapiFreeRfapiVnOptionChain(struct rfapi_vn_option
*p
)
244 struct rfapi_vn_option
*next
;
247 XFREE(MTYPE_RFAPI_VN_OPTION
, p
);
253 static void rfapi_info_free(struct rfapi_info
*goner
)
256 if (goner
->tea_options
) {
257 rfapiFreeBgpTeaOptionChain(goner
->tea_options
);
258 goner
->tea_options
= NULL
;
260 if (goner
->un_options
) {
261 rfapiFreeRfapiUnOptionChain(goner
->un_options
);
262 goner
->un_options
= NULL
;
264 if (goner
->vn_options
) {
265 rfapiFreeRfapiVnOptionChain(goner
->vn_options
);
266 goner
->vn_options
= NULL
;
269 struct rfapi_rib_tcb
*tcb
;
271 tcb
= THREAD_ARG(goner
->timer
);
272 THREAD_OFF(goner
->timer
);
273 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
275 XFREE(MTYPE_RFAPI_INFO
, goner
);
280 * Timer control block for recently-deleted and expired routes
282 struct rfapi_rib_tcb
{
283 struct rfapi_descriptor
*rfd
;
285 struct rfapi_info
*ri
;
288 #define RFAPI_RIB_TCB_FLAG_DELETED 0x00000001
292 * remove route from rib
294 static void rfapiRibExpireTimer(struct thread
*t
)
296 struct rfapi_rib_tcb
*tcb
= THREAD_ARG(t
);
298 RFAPI_RIB_CHECK_COUNTS(1, 0);
301 * Forget reference to thread. Otherwise rfapi_info_free() will
302 * attempt to free thread pointer as an option chain
304 tcb
->ri
->timer
= NULL
;
306 /* "deleted" skiplist frees ri, "active" doesn't */
307 assert(!skiplist_delete(tcb
->sl
, &tcb
->ri
->rk
, NULL
));
310 * XXX in this case, skiplist has no delete function: we must
311 * therefore delete rfapi_info explicitly.
313 rfapi_info_free(tcb
->ri
);
316 if (skiplist_empty(tcb
->sl
)) {
317 if (CHECK_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
))
318 tcb
->rn
->aggregate
= NULL
;
320 struct bgp
*bgp
= bgp_get_default();
321 tcb
->rn
->info
= NULL
;
322 RFAPI_RIB_PREFIX_COUNT_DECR(tcb
->rfd
, bgp
->rfapi
);
324 skiplist_free(tcb
->sl
);
325 agg_unlock_node(tcb
->rn
);
328 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
330 RFAPI_RIB_CHECK_COUNTS(1, 0);
333 static void rfapiRibStartTimer(struct rfapi_descriptor
*rfd
,
334 struct rfapi_info
*ri
,
335 struct agg_node
*rn
, /* route node attached to */
338 struct rfapi_rib_tcb
*tcb
= NULL
;
341 tcb
= THREAD_ARG(ri
->timer
);
342 THREAD_OFF(ri
->timer
);
344 tcb
= XCALLOC(MTYPE_RFAPI_RECENT_DELETE
,
345 sizeof(struct rfapi_rib_tcb
));
351 tcb
->sl
= (struct skiplist
*)rn
->aggregate
;
352 SET_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
);
354 tcb
->sl
= (struct skiplist
*)rn
->info
;
355 UNSET_FLAG(tcb
->flags
, RFAPI_RIB_TCB_FLAG_DELETED
);
358 vnc_zlog_debug_verbose("%s: rfd %p pfx %pRN life %u", __func__
, rfd
, rn
,
361 thread_add_timer(bm
->master
, rfapiRibExpireTimer
, tcb
, ri
->lifetime
,
365 extern void rfapi_rib_key_init(struct prefix
*prefix
, /* may be NULL */
366 struct prefix_rd
*rd
, /* may be NULL */
367 struct prefix
*aux
, /* may be NULL */
368 struct rfapi_rib_key
*rk
)
371 memset((void *)rk
, 0, sizeof(struct rfapi_rib_key
));
377 rk
->aux_prefix
= *aux
;
381 * Compares two <struct rfapi_rib_key>s
383 int rfapi_rib_key_cmp(const void *k1
, const void *k2
)
385 const struct rfapi_rib_key
*a
= (struct rfapi_rib_key
*)k1
;
386 const struct rfapi_rib_key
*b
= (struct rfapi_rib_key
*)k2
;
392 ret
= vnc_prefix_cmp(&a
->vn
, &b
->vn
);
396 ret
= vnc_prefix_cmp(&a
->rd
, &b
->rd
);
400 ret
= vnc_prefix_cmp(&a
->aux_prefix
, &b
->aux_prefix
);
407 * Note: this function will claim that two option chains are
408 * different unless their option items are in identical order.
409 * The consequence is that RFP updated responses can be sent
410 * unnecessarily, or that they might contain nexthop items
411 * that are not strictly needed.
413 * This function could be modified to compare option chains more
414 * thoroughly, but it's not clear that the extra compuation would
417 static int bgp_tea_options_cmp(struct bgp_tea_options
*a
,
418 struct bgp_tea_options
*b
)
426 if (a
->type
!= b
->type
)
427 return (a
->type
- b
->type
);
428 if (a
->length
!= b
->length
)
429 return (a
->length
= b
->length
);
430 if ((rc
= memcmp(a
->value
, b
->value
, a
->length
)))
432 if (!a
->next
!= !b
->next
) { /* logical xor */
433 return (a
->next
- b
->next
);
436 return bgp_tea_options_cmp(a
->next
, b
->next
);
440 static int rfapi_info_cmp(struct rfapi_info
*a
, struct rfapi_info
*b
)
447 if ((rc
= rfapi_rib_key_cmp(&a
->rk
, &b
->rk
)))
450 if ((rc
= vnc_prefix_cmp(&a
->un
, &b
->un
)))
453 if (a
->cost
!= b
->cost
)
454 return (a
->cost
- b
->cost
);
456 if (a
->lifetime
!= b
->lifetime
)
457 return (a
->lifetime
- b
->lifetime
);
459 if ((rc
= bgp_tea_options_cmp(a
->tea_options
, b
->tea_options
)))
465 void rfapiRibClear(struct rfapi_descriptor
*rfd
)
473 bgp
= bgp_get_default();
474 #ifdef DEBUG_L2_EXTRA
475 vnc_zlog_debug_verbose("%s: rfd=%p", __func__
, rfd
);
478 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
482 if (rfd
->rib_pending
[afi
]) {
483 for (pn
= agg_route_top(rfd
->rib_pending
[afi
]); pn
;
484 pn
= agg_route_next(pn
)) {
487 * free references into the rfapi_info
489 * freeing the structures themselves
494 pn
->aggregate
= NULL
;
496 pn
); /* skiplist deleted */
499 * free the rfapi_info structures
502 if (pn
->info
!= (void *)1) {
508 /* linklist or 1 deleted */
514 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
515 rn
= agg_route_next(rn
)) {
518 struct rfapi_info
*ri
;
520 while (0 == skiplist_first(
527 skiplist_delete_first(
532 (struct skiplist
*)rn
->info
);
535 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
,
540 struct rfapi_info
*ri_del
;
542 /* delete skiplist & contents */
543 while (!skiplist_first(
546 NULL
, (void **)&ri_del
)) {
548 /* sl->del takes care of ri_del
550 skiplist_delete_first((
558 rn
->aggregate
= NULL
;
564 if (rfd
->updated_responses_queue
)
565 work_queue_free_and_null(&rfd
->updated_responses_queue
);
569 * Release all dynamically-allocated memory that is part of an HD's RIB
571 void rfapiRibFree(struct rfapi_descriptor
*rfd
)
577 * NB rfd is typically detached from master list, so is not included
578 * in the count performed by RFAPI_RIB_CHECK_COUNTS
582 * Free routes attached to radix trees
586 /* Now the uncounted rfapi_info's are freed, so the check should succeed
588 RFAPI_RIB_CHECK_COUNTS(1, 0);
593 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
594 if (rfd
->rib_pending
[afi
])
595 agg_table_finish(rfd
->rib_pending
[afi
]);
596 rfd
->rib_pending
[afi
] = NULL
;
599 agg_table_finish(rfd
->rib
[afi
]);
600 rfd
->rib
[afi
] = NULL
;
602 /* NB agg_table_finish frees only prefix nodes, not chained
604 if (rfd
->rsp_times
[afi
])
605 agg_table_finish(rfd
->rsp_times
[afi
]);
606 rfd
->rib
[afi
] = NULL
;
611 * Copies struct bgp_path_info to struct rfapi_info, except for rk fields and un
613 static void rfapiRibBi2Ri(struct bgp_path_info
*bpi
, struct rfapi_info
*ri
,
616 struct bgp_attr_encap_subtlv
*pEncap
;
618 ri
->cost
= rfapiRfpCost(bpi
->attr
);
619 ri
->lifetime
= lifetime
;
621 /* This loop based on rfapiRouteInfo2NextHopEntry() */
622 for (pEncap
= bgp_attr_get_vnc_subtlvs(bpi
->attr
); pEncap
;
623 pEncap
= pEncap
->next
) {
624 struct bgp_tea_options
*hop
;
626 switch (pEncap
->type
) {
627 case BGP_VNC_SUBTLV_TYPE_LIFETIME
:
628 /* use configured lifetime, not attr lifetime */
631 case BGP_VNC_SUBTLV_TYPE_RFPOPTION
:
632 hop
= XCALLOC(MTYPE_BGP_TEA_OPTIONS
,
633 sizeof(struct bgp_tea_options
));
635 hop
->type
= pEncap
->value
[0];
636 hop
->length
= pEncap
->value
[1];
637 hop
->value
= XCALLOC(MTYPE_BGP_TEA_OPTIONS_VALUE
,
640 memcpy(hop
->value
, pEncap
->value
+ 2,
642 if (hop
->length
> pEncap
->length
- 2) {
644 "%s: VNC subtlv length mismatch: RFP option says %d, attr says %d (shrinking)",
645 __func__
, hop
->length
,
647 hop
->length
= pEncap
->length
- 2;
649 hop
->next
= ri
->tea_options
;
650 ri
->tea_options
= hop
;
658 rfapi_un_options_free(ri
->un_options
); /* maybe free old version */
659 ri
->un_options
= rfapi_encap_tlv_to_un_option(bpi
->attr
);
665 && decode_rd_type(bpi
->extra
->vnc
.import
.rd
.val
)
666 == RD_TYPE_VNC_ETH
) {
669 struct rfapi_vn_option
*vo
;
671 vo
= XCALLOC(MTYPE_RFAPI_VN_OPTION
,
672 sizeof(struct rfapi_vn_option
));
675 vo
->type
= RFAPI_VN_OPTION_TYPE_L2ADDR
;
677 /* copy from RD already stored in bpi, so we don't need it_node
679 memcpy(&vo
->v
.l2addr
.macaddr
, bpi
->extra
->vnc
.import
.rd
.val
+ 2,
682 (void)rfapiEcommunityGetLNI(bgp_attr_get_ecommunity(bpi
->attr
),
683 &vo
->v
.l2addr
.logical_net_id
);
684 (void)rfapiEcommunityGetEthernetTag(
685 bgp_attr_get_ecommunity(bpi
->attr
),
686 &vo
->v
.l2addr
.tag_id
);
688 /* local_nve_id comes from RD */
689 vo
->v
.l2addr
.local_nve_id
= bpi
->extra
->vnc
.import
.rd
.val
[1];
691 /* label comes from MP_REACH_NLRI label */
692 vo
->v
.l2addr
.label
= decode_label(&bpi
->extra
->label
[0]);
694 rfapi_vn_options_free(
695 ri
->vn_options
); /* maybe free old version */
700 * If there is an auxiliary IP address (L2 can have it), copy it
702 if (bpi
->extra
&& bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
703 ri
->rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
710 * Install route into NVE RIB model so as to be consistent with
711 * caller's response to rfapi_query().
713 * Also: return indication to caller whether this specific route
714 * should be included in the response to the NVE according to
715 * the following tests:
717 * 1. If there were prior duplicates of this route in this same
718 * query response, don't include the route.
722 * 0 OK to include route in response
723 * !0 do not include route in response
725 int rfapiRibPreloadBi(
726 struct agg_node
*rfd_rib_node
, /* NULL = don't preload or filter */
727 struct prefix
*pfx_vn
, struct prefix
*pfx_un
, uint32_t lifetime
,
728 struct bgp_path_info
*bpi
)
730 struct rfapi_descriptor
*rfd
;
731 struct skiplist
*slRibPt
= NULL
;
732 struct rfapi_info
*ori
= NULL
;
733 struct rfapi_rib_key rk
;
734 struct agg_node
*trn
;
736 const struct prefix
*p
= agg_node_get_prefix(rfd_rib_node
);
741 afi
= family2afi(p
->family
);
743 rfd
= agg_get_table_info(agg_get_table(rfd_rib_node
));
745 memset((void *)&rk
, 0, sizeof(rk
));
747 rk
.rd
= bpi
->extra
->vnc
.import
.rd
;
750 * If there is an auxiliary IP address (L2 can have it), copy it
752 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
753 rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
757 * is this route already in NVE's RIB?
759 slRibPt
= (struct skiplist
*)rfd_rib_node
->info
;
761 if (slRibPt
&& !skiplist_search(slRibPt
, &rk
, (void **)&ori
)) {
763 if ((ori
->rsp_counter
== rfd
->rsp_counter
)
764 && (ori
->last_sent_time
== rfd
->rsp_time
)) {
765 return -1; /* duplicate in this response */
768 /* found: update contents of existing route in RIB */
770 rfapiRibBi2Ri(bpi
, ori
, lifetime
);
772 /* not found: add new route to RIB */
773 ori
= rfapi_info_new();
776 rfapiRibBi2Ri(bpi
, ori
, lifetime
);
779 slRibPt
= skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
780 rfd_rib_node
->info
= slRibPt
;
781 agg_lock_node(rfd_rib_node
);
782 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, rfd
->bgp
->rfapi
);
784 skiplist_insert(slRibPt
, &ori
->rk
, ori
);
787 ori
->last_sent_time
= rfapi_time(NULL
);
792 RFAPI_RIB_CHECK_COUNTS(0, 0);
793 rfapiRibStartTimer(rfd
, ori
, rfd_rib_node
, 0);
794 RFAPI_RIB_CHECK_COUNTS(0, 0);
797 * Update last sent time for prefix
799 trn
= agg_node_get(rfd
->rsp_times
[afi
], p
); /* locks trn */
800 trn
->info
= (void *)(uintptr_t)monotime(NULL
);
801 if (agg_node_get_lock_count(trn
) > 1)
802 agg_unlock_node(trn
);
808 * Frees rfapi_info items at node
810 * Adjust 'rib' and 'rib_pending' as follows:
812 * If rib_pending node->info is 1 (magic value):
813 * callback: NHL = RIB NHL with lifetime = withdraw_lifetime_value
814 * RIB = remove all routes at the node
817 * For each item at rib node:
818 * if not present in pending node, move RIB item to "delete list"
820 * For each item at pending rib node:
821 * if present (same vn/un) in rib node with same lifetime & options, drop
822 * matching item from pending node
824 * For each remaining item at pending rib node, add or replace item
827 * Construct NHL as concatenation of pending list + delete list
831 static void process_pending_node(struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
833 struct agg_node
*pn
, /* pending node */
834 struct rfapi_next_hop_entry
**head
,
835 struct rfapi_next_hop_entry
**tail
)
837 struct listnode
*node
= NULL
;
838 struct listnode
*nnode
= NULL
;
839 struct rfapi_info
*ri
= NULL
; /* happy valgrind */
840 struct rfapi_ip_prefix hp
= {0}; /* pfx to put in NHE */
841 struct agg_node
*rn
= NULL
;
842 struct skiplist
*slRibPt
= NULL
; /* rib list */
843 struct skiplist
*slPendPt
= NULL
;
844 struct list
*lPendCost
= NULL
;
845 struct list
*delete_list
= NULL
;
846 int printedprefix
= 0;
847 int rib_node_started_nonempty
= 0;
848 int sendingsomeroutes
= 0;
849 const struct prefix
*p
;
850 #if DEBUG_PROCESS_PENDING_NODE
851 unsigned int count_rib_initial
= 0;
852 unsigned int count_pend_vn_initial
= 0;
853 unsigned int count_pend_cost_initial
= 0;
857 p
= agg_node_get_prefix(pn
);
858 vnc_zlog_debug_verbose("%s: afi=%d, %pRN pn->info=%p", __func__
, afi
,
861 if (AFI_L2VPN
!= afi
) {
862 rfapiQprefix2Rprefix(p
, &hp
);
865 RFAPI_RIB_CHECK_COUNTS(1, 0);
868 * Find corresponding RIB node
870 rn
= agg_node_get(rfd
->rib
[afi
], p
); /* locks rn */
873 * RIB skiplist has key=rfapi_addr={vn,un}, val = rfapi_info,
874 * skiplist.del = NULL
876 slRibPt
= (struct skiplist
*)rn
->info
;
878 rib_node_started_nonempty
= 1;
880 slPendPt
= (struct skiplist
*)(pn
->aggregate
);
881 lPendCost
= (struct list
*)(pn
->info
);
883 #if DEBUG_PROCESS_PENDING_NODE
886 count_rib_initial
= skiplist_count(slRibPt
);
889 count_pend_vn_initial
= skiplist_count(slPendPt
);
891 if (lPendCost
&& lPendCost
!= (struct list
*)1)
892 count_pend_cost_initial
= lPendCost
->count
;
897 * Handle special case: delete all routes at prefix
899 if (lPendCost
== (struct list
*)1) {
900 vnc_zlog_debug_verbose("%s: lPendCost=1 => delete all",
902 if (slRibPt
&& !skiplist_empty(slRibPt
)) {
903 delete_list
= list_new();
905 == skiplist_first(slRibPt
, NULL
, (void **)&ri
)) {
906 listnode_add(delete_list
, ri
);
907 vnc_zlog_debug_verbose(
908 "%s: after listnode_add, delete_list->count=%d",
909 __func__
, delete_list
->count
);
910 rfapiFreeBgpTeaOptionChain(ri
->tea_options
);
911 ri
->tea_options
= NULL
;
914 struct rfapi_rib_tcb
*tcb
;
916 tcb
= THREAD_ARG(ri
->timer
);
917 THREAD_OFF(ri
->timer
);
918 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
921 vnc_zlog_debug_verbose(
922 "%s: put dl pfx=%pRN vn=%pFX un=%pFX cost=%d life=%d vn_options=%p",
923 __func__
, pn
, &ri
->rk
.vn
, &ri
->un
,
924 ri
->cost
, ri
->lifetime
, ri
->vn_options
);
926 skiplist_delete_first(slRibPt
);
929 assert(skiplist_empty(slRibPt
));
931 skiplist_free(slRibPt
);
932 rn
->info
= slRibPt
= NULL
;
935 lPendCost
= pn
->info
= NULL
;
941 skiplist_free(slRibPt
);
947 if (slPendPt
) { /* TBD I think we can toss this block */
948 skiplist_free(slPendPt
);
949 pn
->aggregate
= NULL
;
956 agg_unlock_node(rn
); /* agg_node_get() */
958 if (rib_node_started_nonempty
) {
959 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
, bgp
->rfapi
);
962 RFAPI_RIB_CHECK_COUNTS(1, 0);
967 vnc_zlog_debug_verbose("%s: lPendCost->count=%d, slRibPt->count=%d",
969 (lPendCost
? (int)lPendCost
->count
: -1),
970 (slRibPt
? (int)slRibPt
->count
: -1));
973 * Iterate over routes at RIB Node.
974 * If not found at Pending Node, delete from RIB Node and add to
976 * If found at Pending Node
977 * If identical rfapi_info, delete from Pending Node
981 struct rfapi_info
*ori
;
984 * Iterate over RIB List
987 while (!skiplist_next(slRibPt
, NULL
, (void **)&ori
, &cursor
)) {
989 if (skiplist_search(slPendPt
, &ori
->rk
, (void **)&ri
)) {
991 * Not in Pending list, so it should be deleted
994 delete_list
= list_new();
995 listnode_add(delete_list
, ori
);
996 rfapiFreeBgpTeaOptionChain(ori
->tea_options
);
997 ori
->tea_options
= NULL
;
999 struct rfapi_rib_tcb
*tcb
;
1001 tcb
= THREAD_ARG(ori
->timer
);
1002 THREAD_OFF(ori
->timer
);
1003 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
1006 #if DEBUG_PROCESS_PENDING_NODE
1007 /* deleted from slRibPt below, after we're done
1009 vnc_zlog_debug_verbose(
1010 "%s: slRibPt ri %p not matched in pending list, delete",
1016 * Found in pending list. If same lifetime,
1018 * then remove from pending list because the
1022 if (!rfapi_info_cmp(ori
, ri
)) {
1023 skiplist_delete(slPendPt
, &ri
->rk
,
1027 /* linear walk: might need
1029 listnode_delete(lPendCost
,
1039 #if DEBUG_PROCESS_PENDING_NODE
1040 vnc_zlog_debug_verbose(
1041 "%s: slRibPt ri %p matched in pending list, %s",
1044 : "different info"));
1049 * Go back and delete items from RIB
1052 for (ALL_LIST_ELEMENTS_RO(delete_list
, node
, ri
)) {
1053 vnc_zlog_debug_verbose(
1054 "%s: deleting ri %p from slRibPt",
1056 assert(!skiplist_delete(slRibPt
, &ri
->rk
,
1059 if (skiplist_empty(slRibPt
)) {
1060 skiplist_free(slRibPt
);
1061 slRibPt
= rn
->info
= NULL
;
1062 agg_unlock_node(rn
);
1067 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1070 * Iterate over routes at Pending Node
1072 * If {vn} found at RIB Node, update RIB Node route contents to match PN
1073 * If {vn} NOT found at RIB Node, add copy to RIB Node
1076 for (ALL_LIST_ELEMENTS_RO(lPendCost
, node
, ri
)) {
1078 struct rfapi_info
*ori
;
1081 && !skiplist_search(slRibPt
, &ri
->rk
,
1084 /* found: update contents of existing route in
1087 ori
->cost
= ri
->cost
;
1088 ori
->lifetime
= ri
->lifetime
;
1089 rfapiFreeBgpTeaOptionChain(ori
->tea_options
);
1091 rfapiOptionsDup(ri
->tea_options
);
1092 ori
->last_sent_time
= rfapi_time(NULL
);
1094 rfapiFreeRfapiVnOptionChain(ori
->vn_options
);
1096 rfapiVnOptionsDup(ri
->vn_options
);
1098 rfapiFreeRfapiUnOptionChain(ori
->un_options
);
1100 rfapiUnOptionsDup(ri
->un_options
);
1102 vnc_zlog_debug_verbose(
1103 "%s: matched lPendCost item %p in slRibPt, rewrote",
1108 char buf_rd
[RD_ADDRSTRLEN
];
1110 /* not found: add new route to RIB */
1111 ori
= rfapi_info_new();
1114 ori
->cost
= ri
->cost
;
1115 ori
->lifetime
= ri
->lifetime
;
1117 rfapiOptionsDup(ri
->tea_options
);
1118 ori
->last_sent_time
= rfapi_time(NULL
);
1120 rfapiVnOptionsDup(ri
->vn_options
);
1122 rfapiUnOptionsDup(ri
->un_options
);
1125 slRibPt
= skiplist_new(
1126 0, rfapi_rib_key_cmp
, NULL
);
1130 skiplist_insert(slRibPt
, &ori
->rk
, ori
);
1133 prefix_rd2str(&ori
->rk
.rd
, buf_rd
,
1139 vnc_zlog_debug_verbose(
1140 "%s: nomatch lPendCost item %p in slRibPt, added (rd=%s)",
1141 __func__
, ri
, buf_rd
);
1147 RFAPI_RIB_CHECK_COUNTS(
1148 0, (delete_list
? delete_list
->count
: 0));
1149 rfapiRibStartTimer(rfd
, ori
, rn
, 0);
1150 RFAPI_RIB_CHECK_COUNTS(
1151 0, (delete_list
? delete_list
->count
: 0));
1158 * Construct NHL as concatenation of pending list + delete list
1162 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1169 vnc_zlog_debug_verbose("%s: lPendCost->count now %d", __func__
,
1171 vnc_zlog_debug_verbose("%s: For prefix %pRN (a)", __func__
, pn
);
1174 for (ALL_LIST_ELEMENTS(lPendCost
, node
, nnode
, ri
)) {
1176 struct rfapi_next_hop_entry
*new;
1177 struct agg_node
*trn
;
1179 new = XCALLOC(MTYPE_RFAPI_NEXTHOP
,
1180 sizeof(struct rfapi_next_hop_entry
));
1182 if (ri
->rk
.aux_prefix
.family
) {
1183 rfapiQprefix2Rprefix(&ri
->rk
.aux_prefix
,
1187 if (AFI_L2VPN
== afi
) {
1188 /* hp is 0; need to set length to match
1190 new->prefix
.length
=
1191 (ri
->rk
.vn
.family
== AF_INET
)
1196 new->prefix
.cost
= ri
->cost
;
1197 new->lifetime
= ri
->lifetime
;
1198 rfapiQprefix2Raddr(&ri
->rk
.vn
, &new->vn_address
);
1199 rfapiQprefix2Raddr(&ri
->un
, &new->un_address
);
1200 /* free option chain from ri */
1201 rfapiFreeBgpTeaOptionChain(ri
->tea_options
);
1204 NULL
; /* option chain was transferred to NHL */
1206 new->vn_options
= ri
->vn_options
;
1208 NULL
; /* option chain was transferred to NHL */
1210 new->un_options
= ri
->un_options
;
1212 NULL
; /* option chain was transferred to NHL */
1215 (*tail
)->next
= new;
1220 sendingsomeroutes
= 1;
1222 ++rfd
->stat_count_nh_reachable
;
1223 ++bgp
->rfapi
->stat
.count_updated_response_updates
;
1226 * update this NVE's timestamp for this prefix
1228 trn
= agg_node_get(rfd
->rsp_times
[afi
],
1230 trn
->info
= (void *)(uintptr_t)monotime(NULL
);
1231 if (agg_node_get_lock_count(trn
) > 1)
1232 agg_unlock_node(trn
);
1234 rfapiRfapiIpAddr2Str(&new->vn_address
, buf
, BUFSIZ
);
1235 rfapiRfapiIpAddr2Str(&new->un_address
, buf2
, BUFSIZ
);
1236 vnc_zlog_debug_verbose(
1237 "%s: add vn=%s un=%s cost=%d life=%d",
1238 __func__
, buf
, buf2
, new->prefix
.cost
,
1243 RFAPI_RIB_CHECK_COUNTS(0, (delete_list
? delete_list
->count
: 0));
1250 if (!printedprefix
) {
1251 vnc_zlog_debug_verbose("%s: For prefix %pRN (d)",
1254 vnc_zlog_debug_verbose("%s: delete_list has %d elements",
1255 __func__
, delete_list
->count
);
1257 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1258 if (!CHECK_FLAG(bgp
->rfapi_cfg
->flags
,
1259 BGP_VNC_CONFIG_RESPONSE_REMOVAL_DISABLE
)) {
1261 for (ALL_LIST_ELEMENTS(delete_list
, node
, nnode
, ri
)) {
1263 struct rfapi_next_hop_entry
*new;
1264 struct rfapi_info
*ri_del
;
1266 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1268 MTYPE_RFAPI_NEXTHOP
,
1269 sizeof(struct rfapi_next_hop_entry
));
1271 if (ri
->rk
.aux_prefix
.family
) {
1272 rfapiQprefix2Rprefix(&ri
->rk
.aux_prefix
,
1276 if (AFI_L2VPN
== afi
) {
1277 /* hp is 0; need to set length
1278 * to match AF of vn */
1279 new->prefix
.length
=
1287 new->prefix
.cost
= ri
->cost
;
1288 new->lifetime
= RFAPI_REMOVE_RESPONSE_LIFETIME
;
1289 rfapiQprefix2Raddr(&ri
->rk
.vn
,
1291 rfapiQprefix2Raddr(&ri
->un
, &new->un_address
);
1293 new->vn_options
= ri
->vn_options
;
1294 ri
->vn_options
= NULL
; /* option chain was
1295 transferred to NHL */
1297 new->un_options
= ri
->un_options
;
1298 ri
->un_options
= NULL
; /* option chain was
1299 transferred to NHL */
1302 (*tail
)->next
= new;
1307 ++rfd
->stat_count_nh_removal
;
1309 .count_updated_response_deletes
;
1311 rfapiRfapiIpAddr2Str(&new->vn_address
, buf
,
1313 rfapiRfapiIpAddr2Str(&new->un_address
, buf2
,
1315 vnc_zlog_debug_verbose(
1316 "%s: DEL vn=%s un=%s cost=%d life=%d",
1317 __func__
, buf
, buf2
, new->prefix
.cost
,
1320 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1322 * Update/add to list of recent deletions at
1325 if (!rn
->aggregate
) {
1326 rn
->aggregate
= skiplist_new(
1327 0, rfapi_rib_key_cmp
,
1332 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1334 /* sanity check lifetime */
1336 > RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
)
1338 RFAPI_LIFETIME_INFINITE_WITHDRAW_DELAY
;
1340 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1341 /* cancel normal expire timer */
1343 struct rfapi_rib_tcb
*tcb
;
1345 tcb
= THREAD_ARG(ri
->timer
);
1346 THREAD_OFF(ri
->timer
);
1347 XFREE(MTYPE_RFAPI_RECENT_DELETE
, tcb
);
1349 RFAPI_RIB_CHECK_COUNTS(0, delete_list
->count
);
1352 * Look in "recently-deleted" list
1354 if (skiplist_search(
1355 (struct skiplist
*)(rn
->aggregate
),
1356 &ri
->rk
, (void **)&ri_del
)) {
1360 RFAPI_RIB_CHECK_COUNTS(
1361 0, delete_list
->count
);
1363 * NOT in "recently-deleted" list
1367 node
); /* does not free ri */
1368 rc
= skiplist_insert(
1374 RFAPI_RIB_CHECK_COUNTS(
1375 0, delete_list
->count
);
1376 rfapiRibStartTimer(rfd
, ri
, rn
, 1);
1377 RFAPI_RIB_CHECK_COUNTS(
1378 0, delete_list
->count
);
1379 ri
->last_sent_time
= rfapi_time(NULL
);
1382 char buf_rd
[RD_ADDRSTRLEN
];
1384 vnc_zlog_debug_verbose(
1385 "%s: move route to recently deleted list, rd=%s",
1396 * IN "recently-deleted" list
1398 RFAPI_RIB_CHECK_COUNTS(
1399 0, delete_list
->count
);
1400 rfapiRibStartTimer(rfd
, ri_del
, rn
, 1);
1401 RFAPI_RIB_CHECK_COUNTS(
1402 0, delete_list
->count
);
1403 ri
->last_sent_time
= rfapi_time(NULL
);
1407 vnc_zlog_debug_verbose(
1408 "%s: response removal disabled, omitting removals",
1412 delete_list
->del
= (void (*)(void *))rfapi_info_free
;
1413 list_delete(&delete_list
);
1416 RFAPI_RIB_CHECK_COUNTS(0, 0);
1419 * Reset pending lists. The final agg_unlock_node() will probably
1420 * cause the pending node to be released.
1423 skiplist_free(slPendPt
);
1424 pn
->aggregate
= NULL
;
1425 agg_unlock_node(pn
);
1428 list_delete(&lPendCost
);
1430 agg_unlock_node(pn
);
1432 RFAPI_RIB_CHECK_COUNTS(0, 0);
1434 if (rib_node_started_nonempty
) {
1436 RFAPI_RIB_PREFIX_COUNT_DECR(rfd
, bgp
->rfapi
);
1440 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, bgp
->rfapi
);
1444 if (sendingsomeroutes
)
1445 rfapiMonitorTimersRestart(rfd
, p
);
1447 agg_unlock_node(rn
); /* agg_node_get() */
1449 RFAPI_RIB_CHECK_COUNTS(1, 0);
1453 * regardless of targets, construct a single callback by doing
1454 * only one traversal of the pending RIB
1460 static void rib_do_callback_onepass(struct rfapi_descriptor
*rfd
, afi_t afi
)
1462 struct bgp
*bgp
= bgp_get_default();
1463 struct rfapi_next_hop_entry
*head
= NULL
;
1464 struct rfapi_next_hop_entry
*tail
= NULL
;
1465 struct agg_node
*rn
;
1467 #ifdef DEBUG_L2_EXTRA
1468 vnc_zlog_debug_verbose("%s: rfd=%p, afi=%d", __func__
, rfd
, afi
);
1471 if (!rfd
->rib_pending
[afi
])
1476 for (rn
= agg_route_top(rfd
->rib_pending
[afi
]); rn
;
1477 rn
= agg_route_next(rn
)) {
1478 process_pending_node(bgp
, rfd
, afi
, rn
, &head
, &tail
);
1482 rfapi_response_cb_t
*f
;
1485 vnc_zlog_debug_verbose("%s: response callback NHL follows:",
1487 rfapiPrintNhl(NULL
, head
);
1490 if (rfd
->response_cb
)
1491 f
= rfd
->response_cb
;
1493 f
= bgp
->rfapi
->rfp_methods
.response_cb
;
1495 bgp
->rfapi
->flags
|= RFAPI_INCALLBACK
;
1496 vnc_zlog_debug_verbose("%s: invoking updated response callback",
1498 (*f
)(head
, rfd
->cookie
);
1499 bgp
->rfapi
->flags
&= ~RFAPI_INCALLBACK
;
1500 ++bgp
->rfapi
->response_updated_count
;
1504 static wq_item_status
rfapiRibDoQueuedCallback(struct work_queue
*wq
,
1507 struct rfapi_descriptor
*rfd
;
1509 uint32_t queued_flag
;
1511 RFAPI_RIB_CHECK_COUNTS(1, 0);
1513 rfd
= ((struct rfapi_updated_responses_queue
*)data
)->rfd
;
1514 afi
= ((struct rfapi_updated_responses_queue
*)data
)->afi
;
1516 /* Make sure the HD wasn't closed after the work item was scheduled */
1517 if (rfapi_check(rfd
))
1520 rib_do_callback_onepass(rfd
, afi
);
1522 queued_flag
= RFAPI_QUEUED_FLAG(afi
);
1524 UNSET_FLAG(rfd
->flags
, queued_flag
);
1526 RFAPI_RIB_CHECK_COUNTS(1, 0);
1531 static void rfapiRibQueueItemDelete(struct work_queue
*wq
, void *data
)
1533 XFREE(MTYPE_RFAPI_UPDATED_RESPONSE_QUEUE
, data
);
1536 static void updated_responses_queue_init(struct rfapi_descriptor
*rfd
)
1538 if (rfd
->updated_responses_queue
)
1541 rfd
->updated_responses_queue
=
1542 work_queue_new(bm
->master
, "rfapi updated responses");
1543 assert(rfd
->updated_responses_queue
);
1545 rfd
->updated_responses_queue
->spec
.workfunc
= rfapiRibDoQueuedCallback
;
1546 rfd
->updated_responses_queue
->spec
.del_item_data
=
1547 rfapiRibQueueItemDelete
;
1548 rfd
->updated_responses_queue
->spec
.max_retries
= 0;
1549 rfd
->updated_responses_queue
->spec
.hold
= 1;
1553 * Called when an import table node is modified. Construct a
1554 * new complete nexthop list, sorted by cost (lowest first),
1555 * based on the import table node.
1557 * Filter out duplicate nexthops (vn address). There should be
1558 * only one UN address per VN address from the point of view of
1559 * a given import table, so we can probably ignore UN addresses
1562 * Based on rfapiNhlAddNodeRoutes()
1564 void rfapiRibUpdatePendingNode(
1565 struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1566 struct rfapi_import_table
*it
, /* needed for L2 */
1567 struct agg_node
*it_node
, uint32_t lifetime
)
1569 const struct prefix
*prefix
;
1570 struct bgp_path_info
*bpi
;
1571 struct agg_node
*pn
;
1573 uint32_t queued_flag
;
1576 vnc_zlog_debug_verbose("%s: entry", __func__
);
1578 if (CHECK_FLAG(bgp
->rfapi_cfg
->flags
, BGP_VNC_CONFIG_CALLBACK_DISABLE
))
1581 vnc_zlog_debug_verbose("%s: callbacks are not disabled", __func__
);
1583 RFAPI_RIB_CHECK_COUNTS(1, 0);
1585 prefix
= agg_node_get_prefix(it_node
);
1586 afi
= family2afi(prefix
->family
);
1587 vnc_zlog_debug_verbose("%s: prefix=%pFX", __func__
, prefix
);
1589 pn
= agg_node_get(rfd
->rib_pending
[afi
], prefix
);
1592 vnc_zlog_debug_verbose("%s: pn->info=%p, pn->aggregate=%p", __func__
,
1593 pn
->info
, pn
->aggregate
);
1595 if (pn
->aggregate
) {
1597 * free references into the rfapi_info structures before
1598 * freeing the structures themselves
1600 skiplist_free((struct skiplist
*)(pn
->aggregate
));
1601 pn
->aggregate
= NULL
;
1602 agg_unlock_node(pn
); /* skiplist deleted */
1607 * free the rfapi_info structures
1610 if (pn
->info
!= (void *)1) {
1611 list_delete((struct list
**)(&pn
->info
));
1614 agg_unlock_node(pn
); /* linklist or 1 deleted */
1618 * The BPIs in the import table are already sorted by cost
1620 for (bpi
= it_node
->info
; bpi
; bpi
= bpi
->next
) {
1622 struct rfapi_info
*ri
;
1623 struct prefix pfx_nh
;
1626 /* shouldn't happen */
1627 /* TBD increment error stats counter */
1631 rfapiNexthop2Prefix(bpi
->attr
, &pfx_nh
);
1634 * Omit route if nexthop is self
1636 if (CHECK_FLAG(bgp
->rfapi_cfg
->flags
,
1637 BGP_VNC_CONFIG_FILTER_SELF_FROM_RSP
)) {
1639 struct prefix pfx_vn
;
1641 assert(!rfapiRaddr2Qprefix(&rfd
->vn_addr
, &pfx_vn
));
1642 if (prefix_same(&pfx_vn
, &pfx_nh
))
1646 ri
= rfapi_info_new();
1648 ri
->rk
.rd
= bpi
->extra
->vnc
.import
.rd
;
1650 * If there is an auxiliary IP address (L2 can have it), copy it
1652 if (bpi
->extra
->vnc
.import
.aux_prefix
.family
) {
1653 ri
->rk
.aux_prefix
= bpi
->extra
->vnc
.import
.aux_prefix
;
1656 if (rfapiGetUnAddrOfVpnBi(bpi
, &ri
->un
)) {
1657 rfapi_info_free(ri
);
1661 if (!pn
->aggregate
) {
1663 skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
1668 * If we have already added this nexthop, the insert will fail.
1669 * Note that the skiplist key is a pointer INTO the rfapi_info
1670 * structure which will be added to the "info" list.
1671 * The skiplist entry VALUE is not used for anything but
1672 * might be useful during debugging.
1674 if (skiplist_insert((struct skiplist
*)pn
->aggregate
, &ri
->rk
,
1680 rfapi_info_free(ri
);
1684 rfapiRibBi2Ri(bpi
, ri
, lifetime
);
1687 pn
->info
= list_new();
1688 ((struct list
*)(pn
->info
))->del
=
1689 (void (*)(void *))rfapi_info_free
;
1693 listnode_add((struct list
*)(pn
->info
), ri
);
1697 count
= ((struct list
*)(pn
->info
))->count
;
1702 assert(!pn
->aggregate
);
1703 pn
->info
= (void *)1; /* magic value means this node has no
1708 agg_unlock_node(pn
); /* agg_node_get */
1710 queued_flag
= RFAPI_QUEUED_FLAG(afi
);
1712 if (!CHECK_FLAG(rfd
->flags
, queued_flag
)) {
1714 struct rfapi_updated_responses_queue
*urq
;
1716 urq
= XCALLOC(MTYPE_RFAPI_UPDATED_RESPONSE_QUEUE
,
1717 sizeof(struct rfapi_updated_responses_queue
));
1718 if (!rfd
->updated_responses_queue
)
1719 updated_responses_queue_init(rfd
);
1721 SET_FLAG(rfd
->flags
, queued_flag
);
1724 work_queue_add(rfd
->updated_responses_queue
, urq
);
1726 RFAPI_RIB_CHECK_COUNTS(1, 0);
1729 void rfapiRibUpdatePendingNodeSubtree(
1730 struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1731 struct rfapi_import_table
*it
, struct agg_node
*it_node
,
1732 struct agg_node
*omit_subtree
, /* may be NULL */
1735 /* FIXME: need to find a better way here to work without sticking our
1736 * hands in node->link */
1737 if (agg_node_left(it_node
)
1738 && (agg_node_left(it_node
) != omit_subtree
)) {
1739 if (agg_node_left(it_node
)->info
)
1740 rfapiRibUpdatePendingNode(
1741 bgp
, rfd
, it
, agg_node_left(it_node
), lifetime
);
1742 rfapiRibUpdatePendingNodeSubtree(bgp
, rfd
, it
,
1743 agg_node_left(it_node
),
1744 omit_subtree
, lifetime
);
1747 if (agg_node_right(it_node
)
1748 && (agg_node_right(it_node
) != omit_subtree
)) {
1749 if (agg_node_right(it_node
)->info
)
1750 rfapiRibUpdatePendingNode(bgp
, rfd
, it
,
1751 agg_node_right(it_node
),
1753 rfapiRibUpdatePendingNodeSubtree(bgp
, rfd
, it
,
1754 agg_node_right(it_node
),
1755 omit_subtree
, lifetime
);
1762 * 0 allow prefix to be included in response
1763 * !0 don't allow prefix to be included in response
1765 int rfapiRibFTDFilterRecentPrefix(
1766 struct rfapi_descriptor
*rfd
,
1767 struct agg_node
*it_rn
, /* import table node */
1768 struct prefix
*pfx_target_original
) /* query target */
1770 struct bgp
*bgp
= rfd
->bgp
;
1771 const struct prefix
*p
= agg_node_get_prefix(it_rn
);
1772 afi_t afi
= family2afi(p
->family
);
1774 struct agg_node
*trn
;
1777 * Not in FTD mode, so allow prefix
1779 if (bgp
->rfapi_cfg
->rfp_cfg
.download_type
!= RFAPI_RFP_DOWNLOAD_FULL
)
1784 * This matches behavior of now-obsolete rfapiRibFTDFilterRecent(),
1785 * but we need to decide if that is correct.
1787 if (p
->family
== AF_ETHERNET
)
1790 #ifdef DEBUG_FTD_FILTER_RECENT
1792 vnc_zlog_debug_verbose("%s: prefix %pFX", __func__
,
1793 agg_node_get_prefix(it_rn
));
1798 * prefix covers target address, so allow prefix
1800 if (prefix_match(p
, pfx_target_original
)) {
1801 #ifdef DEBUG_FTD_FILTER_RECENT
1802 vnc_zlog_debug_verbose("%s: prefix covers target, allowed",
1809 * check this NVE's timestamp for this prefix
1811 trn
= agg_node_get(rfd
->rsp_times
[afi
], p
); /* locks trn */
1812 prefix_time
= (time_t)trn
->info
;
1813 if (agg_node_get_lock_count(trn
) > 1)
1814 agg_unlock_node(trn
);
1816 #ifdef DEBUG_FTD_FILTER_RECENT
1817 vnc_zlog_debug_verbose("%s: last sent time %lu, last allowed time %lu",
1818 __func__
, prefix_time
,
1819 rfd
->ftd_last_allowed_time
);
1823 * haven't sent this prefix, which doesn't cover target address,
1824 * to NVE since ftd_advertisement_interval, so OK to send now.
1826 if (prefix_time
<= rfd
->ftd_last_allowed_time
)
1833 * Call when rfapi returns from rfapi_query() so the RIB reflects
1834 * the routes sent to the NVE before the first updated response
1836 * Also: remove duplicates from response. Caller should use returned
1837 * value of nexthop chain.
1839 struct rfapi_next_hop_entry
*
1840 rfapiRibPreload(struct bgp
*bgp
, struct rfapi_descriptor
*rfd
,
1841 struct rfapi_next_hop_entry
*response
, int use_eth_resolution
)
1843 struct rfapi_next_hop_entry
*nhp
;
1844 struct rfapi_next_hop_entry
*nhp_next
;
1845 struct rfapi_next_hop_entry
*head
= NULL
;
1846 struct rfapi_next_hop_entry
*tail
= NULL
;
1847 time_t new_last_sent_time
;
1849 vnc_zlog_debug_verbose("%s: loading response=%p, use_eth_resolution=%d",
1850 __func__
, response
, use_eth_resolution
);
1852 new_last_sent_time
= rfapi_time(NULL
);
1854 for (nhp
= response
; nhp
; nhp
= nhp_next
) {
1857 struct rfapi_rib_key rk
;
1859 struct rfapi_info
*ri
;
1861 struct agg_node
*rn
;
1862 int rib_node_started_nonempty
= 0;
1863 struct agg_node
*trn
;
1866 /* save in case we delete nhp */
1867 nhp_next
= nhp
->next
;
1869 if (nhp
->lifetime
== RFAPI_REMOVE_RESPONSE_LIFETIME
) {
1871 * weird, shouldn't happen
1873 vnc_zlog_debug_verbose(
1874 "%s: got nhp->lifetime == RFAPI_REMOVE_RESPONSE_LIFETIME",
1880 if (use_eth_resolution
) {
1881 /* get the prefix of the ethernet address in the L2
1883 struct rfapi_l2address_option
*pL2o
;
1884 struct rfapi_vn_option
*vo
;
1887 * Look for VN option of type
1888 * RFAPI_VN_OPTION_TYPE_L2ADDR
1890 for (pL2o
= NULL
, vo
= nhp
->vn_options
; vo
;
1892 if (RFAPI_VN_OPTION_TYPE_L2ADDR
== vo
->type
) {
1893 pL2o
= &vo
->v
.l2addr
;
1900 * not supposed to happen
1902 vnc_zlog_debug_verbose("%s: missing L2 info",
1908 rfapiL2o2Qprefix(pL2o
, &pfx
);
1910 rfapiRprefix2Qprefix(&nhp
->prefix
, &pfx
);
1911 afi
= family2afi(pfx
.family
);
1915 * TBD for ethernet, rib must know the right way to distinguish
1918 * Current approach: prefix is key to radix tree; then
1919 * each prefix has a set of routes with unique VN addrs
1923 * Look up prefix in RIB
1925 rn
= agg_node_get(rfd
->rib
[afi
], &pfx
); /* locks rn */
1928 rib_node_started_nonempty
= 1;
1930 rn
->info
= skiplist_new(0, rfapi_rib_key_cmp
, NULL
);
1935 * Look up route at prefix
1938 memset((void *)&rk
, 0, sizeof(rk
));
1939 assert(!rfapiRaddr2Qprefix(&nhp
->vn_address
, &rk
.vn
));
1941 if (use_eth_resolution
) {
1942 /* copy what came from aux_prefix to rk.aux_prefix */
1943 rfapiRprefix2Qprefix(&nhp
->prefix
, &rk
.aux_prefix
);
1944 if (RFAPI_0_PREFIX(&rk
.aux_prefix
)
1945 && RFAPI_HOST_PREFIX(&rk
.aux_prefix
)) {
1946 /* mark as "none" if nhp->prefix is 0/32 or
1948 rk
.aux_prefix
.family
= AF_UNSPEC
;
1954 char str_aux_prefix
[PREFIX_STRLEN
];
1956 str_aux_prefix
[0] = 0;
1958 prefix2str(&rk
.aux_prefix
, str_aux_prefix
,
1959 sizeof(str_aux_prefix
));
1961 if (!rk
.aux_prefix
.family
) {
1963 vnc_zlog_debug_verbose(
1964 "%s: rk.vn=%pFX rk.aux_prefix=%s", __func__
,
1966 (rk
.aux_prefix
.family
? str_aux_prefix
: "-"));
1968 vnc_zlog_debug_verbose(
1969 "%s: RIB skiplist for this prefix follows", __func__
);
1970 rfapiRibShowRibSl(NULL
, agg_node_get_prefix(rn
),
1971 (struct skiplist
*)rn
->info
);
1975 if (!skiplist_search((struct skiplist
*)rn
->info
, &rk
,
1978 * Already have this route; make values match
1980 rfapiFreeRfapiUnOptionChain(ri
->un_options
);
1981 ri
->un_options
= NULL
;
1982 rfapiFreeRfapiVnOptionChain(ri
->vn_options
);
1983 ri
->vn_options
= NULL
;
1986 vnc_zlog_debug_verbose("%s: found in RIB", __func__
);
1990 * Filter duplicate routes from initial response.
1991 * Check timestamps to avoid wraparound problems
1993 if ((ri
->rsp_counter
!= rfd
->rsp_counter
)
1994 || (ri
->last_sent_time
!= new_last_sent_time
)) {
1997 vnc_zlog_debug_verbose(
1998 "%s: allowed due to counter/timestamp diff",
2007 vnc_zlog_debug_verbose(
2008 "%s: allowed due to not yet in RIB", __func__
);
2010 /* not found: add new route to RIB */
2011 ri
= rfapi_info_new();
2017 assert(!rfapiRaddr2Qprefix(&nhp
->un_address
, &ri
->un
));
2018 ri
->cost
= nhp
->prefix
.cost
;
2019 ri
->lifetime
= nhp
->lifetime
;
2020 ri
->vn_options
= rfapiVnOptionsDup(nhp
->vn_options
);
2021 ri
->rsp_counter
= rfd
->rsp_counter
;
2022 ri
->last_sent_time
= rfapi_time(NULL
);
2026 rc
= skiplist_insert((struct skiplist
*)rn
->info
,
2031 if (!rib_node_started_nonempty
) {
2032 RFAPI_RIB_PREFIX_COUNT_INCR(rfd
, bgp
->rfapi
);
2035 RFAPI_RIB_CHECK_COUNTS(0, 0);
2036 rfapiRibStartTimer(rfd
, ri
, rn
, 0);
2037 RFAPI_RIB_CHECK_COUNTS(0, 0);
2039 agg_unlock_node(rn
);
2042 * update this NVE's timestamp for this prefix
2044 trn
= agg_node_get(rfd
->rsp_times
[afi
], &pfx
); /* locks trn */
2045 trn
->info
= (void *)(uintptr_t)monotime(NULL
);
2046 if (agg_node_get_lock_count(trn
) > 1)
2047 agg_unlock_node(trn
);
2049 vnc_zlog_debug_verbose(
2050 "%s: added pfx=%pFX nh[vn]=%pFX, cost=%u, lifetime=%u, allowed=%d",
2051 __func__
, &pfx
, &rk
.vn
, nhp
->prefix
.cost
, nhp
->lifetime
,
2062 rfapi_un_options_free(nhp
->un_options
);
2063 nhp
->un_options
= NULL
;
2064 rfapi_vn_options_free(nhp
->vn_options
);
2065 nhp
->vn_options
= NULL
;
2067 XFREE(MTYPE_RFAPI_NEXTHOP
, nhp
);
2076 void rfapiRibPendingDeleteRoute(struct bgp
*bgp
, struct rfapi_import_table
*it
,
2077 afi_t afi
, struct agg_node
*it_node
)
2079 struct rfapi_descriptor
*rfd
;
2080 struct listnode
*node
;
2081 const struct prefix
*p
= agg_node_get_prefix(it_node
);
2083 vnc_zlog_debug_verbose("%s: entry, it=%p, afi=%d, it_node=%p, pfx=%pRN",
2084 __func__
, it
, afi
, it_node
, it_node
);
2086 if (AFI_L2VPN
== afi
) {
2088 * ethernet import tables are per-LNI and each ethernet monitor
2089 * identifies the rfd that owns it.
2091 struct rfapi_monitor_eth
*m
;
2092 struct agg_node
*rn
;
2093 struct skiplist
*sl
;
2098 * route-specific monitors
2100 if ((sl
= RFAPI_MONITOR_ETH(it_node
))) {
2102 vnc_zlog_debug_verbose(
2103 "%s: route-specific skiplist: %p", __func__
,
2107 rc
= skiplist_next(sl
, NULL
, (void **)&m
, &cursor
);
2108 !rc
; rc
= skiplist_next(sl
, NULL
, (void **)&m
,
2111 #if DEBUG_PENDING_DELETE_ROUTE
2112 vnc_zlog_debug_verbose("%s: eth monitor rfd=%p",
2116 * If we have already sent a route with this
2118 * NVE, it's OK to send an update with the
2121 if ((rn
= agg_node_lookup(m
->rfd
->rib
[afi
],
2123 rfapiRibUpdatePendingNode(
2124 bgp
, m
->rfd
, it
, it_node
,
2125 m
->rfd
->response_lifetime
);
2126 agg_unlock_node(rn
);
2132 * all-routes/FTD monitors
2134 for (m
= it
->eth0_queries
; m
; m
= m
->next
) {
2135 #if DEBUG_PENDING_DELETE_ROUTE
2136 vnc_zlog_debug_verbose("%s: eth0 monitor rfd=%p",
2140 * If we have already sent a route with this prefix to
2142 * NVE, it's OK to send an update with the delete
2144 if ((rn
= agg_node_lookup(m
->rfd
->rib
[afi
], p
))) {
2145 rfapiRibUpdatePendingNode(
2146 bgp
, m
->rfd
, it
, it_node
,
2147 m
->rfd
->response_lifetime
);
2148 agg_unlock_node(rn
);
2154 * Find RFDs that reference this import table
2156 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
,
2159 struct agg_node
*rn
;
2161 vnc_zlog_debug_verbose(
2162 "%s: comparing rfd(%p)->import_table=%p to it=%p",
2163 __func__
, rfd
, rfd
->import_table
, it
);
2165 if (rfd
->import_table
!= it
)
2168 vnc_zlog_debug_verbose("%s: matched rfd %p", __func__
,
2172 * If we have sent a response to this NVE with this
2174 * previously, we should send an updated response.
2176 if ((rn
= agg_node_lookup(rfd
->rib
[afi
], p
))) {
2177 rfapiRibUpdatePendingNode(
2178 bgp
, rfd
, it
, it_node
,
2179 rfd
->response_lifetime
);
2180 agg_unlock_node(rn
);
2186 void rfapiRibShowResponsesSummary(void *stream
)
2188 int (*fp
)(void *, const char *, ...);
2191 const char *vty_newline
;
2192 struct bgp
*bgp
= bgp_get_default();
2195 int nves_with_nonempty_ribs
= 0;
2196 struct rfapi_descriptor
*rfd
;
2197 struct listnode
*node
;
2199 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2202 fp(out
, "Unable to find default BGP instance\n");
2206 fp(out
, "%-24s ", "Responses: (Prefixes)");
2207 fp(out
, "%-8s %-8u ", "Active:", bgp
->rfapi
->rib_prefix_count_total
);
2208 fp(out
, "%-8s %-8u",
2209 "Maximum:", bgp
->rfapi
->rib_prefix_count_total_max
);
2212 fp(out
, "%-24s ", " (Updated)");
2213 fp(out
, "%-8s %-8u ",
2214 "Update:", bgp
->rfapi
->stat
.count_updated_response_updates
);
2215 fp(out
, "%-8s %-8u",
2216 "Remove:", bgp
->rfapi
->stat
.count_updated_response_deletes
);
2217 fp(out
, "%-8s %-8u", "Total:",
2218 bgp
->rfapi
->stat
.count_updated_response_updates
2219 + bgp
->rfapi
->stat
.count_updated_response_deletes
);
2222 fp(out
, "%-24s ", " (NVEs)");
2223 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
2225 if (rfd
->rib_prefix_count
)
2226 ++nves_with_nonempty_ribs
;
2228 fp(out
, "%-8s %-8u ", "Active:", nves_with_nonempty_ribs
);
2229 fp(out
, "%-8s %-8u", "Total:", nves
);
2233 void rfapiRibShowResponsesSummaryClear(void)
2235 struct bgp
*bgp
= bgp_get_default();
2237 bgp
->rfapi
->rib_prefix_count_total_max
=
2238 bgp
->rfapi
->rib_prefix_count_total
;
2241 static int print_rib_sl(int (*fp
)(void *, const char *, ...), struct vty
*vty
,
2242 void *out
, struct skiplist
*sl
, int deleted
,
2243 char *str_pfx
, int *printedprefix
)
2245 struct rfapi_info
*ri
;
2248 int routes_displayed
= 0;
2251 for (rc
= skiplist_next(sl
, NULL
, (void **)&ri
, &cursor
); !rc
;
2252 rc
= skiplist_next(sl
, NULL
, (void **)&ri
, &cursor
)) {
2254 char str_vn
[PREFIX_STRLEN
];
2255 char str_un
[PREFIX_STRLEN
];
2256 char str_lifetime
[BUFSIZ
];
2257 char str_age
[BUFSIZ
];
2259 char str_rd
[RD_ADDRSTRLEN
];
2263 prefix2str(&ri
->rk
.vn
, str_vn
, sizeof(str_vn
));
2264 p
= index(str_vn
, '/');
2268 prefix2str(&ri
->un
, str_un
, sizeof(str_un
));
2269 p
= index(str_un
, '/');
2273 rfapiFormatSeconds(ri
->lifetime
, str_lifetime
, BUFSIZ
);
2274 #ifdef RFAPI_REGISTRATIONS_REPORT_AGE
2275 rfapiFormatAge(ri
->last_sent_time
, str_age
, BUFSIZ
);
2278 time_t now
= rfapi_time(NULL
);
2280 ri
->last_sent_time
+ (time_t)ri
->lifetime
;
2281 /* allow for delayed/async removal */
2282 rfapiFormatSeconds((expire
> now
? expire
- now
: 1),
2287 str_rd
[0] = 0; /* start empty */
2289 prefix_rd2str(&ri
->rk
.rd
, str_rd
, sizeof(str_rd
));
2292 fp(out
, " %c %-20s %-15s %-15s %-4u %-8s %-8s %s\n",
2293 deleted
? 'r' : ' ', *printedprefix
? "" : str_pfx
, str_vn
,
2294 str_un
, ri
->cost
, str_lifetime
, str_age
, str_rd
);
2296 if (!*printedprefix
)
2299 return routes_displayed
;
2304 * This one is for debugging (set stream to NULL to send output to log)
2306 static void rfapiRibShowRibSl(void *stream
, struct prefix
*pfx
,
2307 struct skiplist
*sl
)
2309 int (*fp
)(void *, const char *, ...);
2312 const char *vty_newline
;
2314 int nhs_displayed
= 0;
2315 char str_pfx
[PREFIX_STRLEN
];
2316 int printedprefix
= 0;
2318 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2321 prefix2str(pfx
, str_pfx
, sizeof(str_pfx
));
2324 print_rib_sl(fp
, vty
, out
, sl
, 0, str_pfx
, &printedprefix
);
2328 void rfapiRibShowResponses(void *stream
, struct prefix
*pfx_match
,
2331 int (*fp
)(void *, const char *, ...);
2334 const char *vty_newline
;
2336 struct rfapi_descriptor
*rfd
;
2337 struct listnode
*node
;
2339 struct bgp
*bgp
= bgp_get_default();
2340 int printedheader
= 0;
2341 int routes_total
= 0;
2343 int prefixes_total
= 0;
2344 int prefixes_displayed
= 0;
2346 int nves_with_routes
= 0;
2347 int nves_displayed
= 0;
2348 int routes_displayed
= 0;
2349 int nhs_displayed
= 0;
2351 if (rfapiStream2Vty(stream
, &fp
, &vty
, &out
, &vty_newline
) == 0)
2354 fp(out
, "Unable to find default BGP instance\n");
2361 for (ALL_LIST_ELEMENTS_RO(&bgp
->rfapi
->descriptors
, node
, rfd
)) {
2367 if (rfd
->rib_prefix_count
)
2370 for (afi
= AFI_IP
; afi
< AFI_MAX
; ++afi
) {
2372 struct agg_node
*rn
;
2377 for (rn
= agg_route_top(rfd
->rib
[afi
]); rn
;
2378 rn
= agg_route_next(rn
)) {
2379 const struct prefix
*p
=
2380 agg_node_get_prefix(rn
);
2381 struct skiplist
*sl
;
2382 char str_pfx
[PREFIX_STRLEN
];
2383 int printedprefix
= 0;
2394 nhs_total
+= skiplist_count(sl
);
2397 if (pfx_match
&& !prefix_match(pfx_match
, p
)
2398 && !prefix_match(p
, pfx_match
))
2401 ++prefixes_displayed
;
2403 if (!printedheader
) {
2407 show_removed
? "Removed" : "Active");
2408 fp(out
, "%-15s %-15s\n", "Querying VN",
2411 " %-20s %-15s %-15s %4s %-8s %-8s\n",
2412 "Prefix", "Registered VN",
2413 "Registered UN", "Cost", "Lifetime",
2414 #ifdef RFAPI_REGISTRATIONS_REPORT_AGE
2422 char str_vn
[BUFSIZ
];
2423 char str_un
[BUFSIZ
];
2428 fp(out
, "%-15s %-15s\n",
2429 rfapiRfapiIpAddr2Str(&rfd
->vn_addr
,
2431 rfapiRfapiIpAddr2Str(&rfd
->un_addr
,
2435 prefix2str(p
, str_pfx
, sizeof(str_pfx
));
2436 // fp(out, " %s\n", buf); /* prefix */
2439 nhs_displayed
+= print_rib_sl(
2440 fp
, vty
, out
, sl
, show_removed
, str_pfx
,
2448 fp(out
, "Displayed %u NVEs, and %u out of %u %s prefixes",
2449 nves_displayed
, routes_displayed
, routes_total
,
2450 show_removed
? "removed" : "active");
2451 if (nhs_displayed
!= routes_displayed
2452 || nhs_total
!= routes_total
)
2453 fp(out
, " with %u out of %u next hops", nhs_displayed
,