1 // SPDX-License-Identifier: GPL-2.0-or-later
3 * Zebra GR related helper functions.
6 * Copyright (C) 2019 VMware, Inc.
13 #include "lib/prefix.h"
14 #include "lib/command.h"
17 #include "lib/stream.h"
18 #include "lib/memory.h"
19 #include "lib/table.h"
20 #include "lib/network.h"
21 #include "lib/sockunion.h"
23 #include "lib/zclient.h"
24 #include "lib/privs.h"
25 #include "lib/network.h"
26 #include "lib/buffer.h"
27 #include "lib/nexthop.h"
29 #include "lib/libfrr.h"
30 #include "lib/sockopt.h"
32 #include "zebra/zebra_router.h"
33 #include "zebra/debug.h"
34 #include "zebra/zapi_msg.h"
36 DEFINE_MTYPE_STATIC(ZEBRA
, ZEBRA_GR
, "GR");
39 * Forward declaration.
41 static struct zserv
*zebra_gr_find_stale_client(struct zserv
*client
);
42 static void zebra_gr_route_stale_delete_timer_expiry(struct event
*thread
);
43 static int32_t zebra_gr_delete_stale_routes(struct client_gr_info
*info
);
44 static void zebra_gr_process_client_stale_routes(struct zserv
*client
,
45 struct client_gr_info
*info
);
46 static void zebra_gr_delete_stale_route_table_afi(struct event
*event
);
50 #define LOG_GR(msg, ...) \
52 if (IS_ZEBRA_DEBUG_EVENT) \
53 zlog_debug(msg, ##__VA_ARGS__); \
57 * Client connection functions
61 * Function to clean all the stale clients,
62 * function will also clean up all per instance
63 * capabilities that are exchanged.
65 void zebra_gr_stale_client_cleanup(struct list
*client_list
)
67 struct listnode
*node
, *nnode
;
68 struct zserv
*s_client
= NULL
;
69 struct client_gr_info
*info
, *ninfo
;
71 /* Find the stale client */
72 for (ALL_LIST_ELEMENTS(client_list
, node
, nnode
, s_client
)) {
74 LOG_GR("%s: Stale client %s is being deleted", __func__
,
75 zebra_route_string(s_client
->proto
));
77 TAILQ_FOREACH_SAFE (info
, &s_client
->gr_info_queue
, gr_info
,
80 /* Cancel the stale timer */
81 if (info
->t_stale_removal
!= NULL
) {
82 EVENT_OFF(info
->t_stale_removal
);
83 info
->t_stale_removal
= NULL
;
84 info
->do_delete
= true;
85 /* Process the stale routes */
88 zebra_gr_route_stale_delete_timer_expiry
,
96 * A helper function to create client info.
98 static struct client_gr_info
*zebra_gr_client_info_create(struct zserv
*client
)
100 struct client_gr_info
*info
;
102 info
= XCALLOC(MTYPE_ZEBRA_GR
, sizeof(struct client_gr_info
));
104 info
->stale_client_ptr
= client
;
106 TAILQ_INSERT_TAIL(&(client
->gr_info_queue
), info
, gr_info
);
111 * A helper function to delete and destroy client info.
113 static void zebra_gr_client_info_delete(struct zserv
*client
,
114 struct client_gr_info
*info
)
116 struct vrf
*vrf
= vrf_lookup_by_id(info
->vrf_id
);
118 TAILQ_REMOVE(&(client
->gr_info_queue
), info
, gr_info
);
120 EVENT_OFF(info
->t_stale_removal
);
122 LOG_GR("%s: Instance info is being deleted for client %s vrf %s(%u)",
123 __func__
, zebra_route_string(client
->proto
), VRF_LOGNAME(vrf
),
126 /* Delete all the stale routes. */
127 info
->do_delete
= true;
128 zebra_gr_delete_stale_routes(info
);
130 XFREE(MTYPE_ZEBRA_GR
, info
);
134 * Function to handle client when it disconnect.
136 int32_t zebra_gr_client_disconnect(struct zserv
*client
)
138 struct zserv
*stale_client
;
140 struct client_gr_info
*info
= NULL
;
142 /* Find the stale client */
143 stale_client
= zebra_gr_find_stale_client(client
);
146 * We should never be here.
149 LOG_GR("%s: Stale client %s exist, we should not be here!",
150 __func__
, zebra_route_string(client
->proto
));
154 client
->restart_time
= monotime(&tv
);
156 /* For all the GR instance start the stale removal timer. */
157 TAILQ_FOREACH (info
, &client
->gr_info_queue
, gr_info
) {
158 if (ZEBRA_CLIENT_GR_ENABLED(info
->capabilities
)
159 && (info
->t_stale_removal
== NULL
)) {
160 struct vrf
*vrf
= vrf_lookup_by_id(info
->vrf_id
);
164 zebra_gr_route_stale_delete_timer_expiry
, info
,
165 info
->stale_removal_time
,
166 &info
->t_stale_removal
);
167 info
->stale_client_ptr
= client
;
168 info
->stale_client
= true;
169 LOG_GR("%s: Client %s vrf %s(%u) Stale timer update to %d",
170 __func__
, zebra_route_string(client
->proto
),
171 VRF_LOGNAME(vrf
), info
->vrf_id
,
172 info
->stale_removal_time
);
176 listnode_add(zrouter
.stale_client_list
, client
);
182 * Function to delete stale client
184 static void zebra_gr_delete_stale_client(struct client_gr_info
*info
)
186 struct client_gr_info
*bgp_info
;
187 struct zserv
*s_client
= NULL
;
188 struct vrf
*vrf
= vrf_lookup_by_id(info
->vrf_id
);
190 s_client
= info
->stale_client_ptr
;
192 if (!s_client
|| !info
->stale_client
)
196 * If there are bgp instances with the stale delete timer pending
197 * then stale client is not deleted
199 if ((s_client
->gr_instance_count
> 0) && info
->gr_enable
)
200 s_client
->gr_instance_count
--;
202 TAILQ_REMOVE(&(s_client
->gr_info_queue
), info
, gr_info
);
204 LOG_GR("%s: Client %s gr count %d", __func__
,
205 zebra_route_string(s_client
->proto
),
206 s_client
->gr_instance_count
);
208 TAILQ_FOREACH (bgp_info
, &s_client
->gr_info_queue
, gr_info
) {
209 if (bgp_info
->t_stale_removal
!= NULL
)
213 LOG_GR("%s: Client %s vrf %s(%u) is being deleted", __func__
,
214 zebra_route_string(s_client
->proto
), VRF_LOGNAME(vrf
),
217 TAILQ_INIT(&(s_client
->gr_info_queue
));
218 listnode_delete(zrouter
.stale_client_list
, s_client
);
219 if (info
->stale_client
)
220 zserv_client_delete(s_client
);
221 XFREE(MTYPE_ZEBRA_GR
, info
);
225 * Function to find stale client.
227 static struct zserv
*zebra_gr_find_stale_client(struct zserv
*client
)
229 struct listnode
*node
, *nnode
;
230 struct zserv
*stale_client
;
232 /* Find the stale client */
233 for (ALL_LIST_ELEMENTS(zrouter
.stale_client_list
, node
, nnode
,
235 if (client
->proto
== stale_client
->proto
236 && client
->instance
== stale_client
->instance
) {
245 * Function to handle reconnect of client post restart.
247 void zebra_gr_client_reconnect(struct zserv
*client
)
249 struct listnode
*node
, *nnode
;
250 struct zserv
*old_client
= NULL
;
251 struct client_gr_info
*info
= NULL
;
253 /* Find the stale client */
254 for (ALL_LIST_ELEMENTS(zrouter
.stale_client_list
, node
, nnode
,
256 if (client
->proto
== old_client
->proto
257 && client
->instance
== old_client
->instance
)
261 /* Copy the timers */
265 client
->gr_instance_count
= old_client
->gr_instance_count
;
266 client
->restart_time
= old_client
->restart_time
;
268 LOG_GR("%s : old client %s, gr_instance_count %d", __func__
,
269 zebra_route_string(old_client
->proto
),
270 old_client
->gr_instance_count
);
272 if (TAILQ_FIRST(&old_client
->gr_info_queue
)) {
273 TAILQ_CONCAT(&client
->gr_info_queue
, &old_client
->gr_info_queue
,
275 TAILQ_INIT(&old_client
->gr_info_queue
);
278 TAILQ_FOREACH (info
, &client
->gr_info_queue
, gr_info
) {
279 info
->stale_client_ptr
= client
;
280 info
->stale_client
= false;
283 /* Delete the stale client */
284 listnode_delete(zrouter
.stale_client_list
, old_client
);
285 /* Delete old client */
286 zserv_client_delete(old_client
);
289 struct zebra_gr_afi_clean
{
290 struct client_gr_info
*info
;
299 * Functions to deal with capabilities
303 * Function to decode and call appropriate functions
304 * to handle client capabilities.
306 void zread_client_capabilities(ZAPI_HANDLER_ARGS
)
309 struct client_gr_info
*info
= NULL
;
315 if (zapi_capabilities_decode(s
, &api
)) {
316 LOG_GR("%s: Error in reading capabilities for client %s",
317 __func__
, zebra_route_string(client
->proto
));
321 vrf
= vrf_lookup_by_id(api
.vrf_id
);
324 * If this ever matters uncomment and add safi to the
325 * arrays as needed to track
327 if (api
.safi
!= SAFI_UNICAST
)
330 /* GR only for dynamic clients */
331 if (client
->proto
<= ZEBRA_ROUTE_CONNECT
) {
332 LOG_GR("%s: GR capabilities for client %s not supported",
333 __func__
, zebra_route_string(client
->proto
));
337 /* Find the bgp information for the specified vrf id */
338 TAILQ_FOREACH (info
, &client
->gr_info_queue
, gr_info
) {
339 if (info
->vrf_id
== api
.vrf_id
)
344 * If the command is delete, then cancel the stale timer and
345 * delete the bgp info
348 case ZEBRA_CLIENT_GR_DISABLE
:
352 LOG_GR("%s: Client %s instance GR disabled count %d", __func__
,
353 zebra_route_string(client
->proto
),
354 client
->gr_instance_count
);
356 if ((info
->gr_enable
) && (client
->gr_instance_count
> 0))
357 client
->gr_instance_count
--;
359 zebra_gr_client_info_delete(client
, info
);
361 case ZEBRA_CLIENT_GR_CAPABILITIES
:
362 /* Allocate bgp info */
364 info
= zebra_gr_client_info_create(client
);
366 /* Update other parameters */
367 if (!info
->gr_enable
) {
368 client
->gr_instance_count
++;
370 LOG_GR("%s: Cient %s vrf %s(%u) GR enabled count %d",
371 __func__
, zebra_route_string(client
->proto
),
372 VRF_LOGNAME(vrf
), api
.vrf_id
,
373 client
->gr_instance_count
);
375 info
->capabilities
= api
.cap
;
376 info
->stale_removal_time
= api
.stale_removal_time
;
377 info
->vrf_id
= api
.vrf_id
;
378 info
->gr_enable
= true;
381 case ZEBRA_CLIENT_RIB_STALE_TIME
:
382 LOG_GR("%s: Client %s stale time update event", __func__
,
383 zebra_route_string(client
->proto
));
385 /* Update the stale removal timer */
386 if (info
&& info
->t_stale_removal
== NULL
) {
388 LOG_GR("%s: vrf %s(%u) Stale time: %d is now update to: %d",
389 __func__
, VRF_LOGNAME(vrf
), info
->vrf_id
,
390 info
->stale_removal_time
,
391 api
.stale_removal_time
);
393 info
->stale_removal_time
= api
.stale_removal_time
;
397 case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE
:
399 LOG_GR("%s: Client %s route update complete for AFI %d, SAFI %d, no Graceful Restart communication, returning",
400 __func__
, zebra_route_string(client
->proto
),
405 LOG_GR("%s: Client %s vrf %s(%u) route update complete for AFI %d, SAFI %d",
406 __func__
, zebra_route_string(client
->proto
),
407 VRF_LOGNAME(vrf
), info
->vrf_id
, api
.afi
, api
.safi
);
408 info
->route_sync
[api
.afi
] = true;
411 * Schedule for after anything already in the meta Q
413 rib_add_gr_run(api
.afi
, api
.vrf_id
, client
->proto
,
415 zebra_gr_process_client_stale_routes(client
, info
);
417 case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING
:
419 LOG_GR("%s: Client %s route update pending for AFI %d, SAFI %d",
420 __func__
, zebra_route_string(client
->proto
),
423 LOG_GR("%s: Client %s vrf %s(%u) route update pending for AFI %d, SAFI %d",
424 __func__
, zebra_route_string(client
->proto
),
425 VRF_LOGNAME(vrf
), info
->vrf_id
, api
.afi
,
428 info
->af_enabled
[api
.afi
] = true;
435 * Stale route handling
439 * Delete all the stale routes that have not been refreshed
442 static void zebra_gr_route_stale_delete_timer_expiry(struct event
*thread
)
444 struct client_gr_info
*info
= EVENT_ARG(thread
);
446 struct zserv
*client
;
447 struct vrf
*vrf
= vrf_lookup_by_id(info
->vrf_id
);
449 client
= (struct zserv
*)info
->stale_client_ptr
;
451 cnt
= zebra_gr_delete_stale_routes(info
);
453 /* Restart the timer */
455 LOG_GR("%s: Client %s vrf %s(%u) processed %d routes. Start timer again",
456 __func__
, zebra_route_string(client
->proto
),
457 VRF_LOGNAME(vrf
), info
->vrf_id
, cnt
);
459 event_add_timer(zrouter
.master
,
460 zebra_gr_route_stale_delete_timer_expiry
, info
,
461 ZEBRA_DEFAULT_STALE_UPDATE_DELAY
,
462 &info
->t_stale_removal
);
464 /* No routes to delete for the VRF */
465 LOG_GR("%s: Client %s vrf %s(%u) all stale routes processed",
466 __func__
, zebra_route_string(client
->proto
),
467 VRF_LOGNAME(vrf
), info
->vrf_id
);
469 zebra_gr_delete_stale_client(info
);
475 * Function to process to check if route entry is stale
476 * or has been updated.
478 * Returns true when a node is deleted else false
480 static bool zebra_gr_process_route_entry(struct zserv
*client
,
481 struct route_node
*rn
,
482 struct route_entry
*re
)
484 /* If the route is not refreshed after restart, delete the entry */
485 if (re
->uptime
< client
->restart_time
) {
486 if (IS_ZEBRA_DEBUG_RIB
)
487 zlog_debug("%s: Client %s stale route %pFX is deleted",
488 __func__
, zebra_route_string(client
->proto
),
498 static void zebra_gr_delete_stale_route_table_afi(struct event
*event
)
500 struct zebra_gr_afi_clean
*gac
= EVENT_ARG(event
);
501 struct route_table
*table
;
502 struct route_node
*rn
;
503 struct route_entry
*re
, *next
;
504 struct zebra_vrf
*zvrf
= zebra_vrf_lookup_by_id(gac
->info
->vrf_id
);
510 table
= zvrf
->table
[gac
->afi
][SAFI_UNICAST
];
514 for (rn
= route_top(table
); rn
; rn
= srcdest_route_next(rn
)) {
515 RNODE_FOREACH_RE_SAFE (rn
, re
, next
) {
516 if (CHECK_FLAG(re
->status
, ROUTE_ENTRY_REMOVED
))
519 /* If the route refresh is received
520 * after restart then do not delete
524 if (re
->type
== gac
->proto
&&
525 re
->instance
== gac
->instance
&&
526 zebra_gr_process_route_entry(
527 gac
->info
->stale_client_ptr
, rn
, re
))
530 /* If the max route count is reached
531 * then timer thread will be restarted
532 * Store the current prefix and afi
534 if ((n
>= ZEBRA_MAX_STALE_ROUTE_COUNT
) &&
535 (gac
->info
->do_delete
== false)) {
538 zebra_gr_delete_stale_route_table_afi
,
539 gac
, ZEBRA_DEFAULT_STALE_UPDATE_DELAY
,
546 XFREE(MTYPE_ZEBRA_GR
, gac
);
550 * This function walks through the route table for all vrf and deletes
551 * the stale routes for the restarted client specified by the protocol
554 static int32_t zebra_gr_delete_stale_route(struct client_gr_info
*info
,
555 struct zebra_vrf
*zvrf
)
560 struct zserv
*s_client
;
562 s_client
= info
->stale_client_ptr
;
563 if (s_client
== NULL
) {
564 LOG_GR("%s: Stale client %s(%u) not present", __func__
,
565 zvrf
->vrf
->name
, zvrf
->vrf
->vrf_id
);
569 proto
= s_client
->proto
;
570 instance
= s_client
->instance
;
572 LOG_GR("%s: Client %s %s(%u) stale routes are being deleted", __func__
,
573 zebra_route_string(proto
), zvrf
->vrf
->name
, zvrf
->vrf
->vrf_id
);
575 /* Process routes for all AFI */
576 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
579 * Schedule for immediately after anything in the
582 rib_add_gr_run(afi
, info
->vrf_id
, proto
, instance
);
588 * Delete the stale routes when client is restarted and routes are not
589 * refreshed within the stale timeout
591 static int32_t zebra_gr_delete_stale_routes(struct client_gr_info
*info
)
593 struct zebra_vrf
*zvrf
;
599 zvrf
= zebra_vrf_lookup_by_id(info
->vrf_id
);
601 LOG_GR("%s: Invalid VRF entry %u", __func__
, info
->vrf_id
);
605 cnt
= zebra_gr_delete_stale_route(info
, zvrf
);
610 * This function checks if route update for all AFI, SAFI is completed
611 * and cancels the stale timer
613 static void zebra_gr_process_client_stale_routes(struct zserv
*client
,
614 struct client_gr_info
*info
)
621 /* Check if route update completed for all AFI, SAFI */
622 for (afi
= AFI_IP
; afi
< AFI_MAX
; afi
++) {
623 if (info
->af_enabled
[afi
] && !info
->route_sync
[afi
]) {
624 struct vrf
*vrf
= vrf_lookup_by_id(info
->vrf_id
);
626 LOG_GR("%s: Client %s vrf: %s(%u) route update not completed for AFI %d",
627 __func__
, zebra_route_string(client
->proto
),
628 VRF_LOGNAME(vrf
), info
->vrf_id
, afi
);
634 * Route update completed for all AFI, SAFI
635 * Cancel the stale timer, routes are already being processed
637 if (info
->t_stale_removal
) {
638 struct vrf
*vrf
= vrf_lookup_by_id(info
->vrf_id
);
640 LOG_GR("%s: Client %s canceled stale delete timer vrf %s(%d)",
641 __func__
, zebra_route_string(client
->proto
),
642 VRF_LOGNAME(vrf
), info
->vrf_id
);
643 EVENT_OFF(info
->t_stale_removal
);
647 void zebra_gr_process_client(afi_t afi
, vrf_id_t vrf_id
, uint8_t proto
,
650 struct zserv
*client
= zserv_find_client(proto
, instance
);
651 struct client_gr_info
*info
= NULL
;
652 struct zebra_gr_afi_clean
*gac
;
657 TAILQ_FOREACH (info
, &client
->gr_info_queue
, gr_info
) {
658 if (info
->vrf_id
== vrf_id
)
665 gac
= XCALLOC(MTYPE_ZEBRA_GR
, sizeof(*gac
));
669 gac
->instance
= instance
;
671 event_add_event(zrouter
.master
, zebra_gr_delete_stale_route_table_afi
,
672 gac
, 0, &gac
->t_gac
);