]> git.proxmox.com Git - mirror_frr.git/blob - zebra/zebra_gr.c
zebra: Fix dp_out_queued counter to actually reflect real life
[mirror_frr.git] / zebra / zebra_gr.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Zebra GR related helper functions.
4 *
5 * Portions:
6 * Copyright (C) 2019 VMware, Inc.
7 * et al.
8 */
9
10 #include <zebra.h>
11 #include <libgen.h>
12
13 #include "lib/prefix.h"
14 #include "lib/command.h"
15 #include "lib/if.h"
16 #include "frrevent.h"
17 #include "lib/stream.h"
18 #include "lib/memory.h"
19 #include "lib/table.h"
20 #include "lib/network.h"
21 #include "lib/sockunion.h"
22 #include "lib/log.h"
23 #include "lib/zclient.h"
24 #include "lib/privs.h"
25 #include "lib/network.h"
26 #include "lib/buffer.h"
27 #include "lib/nexthop.h"
28 #include "lib/vrf.h"
29 #include "lib/libfrr.h"
30 #include "lib/sockopt.h"
31
32 #include "zebra/zebra_router.h"
33 #include "zebra/debug.h"
34 #include "zebra/zapi_msg.h"
35
36 DEFINE_MTYPE_STATIC(ZEBRA, ZEBRA_GR, "GR");
37
38 /*
39 * Forward declaration.
40 */
41 static struct zserv *zebra_gr_find_stale_client(struct zserv *client);
42 static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread);
43 static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info);
44 static void zebra_gr_process_client_stale_routes(struct zserv *client,
45 struct client_gr_info *info);
46 static void zebra_gr_delete_stale_route_table_afi(struct event *event);
47 /*
48 * Debug macros.
49 */
50 #define LOG_GR(msg, ...) \
51 do { \
52 if (IS_ZEBRA_DEBUG_EVENT) \
53 zlog_debug(msg, ##__VA_ARGS__); \
54 } while (0)
55
56 /*
57 * Client connection functions
58 */
59
60 /*
61 * Function to clean all the stale clients,
62 * function will also clean up all per instance
63 * capabilities that are exchanged.
64 */
65 void zebra_gr_stale_client_cleanup(struct list *client_list)
66 {
67 struct listnode *node, *nnode;
68 struct zserv *s_client = NULL;
69 struct client_gr_info *info, *ninfo;
70
71 /* Find the stale client */
72 for (ALL_LIST_ELEMENTS(client_list, node, nnode, s_client)) {
73
74 LOG_GR("%s: Stale client %s is being deleted", __func__,
75 zebra_route_string(s_client->proto));
76
77 TAILQ_FOREACH_SAFE (info, &s_client->gr_info_queue, gr_info,
78 ninfo) {
79
80 /* Cancel the stale timer */
81 if (info->t_stale_removal != NULL) {
82 EVENT_OFF(info->t_stale_removal);
83 info->t_stale_removal = NULL;
84 info->do_delete = true;
85 /* Process the stale routes */
86 event_execute(
87 zrouter.master,
88 zebra_gr_route_stale_delete_timer_expiry,
89 info, 0);
90 }
91 }
92 }
93 }
94
95 /*
96 * A helper function to create client info.
97 */
98 static struct client_gr_info *zebra_gr_client_info_create(struct zserv *client)
99 {
100 struct client_gr_info *info;
101
102 info = XCALLOC(MTYPE_ZEBRA_GR, sizeof(struct client_gr_info));
103
104 info->stale_client_ptr = client;
105
106 TAILQ_INSERT_TAIL(&(client->gr_info_queue), info, gr_info);
107 return info;
108 }
109
110 /*
111 * A helper function to delete and destroy client info.
112 */
113 static void zebra_gr_client_info_delete(struct zserv *client,
114 struct client_gr_info *info)
115 {
116 struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);
117
118 TAILQ_REMOVE(&(client->gr_info_queue), info, gr_info);
119
120 EVENT_OFF(info->t_stale_removal);
121
122 LOG_GR("%s: Instance info is being deleted for client %s vrf %s(%u)",
123 __func__, zebra_route_string(client->proto), VRF_LOGNAME(vrf),
124 info->vrf_id);
125
126 /* Delete all the stale routes. */
127 info->do_delete = true;
128 zebra_gr_delete_stale_routes(info);
129
130 XFREE(MTYPE_ZEBRA_GR, info);
131 }
132
133 /*
134 * Function to handle client when it disconnect.
135 */
136 int32_t zebra_gr_client_disconnect(struct zserv *client)
137 {
138 struct zserv *stale_client;
139 struct timeval tv;
140 struct client_gr_info *info = NULL;
141
142 /* Find the stale client */
143 stale_client = zebra_gr_find_stale_client(client);
144
145 /*
146 * We should never be here.
147 */
148 if (stale_client) {
149 LOG_GR("%s: Stale client %s exist, we should not be here!",
150 __func__, zebra_route_string(client->proto));
151 assert(0);
152 }
153
154 client->restart_time = monotime(&tv);
155
156 /* For all the GR instance start the stale removal timer. */
157 TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
158 if (ZEBRA_CLIENT_GR_ENABLED(info->capabilities)
159 && (info->t_stale_removal == NULL)) {
160 struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);
161
162 event_add_timer(
163 zrouter.master,
164 zebra_gr_route_stale_delete_timer_expiry, info,
165 info->stale_removal_time,
166 &info->t_stale_removal);
167 info->stale_client_ptr = client;
168 info->stale_client = true;
169 LOG_GR("%s: Client %s vrf %s(%u) Stale timer update to %d",
170 __func__, zebra_route_string(client->proto),
171 VRF_LOGNAME(vrf), info->vrf_id,
172 info->stale_removal_time);
173 }
174 }
175
176 listnode_add(zrouter.stale_client_list, client);
177
178 return 0;
179 }
180
181 /*
182 * Function to delete stale client
183 */
184 static void zebra_gr_delete_stale_client(struct client_gr_info *info)
185 {
186 struct client_gr_info *bgp_info;
187 struct zserv *s_client = NULL;
188 struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);
189
190 s_client = info->stale_client_ptr;
191
192 if (!s_client || !info->stale_client)
193 return;
194
195 /*
196 * If there are bgp instances with the stale delete timer pending
197 * then stale client is not deleted
198 */
199 if ((s_client->gr_instance_count > 0) && info->gr_enable)
200 s_client->gr_instance_count--;
201
202 TAILQ_REMOVE(&(s_client->gr_info_queue), info, gr_info);
203
204 LOG_GR("%s: Client %s gr count %d", __func__,
205 zebra_route_string(s_client->proto),
206 s_client->gr_instance_count);
207
208 TAILQ_FOREACH (bgp_info, &s_client->gr_info_queue, gr_info) {
209 if (bgp_info->t_stale_removal != NULL)
210 return;
211 }
212
213 LOG_GR("%s: Client %s vrf %s(%u) is being deleted", __func__,
214 zebra_route_string(s_client->proto), VRF_LOGNAME(vrf),
215 info->vrf_id);
216
217 TAILQ_INIT(&(s_client->gr_info_queue));
218 listnode_delete(zrouter.stale_client_list, s_client);
219 if (info->stale_client)
220 zserv_client_delete(s_client);
221 XFREE(MTYPE_ZEBRA_GR, info);
222 }
223
224 /*
225 * Function to find stale client.
226 */
227 static struct zserv *zebra_gr_find_stale_client(struct zserv *client)
228 {
229 struct listnode *node, *nnode;
230 struct zserv *stale_client;
231
232 /* Find the stale client */
233 for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode,
234 stale_client)) {
235 if (client->proto == stale_client->proto
236 && client->instance == stale_client->instance) {
237 return stale_client;
238 }
239 }
240
241 return NULL;
242 }
243
244 /*
245 * Function to handle reconnect of client post restart.
246 */
247 void zebra_gr_client_reconnect(struct zserv *client)
248 {
249 struct listnode *node, *nnode;
250 struct zserv *old_client = NULL;
251 struct client_gr_info *info = NULL;
252
253 /* Find the stale client */
254 for (ALL_LIST_ELEMENTS(zrouter.stale_client_list, node, nnode,
255 old_client)) {
256 if (client->proto == old_client->proto
257 && client->instance == old_client->instance)
258 break;
259 }
260
261 /* Copy the timers */
262 if (!old_client)
263 return;
264
265 client->gr_instance_count = old_client->gr_instance_count;
266 client->restart_time = old_client->restart_time;
267
268 LOG_GR("%s : old client %s, gr_instance_count %d", __func__,
269 zebra_route_string(old_client->proto),
270 old_client->gr_instance_count);
271
272 if (TAILQ_FIRST(&old_client->gr_info_queue)) {
273 TAILQ_CONCAT(&client->gr_info_queue, &old_client->gr_info_queue,
274 gr_info);
275 TAILQ_INIT(&old_client->gr_info_queue);
276 }
277
278 TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
279 info->stale_client_ptr = client;
280 info->stale_client = false;
281 }
282
283 /* Delete the stale client */
284 listnode_delete(zrouter.stale_client_list, old_client);
285 /* Delete old client */
286 zserv_client_delete(old_client);
287 }
288
289 struct zebra_gr_afi_clean {
290 struct client_gr_info *info;
291 afi_t afi;
292 uint8_t proto;
293 uint8_t instance;
294
295 struct event *t_gac;
296 };
297
298 /*
299 * Functions to deal with capabilities
300 */
301
302 /*
303 * Function to decode and call appropriate functions
304 * to handle client capabilities.
305 */
306 void zread_client_capabilities(ZAPI_HANDLER_ARGS)
307 {
308 struct zapi_cap api;
309 struct client_gr_info *info = NULL;
310 struct stream *s;
311 struct vrf *vrf;
312
313 s = msg;
314
315 if (zapi_capabilities_decode(s, &api)) {
316 LOG_GR("%s: Error in reading capabilities for client %s",
317 __func__, zebra_route_string(client->proto));
318 return;
319 }
320
321 vrf = vrf_lookup_by_id(api.vrf_id);
322
323 /*
324 * If this ever matters uncomment and add safi to the
325 * arrays as needed to track
326 */
327 if (api.safi != SAFI_UNICAST)
328 return;
329
330 /* GR only for dynamic clients */
331 if (client->proto <= ZEBRA_ROUTE_CONNECT) {
332 LOG_GR("%s: GR capabilities for client %s not supported",
333 __func__, zebra_route_string(client->proto));
334 return;
335 }
336
337 /* Find the bgp information for the specified vrf id */
338 TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
339 if (info->vrf_id == api.vrf_id)
340 break;
341 }
342
343 /*
344 * If the command is delete, then cancel the stale timer and
345 * delete the bgp info
346 */
347 switch (api.cap) {
348 case ZEBRA_CLIENT_GR_DISABLE:
349 if (!info)
350 return;
351
352 LOG_GR("%s: Client %s instance GR disabled count %d", __func__,
353 zebra_route_string(client->proto),
354 client->gr_instance_count);
355
356 if ((info->gr_enable) && (client->gr_instance_count > 0))
357 client->gr_instance_count--;
358
359 zebra_gr_client_info_delete(client, info);
360 break;
361 case ZEBRA_CLIENT_GR_CAPABILITIES:
362 /* Allocate bgp info */
363 if (!info)
364 info = zebra_gr_client_info_create(client);
365
366 /* Update other parameters */
367 if (!info->gr_enable) {
368 client->gr_instance_count++;
369
370 LOG_GR("%s: Cient %s vrf %s(%u) GR enabled count %d",
371 __func__, zebra_route_string(client->proto),
372 VRF_LOGNAME(vrf), api.vrf_id,
373 client->gr_instance_count);
374
375 info->capabilities = api.cap;
376 info->stale_removal_time = api.stale_removal_time;
377 info->vrf_id = api.vrf_id;
378 info->gr_enable = true;
379 }
380 break;
381 case ZEBRA_CLIENT_RIB_STALE_TIME:
382 LOG_GR("%s: Client %s stale time update event", __func__,
383 zebra_route_string(client->proto));
384
385 /* Update the stale removal timer */
386 if (info && info->t_stale_removal == NULL) {
387
388 LOG_GR("%s: vrf %s(%u) Stale time: %d is now update to: %d",
389 __func__, VRF_LOGNAME(vrf), info->vrf_id,
390 info->stale_removal_time,
391 api.stale_removal_time);
392
393 info->stale_removal_time = api.stale_removal_time;
394 }
395
396 break;
397 case ZEBRA_CLIENT_ROUTE_UPDATE_COMPLETE:
398 if (!info) {
399 LOG_GR("%s: Client %s route update complete for AFI %d, SAFI %d, no Graceful Restart communication, returning",
400 __func__, zebra_route_string(client->proto),
401 api.afi, api.safi);
402 return;
403 }
404
405 LOG_GR("%s: Client %s vrf %s(%u) route update complete for AFI %d, SAFI %d",
406 __func__, zebra_route_string(client->proto),
407 VRF_LOGNAME(vrf), info->vrf_id, api.afi, api.safi);
408 info->route_sync[api.afi] = true;
409
410 /*
411 * Schedule for after anything already in the meta Q
412 */
413 rib_add_gr_run(api.afi, api.vrf_id, client->proto,
414 client->instance);
415 zebra_gr_process_client_stale_routes(client, info);
416 break;
417 case ZEBRA_CLIENT_ROUTE_UPDATE_PENDING:
418 if (!info) {
419 LOG_GR("%s: Client %s route update pending for AFI %d, SAFI %d",
420 __func__, zebra_route_string(client->proto),
421 api.afi, api.safi);
422 } else {
423 LOG_GR("%s: Client %s vrf %s(%u) route update pending for AFI %d, SAFI %d",
424 __func__, zebra_route_string(client->proto),
425 VRF_LOGNAME(vrf), info->vrf_id, api.afi,
426 api.safi);
427
428 info->af_enabled[api.afi] = true;
429 }
430 break;
431 }
432 }
433
434 /*
435 * Stale route handling
436 */
437
438 /*
439 * Delete all the stale routes that have not been refreshed
440 * post restart.
441 */
442 static void zebra_gr_route_stale_delete_timer_expiry(struct event *thread)
443 {
444 struct client_gr_info *info = EVENT_ARG(thread);
445 int32_t cnt = 0;
446 struct zserv *client;
447 struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);
448
449 client = (struct zserv *)info->stale_client_ptr;
450
451 cnt = zebra_gr_delete_stale_routes(info);
452
453 /* Restart the timer */
454 if (cnt > 0) {
455 LOG_GR("%s: Client %s vrf %s(%u) processed %d routes. Start timer again",
456 __func__, zebra_route_string(client->proto),
457 VRF_LOGNAME(vrf), info->vrf_id, cnt);
458
459 event_add_timer(zrouter.master,
460 zebra_gr_route_stale_delete_timer_expiry, info,
461 ZEBRA_DEFAULT_STALE_UPDATE_DELAY,
462 &info->t_stale_removal);
463 } else {
464 /* No routes to delete for the VRF */
465 LOG_GR("%s: Client %s vrf %s(%u) all stale routes processed",
466 __func__, zebra_route_string(client->proto),
467 VRF_LOGNAME(vrf), info->vrf_id);
468
469 zebra_gr_delete_stale_client(info);
470 }
471 }
472
473
474 /*
475 * Function to process to check if route entry is stale
476 * or has been updated.
477 *
478 * Returns true when a node is deleted else false
479 */
480 static bool zebra_gr_process_route_entry(struct zserv *client,
481 struct route_node *rn,
482 struct route_entry *re)
483 {
484 /* If the route is not refreshed after restart, delete the entry */
485 if (re->uptime < client->restart_time) {
486 if (IS_ZEBRA_DEBUG_RIB)
487 zlog_debug("%s: Client %s stale route %pFX is deleted",
488 __func__, zebra_route_string(client->proto),
489 &rn->p);
490 rib_delnode(rn, re);
491
492 return true;
493 }
494
495 return false;
496 }
497
498 static void zebra_gr_delete_stale_route_table_afi(struct event *event)
499 {
500 struct zebra_gr_afi_clean *gac = EVENT_ARG(event);
501 struct route_table *table;
502 struct route_node *rn;
503 struct route_entry *re, *next;
504 struct zebra_vrf *zvrf = zebra_vrf_lookup_by_id(gac->info->vrf_id);
505 int32_t n = 0;
506
507 if (!zvrf)
508 goto done;
509
510 table = zvrf->table[gac->afi][SAFI_UNICAST];
511 if (!table)
512 goto done;
513
514 for (rn = route_top(table); rn; rn = srcdest_route_next(rn)) {
515 RNODE_FOREACH_RE_SAFE (rn, re, next) {
516 if (CHECK_FLAG(re->status, ROUTE_ENTRY_REMOVED))
517 continue;
518
519 /* If the route refresh is received
520 * after restart then do not delete
521 * the route
522 */
523
524 if (re->type == gac->proto &&
525 re->instance == gac->instance &&
526 zebra_gr_process_route_entry(
527 gac->info->stale_client_ptr, rn, re))
528 n++;
529
530 /* If the max route count is reached
531 * then timer thread will be restarted
532 * Store the current prefix and afi
533 */
534 if ((n >= ZEBRA_MAX_STALE_ROUTE_COUNT) &&
535 (gac->info->do_delete == false)) {
536 event_add_timer(
537 zrouter.master,
538 zebra_gr_delete_stale_route_table_afi,
539 gac, ZEBRA_DEFAULT_STALE_UPDATE_DELAY,
540 &gac->t_gac);
541 }
542 }
543 }
544
545 done:
546 XFREE(MTYPE_ZEBRA_GR, gac);
547 }
548
549 /*
550 * This function walks through the route table for all vrf and deletes
551 * the stale routes for the restarted client specified by the protocol
552 * type
553 */
554 static int32_t zebra_gr_delete_stale_route(struct client_gr_info *info,
555 struct zebra_vrf *zvrf)
556 {
557 afi_t afi;
558 uint8_t proto;
559 uint16_t instance;
560 struct zserv *s_client;
561
562 s_client = info->stale_client_ptr;
563 if (s_client == NULL) {
564 LOG_GR("%s: Stale client %s(%u) not present", __func__,
565 zvrf->vrf->name, zvrf->vrf->vrf_id);
566 return -1;
567 }
568
569 proto = s_client->proto;
570 instance = s_client->instance;
571
572 LOG_GR("%s: Client %s %s(%u) stale routes are being deleted", __func__,
573 zebra_route_string(proto), zvrf->vrf->name, zvrf->vrf->vrf_id);
574
575 /* Process routes for all AFI */
576 for (afi = AFI_IP; afi < AFI_MAX; afi++) {
577
578 /*
579 * Schedule for immediately after anything in the
580 * meta-Q
581 */
582 rib_add_gr_run(afi, info->vrf_id, proto, instance);
583 }
584 return 0;
585 }
586
587 /*
588 * Delete the stale routes when client is restarted and routes are not
589 * refreshed within the stale timeout
590 */
591 static int32_t zebra_gr_delete_stale_routes(struct client_gr_info *info)
592 {
593 struct zebra_vrf *zvrf;
594 uint64_t cnt = 0;
595
596 if (info == NULL)
597 return -1;
598
599 zvrf = zebra_vrf_lookup_by_id(info->vrf_id);
600 if (zvrf == NULL) {
601 LOG_GR("%s: Invalid VRF entry %u", __func__, info->vrf_id);
602 return -1;
603 }
604
605 cnt = zebra_gr_delete_stale_route(info, zvrf);
606 return cnt;
607 }
608
609 /*
610 * This function checks if route update for all AFI, SAFI is completed
611 * and cancels the stale timer
612 */
613 static void zebra_gr_process_client_stale_routes(struct zserv *client,
614 struct client_gr_info *info)
615 {
616 afi_t afi;
617
618 if (info == NULL)
619 return;
620
621 /* Check if route update completed for all AFI, SAFI */
622 for (afi = AFI_IP; afi < AFI_MAX; afi++) {
623 if (info->af_enabled[afi] && !info->route_sync[afi]) {
624 struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);
625
626 LOG_GR("%s: Client %s vrf: %s(%u) route update not completed for AFI %d",
627 __func__, zebra_route_string(client->proto),
628 VRF_LOGNAME(vrf), info->vrf_id, afi);
629 return;
630 }
631 }
632
633 /*
634 * Route update completed for all AFI, SAFI
635 * Cancel the stale timer, routes are already being processed
636 */
637 if (info->t_stale_removal) {
638 struct vrf *vrf = vrf_lookup_by_id(info->vrf_id);
639
640 LOG_GR("%s: Client %s canceled stale delete timer vrf %s(%d)",
641 __func__, zebra_route_string(client->proto),
642 VRF_LOGNAME(vrf), info->vrf_id);
643 EVENT_OFF(info->t_stale_removal);
644 }
645 }
646
647 void zebra_gr_process_client(afi_t afi, vrf_id_t vrf_id, uint8_t proto,
648 uint8_t instance)
649 {
650 struct zserv *client = zserv_find_client(proto, instance);
651 struct client_gr_info *info = NULL;
652 struct zebra_gr_afi_clean *gac;
653
654 if (client == NULL)
655 return;
656
657 TAILQ_FOREACH (info, &client->gr_info_queue, gr_info) {
658 if (info->vrf_id == vrf_id)
659 break;
660 }
661
662 if (info == NULL)
663 return;
664
665 gac = XCALLOC(MTYPE_ZEBRA_GR, sizeof(*gac));
666 gac->info = info;
667 gac->afi = afi;
668 gac->proto = proto;
669 gac->instance = instance;
670
671 event_add_event(zrouter.master, zebra_gr_delete_stale_route_table_afi,
672 gac, 0, &gac->t_gac);
673 }