2 * Zebra dataplane layer.
3 * Copyright (c) 2018 Volta Networks, Inc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; see the file COPYING; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "lib/libfrr.h"
21 #include "lib/debug.h"
22 #include "lib/frratomic.h"
23 #include "lib/frr_pthread.h"
24 #include "lib/memory.h"
25 #include "lib/queue.h"
26 #include "lib/zebra.h"
27 #include "zebra/zebra_memory.h"
28 #include "zebra/zserv.h"
29 #include "zebra/zebra_dplane.h"
31 #include "zebra/debug.h"
33 /* Memory type for context blocks */
34 DEFINE_MTYPE(ZEBRA
, DP_CTX
, "Zebra DPlane Ctx")
35 DEFINE_MTYPE(ZEBRA
, DP_PROV
, "Zebra DPlane Provider")
41 /* Enable test dataplane provider */
42 /*#define DPLANE_TEST_PROVIDER 1 */
44 /* Default value for max queued incoming updates */
45 const uint32_t DPLANE_DEFAULT_MAX_QUEUED
= 200;
47 /* Default value for new work per cycle */
48 const uint32_t DPLANE_DEFAULT_NEW_WORK
= 100;
50 /* Validation check macro for context blocks */
51 /* #define DPLANE_DEBUG 1 */
55 # define DPLANE_CTX_VALID(p) \
60 # define DPLANE_CTX_VALID(p)
62 #endif /* DPLANE_DEBUG */
65 * The context block used to exchange info about route updates across
66 * the boundary between the zebra main context (and pthread) and the
67 * dataplane layer (and pthread).
69 struct zebra_dplane_ctx
{
72 enum dplane_op_e zd_op
;
74 /* Status on return */
75 enum zebra_dplane_result zd_status
;
77 /* Dplane provider id */
80 /* Flags - used by providers, e.g. */
83 /* TODO -- internal/sub-operation status? */
84 enum zebra_dplane_result zd_remote_status
;
85 enum zebra_dplane_result zd_kernel_status
;
87 /* Dest and (optional) source prefixes */
88 struct prefix zd_dest
;
105 route_tag_t zd_old_tag
;
107 uint32_t zd_old_metric
;
108 uint16_t zd_instance
;
109 uint16_t zd_old_instance
;
112 uint8_t zd_old_distance
;
115 uint32_t zd_nexthop_mtu
;
118 struct zebra_dplane_info zd_ns_info
;
121 struct nexthop_group zd_ng
;
123 /* "Previous" nexthops, used only in route updates without netlink */
124 struct nexthop_group zd_old_ng
;
126 /* TODO -- use fixed array of nexthops, to avoid mallocs? */
128 /* Embedded list linkage */
129 TAILQ_ENTRY(zebra_dplane_ctx
) zd_q_entries
;
132 /* Flag that can be set by a pre-kernel provider as a signal that an update
133 * should bypass the kernel.
135 #define DPLANE_CTX_FLAG_NO_KERNEL 0x01
139 * Registration block for one dataplane provider.
141 struct zebra_dplane_provider
{
143 char dp_name
[DPLANE_PROVIDER_NAMELEN
+ 1];
145 /* Priority, for ordering among providers */
152 pthread_mutex_t dp_mutex
;
154 /* Plugin-provided extra data */
160 dplane_provider_process_fp dp_fp
;
162 dplane_provider_fini_fp dp_fini
;
164 _Atomic
uint32_t dp_in_counter
;
165 _Atomic
uint32_t dp_in_max
;
166 _Atomic
uint32_t dp_out_counter
;
167 _Atomic
uint32_t dp_out_max
;
168 _Atomic
uint32_t dp_error_counter
;
170 /* Queue of contexts inbound to the provider */
171 struct dplane_ctx_q dp_ctx_in_q
;
173 /* Queue of completed contexts outbound from the provider back
174 * towards the dataplane module.
176 struct dplane_ctx_q dp_ctx_out_q
;
178 /* Embedded list linkage for provider objects */
179 TAILQ_ENTRY(zebra_dplane_provider
) dp_prov_link
;
185 static struct zebra_dplane_globals
{
186 /* Mutex to control access to dataplane components */
187 pthread_mutex_t dg_mutex
;
189 /* Results callback registered by zebra 'core' */
190 dplane_results_fp dg_results_cb
;
192 /* Sentinel for beginning of shutdown */
193 volatile bool dg_is_shutdown
;
195 /* Sentinel for end of shutdown */
196 volatile bool dg_run
;
198 /* Route-update context queue inbound to the dataplane */
199 TAILQ_HEAD(zdg_ctx_q
, zebra_dplane_ctx
) dg_route_ctx_q
;
201 /* Ordered list of providers */
202 TAILQ_HEAD(zdg_prov_q
, zebra_dplane_provider
) dg_providers_q
;
204 /* Counter used to assign internal ids to providers */
205 uint32_t dg_provider_id
;
207 /* Limit number of pending, unprocessed updates */
208 _Atomic
uint32_t dg_max_queued_updates
;
210 /* Limit number of new updates dequeued at once, to pace an
213 uint32_t dg_updates_per_cycle
;
215 _Atomic
uint32_t dg_routes_in
;
216 _Atomic
uint32_t dg_routes_queued
;
217 _Atomic
uint32_t dg_routes_queued_max
;
218 _Atomic
uint32_t dg_route_errors
;
219 _Atomic
uint32_t dg_update_yields
;
221 /* Dataplane pthread */
222 struct frr_pthread
*dg_pthread
;
224 /* Event-delivery context 'master' for the dplane */
225 struct thread_master
*dg_master
;
227 /* Event/'thread' pointer for queued updates */
228 struct thread
*dg_t_update
;
230 /* Event pointer for pending shutdown check loop */
231 struct thread
*dg_t_shutdown_check
;
236 * Lock and unlock for interactions with the zebra 'core' pthread
238 #define DPLANE_LOCK() pthread_mutex_lock(&zdplane_info.dg_mutex)
239 #define DPLANE_UNLOCK() pthread_mutex_unlock(&zdplane_info.dg_mutex)
243 * Lock and unlock for individual providers
245 #define DPLANE_PROV_LOCK(p) pthread_mutex_lock(&((p)->dp_mutex))
246 #define DPLANE_PROV_UNLOCK(p) pthread_mutex_unlock(&((p)->dp_mutex))
249 static int dplane_thread_loop(struct thread
*event
);
256 * Allocate a dataplane update context
258 static struct zebra_dplane_ctx
*dplane_ctx_alloc(void)
260 struct zebra_dplane_ctx
*p
;
262 /* TODO -- just alloc'ing memory, but would like to maintain
265 p
= XCALLOC(MTYPE_DP_CTX
, sizeof(struct zebra_dplane_ctx
));
271 * Free a dataplane results context.
273 static void dplane_ctx_free(struct zebra_dplane_ctx
**pctx
)
276 DPLANE_CTX_VALID(*pctx
);
278 /* TODO -- just freeing memory, but would like to maintain
282 /* Free embedded nexthops */
283 if ((*pctx
)->zd_ng
.nexthop
) {
284 /* This deals with recursive nexthops too */
285 nexthops_free((*pctx
)->zd_ng
.nexthop
);
288 if ((*pctx
)->zd_old_ng
.nexthop
) {
289 /* This deals with recursive nexthops too */
290 nexthops_free((*pctx
)->zd_old_ng
.nexthop
);
293 XFREE(MTYPE_DP_CTX
, *pctx
);
299 * Return a context block to the dplane module after processing
301 void dplane_ctx_fini(struct zebra_dplane_ctx
**pctx
)
303 /* TODO -- maintain pool; for now, just free */
304 dplane_ctx_free(pctx
);
307 /* Enqueue a context block */
308 void dplane_ctx_enqueue_tail(struct dplane_ctx_q
*q
,
309 const struct zebra_dplane_ctx
*ctx
)
311 TAILQ_INSERT_TAIL(q
, (struct zebra_dplane_ctx
*)ctx
, zd_q_entries
);
314 /* Append a list of context blocks to another list */
315 void dplane_ctx_list_append(struct dplane_ctx_q
*to_list
,
316 struct dplane_ctx_q
*from_list
)
318 if (TAILQ_FIRST(from_list
)) {
319 TAILQ_CONCAT(to_list
, from_list
, zd_q_entries
);
321 /* And clear 'from' list */
322 TAILQ_INIT(from_list
);
326 /* Dequeue a context block from the head of a list */
327 struct zebra_dplane_ctx
*dplane_ctx_dequeue(struct dplane_ctx_q
*q
)
329 struct zebra_dplane_ctx
*ctx
= TAILQ_FIRST(q
);
332 TAILQ_REMOVE(q
, ctx
, zd_q_entries
);
338 * Accessors for information from the context object
340 enum zebra_dplane_result
dplane_ctx_get_status(
341 const struct zebra_dplane_ctx
*ctx
)
343 DPLANE_CTX_VALID(ctx
);
345 return ctx
->zd_status
;
348 void dplane_ctx_set_status(struct zebra_dplane_ctx
*ctx
,
349 enum zebra_dplane_result status
)
351 DPLANE_CTX_VALID(ctx
);
353 ctx
->zd_status
= status
;
356 /* Retrieve last/current provider id */
357 uint32_t dplane_ctx_get_provider(const struct zebra_dplane_ctx
*ctx
)
359 DPLANE_CTX_VALID(ctx
);
360 return ctx
->zd_provider
;
363 /* Providers run before the kernel can control whether a kernel
364 * update should be done.
366 void dplane_ctx_set_skip_kernel(struct zebra_dplane_ctx
*ctx
)
368 DPLANE_CTX_VALID(ctx
);
370 SET_FLAG(ctx
->zd_flags
, DPLANE_CTX_FLAG_NO_KERNEL
);
373 bool dplane_ctx_is_skip_kernel(const struct zebra_dplane_ctx
*ctx
)
375 DPLANE_CTX_VALID(ctx
);
377 return CHECK_FLAG(ctx
->zd_flags
, DPLANE_CTX_FLAG_NO_KERNEL
);
380 enum dplane_op_e
dplane_ctx_get_op(const struct zebra_dplane_ctx
*ctx
)
382 DPLANE_CTX_VALID(ctx
);
387 const char *dplane_op2str(enum dplane_op_e op
)
389 const char *ret
= "UNKNOWN";
397 case DPLANE_OP_ROUTE_INSTALL
:
398 ret
= "ROUTE_INSTALL";
400 case DPLANE_OP_ROUTE_UPDATE
:
401 ret
= "ROUTE_UPDATE";
403 case DPLANE_OP_ROUTE_DELETE
:
404 ret
= "ROUTE_DELETE";
412 const char *dplane_res2str(enum zebra_dplane_result res
)
414 const char *ret
= "<Unknown>";
417 case ZEBRA_DPLANE_REQUEST_FAILURE
:
420 case ZEBRA_DPLANE_REQUEST_QUEUED
:
423 case ZEBRA_DPLANE_REQUEST_SUCCESS
:
431 const struct prefix
*dplane_ctx_get_dest(const struct zebra_dplane_ctx
*ctx
)
433 DPLANE_CTX_VALID(ctx
);
435 return &(ctx
->zd_dest
);
438 /* Source prefix is a little special - return NULL for "no src prefix" */
439 const struct prefix
*dplane_ctx_get_src(const struct zebra_dplane_ctx
*ctx
)
441 DPLANE_CTX_VALID(ctx
);
443 if (ctx
->zd_src
.prefixlen
== 0 &&
444 IN6_IS_ADDR_UNSPECIFIED(&(ctx
->zd_src
.u
.prefix6
))) {
447 return &(ctx
->zd_src
);
451 bool dplane_ctx_is_update(const struct zebra_dplane_ctx
*ctx
)
453 DPLANE_CTX_VALID(ctx
);
455 return ctx
->zd_is_update
;
458 uint32_t dplane_ctx_get_seq(const struct zebra_dplane_ctx
*ctx
)
460 DPLANE_CTX_VALID(ctx
);
465 uint32_t dplane_ctx_get_old_seq(const struct zebra_dplane_ctx
*ctx
)
467 DPLANE_CTX_VALID(ctx
);
469 return ctx
->zd_old_seq
;
472 vrf_id_t
dplane_ctx_get_vrf(const struct zebra_dplane_ctx
*ctx
)
474 DPLANE_CTX_VALID(ctx
);
476 return ctx
->zd_vrf_id
;
479 int dplane_ctx_get_type(const struct zebra_dplane_ctx
*ctx
)
481 DPLANE_CTX_VALID(ctx
);
486 int dplane_ctx_get_old_type(const struct zebra_dplane_ctx
*ctx
)
488 DPLANE_CTX_VALID(ctx
);
490 return ctx
->zd_old_type
;
493 afi_t
dplane_ctx_get_afi(const struct zebra_dplane_ctx
*ctx
)
495 DPLANE_CTX_VALID(ctx
);
500 safi_t
dplane_ctx_get_safi(const struct zebra_dplane_ctx
*ctx
)
502 DPLANE_CTX_VALID(ctx
);
507 uint32_t dplane_ctx_get_table(const struct zebra_dplane_ctx
*ctx
)
509 DPLANE_CTX_VALID(ctx
);
511 return ctx
->zd_table_id
;
514 route_tag_t
dplane_ctx_get_tag(const struct zebra_dplane_ctx
*ctx
)
516 DPLANE_CTX_VALID(ctx
);
521 route_tag_t
dplane_ctx_get_old_tag(const struct zebra_dplane_ctx
*ctx
)
523 DPLANE_CTX_VALID(ctx
);
525 return ctx
->zd_old_tag
;
528 uint16_t dplane_ctx_get_instance(const struct zebra_dplane_ctx
*ctx
)
530 DPLANE_CTX_VALID(ctx
);
532 return ctx
->zd_instance
;
535 uint16_t dplane_ctx_get_old_instance(const struct zebra_dplane_ctx
*ctx
)
537 DPLANE_CTX_VALID(ctx
);
539 return ctx
->zd_old_instance
;
542 uint32_t dplane_ctx_get_metric(const struct zebra_dplane_ctx
*ctx
)
544 DPLANE_CTX_VALID(ctx
);
546 return ctx
->zd_metric
;
549 uint32_t dplane_ctx_get_old_metric(const struct zebra_dplane_ctx
*ctx
)
551 DPLANE_CTX_VALID(ctx
);
553 return ctx
->zd_old_metric
;
556 uint32_t dplane_ctx_get_mtu(const struct zebra_dplane_ctx
*ctx
)
558 DPLANE_CTX_VALID(ctx
);
563 uint32_t dplane_ctx_get_nh_mtu(const struct zebra_dplane_ctx
*ctx
)
565 DPLANE_CTX_VALID(ctx
);
567 return ctx
->zd_nexthop_mtu
;
570 uint8_t dplane_ctx_get_distance(const struct zebra_dplane_ctx
*ctx
)
572 DPLANE_CTX_VALID(ctx
);
574 return ctx
->zd_distance
;
577 uint8_t dplane_ctx_get_old_distance(const struct zebra_dplane_ctx
*ctx
)
579 DPLANE_CTX_VALID(ctx
);
581 return ctx
->zd_old_distance
;
584 const struct nexthop_group
*dplane_ctx_get_ng(
585 const struct zebra_dplane_ctx
*ctx
)
587 DPLANE_CTX_VALID(ctx
);
589 return &(ctx
->zd_ng
);
592 const struct nexthop_group
*dplane_ctx_get_old_ng(
593 const struct zebra_dplane_ctx
*ctx
)
595 DPLANE_CTX_VALID(ctx
);
597 return &(ctx
->zd_old_ng
);
600 const struct zebra_dplane_info
*dplane_ctx_get_ns(
601 const struct zebra_dplane_ctx
*ctx
)
603 DPLANE_CTX_VALID(ctx
);
605 return &(ctx
->zd_ns_info
);
609 * End of dplane context accessors
614 * Retrieve the limit on the number of pending, unprocessed updates.
616 uint32_t dplane_get_in_queue_limit(void)
618 return atomic_load_explicit(&zdplane_info
.dg_max_queued_updates
,
619 memory_order_relaxed
);
623 * Configure limit on the number of pending, queued updates.
625 void dplane_set_in_queue_limit(uint32_t limit
, bool set
)
627 /* Reset to default on 'unset' */
629 limit
= DPLANE_DEFAULT_MAX_QUEUED
;
631 atomic_store_explicit(&zdplane_info
.dg_max_queued_updates
, limit
,
632 memory_order_relaxed
);
636 * Retrieve the current queue depth of incoming, unprocessed updates
638 uint32_t dplane_get_in_queue_len(void)
640 return atomic_load_explicit(&zdplane_info
.dg_routes_queued
,
641 memory_order_seq_cst
);
645 * Initialize a context block for a route update from zebra data structs.
647 static int dplane_ctx_route_init(struct zebra_dplane_ctx
*ctx
,
649 struct route_node
*rn
,
650 struct route_entry
*re
)
653 const struct route_table
*table
= NULL
;
654 const rib_table_info_t
*info
;
655 const struct prefix
*p
, *src_p
;
656 struct zebra_ns
*zns
;
657 struct zebra_vrf
*zvrf
;
658 struct nexthop
*nexthop
;
660 if (!ctx
|| !rn
|| !re
)
664 ctx
->zd_status
= ZEBRA_DPLANE_REQUEST_SUCCESS
;
666 ctx
->zd_type
= re
->type
;
667 ctx
->zd_old_type
= re
->type
;
669 /* Prefixes: dest, and optional source */
670 srcdest_rnode_prefixes(rn
, &p
, &src_p
);
672 prefix_copy(&(ctx
->zd_dest
), p
);
675 prefix_copy(&(ctx
->zd_src
), src_p
);
677 memset(&(ctx
->zd_src
), 0, sizeof(ctx
->zd_src
));
679 ctx
->zd_table_id
= re
->table
;
681 ctx
->zd_metric
= re
->metric
;
682 ctx
->zd_old_metric
= re
->metric
;
683 ctx
->zd_vrf_id
= re
->vrf_id
;
684 ctx
->zd_mtu
= re
->mtu
;
685 ctx
->zd_nexthop_mtu
= re
->nexthop_mtu
;
686 ctx
->zd_instance
= re
->instance
;
687 ctx
->zd_tag
= re
->tag
;
688 ctx
->zd_old_tag
= re
->tag
;
689 ctx
->zd_distance
= re
->distance
;
691 table
= srcdest_rnode_table(rn
);
694 ctx
->zd_afi
= info
->afi
;
695 ctx
->zd_safi
= info
->safi
;
697 /* Extract ns info - can't use pointers to 'core' structs */
698 zvrf
= vrf_info_lookup(re
->vrf_id
);
701 zebra_dplane_info_from_zns(&(ctx
->zd_ns_info
), zns
, true /*is_cmd*/);
703 #if defined(HAVE_NETLINK)
704 /* Increment message counter after copying to context struct - may need
705 * two messages in some 'update' cases.
707 if (op
== DPLANE_OP_ROUTE_UPDATE
)
708 zns
->netlink_cmd
.seq
+= 2;
710 zns
->netlink_cmd
.seq
++;
713 /* Copy nexthops; recursive info is included too */
714 copy_nexthops(&(ctx
->zd_ng
.nexthop
), re
->ng
.nexthop
, NULL
);
716 /* TODO -- maybe use array of nexthops to avoid allocs? */
718 /* Ensure that the dplane's nexthops flags are clear. */
719 for (ALL_NEXTHOPS(ctx
->zd_ng
, nexthop
))
720 UNSET_FLAG(nexthop
->flags
, NEXTHOP_FLAG_FIB
);
722 /* Trying out the sequence number idea, so we can try to detect
723 * when a result is stale.
725 re
->dplane_sequence
++;
726 ctx
->zd_seq
= re
->dplane_sequence
;
735 * Enqueue a new route update,
736 * and ensure an event is active for the dataplane thread.
738 static int dplane_route_enqueue(struct zebra_dplane_ctx
*ctx
)
743 /* Enqueue for processing by the dataplane thread */
746 TAILQ_INSERT_TAIL(&zdplane_info
.dg_route_ctx_q
, ctx
,
751 curr
= atomic_add_fetch_explicit(
753 /* TODO -- issue with the clang atomic/intrinsics currently;
754 * casting away the 'Atomic'-ness of the variable works.
756 (uint32_t *)&(zdplane_info
.dg_routes_queued
),
758 &(zdplane_info
.dg_routes_queued
),
760 1, memory_order_seq_cst
);
762 /* Maybe update high-water counter also */
763 high
= atomic_load_explicit(&zdplane_info
.dg_routes_queued_max
,
764 memory_order_seq_cst
);
765 while (high
< curr
) {
766 if (atomic_compare_exchange_weak_explicit(
767 &zdplane_info
.dg_routes_queued_max
,
769 memory_order_seq_cst
,
770 memory_order_seq_cst
))
774 /* Ensure that an event for the dataplane thread is active */
775 ret
= dplane_provider_work_ready();
781 * Utility that prepares a route update and enqueues it for processing
783 static enum zebra_dplane_result
784 dplane_route_update_internal(struct route_node
*rn
,
785 struct route_entry
*re
,
786 struct route_entry
*old_re
,
789 enum zebra_dplane_result result
= ZEBRA_DPLANE_REQUEST_FAILURE
;
791 struct zebra_dplane_ctx
*ctx
= NULL
;
793 /* Obtain context block */
794 ctx
= dplane_ctx_alloc();
800 /* Init context with info from zebra data structs */
801 ret
= dplane_ctx_route_init(ctx
, op
, rn
, re
);
803 /* Capture some extra info for update case
804 * where there's a different 'old' route.
806 if ((op
== DPLANE_OP_ROUTE_UPDATE
) &&
807 old_re
&& (old_re
!= re
)) {
808 ctx
->zd_is_update
= true;
810 old_re
->dplane_sequence
++;
811 ctx
->zd_old_seq
= old_re
->dplane_sequence
;
813 ctx
->zd_old_tag
= old_re
->tag
;
814 ctx
->zd_old_type
= old_re
->type
;
815 ctx
->zd_old_instance
= old_re
->instance
;
816 ctx
->zd_old_distance
= old_re
->distance
;
817 ctx
->zd_old_metric
= old_re
->metric
;
820 /* For bsd, capture previous re's nexthops too, sigh.
821 * We'll need these to do per-nexthop deletes.
823 copy_nexthops(&(ctx
->zd_old_ng
.nexthop
),
824 old_re
->ng
.nexthop
, NULL
);
825 #endif /* !HAVE_NETLINK */
828 /* Enqueue context for processing */
829 ret
= dplane_route_enqueue(ctx
);
834 atomic_fetch_add_explicit(&zdplane_info
.dg_routes_in
, 1,
835 memory_order_relaxed
);
838 result
= ZEBRA_DPLANE_REQUEST_QUEUED
;
840 atomic_fetch_add_explicit(&zdplane_info
.dg_route_errors
, 1,
841 memory_order_relaxed
);
842 dplane_ctx_free(&ctx
);
849 * Enqueue a route 'add' for the dataplane.
851 enum zebra_dplane_result
dplane_route_add(struct route_node
*rn
,
852 struct route_entry
*re
)
854 enum zebra_dplane_result ret
= ZEBRA_DPLANE_REQUEST_FAILURE
;
856 if (rn
== NULL
|| re
== NULL
)
859 ret
= dplane_route_update_internal(rn
, re
, NULL
,
860 DPLANE_OP_ROUTE_INSTALL
);
867 * Enqueue a route update for the dataplane.
869 enum zebra_dplane_result
dplane_route_update(struct route_node
*rn
,
870 struct route_entry
*re
,
871 struct route_entry
*old_re
)
873 enum zebra_dplane_result ret
= ZEBRA_DPLANE_REQUEST_FAILURE
;
875 if (rn
== NULL
|| re
== NULL
)
878 ret
= dplane_route_update_internal(rn
, re
, old_re
,
879 DPLANE_OP_ROUTE_UPDATE
);
885 * Enqueue a route removal for the dataplane.
887 enum zebra_dplane_result
dplane_route_delete(struct route_node
*rn
,
888 struct route_entry
*re
)
890 enum zebra_dplane_result ret
= ZEBRA_DPLANE_REQUEST_FAILURE
;
892 if (rn
== NULL
|| re
== NULL
)
895 ret
= dplane_route_update_internal(rn
, re
, NULL
,
896 DPLANE_OP_ROUTE_DELETE
);
903 * Handler for 'show dplane'
905 int dplane_show_helper(struct vty
*vty
, bool detailed
)
907 uint64_t queued
, queue_max
, limit
, errs
, incoming
, yields
;
909 /* Using atomics because counters are being changed in different
912 incoming
= atomic_load_explicit(&zdplane_info
.dg_routes_in
,
913 memory_order_relaxed
);
914 limit
= atomic_load_explicit(&zdplane_info
.dg_max_queued_updates
,
915 memory_order_relaxed
);
916 queued
= atomic_load_explicit(&zdplane_info
.dg_routes_queued
,
917 memory_order_relaxed
);
918 queue_max
= atomic_load_explicit(&zdplane_info
.dg_routes_queued_max
,
919 memory_order_relaxed
);
920 errs
= atomic_load_explicit(&zdplane_info
.dg_route_errors
,
921 memory_order_relaxed
);
922 yields
= atomic_load_explicit(&zdplane_info
.dg_update_yields
,
923 memory_order_relaxed
);
925 vty_out(vty
, "Zebra dataplane:\nRoute updates: %"PRIu64
"\n",
927 vty_out(vty
, "Route update errors: %"PRIu64
"\n", errs
);
928 vty_out(vty
, "Route update queue limit: %"PRIu64
"\n", limit
);
929 vty_out(vty
, "Route update queue depth: %"PRIu64
"\n", queued
);
930 vty_out(vty
, "Route update queue max: %"PRIu64
"\n", queue_max
);
931 vty_out(vty
, "Route update yields: %"PRIu64
"\n", yields
);
937 * Handler for 'show dplane providers'
939 int dplane_show_provs_helper(struct vty
*vty
, bool detailed
)
941 struct zebra_dplane_provider
*prov
;
942 uint64_t in
, in_max
, out
, out_max
;
944 vty_out(vty
, "Zebra dataplane providers:\n");
947 prov
= TAILQ_FIRST(&zdplane_info
.dg_providers_q
);
950 /* Show counters, useful info from each registered provider */
953 in
= atomic_load_explicit(&prov
->dp_in_counter
,
954 memory_order_relaxed
);
955 in_max
= atomic_load_explicit(&prov
->dp_in_max
,
956 memory_order_relaxed
);
957 out
= atomic_load_explicit(&prov
->dp_out_counter
,
958 memory_order_relaxed
);
959 out_max
= atomic_load_explicit(&prov
->dp_out_max
,
960 memory_order_relaxed
);
962 vty_out(vty
, "%s (%u): in: %"PRIu64
", max: %"PRIu64
", "
963 "out: %"PRIu64
", max: %"PRIu64
"\n",
964 prov
->dp_name
, prov
->dp_id
, in
, in_max
, out
, out_max
);
967 prov
= TAILQ_NEXT(prov
, dp_prov_link
);
975 * Provider registration
977 int dplane_provider_register(const char *name
,
978 enum dplane_provider_prio prio
,
980 dplane_provider_process_fp fp
,
981 dplane_provider_fini_fp fini_fp
,
985 struct zebra_dplane_provider
*p
, *last
;
993 if (prio
<= DPLANE_PRIO_NONE
||
994 prio
> DPLANE_PRIO_LAST
) {
999 /* Allocate and init new provider struct */
1000 p
= XCALLOC(MTYPE_DP_PROV
, sizeof(struct zebra_dplane_provider
));
1006 pthread_mutex_init(&(p
->dp_mutex
), NULL
);
1007 TAILQ_INIT(&(p
->dp_ctx_in_q
));
1008 TAILQ_INIT(&(p
->dp_ctx_out_q
));
1010 p
->dp_priority
= prio
;
1012 p
->dp_fini
= fini_fp
;
1015 /* Lock - the dplane pthread may be running */
1018 p
->dp_id
= ++zdplane_info
.dg_provider_id
;
1021 strlcpy(p
->dp_name
, name
, DPLANE_PROVIDER_NAMELEN
);
1023 snprintf(p
->dp_name
, DPLANE_PROVIDER_NAMELEN
,
1024 "provider-%u", p
->dp_id
);
1026 /* Insert into list ordered by priority */
1027 TAILQ_FOREACH(last
, &zdplane_info
.dg_providers_q
, dp_prov_link
) {
1028 if (last
->dp_priority
> p
->dp_priority
)
1033 TAILQ_INSERT_BEFORE(last
, p
, dp_prov_link
);
1035 TAILQ_INSERT_TAIL(&zdplane_info
.dg_providers_q
, p
,
1041 if (IS_ZEBRA_DEBUG_DPLANE
)
1042 zlog_debug("dplane: registered new provider '%s' (%u), prio %d",
1043 p
->dp_name
, p
->dp_id
, p
->dp_priority
);
1049 /* Accessors for provider attributes */
1050 const char *dplane_provider_get_name(const struct zebra_dplane_provider
*prov
)
1052 return prov
->dp_name
;
1055 uint32_t dplane_provider_get_id(const struct zebra_dplane_provider
*prov
)
1060 void *dplane_provider_get_data(const struct zebra_dplane_provider
*prov
)
1062 return prov
->dp_data
;
1065 int dplane_provider_get_work_limit(const struct zebra_dplane_provider
*prov
)
1067 return zdplane_info
.dg_updates_per_cycle
;
1071 * Dequeue and maintain associated counter
1073 struct zebra_dplane_ctx
*dplane_provider_dequeue_in_ctx(
1074 struct zebra_dplane_provider
*prov
)
1076 struct zebra_dplane_ctx
*ctx
= NULL
;
1078 if (dplane_provider_is_threaded(prov
))
1079 DPLANE_PROV_LOCK(prov
);
1081 ctx
= TAILQ_FIRST(&(prov
->dp_ctx_in_q
));
1083 TAILQ_REMOVE(&(prov
->dp_ctx_in_q
), ctx
, zd_q_entries
);
1086 if (dplane_provider_is_threaded(prov
))
1087 DPLANE_PROV_UNLOCK(prov
);
1093 * Dequeue work to a list, return count
1095 int dplane_provider_dequeue_in_list(struct zebra_dplane_provider
*prov
,
1096 struct dplane_ctx_q
*listp
)
1099 struct zebra_dplane_ctx
*ctx
;
1101 limit
= zdplane_info
.dg_updates_per_cycle
;
1103 if (dplane_provider_is_threaded(prov
))
1104 DPLANE_PROV_LOCK(prov
);
1106 for (ret
= 0; ret
< limit
; ret
++) {
1107 ctx
= TAILQ_FIRST(&(prov
->dp_ctx_in_q
));
1109 TAILQ_REMOVE(&(prov
->dp_ctx_in_q
), ctx
, zd_q_entries
);
1111 TAILQ_INSERT_TAIL(listp
, ctx
, zd_q_entries
);
1117 if (dplane_provider_is_threaded(prov
))
1118 DPLANE_PROV_UNLOCK(prov
);
1124 * Enqueue and maintain associated counter
1126 void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider
*prov
,
1127 struct zebra_dplane_ctx
*ctx
)
1129 if (dplane_provider_is_threaded(prov
))
1130 DPLANE_PROV_LOCK(prov
);
1132 TAILQ_INSERT_TAIL(&(prov
->dp_ctx_out_q
), ctx
,
1135 if (dplane_provider_is_threaded(prov
))
1136 DPLANE_PROV_UNLOCK(prov
);
1138 atomic_fetch_add_explicit(&(prov
->dp_out_counter
), 1,
1139 memory_order_relaxed
);
1142 bool dplane_provider_is_threaded(const struct zebra_dplane_provider
*prov
)
1144 return (prov
->dp_flags
& DPLANE_PROV_FLAG_THREADED
);
1148 * Provider api to signal that work/events are available
1149 * for the dataplane pthread.
1151 int dplane_provider_work_ready(void)
1153 /* Note that during zebra startup, we may be offered work before
1154 * the dataplane pthread (and thread-master) are ready. We want to
1155 * enqueue the work, but the event-scheduling machinery may not be
1158 if (zdplane_info
.dg_run
) {
1159 thread_add_event(zdplane_info
.dg_master
,
1160 dplane_thread_loop
, NULL
, 0,
1161 &zdplane_info
.dg_t_update
);
1168 * Zebra registers a results callback with the dataplane system
1170 int dplane_results_register(dplane_results_fp fp
)
1172 zdplane_info
.dg_results_cb
= fp
;
1177 * Kernel dataplane provider
1181 * Kernel provider callback
1183 static int kernel_dplane_process_func(struct zebra_dplane_provider
*prov
)
1185 enum zebra_dplane_result res
;
1186 struct zebra_dplane_ctx
*ctx
;
1189 limit
= dplane_provider_get_work_limit(prov
);
1191 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
)
1192 zlog_debug("dplane provider '%s': processing",
1193 dplane_provider_get_name(prov
));
1195 for (counter
= 0; counter
< limit
; counter
++) {
1197 ctx
= dplane_provider_dequeue_in_ctx(prov
);
1201 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
) {
1202 char dest_str
[PREFIX_STRLEN
];
1204 prefix2str(dplane_ctx_get_dest(ctx
),
1205 dest_str
, sizeof(dest_str
));
1207 zlog_debug("%u:%s Dplane route update ctx %p op %s",
1208 dplane_ctx_get_vrf(ctx
), dest_str
,
1209 ctx
, dplane_op2str(dplane_ctx_get_op(ctx
)));
1212 /* Call into the synchronous kernel-facing code here */
1213 res
= kernel_route_update(ctx
);
1215 if (res
!= ZEBRA_DPLANE_REQUEST_SUCCESS
)
1216 atomic_fetch_add_explicit(
1217 &zdplane_info
.dg_route_errors
, 1,
1218 memory_order_relaxed
);
1220 dplane_ctx_set_status(ctx
, res
);
1222 dplane_provider_enqueue_out_ctx(prov
, ctx
);
1225 /* Ensure that we'll run the work loop again if there's still
1228 if (counter
>= limit
) {
1229 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
)
1230 zlog_debug("dplane provider '%s' reached max updates %d",
1231 dplane_provider_get_name(prov
), counter
);
1233 atomic_fetch_add_explicit(&zdplane_info
.dg_update_yields
,
1234 1, memory_order_relaxed
);
1236 dplane_provider_work_ready();
1242 #if DPLANE_TEST_PROVIDER
1245 * Test dataplane provider plugin
1249 * Test provider process callback
1251 static int test_dplane_process_func(struct zebra_dplane_provider
*prov
)
1253 struct zebra_dplane_ctx
*ctx
;
1256 /* Just moving from 'in' queue to 'out' queue */
1258 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
)
1259 zlog_debug("dplane provider '%s': processing",
1260 dplane_provider_get_name(prov
));
1262 limit
= dplane_provider_get_work_limit(prov
);
1264 for (counter
= 0; counter
< limit
; counter
++) {
1266 ctx
= dplane_provider_dequeue_in_ctx(prov
);
1270 dplane_ctx_set_status(ctx
, ZEBRA_DPLANE_REQUEST_SUCCESS
);
1272 dplane_provider_enqueue_out_ctx(prov
, ctx
);
1275 /* Ensure that we'll run the work loop again if there's still
1278 if (counter
>= limit
)
1279 dplane_provider_work_ready();
1285 * Test provider shutdown/fini callback
1287 static int test_dplane_shutdown_func(struct zebra_dplane_provider
*prov
,
1290 if (IS_ZEBRA_DEBUG_DPLANE
)
1291 zlog_debug("dplane provider '%s': %sshutdown",
1292 dplane_provider_get_name(prov
),
1293 early
? "early " : "");
1297 #endif /* DPLANE_TEST_PROVIDER */
1300 * Register default kernel provider
1302 static void dplane_provider_init(void)
1306 ret
= dplane_provider_register("Kernel",
1308 DPLANE_PROV_FLAGS_DEFAULT
,
1309 kernel_dplane_process_func
,
1314 zlog_err("Unable to register kernel dplane provider: %d",
1317 #if DPLANE_TEST_PROVIDER
1318 /* Optional test provider ... */
1319 ret
= dplane_provider_register("Test",
1320 DPLANE_PRIO_PRE_KERNEL
,
1321 DPLANE_PROV_FLAGS_DEFAULT
,
1322 test_dplane_process_func
,
1323 test_dplane_shutdown_func
,
1327 zlog_err("Unable to register test dplane provider: %d",
1329 #endif /* DPLANE_TEST_PROVIDER */
1332 /* Indicates zebra shutdown/exit is in progress. Some operations may be
1333 * simplified or skipped during shutdown processing.
1335 bool dplane_is_in_shutdown(void)
1337 return zdplane_info
.dg_is_shutdown
;
1341 * Early or pre-shutdown, de-init notification api. This runs pretty
1342 * early during zebra shutdown, as a signal to stop new work and prepare
1343 * for updates generated by shutdown/cleanup activity, as zebra tries to
1344 * remove everything it's responsible for.
1345 * NB: This runs in the main zebra thread context.
1347 void zebra_dplane_pre_finish(void)
1349 if (IS_ZEBRA_DEBUG_DPLANE
)
1350 zlog_debug("Zebra dataplane pre-fini called");
1352 zdplane_info
.dg_is_shutdown
= true;
1354 /* Notify provider(s) of pending shutdown */
1358 * Utility to determine whether work remains enqueued within the dplane;
1359 * used during system shutdown processing.
1361 static bool dplane_work_pending(void)
1363 struct zebra_dplane_ctx
*ctx
;
1365 /* TODO -- just checking incoming/pending work for now, must check
1370 ctx
= TAILQ_FIRST(&zdplane_info
.dg_route_ctx_q
);
1374 return (ctx
!= NULL
);
1378 * Shutdown-time intermediate callback, used to determine when all pending
1379 * in-flight updates are done. If there's still work to do, reschedules itself.
1380 * If all work is done, schedules an event to the main zebra thread for
1381 * final zebra shutdown.
1382 * This runs in the dplane pthread context.
1384 static int dplane_check_shutdown_status(struct thread
*event
)
1386 if (IS_ZEBRA_DEBUG_DPLANE
)
1387 zlog_debug("Zebra dataplane shutdown status check called");
1389 if (dplane_work_pending()) {
1390 /* Reschedule dplane check on a short timer */
1391 thread_add_timer_msec(zdplane_info
.dg_master
,
1392 dplane_check_shutdown_status
,
1394 &zdplane_info
.dg_t_shutdown_check
);
1396 /* TODO - give up and stop waiting after a short time? */
1399 /* We appear to be done - schedule a final callback event
1400 * for the zebra main pthread.
1402 thread_add_event(zebrad
.master
, zebra_finalize
, NULL
, 0, NULL
);
1409 * Shutdown, de-init api. This runs pretty late during shutdown,
1410 * after zebra has tried to free/remove/uninstall all routes during shutdown.
1411 * At this point, dplane work may still remain to be done, so we can't just
1412 * blindly terminate. If there's still work to do, we'll periodically check
1413 * and when done, we'll enqueue a task to the zebra main thread for final
1414 * termination processing.
1416 * NB: This runs in the main zebra thread context.
1418 void zebra_dplane_finish(void)
1420 if (IS_ZEBRA_DEBUG_DPLANE
)
1421 zlog_debug("Zebra dataplane fini called");
1423 thread_add_event(zdplane_info
.dg_master
,
1424 dplane_check_shutdown_status
, NULL
, 0,
1425 &zdplane_info
.dg_t_shutdown_check
);
1429 * Main dataplane pthread event loop. The thread takes new incoming work
1430 * and offers it to the first provider. It then iterates through the
1431 * providers, taking complete work from each one and offering it
1432 * to the next in order. At each step, a limited number of updates are
1433 * processed during a cycle in order to provide some fairness.
1435 * This loop through the providers is only run once, so that the dataplane
1436 * pthread can look for other pending work - such as i/o work on behalf of
1439 static int dplane_thread_loop(struct thread
*event
)
1441 struct dplane_ctx_q work_list
;
1442 struct dplane_ctx_q error_list
;
1443 struct zebra_dplane_provider
*prov
;
1444 struct zebra_dplane_ctx
*ctx
, *tctx
;
1445 int limit
, counter
, error_counter
;
1448 /* Capture work limit per cycle */
1449 limit
= zdplane_info
.dg_updates_per_cycle
;
1451 /* Init temporary lists used to move contexts among providers */
1452 TAILQ_INIT(&work_list
);
1453 TAILQ_INIT(&error_list
);
1456 /* Check for zebra shutdown */
1457 if (!zdplane_info
.dg_run
)
1460 /* Dequeue some incoming work from zebra (if any) onto the temporary
1465 /* Locate initial registered provider */
1466 prov
= TAILQ_FIRST(&zdplane_info
.dg_providers_q
);
1468 /* Move new work from incoming list to temp list */
1469 for (counter
= 0; counter
< limit
; counter
++) {
1470 ctx
= TAILQ_FIRST(&zdplane_info
.dg_route_ctx_q
);
1472 TAILQ_REMOVE(&zdplane_info
.dg_route_ctx_q
, ctx
,
1475 ctx
->zd_provider
= prov
->dp_id
;
1477 TAILQ_INSERT_TAIL(&work_list
, ctx
, zd_q_entries
);
1485 atomic_fetch_sub_explicit(&zdplane_info
.dg_routes_queued
, counter
,
1486 memory_order_relaxed
);
1488 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
)
1489 zlog_debug("dplane: incoming new work counter: %d", counter
);
1491 /* Iterate through the registered providers, offering new incoming
1492 * work. If the provider has outgoing work in its queue, take that
1493 * work for the next provider
1497 /* At each iteration, the temporary work list has 'counter'
1501 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
)
1502 zlog_debug("dplane enqueues %d new work to provider '%s'",
1503 counter
, dplane_provider_get_name(prov
));
1505 /* Capture current provider id in each context; check for
1508 TAILQ_FOREACH_SAFE(ctx
, &work_list
, zd_q_entries
, tctx
) {
1509 if (dplane_ctx_get_status(ctx
) ==
1510 ZEBRA_DPLANE_REQUEST_SUCCESS
) {
1511 ctx
->zd_provider
= prov
->dp_id
;
1514 * TODO -- improve error-handling: recirc
1515 * errors backwards so that providers can
1516 * 'undo' their work (if they want to)
1519 /* Move to error list; will be returned
1522 TAILQ_REMOVE(&work_list
, ctx
, zd_q_entries
);
1523 TAILQ_INSERT_TAIL(&error_list
,
1529 /* Enqueue new work to the provider */
1530 if (dplane_provider_is_threaded(prov
))
1531 DPLANE_PROV_LOCK(prov
);
1533 if (TAILQ_FIRST(&work_list
))
1534 TAILQ_CONCAT(&(prov
->dp_ctx_in_q
), &work_list
,
1537 lval
= atomic_add_fetch_explicit(&prov
->dp_in_counter
, counter
,
1538 memory_order_relaxed
);
1539 if (lval
> prov
->dp_in_max
)
1540 atomic_store_explicit(&prov
->dp_in_max
, lval
,
1541 memory_order_relaxed
);
1543 if (dplane_provider_is_threaded(prov
))
1544 DPLANE_PROV_UNLOCK(prov
);
1546 /* Reset the temp list (though the 'concat' may have done this
1547 * already), and the counter
1549 TAILQ_INIT(&work_list
);
1552 /* Call into the provider code. Note that this is
1553 * unconditional: we offer to do work even if we don't enqueue
1556 (*prov
->dp_fp
)(prov
);
1558 /* Check for zebra shutdown */
1559 if (!zdplane_info
.dg_run
)
1562 /* Dequeue completed work from the provider */
1563 if (dplane_provider_is_threaded(prov
))
1564 DPLANE_PROV_LOCK(prov
);
1566 while (counter
< limit
) {
1567 ctx
= TAILQ_FIRST(&(prov
->dp_ctx_out_q
));
1569 TAILQ_REMOVE(&(prov
->dp_ctx_out_q
), ctx
,
1572 TAILQ_INSERT_TAIL(&work_list
,
1579 if (dplane_provider_is_threaded(prov
))
1580 DPLANE_PROV_UNLOCK(prov
);
1582 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
)
1583 zlog_debug("dplane dequeues %d completed work from provider %s",
1584 counter
, dplane_provider_get_name(prov
));
1586 /* Locate next provider */
1588 prov
= TAILQ_NEXT(prov
, dp_prov_link
);
1592 /* After all providers have been serviced, enqueue any completed
1593 * work and any errors back to zebra so it can process the results.
1595 if (IS_ZEBRA_DEBUG_DPLANE_DETAIL
)
1596 zlog_debug("dplane has %d completed, %d errors, for zebra main",
1597 counter
, error_counter
);
1600 * TODO -- I'd rather hand lists through the api to zebra main,
1601 * to reduce the number of lock/unlock cycles
1603 for (ctx
= TAILQ_FIRST(&error_list
); ctx
; ) {
1604 TAILQ_REMOVE(&error_list
, ctx
, zd_q_entries
);
1606 /* Call through to zebra main */
1607 (*zdplane_info
.dg_results_cb
)(ctx
);
1609 ctx
= TAILQ_FIRST(&error_list
);
1613 for (ctx
= TAILQ_FIRST(&work_list
); ctx
; ) {
1614 TAILQ_REMOVE(&work_list
, ctx
, zd_q_entries
);
1616 /* Call through to zebra main */
1617 (*zdplane_info
.dg_results_cb
)(ctx
);
1619 ctx
= TAILQ_FIRST(&work_list
);
1627 * Final phase of shutdown, after all work enqueued to dplane has been
1628 * processed. This is called from the zebra main pthread context.
1630 void zebra_dplane_shutdown(void)
1632 if (IS_ZEBRA_DEBUG_DPLANE
)
1633 zlog_debug("Zebra dataplane shutdown called");
1635 /* Stop dplane thread, if it's running */
1637 zdplane_info
.dg_run
= false;
1639 THREAD_OFF(zdplane_info
.dg_t_update
);
1641 frr_pthread_stop(zdplane_info
.dg_pthread
, NULL
);
1643 /* Destroy pthread */
1644 frr_pthread_destroy(zdplane_info
.dg_pthread
);
1645 zdplane_info
.dg_pthread
= NULL
;
1646 zdplane_info
.dg_master
= NULL
;
1648 /* TODO -- Notify provider(s) of final shutdown */
1650 /* TODO -- Clean-up provider objects */
1652 /* TODO -- Clean queue(s), free memory */
1656 * Initialize the dataplane module during startup, internal/private version
1658 static void zebra_dplane_init_internal(struct zebra_t
*zebra
)
1660 memset(&zdplane_info
, 0, sizeof(zdplane_info
));
1662 pthread_mutex_init(&zdplane_info
.dg_mutex
, NULL
);
1664 TAILQ_INIT(&zdplane_info
.dg_route_ctx_q
);
1665 TAILQ_INIT(&zdplane_info
.dg_providers_q
);
1667 zdplane_info
.dg_updates_per_cycle
= DPLANE_DEFAULT_NEW_WORK
;
1669 zdplane_info
.dg_max_queued_updates
= DPLANE_DEFAULT_MAX_QUEUED
;
1671 /* Register default kernel 'provider' during init */
1672 dplane_provider_init();
1676 * Start the dataplane pthread. This step needs to be run later than the
1677 * 'init' step, in case zebra has fork-ed.
1679 void zebra_dplane_start(void)
1681 /* Start dataplane pthread */
1683 struct frr_pthread_attr pattr
= {
1684 .start
= frr_pthread_attr_default
.start
,
1685 .stop
= frr_pthread_attr_default
.stop
1688 zdplane_info
.dg_pthread
= frr_pthread_new(&pattr
, "Zebra dplane thread",
1691 zdplane_info
.dg_master
= zdplane_info
.dg_pthread
->master
;
1693 zdplane_info
.dg_run
= true;
1695 /* Enqueue an initial event for the dataplane pthread */
1696 thread_add_event(zdplane_info
.dg_master
, dplane_thread_loop
, NULL
, 0,
1697 &zdplane_info
.dg_t_update
);
1699 frr_pthread_run(zdplane_info
.dg_pthread
, NULL
);
1703 * Initialize the dataplane module at startup; called by zebra rib_init()
1705 void zebra_dplane_init(void)
1707 zebra_dplane_init_internal(&zebrad
);