From 62b8bb7a1776ea98e5874a4a057617e335a7f77c Mon Sep 17 00:00:00 2001 From: Mark Stapp Date: Mon, 12 Nov 2018 15:57:03 -0500 Subject: [PATCH] zebra: separate netlink socket for dataplane Use a separate netlink socket for the dataplane's updates, to avoid races between the dataplane pthread and the zebra main pthread. Revise zebra shutdown so that the dataplane netlink socket is cleaned-up later, after all shutdown-time dataplane work has been done. Signed-off-by: Mark Stapp --- zebra/kernel_netlink.c | 65 +++++++++++++++++++++++++++++++++--------- zebra/kernel_socket.c | 2 +- zebra/main.c | 5 +++- zebra/rt.h | 2 +- zebra/zebra_dplane.c | 27 ++++++++++++++++-- zebra/zebra_ns.c | 39 +++++++++++++++++++++---- zebra/zebra_ns.h | 8 ++++-- 7 files changed, 120 insertions(+), 28 deletions(-) diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c index 0772c59b9..afc398585 100644 --- a/zebra/kernel_netlink.c +++ b/zebra/kernel_netlink.c @@ -396,7 +396,7 @@ static int kernel_read(struct thread *thread) /* * Filter out messages from self that occur on listener socket, - * caused by our actions on the command socket + * caused by our actions on the command socket(s) * * When we add new Netlink message types we probably * do not need to add them here as that we are filtering @@ -407,7 +407,7 @@ static int kernel_read(struct thread *thread) * so that we only had to write one way to handle incoming * address add/delete changes. */ -static void netlink_install_filter(int sock, __u32 pid) +static void netlink_install_filter(int sock, __u32 pid, __u32 dplane_pid) { /* * BPF_JUMP instructions and where you jump to are based upon @@ -418,7 +418,8 @@ static void netlink_install_filter(int sock, __u32 pid) struct sock_filter filter[] = { /* * Logic: - * if (nlmsg_pid == pid) { + * if (nlmsg_pid == pid || + * nlmsg_pid == dplane_pid) { * if (the incoming nlmsg_type == * RTM_NEWADDR | RTM_DELADDR) * keep this message @@ -435,26 +436,30 @@ static void netlink_install_filter(int sock, __u32 pid) /* * 1: Compare to pid */ - BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 0, 4), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(pid), 1, 0), /* - * 2: Load the nlmsg_type into BPF register + * 2: Compare to dplane pid + */ + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htonl(dplane_pid), 0, 4), + /* + * 3: Load the nlmsg_type into BPF register */ BPF_STMT(BPF_LD | BPF_ABS | BPF_H, offsetof(struct nlmsghdr, nlmsg_type)), /* - * 3: Compare to RTM_NEWADDR + * 4: Compare to RTM_NEWADDR */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_NEWADDR), 2, 0), /* - * 4: Compare to RTM_DELADDR + * 5: Compare to RTM_DELADDR */ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, htons(RTM_DELADDR), 1, 0), /* - * 5: This is the end state of we want to skip the + * 6: This is the end state of we want to skip the * message */ BPF_STMT(BPF_RET | BPF_K, 0), - /* 6: This is the end state of we want to keep + /* 7: This is the end state of we want to keep * the message */ BPF_STMT(BPF_RET | BPF_K, 0xffff), @@ -1102,6 +1107,15 @@ void kernel_init(struct zebra_ns *zns) exit(-1); } + snprintf(zns->netlink_dplane.name, sizeof(zns->netlink_dplane.name), + "netlink-dp (NS %u)", zns->ns_id); + zns->netlink_dplane.sock = -1; + if (netlink_socket(&zns->netlink_dplane, 0, zns->ns_id) < 0) { + zlog_err("Failure to create %s socket", + zns->netlink_dplane.name); + exit(-1); + } + /* * SOL_NETLINK is not available on all platforms yet * apparently. It's in bits/socket.h which I am not @@ -1110,14 +1124,22 @@ void kernel_init(struct zebra_ns *zns) #if defined SOL_NETLINK /* * Let's tell the kernel that we want to receive extended - * ACKS over our command socket + * ACKS over our command socket(s) */ one = 1; ret = setsockopt(zns->netlink_cmd.sock, SOL_NETLINK, NETLINK_EXT_ACK, &one, sizeof(one)); if (ret < 0) - zlog_notice("Registration for extended ACK failed : %d %s", + zlog_notice("Registration for extended cmd ACK failed : %d %s", + errno, safe_strerror(errno)); + + one = 1; + ret = setsockopt(zns->netlink_dplane.sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + + if (ret < 0) + zlog_notice("Registration for extended dp ACK failed : %d %s", errno, safe_strerror(errno)); #endif @@ -1130,12 +1152,18 @@ void kernel_init(struct zebra_ns *zns) zlog_err("Can't set %s socket error: %s(%d)", zns->netlink_cmd.name, safe_strerror(errno), errno); + if (fcntl(zns->netlink_dplane.sock, F_SETFL, O_NONBLOCK) < 0) + zlog_err("Can't set %s socket error: %s(%d)", + zns->netlink_dplane.name, safe_strerror(errno), errno); + /* Set receive buffer size if it's set from command line */ if (nl_rcvbufsize) netlink_recvbuf(&zns->netlink, nl_rcvbufsize); netlink_install_filter(zns->netlink.sock, - zns->netlink_cmd.snl.nl_pid); + zns->netlink_cmd.snl.nl_pid, + zns->netlink_dplane.snl.nl_pid); + zns->t_netlink = NULL; thread_add_read(zebrad.master, kernel_read, zns, @@ -1144,7 +1172,7 @@ void kernel_init(struct zebra_ns *zns) rt_netlink_init(); } -void kernel_terminate(struct zebra_ns *zns) +void kernel_terminate(struct zebra_ns *zns, bool complete) { THREAD_READ_OFF(zns->t_netlink); @@ -1157,6 +1185,15 @@ void kernel_terminate(struct zebra_ns *zns) close(zns->netlink_cmd.sock); zns->netlink_cmd.sock = -1; } -} + /* During zebra shutdown, we need to leave the dataplane socket + * around until all work is done. + */ + if (complete) { + if (zns->netlink_dplane.sock >= 0) { + close(zns->netlink_dplane.sock); + zns->netlink_dplane.sock = -1; + } + } +} #endif /* HAVE_NETLINK */ diff --git a/zebra/kernel_socket.c b/zebra/kernel_socket.c index 7af3083fd..ff739ae79 100644 --- a/zebra/kernel_socket.c +++ b/zebra/kernel_socket.c @@ -1415,7 +1415,7 @@ void kernel_init(struct zebra_ns *zns) routing_socket(zns); } -void kernel_terminate(struct zebra_ns *zns) +void kernel_terminate(struct zebra_ns *zns, bool complete) { return; } diff --git a/zebra/main.c b/zebra/main.c index e5160c1a5..5b5ee8259 100644 --- a/zebra/main.c +++ b/zebra/main.c @@ -172,7 +172,7 @@ static void sigint(void) work_queue_free_and_null(&zebrad.lsp_process_q); vrf_terminate(); - ns_walk_func(zebra_ns_disabled); + ns_walk_func(zebra_ns_early_shutdown); zebra_ns_notify_close(); access_list_reset(); @@ -196,6 +196,9 @@ int zebra_finalize(struct thread *dummy) { zlog_info("Zebra final shutdown"); + /* Final shutdown of ns resources */ + ns_walk_func(zebra_ns_final_shutdown); + /* Stop dplane thread and finish any cleanup */ zebra_dplane_shutdown(); diff --git a/zebra/rt.h b/zebra/rt.h index 70ac6f635..0317dc85b 100644 --- a/zebra/rt.h +++ b/zebra/rt.h @@ -86,7 +86,7 @@ extern int kernel_del_neigh(struct interface *ifp, struct ipaddr *ip); */ extern void interface_list(struct zebra_ns *zns); extern void kernel_init(struct zebra_ns *zns); -extern void kernel_terminate(struct zebra_ns *zns); +extern void kernel_terminate(struct zebra_ns *zns, bool complete); extern void macfdb_read(struct zebra_ns *zns); extern void macfdb_read_for_bridge(struct zebra_ns *zns, struct interface *ifp, struct interface *br_if); diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c index 95df24382..ba0f1b41a 100644 --- a/zebra/zebra_dplane.c +++ b/zebra/zebra_dplane.c @@ -249,6 +249,8 @@ static struct zebra_dplane_globals { /* Prototypes */ static int dplane_thread_loop(struct thread *event); +static void dplane_info_from_zns(struct zebra_dplane_info *ns_info, + struct zebra_ns *zns); /* * Public APIs @@ -706,16 +708,17 @@ static int dplane_ctx_route_init(struct zebra_dplane_ctx *ctx, zvrf = vrf_info_lookup(re->vrf_id); zns = zvrf->zns; - zebra_dplane_info_from_zns(&(ctx->zd_ns_info), zns, true /*is_cmd*/); + /* Internal copy helper */ + dplane_info_from_zns(&(ctx->zd_ns_info), zns); #if defined(HAVE_NETLINK) /* Increment message counter after copying to context struct - may need * two messages in some 'update' cases. */ if (op == DPLANE_OP_ROUTE_UPDATE) - zns->netlink_cmd.seq += 2; + zns->netlink_dplane.seq += 2; else - zns->netlink_cmd.seq++; + zns->netlink_dplane.seq++; #endif /* NETLINK*/ /* Copy nexthops; recursive info is included too */ @@ -1163,11 +1166,29 @@ void dplane_provider_enqueue_out_ctx(struct zebra_dplane_provider *prov, memory_order_relaxed); } +/* + * Accessor for provider object + */ bool dplane_provider_is_threaded(const struct zebra_dplane_provider *prov) { return (prov->dp_flags & DPLANE_PROV_FLAG_THREADED); } +/* + * Internal helper that copies information from a zebra ns object; this is + * called in the zebra main pthread context as part of dplane ctx init. + */ +static void dplane_info_from_zns(struct zebra_dplane_info *ns_info, + struct zebra_ns *zns) +{ + ns_info->ns_id = zns->ns_id; + +#if defined(HAVE_NETLINK) + ns_info->is_cmd = true; + ns_info->nls = zns->netlink_dplane; +#endif /* NETLINK */ +} + /* * Provider api to signal that work/events are available * for the dataplane pthread. diff --git a/zebra/zebra_ns.c b/zebra/zebra_ns.c index e65f23dc8..965c8c206 100644 --- a/zebra/zebra_ns.c +++ b/zebra/zebra_ns.c @@ -47,6 +47,7 @@ DEFINE_MTYPE(ZEBRA, ZEBRA_NS, "Zebra Name Space") static struct zebra_ns *dzns; static int logicalrouter_config_write(struct vty *vty); +static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete); struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id) { @@ -111,7 +112,7 @@ int zebra_ns_disabled(struct ns *ns) zlog_info("ZNS %s with id %u (disabled)", ns->name, ns->ns_id); if (!zns) return 0; - return zebra_ns_disable(ns->ns_id, (void **)&zns); + return zebra_ns_disable_internal(zns, true); } /* Do global enable actions - open sockets, read kernel config etc. */ @@ -135,17 +136,18 @@ int zebra_ns_enable(ns_id_t ns_id, void **info) return 0; } -int zebra_ns_disable(ns_id_t ns_id, void **info) +/* Common handler for ns disable - this can be called during ns config, + * or during zebra shutdown. + */ +static int zebra_ns_disable_internal(struct zebra_ns *zns, bool complete) { - struct zebra_ns *zns = (struct zebra_ns *)(*info); - route_table_finish(zns->if_table); zebra_vxlan_ns_disable(zns); #if defined(HAVE_RTADV) rtadv_terminate(zns); #endif - kernel_terminate(zns); + kernel_terminate(zns, complete); table_manager_disable(zns->ns_id); @@ -154,6 +156,33 @@ int zebra_ns_disable(ns_id_t ns_id, void **info) return 0; } +/* During zebra shutdown, do partial cleanup while the async dataplane + * is still running. + */ +int zebra_ns_early_shutdown(struct ns *ns) +{ + struct zebra_ns *zns = ns->info; + + if (zns == NULL) + return 0; + + return zebra_ns_disable_internal(zns, false); +} + +/* During zebra shutdown, do final cleanup + * after all dataplane work is complete. + */ +int zebra_ns_final_shutdown(struct ns *ns) +{ + struct zebra_ns *zns = ns->info; + + if (zns == NULL) + return 0; + + kernel_terminate(zns, true); + + return 0; +} int zebra_ns_init(void) { diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h index c1a9b41b8..d3592f8f3 100644 --- a/zebra/zebra_ns.h +++ b/zebra/zebra_ns.h @@ -46,8 +46,9 @@ struct zebra_ns { ns_id_t ns_id; #ifdef HAVE_NETLINK - struct nlsock netlink; /* kernel messages */ - struct nlsock netlink_cmd; /* command channel */ + struct nlsock netlink; /* kernel messages */ + struct nlsock netlink_cmd; /* command channel */ + struct nlsock netlink_dplane; /* dataplane channel */ struct thread *t_netlink; #endif @@ -62,7 +63,8 @@ struct zebra_ns *zebra_ns_lookup(ns_id_t ns_id); int zebra_ns_init(void); int zebra_ns_enable(ns_id_t ns_id, void **info); int zebra_ns_disabled(struct ns *ns); -int zebra_ns_disable(ns_id_t ns_id, void **info); +int zebra_ns_early_shutdown(struct ns *ns); +int zebra_ns_final_shutdown(struct ns *ns); int zebra_ns_config_write(struct vty *vty, struct ns *ns); -- 2.39.2