1 From 2cf7651f0b1b0123dc5568ebad00ac84a9b3c348 Mon Sep 17 00:00:00 2001
2 From: Donald Sharp <sharpd@nvidia.com>
3 Date: Wed, 2 Feb 2022 13:28:42 -0500
4 Subject: [PATCH] zebra: Make netlink buffer reads resizeable when needed
6 Currently when the kernel sends netlink messages to FRR
7 the buffers to receive this data is of fixed length.
8 The kernel, with certain configurations, will send
9 netlink messages that are larger than this fixed length.
10 This leads to situations where, on startup, zebra gets
11 really confused about the state of the kernel. Effectively
12 the current algorithm is this:
14 read up to buffer in size
16 get netlink message header, look at size
19 The problem is that there is a 32k buffer we read.
20 We get the first message that is say 1k in size,
21 subtract that 1k to 31k left to parse. We then
22 get the next header and notice that the length
23 of the message is 33k. Which is obviously larger
24 than what we read in. FRR has no recover mechanism
25 nor is there a way to know, a priori, what the maximum
26 size the kernel will send us.
28 Modify FRR to look at the kernel message and see if the
29 buffer is large enough, if not, make it large enough to
32 This code has to be per netlink socket because of the usage
33 of pthreads. So add to `struct nlsock` the buffer and current
34 buffer length. Growing it as necessary.
37 Signed-off-by: Donald Sharp <sharpd@nvidia.com>
39 zebra/kernel_netlink.c | 68 +++++++++++++++++++++++++-----------------
40 zebra/kernel_netlink.h | 2 +-
41 zebra/zebra_dplane.c | 4 +++
42 zebra/zebra_ns.h | 3 ++
43 4 files changed, 49 insertions(+), 28 deletions(-)
45 diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
46 index 3650d87e0fb..d0c86a6bb0e 100644
47 --- a/zebra/kernel_netlink.c
48 +++ b/zebra/kernel_netlink.c
51 #define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
53 -#define NL_BATCH_RX_BUFSIZE NL_RCV_PKT_BUF_SIZE
55 static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
56 {RTM_DELROUTE, "RTM_DELROUTE"},
57 {RTM_GETROUTE, "RTM_GETROUTE"},
58 @@ -165,8 +163,6 @@ struct hash *nlsock_hash;
59 size_t nl_batch_tx_bufsize;
60 char *nl_batch_tx_buf;
62 -char nl_batch_rx_buf[NL_BATCH_RX_BUFSIZE];
64 _Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
65 _Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
67 @@ -320,6 +316,9 @@ static int netlink_socket(struct nlsock *nl, unsigned long groups,
71 + nl->buflen = NL_RCV_PKT_BUF_SIZE;
72 + nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
77 @@ -785,19 +784,29 @@ static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
79 * Returns -1 on error, 0 if read would block or the number of bytes received.
81 -static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
82 - void *buf, size_t buflen)
83 +static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
89 - iov.iov_len = buflen;
92 + iov.iov_base = nl->buf;
93 + iov.iov_len = nl->buflen;
94 + msg->msg_iov = &iov;
95 + msg->msg_iovlen = 1;
98 - status = recvmsg(nl->sock, &msg, 0);
101 + bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
103 + if (bytes >= 0 && (size_t)bytes > nl->buflen) {
104 + nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
105 + nl->buflen = bytes;
106 + iov.iov_base = nl->buf;
107 + iov.iov_len = nl->buflen;
110 + status = recvmsg(nl->sock, msg, 0);
111 } while (status == -1 && errno == EINTR);
114 @@ -817,19 +826,19 @@ static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
118 - if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
119 + if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
120 flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
121 "%s sender address length error: length %d", nl->name,
127 if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
128 zlog_debug("%s: << netlink message dump [recv]", __func__);
130 - nl_dump(buf, status);
131 + nl_dump(nl->buf, status);
133 - zlog_hexdump(buf, status);
134 + zlog_hexdump(nl->buf, status);
135 #endif /* NETLINK_DEBUG */
138 @@ -932,8 +941,7 @@ static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
141 int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
142 - const struct nlsock *nl,
143 - const struct zebra_dplane_info *zns,
144 + struct nlsock *nl, const struct zebra_dplane_info *zns,
145 int count, bool startup)
148 @@ -942,7 +950,6 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
152 - char buf[NL_RCV_PKT_BUF_SIZE];
153 struct sockaddr_nl snl;
154 struct msghdr msg = {.msg_name = (void *)&snl,
155 .msg_namelen = sizeof(snl)};
156 @@ -951,14 +958,14 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
157 if (count && read_in >= count)
160 - status = netlink_recv_msg(nl, msg, buf, sizeof(buf));
161 + status = netlink_recv_msg(nl, &msg);
164 else if (status == 0)
168 - for (h = (struct nlmsghdr *)buf;
169 + for (h = (struct nlmsghdr *)nl->buf;
170 (status >= 0 && NLMSG_OK(h, (unsigned int)status));
171 h = NLMSG_NEXT(h, status)) {
172 /* Finish of reading. */
173 @@ -1034,10 +1041,10 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
174 * startup -> Are we reading in under startup conditions
175 * This is passed through eventually to filter.
178 -netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
179 - struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
181 +static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t,
183 + struct nlmsghdr *n,
184 + struct zebra_dplane_info *dp_info, bool startup)
188 @@ -1127,8 +1134,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
192 - status = netlink_recv_msg(nl, msg, nl_batch_rx_buf,
193 - sizeof(nl_batch_rx_buf));
194 + status = netlink_recv_msg(nl, &msg);
196 * status == -1 is a full on failure somewhere
197 * since we don't know where the problem happened
198 @@ -1149,7 +1155,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
202 - h = (struct nlmsghdr *)nl_batch_rx_buf;
203 + h = (struct nlmsghdr *)nl->buf;
207 @@ -1708,18 +1714,24 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
208 hash_release(nlsock_hash, &zns->netlink);
209 close(zns->netlink.sock);
210 zns->netlink.sock = -1;
211 + XFREE(MTYPE_NL_BUF, zns->netlink.buf);
212 + zns->netlink.buflen = 0;
215 if (zns->netlink_cmd.sock >= 0) {
216 hash_release(nlsock_hash, &zns->netlink_cmd);
217 close(zns->netlink_cmd.sock);
218 zns->netlink_cmd.sock = -1;
219 + XFREE(MTYPE_NL_BUF, zns->netlink_cmd.buf);
220 + zns->netlink_cmd.buflen = 0;
223 if (zns->netlink_dplane_in.sock >= 0) {
224 hash_release(nlsock_hash, &zns->netlink_dplane_in);
225 close(zns->netlink_dplane_in.sock);
226 zns->netlink_dplane_in.sock = -1;
227 + XFREE(MTYPE_NL_BUF, zns->netlink_dplane_in.buf);
228 + zns->netlink_dplane_in.buflen = 0;
231 /* During zebra shutdown, we need to leave the dataplane socket
232 @@ -1730,6 +1742,8 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
233 hash_release(nlsock_hash, &zns->netlink_dplane_out);
234 close(zns->netlink_dplane_out.sock);
235 zns->netlink_dplane_out.sock = -1;
236 + XFREE(MTYPE_NL_BUF, zns->netlink_dplane_out.buf);
237 + zns->netlink_dplane_out.buflen = 0;
240 hash_free(nlsock_hash);
241 diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
242 index ae88f3372b1..9421ea1c611 100644
243 --- a/zebra/kernel_netlink.h
244 +++ b/zebra/kernel_netlink.h
245 @@ -96,7 +96,7 @@ extern const char *nl_family_to_str(uint8_t family);
246 extern const char *nl_rttype_to_str(uint8_t rttype);
248 extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
249 - const struct nlsock *nl,
251 const struct zebra_dplane_info *dp_info,
252 int count, bool startup);
253 extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup);
254 diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
255 index 05297e143b5..4d32e54d1fb 100644
256 --- a/zebra/zebra_dplane.c
257 +++ b/zebra/zebra_dplane.c
258 @@ -1469,7 +1469,11 @@ int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx)
260 DPLANE_CTX_VALID(ctx);
263 return ctx->zd_ns_info.sock;
269 /* Accessors for nexthop information */
270 diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
271 index 0519e1d5b33..7a0ffbc1ee6 100644
272 --- a/zebra/zebra_ns.h
273 +++ b/zebra/zebra_ns.h
274 @@ -39,6 +39,9 @@ struct nlsock {
276 struct sockaddr_nl snl;