]> git.proxmox.com Git - frr.git/blob - debian/patches/frr/0003-zebra-buffering.patch
add frr zebra buffering patches
[frr.git] / debian / patches / frr / 0003-zebra-buffering.patch
1 From 2cf7651f0b1b0123dc5568ebad00ac84a9b3c348 Mon Sep 17 00:00:00 2001
2 From: Donald Sharp <sharpd@nvidia.com>
3 Date: Wed, 2 Feb 2022 13:28:42 -0500
4 Subject: [PATCH] zebra: Make netlink buffer reads resizeable when needed
5
6 Currently when the kernel sends netlink messages to FRR
7 the buffers to receive this data is of fixed length.
8 The kernel, with certain configurations, will send
9 netlink messages that are larger than this fixed length.
10 This leads to situations where, on startup, zebra gets
11 really confused about the state of the kernel. Effectively
12 the current algorithm is this:
13
14 read up to buffer in size
15 while (data to parse)
16 get netlink message header, look at size
17 parse if you can
18
19 The problem is that there is a 32k buffer we read.
20 We get the first message that is say 1k in size,
21 subtract that 1k to 31k left to parse. We then
22 get the next header and notice that the length
23 of the message is 33k. Which is obviously larger
24 than what we read in. FRR has no recover mechanism
25 nor is there a way to know, a priori, what the maximum
26 size the kernel will send us.
27
28 Modify FRR to look at the kernel message and see if the
29 buffer is large enough, if not, make it large enough to
30 read in the message.
31
32 This code has to be per netlink socket because of the usage
33 of pthreads. So add to `struct nlsock` the buffer and current
34 buffer length. Growing it as necessary.
35
36 Fixes: #10404
37 Signed-off-by: Donald Sharp <sharpd@nvidia.com>
38 ---
39 zebra/kernel_netlink.c | 68 +++++++++++++++++++++++++-----------------
40 zebra/kernel_netlink.h | 2 +-
41 zebra/zebra_dplane.c | 4 +++
42 zebra/zebra_ns.h | 3 ++
43 4 files changed, 49 insertions(+), 28 deletions(-)
44
45 diff --git a/zebra/kernel_netlink.c b/zebra/kernel_netlink.c
46 index 3650d87e0fb..d0c86a6bb0e 100644
47 --- a/zebra/kernel_netlink.c
48 +++ b/zebra/kernel_netlink.c
49 @@ -90,8 +90,6 @@
50 */
51 #define NL_DEFAULT_BATCH_SEND_THRESHOLD (15 * NL_PKT_BUF_SIZE)
52
53 -#define NL_BATCH_RX_BUFSIZE NL_RCV_PKT_BUF_SIZE
54 -
55 static const struct message nlmsg_str[] = {{RTM_NEWROUTE, "RTM_NEWROUTE"},
56 {RTM_DELROUTE, "RTM_DELROUTE"},
57 {RTM_GETROUTE, "RTM_GETROUTE"},
58 @@ -165,8 +163,6 @@ struct hash *nlsock_hash;
59 size_t nl_batch_tx_bufsize;
60 char *nl_batch_tx_buf;
61
62 -char nl_batch_rx_buf[NL_BATCH_RX_BUFSIZE];
63 -
64 _Atomic uint32_t nl_batch_bufsize = NL_DEFAULT_BATCH_BUFSIZE;
65 _Atomic uint32_t nl_batch_send_threshold = NL_DEFAULT_BATCH_SEND_THRESHOLD;
66
67 @@ -320,6 +316,9 @@ static int netlink_socket(struct nlsock *nl, unsigned long groups,
68
69 nl->snl = snl;
70 nl->sock = sock;
71 + nl->buflen = NL_RCV_PKT_BUF_SIZE;
72 + nl->buf = XMALLOC(MTYPE_NL_BUF, nl->buflen);
73 +
74 return ret;
75 }
76
77 @@ -785,19 +784,29 @@ static ssize_t netlink_send_msg(const struct nlsock *nl, void *buf,
78 *
79 * Returns -1 on error, 0 if read would block or the number of bytes received.
80 */
81 -static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
82 - void *buf, size_t buflen)
83 +static int netlink_recv_msg(struct nlsock *nl, struct msghdr *msg)
84 {
85 struct iovec iov;
86 int status;
87
88 - iov.iov_base = buf;
89 - iov.iov_len = buflen;
90 - msg.msg_iov = &iov;
91 - msg.msg_iovlen = 1;
92 + iov.iov_base = nl->buf;
93 + iov.iov_len = nl->buflen;
94 + msg->msg_iov = &iov;
95 + msg->msg_iovlen = 1;
96
97 do {
98 - status = recvmsg(nl->sock, &msg, 0);
99 + int bytes;
100 +
101 + bytes = recv(nl->sock, NULL, 0, MSG_PEEK | MSG_TRUNC);
102 +
103 + if (bytes >= 0 && (size_t)bytes > nl->buflen) {
104 + nl->buf = XREALLOC(MTYPE_NL_BUF, nl->buf, bytes);
105 + nl->buflen = bytes;
106 + iov.iov_base = nl->buf;
107 + iov.iov_len = nl->buflen;
108 + }
109 +
110 + status = recvmsg(nl->sock, msg, 0);
111 } while (status == -1 && errno == EINTR);
112
113 if (status == -1) {
114 @@ -817,19 +826,19 @@ static int netlink_recv_msg(const struct nlsock *nl, struct msghdr msg,
115 return -1;
116 }
117
118 - if (msg.msg_namelen != sizeof(struct sockaddr_nl)) {
119 + if (msg->msg_namelen != sizeof(struct sockaddr_nl)) {
120 flog_err(EC_ZEBRA_NETLINK_LENGTH_ERROR,
121 "%s sender address length error: length %d", nl->name,
122 - msg.msg_namelen);
123 + msg->msg_namelen);
124 return -1;
125 }
126
127 if (IS_ZEBRA_DEBUG_KERNEL_MSGDUMP_RECV) {
128 zlog_debug("%s: << netlink message dump [recv]", __func__);
129 #ifdef NETLINK_DEBUG
130 - nl_dump(buf, status);
131 + nl_dump(nl->buf, status);
132 #else
133 - zlog_hexdump(buf, status);
134 + zlog_hexdump(nl->buf, status);
135 #endif /* NETLINK_DEBUG */
136 }
137
138 @@ -932,8 +941,7 @@ static int netlink_parse_error(const struct nlsock *nl, struct nlmsghdr *h,
139 * the filter.
140 */
141 int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
142 - const struct nlsock *nl,
143 - const struct zebra_dplane_info *zns,
144 + struct nlsock *nl, const struct zebra_dplane_info *zns,
145 int count, bool startup)
146 {
147 int status;
148 @@ -942,7 +950,6 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
149 int read_in = 0;
150
151 while (1) {
152 - char buf[NL_RCV_PKT_BUF_SIZE];
153 struct sockaddr_nl snl;
154 struct msghdr msg = {.msg_name = (void *)&snl,
155 .msg_namelen = sizeof(snl)};
156 @@ -951,14 +958,14 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
157 if (count && read_in >= count)
158 return 0;
159
160 - status = netlink_recv_msg(nl, msg, buf, sizeof(buf));
161 + status = netlink_recv_msg(nl, &msg);
162 if (status == -1)
163 return -1;
164 else if (status == 0)
165 break;
166
167 read_in++;
168 - for (h = (struct nlmsghdr *)buf;
169 + for (h = (struct nlmsghdr *)nl->buf;
170 (status >= 0 && NLMSG_OK(h, (unsigned int)status));
171 h = NLMSG_NEXT(h, status)) {
172 /* Finish of reading. */
173 @@ -1034,10 +1041,10 @@ int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
174 * startup -> Are we reading in under startup conditions
175 * This is passed through eventually to filter.
176 */
177 -static int
178 -netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t, int startup),
179 - struct nlmsghdr *n, const struct zebra_dplane_info *dp_info,
180 - bool startup)
181 +static int netlink_talk_info(int (*filter)(struct nlmsghdr *, ns_id_t,
182 + int startup),
183 + struct nlmsghdr *n,
184 + struct zebra_dplane_info *dp_info, bool startup)
185 {
186 struct nlsock *nl;
187
188 @@ -1127,8 +1134,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
189 * message at a time.
190 */
191 while (true) {
192 - status = netlink_recv_msg(nl, msg, nl_batch_rx_buf,
193 - sizeof(nl_batch_rx_buf));
194 + status = netlink_recv_msg(nl, &msg);
195 /*
196 * status == -1 is a full on failure somewhere
197 * since we don't know where the problem happened
198 @@ -1149,7 +1155,7 @@ static int nl_batch_read_resp(struct nl_batch *bth)
199 return status;
200 }
201
202 - h = (struct nlmsghdr *)nl_batch_rx_buf;
203 + h = (struct nlmsghdr *)nl->buf;
204 ignore_msg = false;
205 seq = h->nlmsg_seq;
206 /*
207 @@ -1708,18 +1714,24 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
208 hash_release(nlsock_hash, &zns->netlink);
209 close(zns->netlink.sock);
210 zns->netlink.sock = -1;
211 + XFREE(MTYPE_NL_BUF, zns->netlink.buf);
212 + zns->netlink.buflen = 0;
213 }
214
215 if (zns->netlink_cmd.sock >= 0) {
216 hash_release(nlsock_hash, &zns->netlink_cmd);
217 close(zns->netlink_cmd.sock);
218 zns->netlink_cmd.sock = -1;
219 + XFREE(MTYPE_NL_BUF, zns->netlink_cmd.buf);
220 + zns->netlink_cmd.buflen = 0;
221 }
222
223 if (zns->netlink_dplane_in.sock >= 0) {
224 hash_release(nlsock_hash, &zns->netlink_dplane_in);
225 close(zns->netlink_dplane_in.sock);
226 zns->netlink_dplane_in.sock = -1;
227 + XFREE(MTYPE_NL_BUF, zns->netlink_dplane_in.buf);
228 + zns->netlink_dplane_in.buflen = 0;
229 }
230
231 /* During zebra shutdown, we need to leave the dataplane socket
232 @@ -1730,6 +1742,8 @@ void kernel_terminate(struct zebra_ns *zns, bool complete)
233 hash_release(nlsock_hash, &zns->netlink_dplane_out);
234 close(zns->netlink_dplane_out.sock);
235 zns->netlink_dplane_out.sock = -1;
236 + XFREE(MTYPE_NL_BUF, zns->netlink_dplane_out.buf);
237 + zns->netlink_dplane_out.buflen = 0;
238 }
239
240 hash_free(nlsock_hash);
241 diff --git a/zebra/kernel_netlink.h b/zebra/kernel_netlink.h
242 index ae88f3372b1..9421ea1c611 100644
243 --- a/zebra/kernel_netlink.h
244 +++ b/zebra/kernel_netlink.h
245 @@ -96,7 +96,7 @@ extern const char *nl_family_to_str(uint8_t family);
246 extern const char *nl_rttype_to_str(uint8_t rttype);
247
248 extern int netlink_parse_info(int (*filter)(struct nlmsghdr *, ns_id_t, int),
249 - const struct nlsock *nl,
250 + struct nlsock *nl,
251 const struct zebra_dplane_info *dp_info,
252 int count, bool startup);
253 extern int netlink_talk_filter(struct nlmsghdr *h, ns_id_t ns, int startup);
254 diff --git a/zebra/zebra_dplane.c b/zebra/zebra_dplane.c
255 index 05297e143b5..4d32e54d1fb 100644
256 --- a/zebra/zebra_dplane.c
257 +++ b/zebra/zebra_dplane.c
258 @@ -1469,7 +1469,11 @@ int dplane_ctx_get_ns_sock(const struct zebra_dplane_ctx *ctx)
259 {
260 DPLANE_CTX_VALID(ctx);
261
262 +#ifdef HAVE_NETLINK
263 return ctx->zd_ns_info.sock;
264 +#else
265 + return -1;
266 +#endif
267 }
268
269 /* Accessors for nexthop information */
270 diff --git a/zebra/zebra_ns.h b/zebra/zebra_ns.h
271 index 0519e1d5b33..7a0ffbc1ee6 100644
272 --- a/zebra/zebra_ns.h
273 +++ b/zebra/zebra_ns.h
274 @@ -39,6 +39,9 @@ struct nlsock {
275 int seq;
276 struct sockaddr_nl snl;
277 char name[64];
278 +
279 + uint8_t *buf;
280 + size_t buflen;
281 };
282 #endif
283