]> git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/drivers/net/tap/tap_netlink.c
update sources to ceph Nautilus 14.2.1
[ceph.git] / ceph / src / seastar / dpdk / drivers / net / tap / tap_netlink.c
1 /*-
2 * BSD LICENSE
3 *
4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 #include <errno.h>
35 #include <inttypes.h>
36 #include <linux/netlink.h>
37 #include <string.h>
38 #include <sys/socket.h>
39 #include <unistd.h>
40
41 #include <rte_malloc.h>
42 #include <tap_netlink.h>
43 #include <rte_random.h>
44
45 /* Must be quite large to support dumping a huge list of QDISC or filters. */
46 #define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
47 #define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
48 #define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
49
50 struct nested_tail {
51 struct rtattr *tail;
52 struct nested_tail *prev;
53 };
54
55 /**
56 * Initialize a netlink socket for communicating with the kernel.
57 *
58 * @param nl_groups
59 * Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for
60 * specific netlink multicast groups. Otherwise, no subscription will be made.
61 *
62 * @return
63 * netlink socket file descriptor on success, -1 otherwise.
64 */
65 int
66 nl_init(uint32_t nl_groups)
67 {
68 int fd, sndbuf_size = SNDBUF_SIZE, rcvbuf_size = RCVBUF_SIZE;
69 struct sockaddr_nl local = {
70 .nl_family = AF_NETLINK,
71 .nl_groups = nl_groups,
72 };
73
74 fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
75 if (fd < 0) {
76 RTE_LOG(ERR, PMD, "Unable to create a netlink socket\n");
77 return -1;
78 }
79 if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf_size, sizeof(int))) {
80 RTE_LOG(ERR, PMD, "Unable to set socket buffer send size\n");
81 return -1;
82 }
83 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf_size, sizeof(int))) {
84 RTE_LOG(ERR, PMD, "Unable to set socket buffer receive size\n");
85 return -1;
86 }
87 if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
88 RTE_LOG(ERR, PMD, "Unable to bind to the netlink socket\n");
89 return -1;
90 }
91 return fd;
92 }
93
94 /**
95 * Clean up a netlink socket once all communicating with the kernel is finished.
96 *
97 * @param[in] nlsk_fd
98 * The netlink socket file descriptor used for communication.
99 *
100 * @return
101 * 0 on success, -1 otherwise.
102 */
103 int
104 nl_final(int nlsk_fd)
105 {
106 if (close(nlsk_fd)) {
107 RTE_LOG(ERR, PMD, "Failed to close netlink socket: %s (%d)\n",
108 strerror(errno), errno);
109 return -1;
110 }
111 return 0;
112 }
113
114 /**
115 * Send a message to the kernel on the netlink socket.
116 *
117 * @param[in] nlsk_fd
118 * The netlink socket file descriptor used for communication.
119 * @param[in] nh
120 * The netlink message send to the kernel.
121 *
122 * @return
123 * the number of sent bytes on success, -1 otherwise.
124 */
125 int
126 nl_send(int nlsk_fd, struct nlmsghdr *nh)
127 {
128 /* man 7 netlink EXAMPLE */
129 struct sockaddr_nl sa = {
130 .nl_family = AF_NETLINK,
131 };
132 struct iovec iov = {
133 .iov_base = nh,
134 .iov_len = nh->nlmsg_len,
135 };
136 struct msghdr msg = {
137 .msg_name = &sa,
138 .msg_namelen = sizeof(sa),
139 .msg_iov = &iov,
140 .msg_iovlen = 1,
141 };
142 int send_bytes;
143
144 nh->nlmsg_pid = 0; /* communication with the kernel uses pid 0 */
145 nh->nlmsg_seq = (uint32_t)rte_rand();
146 send_bytes = sendmsg(nlsk_fd, &msg, 0);
147 if (send_bytes < 0) {
148 RTE_LOG(ERR, PMD, "Failed to send netlink message: %s (%d)\n",
149 strerror(errno), errno);
150 return -1;
151 }
152 return send_bytes;
153 }
154
155 /**
156 * Check that the kernel sends an appropriate ACK in response to an nl_send().
157 *
158 * @param[in] nlsk_fd
159 * The netlink socket file descriptor used for communication.
160 *
161 * @return
162 * 0 on success, -1 otherwise with errno set.
163 */
164 int
165 nl_recv_ack(int nlsk_fd)
166 {
167 return nl_recv(nlsk_fd, NULL, NULL);
168 }
169
170 /**
171 * Receive a message from the kernel on the netlink socket, following an
172 * nl_send().
173 *
174 * @param[in] nlsk_fd
175 * The netlink socket file descriptor used for communication.
176 * @param[in] cb
177 * The callback function to call for each netlink message received.
178 * @param[in, out] arg
179 * Custom arguments for the callback.
180 *
181 * @return
182 * 0 on success, -1 otherwise with errno set.
183 */
184 int
185 nl_recv(int nlsk_fd, int (*cb)(struct nlmsghdr *, void *arg), void *arg)
186 {
187 /* man 7 netlink EXAMPLE */
188 struct sockaddr_nl sa;
189 char buf[BUF_SIZE];
190 struct iovec iov = {
191 .iov_base = buf,
192 .iov_len = sizeof(buf),
193 };
194 struct msghdr msg = {
195 .msg_name = &sa,
196 .msg_namelen = sizeof(sa),
197 .msg_iov = &iov,
198 /* One message at a time */
199 .msg_iovlen = 1,
200 };
201 int multipart = 0;
202 int ret = 0;
203
204 do {
205 struct nlmsghdr *nh;
206 int recv_bytes = 0;
207
208 recv_bytes = recvmsg(nlsk_fd, &msg, 0);
209 if (recv_bytes < 0)
210 return -1;
211 for (nh = (struct nlmsghdr *)buf;
212 NLMSG_OK(nh, (unsigned int)recv_bytes);
213 nh = NLMSG_NEXT(nh, recv_bytes)) {
214 if (nh->nlmsg_type == NLMSG_ERROR) {
215 struct nlmsgerr *err_data = NLMSG_DATA(nh);
216
217 if (err_data->error < 0) {
218 errno = -err_data->error;
219 return -1;
220 }
221 /* Ack message. */
222 return 0;
223 }
224 /* Multi-part msgs and their trailing DONE message. */
225 if (nh->nlmsg_flags & NLM_F_MULTI) {
226 if (nh->nlmsg_type == NLMSG_DONE)
227 return 0;
228 multipart = 1;
229 }
230 if (cb)
231 ret = cb(nh, arg);
232 }
233 } while (multipart);
234 return ret;
235 }
236
237 /**
238 * Append a netlink attribute to a message.
239 *
240 * @param[in, out] nh
241 * The netlink message to parse, received from the kernel.
242 * @param[in] type
243 * The type of attribute to append.
244 * @param[in] data_len
245 * The length of the data to append.
246 * @param[in] data
247 * The data to append.
248 */
249 void
250 nlattr_add(struct nlmsghdr *nh, unsigned short type,
251 unsigned int data_len, const void *data)
252 {
253 /* see man 3 rtnetlink */
254 struct rtattr *rta;
255
256 rta = (struct rtattr *)NLMSG_TAIL(nh);
257 rta->rta_len = RTA_LENGTH(data_len);
258 rta->rta_type = type;
259 memcpy(RTA_DATA(rta), data, data_len);
260 nh->nlmsg_len = NLMSG_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
261 }
262
263 /**
264 * Append a uint8_t netlink attribute to a message.
265 *
266 * @param[in, out] nh
267 * The netlink message to parse, received from the kernel.
268 * @param[in] type
269 * The type of attribute to append.
270 * @param[in] data
271 * The data to append.
272 */
273 void
274 nlattr_add8(struct nlmsghdr *nh, unsigned short type, uint8_t data)
275 {
276 nlattr_add(nh, type, sizeof(uint8_t), &data);
277 }
278
279 /**
280 * Append a uint16_t netlink attribute to a message.
281 *
282 * @param[in, out] nh
283 * The netlink message to parse, received from the kernel.
284 * @param[in] type
285 * The type of attribute to append.
286 * @param[in] data
287 * The data to append.
288 */
289 void
290 nlattr_add16(struct nlmsghdr *nh, unsigned short type, uint16_t data)
291 {
292 nlattr_add(nh, type, sizeof(uint16_t), &data);
293 }
294
295 /**
296 * Append a uint16_t netlink attribute to a message.
297 *
298 * @param[in, out] nh
299 * The netlink message to parse, received from the kernel.
300 * @param[in] type
301 * The type of attribute to append.
302 * @param[in] data
303 * The data to append.
304 */
305 void
306 nlattr_add32(struct nlmsghdr *nh, unsigned short type, uint32_t data)
307 {
308 nlattr_add(nh, type, sizeof(uint32_t), &data);
309 }
310
311 /**
312 * Start a nested netlink attribute.
313 * It must be followed later by a call to nlattr_nested_finish().
314 *
315 * @param[in, out] msg
316 * The netlink message where to edit the nested_tails metadata.
317 * @param[in] type
318 * The nested attribute type to append.
319 *
320 * @return
321 * -1 if adding a nested netlink attribute failed, 0 otherwise.
322 */
323 int
324 nlattr_nested_start(struct nlmsg *msg, uint16_t type)
325 {
326 struct nested_tail *tail;
327
328 tail = rte_zmalloc(NULL, sizeof(struct nested_tail), 0);
329 if (!tail) {
330 RTE_LOG(ERR, PMD,
331 "Couldn't allocate memory for nested netlink"
332 " attribute\n");
333 return -1;
334 }
335
336 tail->tail = (struct rtattr *)NLMSG_TAIL(&msg->nh);
337
338 nlattr_add(&msg->nh, type, 0, NULL);
339
340 tail->prev = msg->nested_tails;
341
342 msg->nested_tails = tail;
343
344 return 0;
345 }
346
347 /**
348 * End a nested netlink attribute.
349 * It follows a call to nlattr_nested_start().
350 * In effect, it will modify the nested attribute length to include every bytes
351 * from the nested attribute start, up to here.
352 *
353 * @param[in, out] msg
354 * The netlink message where to edit the nested_tails metadata.
355 */
356 void
357 nlattr_nested_finish(struct nlmsg *msg)
358 {
359 struct nested_tail *tail = msg->nested_tails;
360
361 tail->tail->rta_len = (char *)NLMSG_TAIL(&msg->nh) - (char *)tail->tail;
362
363 if (tail->prev)
364 msg->nested_tails = tail->prev;
365
366 rte_free(tail);
367 }