]>
git.proxmox.com Git - ceph.git/blob - ceph/src/seastar/dpdk/drivers/net/tap/tap_netlink.c
4 * Copyright 2017 6WIND S.A.
5 * Copyright 2017 Mellanox.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
17 * * Neither the name of 6WIND S.A. nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 #include <linux/netlink.h>
38 #include <sys/socket.h>
41 #include <rte_malloc.h>
42 #include <tap_netlink.h>
43 #include <rte_random.h>
45 /* Must be quite large to support dumping a huge list of QDISC or filters. */
46 #define BUF_SIZE (32 * 1024) /* Size of the buffer to receive kernel messages */
47 #define SNDBUF_SIZE 32768 /* Send buffer size for the netlink socket */
48 #define RCVBUF_SIZE 32768 /* Receive buffer size for the netlink socket */
52 struct nested_tail
*prev
;
56 * Initialize a netlink socket for communicating with the kernel.
59 * Set it to a netlink group value (e.g. RTMGRP_LINK) to receive messages for
60 * specific netlink multicast groups. Otherwise, no subscription will be made.
63 * netlink socket file descriptor on success, -1 otherwise.
66 nl_init(uint32_t nl_groups
)
68 int fd
, sndbuf_size
= SNDBUF_SIZE
, rcvbuf_size
= RCVBUF_SIZE
;
69 struct sockaddr_nl local
= {
70 .nl_family
= AF_NETLINK
,
71 .nl_groups
= nl_groups
,
74 fd
= socket(AF_NETLINK
, SOCK_RAW
| SOCK_CLOEXEC
, NETLINK_ROUTE
);
76 RTE_LOG(ERR
, PMD
, "Unable to create a netlink socket\n");
79 if (setsockopt(fd
, SOL_SOCKET
, SO_SNDBUF
, &sndbuf_size
, sizeof(int))) {
80 RTE_LOG(ERR
, PMD
, "Unable to set socket buffer send size\n");
83 if (setsockopt(fd
, SOL_SOCKET
, SO_RCVBUF
, &rcvbuf_size
, sizeof(int))) {
84 RTE_LOG(ERR
, PMD
, "Unable to set socket buffer receive size\n");
87 if (bind(fd
, (struct sockaddr
*)&local
, sizeof(local
)) < 0) {
88 RTE_LOG(ERR
, PMD
, "Unable to bind to the netlink socket\n");
95 * Clean up a netlink socket once all communicating with the kernel is finished.
98 * The netlink socket file descriptor used for communication.
101 * 0 on success, -1 otherwise.
104 nl_final(int nlsk_fd
)
106 if (close(nlsk_fd
)) {
107 RTE_LOG(ERR
, PMD
, "Failed to close netlink socket: %s (%d)\n",
108 strerror(errno
), errno
);
115 * Send a message to the kernel on the netlink socket.
118 * The netlink socket file descriptor used for communication.
120 * The netlink message send to the kernel.
123 * the number of sent bytes on success, -1 otherwise.
126 nl_send(int nlsk_fd
, struct nlmsghdr
*nh
)
128 /* man 7 netlink EXAMPLE */
129 struct sockaddr_nl sa
= {
130 .nl_family
= AF_NETLINK
,
134 .iov_len
= nh
->nlmsg_len
,
136 struct msghdr msg
= {
138 .msg_namelen
= sizeof(sa
),
144 nh
->nlmsg_pid
= 0; /* communication with the kernel uses pid 0 */
145 nh
->nlmsg_seq
= (uint32_t)rte_rand();
146 send_bytes
= sendmsg(nlsk_fd
, &msg
, 0);
147 if (send_bytes
< 0) {
148 RTE_LOG(ERR
, PMD
, "Failed to send netlink message: %s (%d)\n",
149 strerror(errno
), errno
);
156 * Check that the kernel sends an appropriate ACK in response to an nl_send().
159 * The netlink socket file descriptor used for communication.
162 * 0 on success, -1 otherwise with errno set.
165 nl_recv_ack(int nlsk_fd
)
167 return nl_recv(nlsk_fd
, NULL
, NULL
);
171 * Receive a message from the kernel on the netlink socket, following an
175 * The netlink socket file descriptor used for communication.
177 * The callback function to call for each netlink message received.
178 * @param[in, out] arg
179 * Custom arguments for the callback.
182 * 0 on success, -1 otherwise with errno set.
185 nl_recv(int nlsk_fd
, int (*cb
)(struct nlmsghdr
*, void *arg
), void *arg
)
187 /* man 7 netlink EXAMPLE */
188 struct sockaddr_nl sa
;
192 .iov_len
= sizeof(buf
),
194 struct msghdr msg
= {
196 .msg_namelen
= sizeof(sa
),
198 /* One message at a time */
208 recv_bytes
= recvmsg(nlsk_fd
, &msg
, 0);
211 for (nh
= (struct nlmsghdr
*)buf
;
212 NLMSG_OK(nh
, (unsigned int)recv_bytes
);
213 nh
= NLMSG_NEXT(nh
, recv_bytes
)) {
214 if (nh
->nlmsg_type
== NLMSG_ERROR
) {
215 struct nlmsgerr
*err_data
= NLMSG_DATA(nh
);
217 if (err_data
->error
< 0) {
218 errno
= -err_data
->error
;
224 /* Multi-part msgs and their trailing DONE message. */
225 if (nh
->nlmsg_flags
& NLM_F_MULTI
) {
226 if (nh
->nlmsg_type
== NLMSG_DONE
)
238 * Append a netlink attribute to a message.
241 * The netlink message to parse, received from the kernel.
243 * The type of attribute to append.
244 * @param[in] data_len
245 * The length of the data to append.
247 * The data to append.
250 nlattr_add(struct nlmsghdr
*nh
, unsigned short type
,
251 unsigned int data_len
, const void *data
)
253 /* see man 3 rtnetlink */
256 rta
= (struct rtattr
*)NLMSG_TAIL(nh
);
257 rta
->rta_len
= RTA_LENGTH(data_len
);
258 rta
->rta_type
= type
;
259 memcpy(RTA_DATA(rta
), data
, data_len
);
260 nh
->nlmsg_len
= NLMSG_ALIGN(nh
->nlmsg_len
) + RTA_ALIGN(rta
->rta_len
);
264 * Append a uint8_t netlink attribute to a message.
267 * The netlink message to parse, received from the kernel.
269 * The type of attribute to append.
271 * The data to append.
274 nlattr_add8(struct nlmsghdr
*nh
, unsigned short type
, uint8_t data
)
276 nlattr_add(nh
, type
, sizeof(uint8_t), &data
);
280 * Append a uint16_t netlink attribute to a message.
283 * The netlink message to parse, received from the kernel.
285 * The type of attribute to append.
287 * The data to append.
290 nlattr_add16(struct nlmsghdr
*nh
, unsigned short type
, uint16_t data
)
292 nlattr_add(nh
, type
, sizeof(uint16_t), &data
);
296 * Append a uint16_t netlink attribute to a message.
299 * The netlink message to parse, received from the kernel.
301 * The type of attribute to append.
303 * The data to append.
306 nlattr_add32(struct nlmsghdr
*nh
, unsigned short type
, uint32_t data
)
308 nlattr_add(nh
, type
, sizeof(uint32_t), &data
);
312 * Start a nested netlink attribute.
313 * It must be followed later by a call to nlattr_nested_finish().
315 * @param[in, out] msg
316 * The netlink message where to edit the nested_tails metadata.
318 * The nested attribute type to append.
321 * -1 if adding a nested netlink attribute failed, 0 otherwise.
324 nlattr_nested_start(struct nlmsg
*msg
, uint16_t type
)
326 struct nested_tail
*tail
;
328 tail
= rte_zmalloc(NULL
, sizeof(struct nested_tail
), 0);
331 "Couldn't allocate memory for nested netlink"
336 tail
->tail
= (struct rtattr
*)NLMSG_TAIL(&msg
->nh
);
338 nlattr_add(&msg
->nh
, type
, 0, NULL
);
340 tail
->prev
= msg
->nested_tails
;
342 msg
->nested_tails
= tail
;
348 * End a nested netlink attribute.
349 * It follows a call to nlattr_nested_start().
350 * In effect, it will modify the nested attribute length to include every bytes
351 * from the nested attribute start, up to here.
353 * @param[in, out] msg
354 * The netlink message where to edit the nested_tails metadata.
357 nlattr_nested_finish(struct nlmsg
*msg
)
359 struct nested_tail
*tail
= msg
->nested_tails
;
361 tail
->tail
->rta_len
= (char *)NLMSG_TAIL(&msg
->nh
) - (char *)tail
->tail
;
364 msg
->nested_tails
= tail
->prev
;