]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
attach: cleanup various helpers
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
d7b58715 39#include "raw_syscalls.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
8bf64b77 50typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
3ebffb98 51static const char loop_device[] = "lo";
811ef482 52
b670016a 53static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 54{
55 int addrlen, err;
56 struct nl_handler nlh;
57 struct rtmsg *rt;
58 struct nlmsg *answer = NULL, *nlmsg = NULL;
59
60 addrlen = family == AF_INET ? sizeof(struct in_addr)
61 : sizeof(struct in6_addr);
62
63 err = netlink_open(&nlh, NETLINK_ROUTE);
64 if (err)
65 return err;
66
67 err = -ENOMEM;
68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
70 goto out;
71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
74 goto out;
75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
82 goto out;
83 rt->rtm_family = family;
84 rt->rtm_table = RT_TABLE_MAIN;
85 rt->rtm_scope = RT_SCOPE_LINK;
86 rt->rtm_protocol = RTPROT_BOOT;
87 rt->rtm_type = RTN_UNICAST;
88 rt->rtm_dst_len = netmask;
89
90 err = -EINVAL;
91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
92 goto out;
93 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
94 goto out;
95 err = netlink_transaction(&nlh, nlmsg, answer);
96out:
97 netlink_close(&nlh);
98 nlmsg_free(answer);
99 nlmsg_free(nlmsg);
100 return err;
101}
102
103static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
104{
b670016a 105 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 106}
107
108static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
109{
b670016a 110 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
111}
112
113static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
114{
115 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
116}
117
118static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
119{
120 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 121}
122
d4a7da46 123static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
124{
125 struct lxc_list *iterator;
126 int err;
127
128 lxc_list_for_each(iterator, ip) {
129 struct lxc_inetdev *inetdev = iterator->elem;
130
131 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
132 if (err) {
133 SYSERROR("Failed to setup ipv4 route for network device "
134 "with ifindex %d", ifindex);
596a002c 135 return ret_set_errno(-1, -err);
d4a7da46 136 }
137 }
138
139 return 0;
140}
141
142static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
143{
144 struct lxc_list *iterator;
145 int err;
146
147 lxc_list_for_each(iterator, ip) {
148 struct lxc_inet6dev *inet6dev = iterator->elem;
149
150 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
151 if (err) {
152 SYSERROR("Failed to setup ipv6 route for network device "
153 "with ifindex %d", ifindex);
596a002c 154 return ret_set_errno(-1, -err);
d4a7da46 155 }
156 }
157
158 return 0;
159}
160
6dfa9581
TP
161static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
162{
163 struct lxc_list *iterator;
164 int err;
165
166 lxc_list_for_each(iterator, ip) {
167 struct lxc_inetdev *inetdev = iterator->elem;
168
169 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
170
171 if (err)
596a002c 172 return log_error_errno(-1, err,
6dfa9581
TP
173 "Failed to setup ipv4 address route for network device with eifindex %d",
174 ifindex);
175 }
176
177 return 0;
178}
179
180static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
181{
182 struct lxc_list *iterator;
183 int err;
184
185 lxc_list_for_each(iterator, ip) {
186 struct lxc_inet6dev *inet6dev = iterator->elem;
187
188 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
189 if (err)
596a002c 190 return log_error_errno(-1, err,
6dfa9581
TP
191 "Failed to setup ipv6 address route for network device with eifindex %d",
192 ifindex);
193 }
194
195 return 0;
196}
197
198struct ip_proxy_args {
199 const char *ip;
200 const char *dev;
201};
202
5fe147e9 203static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 204{
5fe147e9
TP
205 int addrlen, err;
206 struct nl_handler nlh;
207 struct ndmsg *rt;
208 struct nlmsg *answer = NULL, *nlmsg = NULL;
6dfa9581 209
5fe147e9 210 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 211
5fe147e9
TP
212 err = netlink_open(&nlh, NETLINK_ROUTE);
213 if (err)
214 return err;
6dfa9581 215
5fe147e9
TP
216 err = -ENOMEM;
217 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
218 if (!nlmsg)
219 goto out;
6dfa9581 220
5fe147e9
TP
221 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
222 if (!answer)
223 goto out;
6dfa9581 224
5fe147e9
TP
225 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
226 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 227
5fe147e9
TP
228 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
229 if (!rt)
230 goto out;
231 rt->ndm_ifindex = ifindex;
232 rt->ndm_flags = NTF_PROXY;
233 rt->ndm_type = NDA_DST;
234 rt->ndm_family = family;
6dfa9581 235
5fe147e9
TP
236 err = -EINVAL;
237 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
238 goto out;
6dfa9581 239
5fe147e9
TP
240 err = netlink_transaction(&nlh, nlmsg, answer);
241out:
242 netlink_close(&nlh);
243 nlmsg_free(answer);
244 nlmsg_free(nlmsg);
245 return err;
6dfa9581
TP
246}
247
248static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
249{
250 int ret;
251 char path[PATH_MAX];
252 char buf[1] = "";
253
254 if (family != AF_INET && family != AF_INET6)
596a002c 255 return ret_set_errno(-1, EINVAL);
6dfa9581
TP
256
257 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
258 family == AF_INET ? "ipv4" : "ipv6", ifname,
259 "forwarding");
260 if (ret < 0 || (size_t)ret >= PATH_MAX)
596a002c 261 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
262
263 return lxc_read_file_expect(path, buf, 1, "1");
264}
265
811ef482
CB
266static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
267{
54256301 268 int err;
a00fbab5 269 unsigned int mtu = 1500;
811ef482
CB
270 char *veth1, *veth2;
271 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 272
de4855a8 273 if (netdev->priv.veth_attr.pair[0] != '\0') {
811ef482
CB
274 veth1 = netdev->priv.veth_attr.pair;
275 if (handler->conf->reboot)
276 lxc_netdev_delete_by_name(veth1);
277 } else {
278 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
279 if (err < 0 || (size_t)err >= sizeof(veth1buf))
280 return -1;
281
282 veth1 = lxc_mkifname(veth1buf);
283 if (!veth1)
284 return -1;
285
286 /* store away for deconf */
287 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
288 }
289
d34212ad
CB
290 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
291 if (err < 0 || (size_t)err >= sizeof(veth2buf))
292 return -1;
293
811ef482
CB
294 veth2 = lxc_mkifname(veth2buf);
295 if (!veth2)
54256301
CB
296 return -1;
297
a00fbab5
TP
298 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
299 if (netdev->mtu) {
300 if (lxc_safe_uint(netdev->mtu, &mtu))
301 return log_error_errno(-1, errno, "Failed to parse mtu");
54256301
CB
302 } else if (netdev->link[0] != '\0') {
303 int ifindex_mtu;
811ef482 304
54256301
CB
305 ifindex_mtu = if_nametoindex(netdev->link);
306 if (ifindex_mtu) {
307 mtu = netdev_get_mtu(ifindex_mtu);
308 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
309 }
310 }
311
312 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
811ef482 313 if (err) {
6d1400b5 314 errno = -err;
315 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
54256301 316 return -1;
811ef482
CB
317 }
318
24190194
CB
319 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
320
811ef482
CB
321 /* changing the high byte of the mac address to 0xfe, the bridge interface
322 * will always keep the host's mac address and not take the mac address
323 * of a container */
324 err = setup_private_host_hw_addr(veth1);
325 if (err) {
6d1400b5 326 errno = -err;
327 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
328 goto out_delete;
329 }
330
8da62485
CB
331 /* Retrieve ifindex of the host's veth device. */
332 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
333 if (!netdev->priv.veth_attr.ifindex) {
334 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
335 goto out_delete;
336 }
337
811ef482
CB
338 if (mtu) {
339 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 340 if (err) {
6d1400b5 341 errno = -err;
54256301 342 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
343 goto out_delete;
344 }
345 }
346
6dfa9581 347 if (netdev->link[0] != '\0' && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
811ef482
CB
348 err = lxc_bridge_attach(netdev->link, veth1);
349 if (err) {
6d1400b5 350 errno = -err;
351 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
352 veth1, netdev->link);
811ef482
CB
353 goto out_delete;
354 }
355 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
356 }
357
358 err = lxc_netdev_up(veth1);
359 if (err) {
6d1400b5 360 errno = -err;
361 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
362 goto out_delete;
363 }
364
d4a7da46 365 /* setup ipv4 routes on the host interface */
366 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
367 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
368 goto out_delete;
369 }
370
371 /* setup ipv6 routes on the host interface */
372 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
373 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
374 goto out_delete;
375 }
376
6dfa9581 377 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
378 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
379 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
380 appear to be added successfully but then do not appear in the proxy list. The length of time
381 slept doesn't appear to be important, only that the process sleeps for a short period of time.
382 */
383 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
384
6dfa9581
TP
385 if (netdev->ipv4_gateway) {
386 char bufinet4[INET_ADDRSTRLEN];
387 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
596a002c 388 log_error_errno(-1, -errno, "Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
389 goto out_delete;
390 }
391
392 err = lxc_ip_forwarding_on(veth1, AF_INET);
393 if (err) {
596a002c 394 log_error_errno(-1, err, "Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
395 goto out_delete;
396 }
397
5fe147e9 398 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 399 if (err) {
596a002c 400 log_error_errno(-1, err, "Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
401 goto out_delete;
402 }
403 }
404
405 if (netdev->ipv6_gateway) {
406 char bufinet6[INET6_ADDRSTRLEN];
407
408 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
596a002c 409 log_error_errno(-1, -errno, "Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
410 goto out_delete;
411 }
412
413 /* Check for sysctl net.ipv6.conf.all.forwarding=1
414 Kernel requires this to route any packets for IPv6.
415 */
416 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
417 if (err) {
596a002c 418 log_error_errno(-1, err, "Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
419 goto out_delete;
420 }
421
422 err = lxc_ip_forwarding_on(veth1, AF_INET6);
423 if (err) {
596a002c 424 log_error_errno(-1, err, "Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
425 goto out_delete;
426 }
427
428 err = lxc_neigh_proxy_on(veth1, AF_INET6);
429 if (err) {
596a002c 430 log_error_errno(-1, err, "Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
431 goto out_delete;
432 }
433
5fe147e9 434 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 435 if (err) {
596a002c 436 log_error_errno(-1, err, "Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
437 goto out_delete;
438 }
439 }
440
441 /* setup ipv4 address routes on the host interface */
442 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
443 if (err) {
596a002c 444 log_error_errno(-1, err, "Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
445 goto out_delete;
446 }
447
448 /* setup ipv6 address routes on the host interface */
449 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
450 if (err) {
596a002c 451 log_error_errno(-1, err, "Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
452 goto out_delete;
453 }
454 }
455
811ef482 456 if (netdev->upscript) {
14a7b0f9
CB
457 char *argv[] = {
458 "veth",
459 netdev->link,
990b9ac3 460 veth1,
14a7b0f9
CB
461 NULL,
462 };
463
464 err = run_script_argv(handler->name,
465 handler->conf->hooks_version, "net",
466 netdev->upscript, "up", argv);
467 if (err < 0)
811ef482
CB
468 goto out_delete;
469 }
470
54256301 471 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
472
473 return 0;
474
475out_delete:
54256301 476 lxc_netdev_delete_by_name(veth1);
811ef482
CB
477 return -1;
478}
479
480static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
481{
8021de25 482 char peer[IFNAMSIZ];
811ef482
CB
483 int err;
484
de4855a8 485 if (netdev->link[0] == '\0') {
811ef482
CB
486 ERROR("No link for macvlan network device specified");
487 return -1;
488 }
489
8021de25
CB
490 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
491 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
492 return -1;
493
8021de25 494 if (!lxc_mkifname(peer))
811ef482
CB
495 return -1;
496
497 err = lxc_macvlan_create(netdev->link, peer,
498 netdev->priv.macvlan_attr.mode);
499 if (err) {
6d1400b5 500 errno = -err;
501 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
502 peer, netdev->link);
966e9f1f 503 goto on_error;
811ef482
CB
504 }
505
a9704f05 506 strlcpy(netdev->created_name, peer, IFNAMSIZ);
8bf64b77
CB
507 if (netdev->name[0] == '\0')
508 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
a9704f05 509
811ef482
CB
510 netdev->ifindex = if_nametoindex(peer);
511 if (!netdev->ifindex) {
512 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 513 goto on_error;
811ef482
CB
514 }
515
3bef7b7b 516 if (netdev->mtu) {
54256301
CB
517 unsigned int mtu;
518
3bef7b7b
TP
519 err = lxc_safe_uint(netdev->mtu, &mtu);
520 if (err < 0) {
521 errno = -err;
522 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
523 goto on_error;
524 }
525
526 err = lxc_netdev_set_mtu(peer, mtu);
527 if (err < 0) {
528 errno = -err;
529 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
530 goto on_error;
531 }
532 }
533
811ef482 534 if (netdev->upscript) {
14a7b0f9
CB
535 char *argv[] = {
536 "macvlan",
537 netdev->link,
538 NULL,
539 };
540
541 err = run_script_argv(handler->name,
542 handler->conf->hooks_version, "net",
543 netdev->upscript, "up", argv);
544 if (err < 0)
966e9f1f 545 goto on_error;
811ef482
CB
546 }
547
548 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
549 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
550
551 return 0;
966e9f1f
CB
552
553on_error:
811ef482 554 lxc_netdev_delete_by_name(peer);
811ef482
CB
555 return -1;
556}
557
c9f52382 558static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
559{
560 int err, index, len;
561 struct ifinfomsg *ifi;
562 struct nl_handler nlh;
563 struct rtattr *nest, *nest2;
564 struct nlmsg *answer = NULL, *nlmsg = NULL;
565
566 len = strlen(master);
567 if (len == 1 || len >= IFNAMSIZ)
596a002c 568 return ret_set_errno(-1, EINVAL);
c9f52382 569
570 len = strlen(name);
571 if (len == 1 || len >= IFNAMSIZ)
596a002c 572 return ret_set_errno(-1, EINVAL);
c9f52382 573
574 index = if_nametoindex(master);
575 if (!index)
596a002c 576 return ret_set_errno(-1, EINVAL);
c9f52382 577
578 err = netlink_open(&nlh, NETLINK_ROUTE);
579 if (err)
596a002c 580 return ret_set_errno(-1, -err);
c9f52382 581
582 err = -ENOMEM;
583 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
584 if (!nlmsg)
585 goto out;
586
587 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
588 if (!answer)
589 goto out;
590
591 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
592 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
593
594 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
595 if (!ifi) {
596 goto out;
597 }
598 ifi->ifi_family = AF_UNSPEC;
599
600 err = -EPROTO;
601 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
602 if (!nest)
603 goto out;
604
605 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
606 goto out;
607
608 if (mode) {
609 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
610 if (!nest2)
611 goto out;
612
613 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
614 goto out;
615
616 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
617 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
618 */
619 if (isolation > 0) {
620 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
621 goto out;
622 }
623
624 nla_end_nested(nlmsg, nest2);
625 }
626
627 nla_end_nested(nlmsg, nest);
628
629 if (nla_put_u32(nlmsg, IFLA_LINK, index))
630 goto out;
631
632 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
633 goto out;
634
635 err = netlink_transaction(&nlh, nlmsg, answer);
636out:
637 netlink_close(&nlh);
638 nlmsg_free(answer);
639 nlmsg_free(nlmsg);
640 if (err < 0)
596a002c 641 return ret_set_errno(-1, -err);
c9f52382 642 return 0;
643}
644
645static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
646{
dd119206 647 char peer[IFNAMSIZ];
c9f52382 648 int err;
649
650 if (netdev->link[0] == '\0') {
651 ERROR("No link for ipvlan network device specified");
652 return -1;
653 }
654
dd119206
CB
655 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
656 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 657 return -1;
658
dd119206 659 if (!lxc_mkifname(peer))
c9f52382 660 return -1;
661
dd119206
CB
662 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
663 netdev->priv.ipvlan_attr.isolation);
c9f52382 664 if (err) {
dd119206
CB
665 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
666 peer, netdev->link);
c9f52382 667 goto on_error;
668 }
669
e7fdd504 670 strlcpy(netdev->created_name, peer, IFNAMSIZ);
8bf64b77
CB
671 if (netdev->name[0] == '\0')
672 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
e7fdd504 673
c9f52382 674 netdev->ifindex = if_nametoindex(peer);
675 if (!netdev->ifindex) {
676 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
677 goto on_error;
678 }
679
006e135e 680 if (netdev->mtu) {
54256301
CB
681 unsigned int mtu;
682
006e135e 683 err = lxc_safe_uint(netdev->mtu, &mtu);
684 if (err < 0) {
685 errno = -err;
54256301 686 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 687 goto on_error;
688 }
689
690 err = lxc_netdev_set_mtu(peer, mtu);
691 if (err < 0) {
692 errno = -err;
54256301 693 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 694 goto on_error;
695 }
696 }
697
c9f52382 698 if (netdev->upscript) {
699 char *argv[] = {
700 "ipvlan",
701 netdev->link,
702 NULL,
703 };
704
dd119206
CB
705 err = run_script_argv(handler->name, handler->conf->hooks_version,
706 "net", netdev->upscript, "up", argv);
c9f52382 707 if (err < 0)
708 goto on_error;
709 }
710
dd119206
CB
711 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
712 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 713
714 return 0;
715
716on_error:
717 lxc_netdev_delete_by_name(peer);
718 return -1;
719}
720
811ef482
CB
721static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
722{
723 char peer[IFNAMSIZ];
724 int err;
725 static uint16_t vlan_cntr = 0;
811ef482 726
de4855a8 727 if (netdev->link[0] == '\0') {
811ef482
CB
728 ERROR("No link for vlan network device specified");
729 return -1;
730 }
731
d4d68410
CB
732 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
733 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
734 if (err < 0 || (size_t)err >= sizeof(peer))
735 return -1;
736
737 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
738 if (err) {
6d1400b5 739 errno = -err;
740 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
741 peer, netdev->link);
811ef482
CB
742 return -1;
743 }
744
83530dba 745 strlcpy(netdev->created_name, peer, IFNAMSIZ);
8bf64b77
CB
746 if (netdev->name[0] == '\0')
747 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
83530dba 748
811ef482
CB
749 netdev->ifindex = if_nametoindex(peer);
750 if (!netdev->ifindex) {
751 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 752 goto on_error;
753 }
754
755 if (netdev->mtu) {
54256301
CB
756 unsigned int mtu;
757
3e2a7b08 758 err = lxc_safe_uint(netdev->mtu, &mtu);
759 if (err < 0) {
760 errno = -err;
54256301 761 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 762 goto on_error;
763 }
764
765 err = lxc_netdev_set_mtu(peer, mtu);
54256301 766 if (err < 0) {
3e2a7b08 767 errno = -err;
54256301 768 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 769 goto on_error;
770 }
811ef482
CB
771 }
772
3a73d9f1 773 if (netdev->upscript) {
774 char *argv[] = {
775 "vlan",
776 netdev->link,
777 NULL,
778 };
779
d4d68410
CB
780 err = run_script_argv(handler->name, handler->conf->hooks_version,
781 "net", netdev->upscript, "up", argv);
19abca58 782 if (err < 0) {
3e2a7b08 783 goto on_error;
19abca58 784 }
3a73d9f1 785 }
786
d4d68410
CB
787 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
788 netdev->ifindex);
811ef482
CB
789
790 return 0;
3e2a7b08 791
792on_error:
793 lxc_netdev_delete_by_name(peer);
794 return -1;
811ef482
CB
795}
796
797static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
798{
0b154989 799 int err, mtu_orig = 0;
14a7b0f9 800
de4855a8 801 if (netdev->link[0] == '\0') {
811ef482
CB
802 ERROR("No link for physical interface specified");
803 return -1;
804 }
805
75b074ee
CB
806 /*
807 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
808 * network namespace because we need it to move the device from the
809 * host's network namespace to the container's network namespace later
810 * on.
811 * Note that netdev->link will contain the name of the physical network
812 * device in the host's namespace.
813 */
811ef482
CB
814 netdev->ifindex = if_nametoindex(netdev->link);
815 if (!netdev->ifindex) {
816 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
817 return -1;
818 }
819
61302ef7 820 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
8bf64b77
CB
821 if (netdev->name[0] == '\0')
822 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 823
75b074ee
CB
824 /*
825 * Store the ifindex of the host's network device in the host's
790255cf
CB
826 * namespace.
827 */
828 netdev->priv.phys_attr.ifindex = netdev->ifindex;
829
75b074ee
CB
830 /*
831 * Get original device MTU setting and store for restoration after
832 * container shutdown.
833 */
0b154989
TP
834 mtu_orig = netdev_get_mtu(netdev->ifindex);
835 if (mtu_orig < 0) {
836 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
596a002c 837 return ret_set_errno(-1, -mtu_orig);
0b154989
TP
838 }
839
840 netdev->priv.phys_attr.mtu = mtu_orig;
841
3bef7b7b 842 if (netdev->mtu) {
54256301
CB
843 unsigned int mtu;
844
3bef7b7b
TP
845 err = lxc_safe_uint(netdev->mtu, &mtu);
846 if (err < 0) {
847 errno = -err;
75b074ee
CB
848 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
849 netdev->mtu, netdev->link);
3bef7b7b
TP
850 return -1;
851 }
14a7b0f9 852
3bef7b7b
TP
853 err = lxc_netdev_set_mtu(netdev->link, mtu);
854 if (err < 0) {
855 errno = -err;
54256301 856 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
857 return -1;
858 }
859 }
860
861 if (netdev->upscript) {
862 char *argv[] = {
863 "phys",
864 netdev->link,
865 NULL,
866 };
867
75b074ee
CB
868 err = run_script_argv(handler->name, handler->conf->hooks_version,
869 "net", netdev->upscript, "up", argv);
3bef7b7b
TP
870 if (err < 0) {
871 return -1;
872 }
873 }
874
75b074ee
CB
875 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
876 netdev->ifindex);
811ef482
CB
877
878 return 0;
879}
880
881static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
882{
14a7b0f9
CB
883 int ret;
884 char *argv[] = {
885 "empty",
886 NULL,
887 };
888
811ef482 889 netdev->ifindex = 0;
14a7b0f9
CB
890 if (!netdev->upscript)
891 return 0;
892
893 ret = run_script_argv(handler->name, handler->conf->hooks_version,
894 "net", netdev->upscript, "up", argv);
895 if (ret < 0)
896 return -1;
897
811ef482
CB
898 return 0;
899}
900
901static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
902{
903 netdev->ifindex = 0;
904 return 0;
905}
906
907static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
908 [LXC_NET_VETH] = instantiate_veth,
909 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 910 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
911 [LXC_NET_VLAN] = instantiate_vlan,
912 [LXC_NET_PHYS] = instantiate_phys,
913 [LXC_NET_EMPTY] = instantiate_empty,
914 [LXC_NET_NONE] = instantiate_none,
915};
916
8bf64b77
CB
917static int instantiate_ns_veth(struct lxc_netdev *netdev)
918{
919 char current_ifname[IFNAMSIZ];
920
921 netdev->ifindex = if_nametoindex(netdev->created_name);
922 if (!netdev->ifindex)
923 return log_error_errno(-1,
924 errno, "Failed to retrieve ifindex for network device with name %s",
925 netdev->created_name);
926
927 if (netdev->name[0] == '\0')
928 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
929
930 if (strcmp(netdev->created_name, netdev->name) != 0) {
931 int ret;
932
933 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
934 if (ret)
935 return log_error_errno(-1,
936 -ret, "Failed to rename network device \"%s\" to \"%s\"",
937 netdev->created_name,
938 netdev->name);
939
940 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
941 }
942
943 /*
944 * Re-read the name of the interface because its name has changed and
945 * would be automatically allocated by the system
946 */
947 if (!if_indextoname(netdev->ifindex, current_ifname))
948 return log_error_errno(-1,
949 errno, "Failed get name for network device with ifindex %d",
950 netdev->ifindex);
951
952 /*
953 * Now update the recorded name of the network device to reflect the
954 * name of the network device in the child's network namespace. We will
955 * later on send this information back to the parent.
956 */
957 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
958
959 return 0;
960}
961
962static int __instantiate_common(struct lxc_netdev *netdev)
963{
964 netdev->ifindex = if_nametoindex(netdev->name);
965 if (!netdev->ifindex)
966 return log_error_errno(-1,
967 errno, "Failed to retrieve ifindex for network device with name %s",
968 netdev->name);
969
970 return 0;
971}
972
973static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
974{
975 return __instantiate_common(netdev);
976}
977
978static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
979{
980 return __instantiate_common(netdev);
981}
982
983static int instantiate_ns_vlan(struct lxc_netdev *netdev)
984{
985 return __instantiate_common(netdev);
986}
987
988static int instantiate_ns_phys(struct lxc_netdev *netdev)
989{
990 return __instantiate_common(netdev);
991}
992
993static int instantiate_ns_empty(struct lxc_netdev *netdev)
994{
995 return 0;
996}
997
998static int instantiate_ns_none(struct lxc_netdev *netdev)
999{
1000 return 0;
1001}
1002
1003static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
1004 [LXC_NET_VETH] = instantiate_ns_veth,
1005 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
1006 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
1007 [LXC_NET_VLAN] = instantiate_ns_vlan,
1008 [LXC_NET_PHYS] = instantiate_ns_phys,
1009 [LXC_NET_EMPTY] = instantiate_ns_empty,
1010 [LXC_NET_NONE] = instantiate_ns_none,
1011};
1012
811ef482
CB
1013static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1014{
14a7b0f9
CB
1015 int ret;
1016 char *argv[] = {
1017 "veth",
1018 netdev->link,
1019 NULL,
1020 NULL,
1021 };
1022
1023 if (!netdev->downscript)
1024 return 0;
811ef482 1025
de4855a8 1026 if (netdev->priv.veth_attr.pair[0] != '\0')
14a7b0f9 1027 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1028 else
14a7b0f9
CB
1029 argv[2] = netdev->priv.veth_attr.veth1;
1030
1031 ret = run_script_argv(handler->name,
1032 handler->conf->hooks_version, "net",
1033 netdev->downscript, "down", argv);
1034 if (ret < 0)
1035 return -1;
811ef482 1036
811ef482
CB
1037 return 0;
1038}
1039
1040static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1041{
14a7b0f9
CB
1042 int ret;
1043 char *argv[] = {
1044 "macvlan",
1045 netdev->link,
1046 NULL,
1047 };
1048
1049 if (!netdev->downscript)
1050 return 0;
1051
1052 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1053 "net", netdev->downscript, "down", argv);
1054 if (ret < 0)
1055 return -1;
811ef482 1056
811ef482
CB
1057 return 0;
1058}
1059
c9f52382 1060static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1061{
1062 int ret;
1063 char *argv[] = {
1064 "ipvlan",
1065 netdev->link,
1066 NULL,
1067 };
1068
1069 if (!netdev->downscript)
1070 return 0;
1071
1072 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1073 "net", netdev->downscript, "down", argv);
1074 if (ret < 0)
1075 return -1;
1076
1077 return 0;
1078}
1079
811ef482
CB
1080static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1081{
3a73d9f1 1082 int ret;
1083 char *argv[] = {
1084 "vlan",
1085 netdev->link,
1086 NULL,
1087 };
1088
1089 if (!netdev->downscript)
1090 return 0;
1091
1092 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1093 "net", netdev->downscript, "down", argv);
1094 if (ret < 0)
1095 return -1;
1096
811ef482
CB
1097 return 0;
1098}
1099
1100static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1101{
14a7b0f9
CB
1102 int ret;
1103 char *argv[] = {
1104 "phys",
1105 netdev->link,
1106 NULL,
1107 };
1108
1109 if (!netdev->downscript)
1110 return 0;
1111
1112 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1113 "net", netdev->downscript, "down", argv);
1114 if (ret < 0)
1115 return -1;
811ef482 1116
811ef482
CB
1117 return 0;
1118}
1119
1120static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1121{
14a7b0f9
CB
1122 int ret;
1123 char *argv[] = {
1124 "empty",
1125 NULL,
1126 };
1127
1128 if (!netdev->downscript)
1129 return 0;
1130
1131 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1132 "net", netdev->downscript, "down", argv);
1133 if (ret < 0)
1134 return -1;
811ef482 1135
811ef482
CB
1136 return 0;
1137}
1138
1139static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1140{
1141 return 0;
1142}
1143
1144static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1145 [LXC_NET_VETH] = shutdown_veth,
1146 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1147 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1148 [LXC_NET_VLAN] = shutdown_vlan,
1149 [LXC_NET_PHYS] = shutdown_phys,
1150 [LXC_NET_EMPTY] = shutdown_empty,
1151 [LXC_NET_NONE] = shutdown_none,
1152};
1153
0037ab49
TP
1154static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1155{
1156 int err;
1157 struct nl_handler nlh;
1158 struct ifinfomsg *ifi;
1159 struct nlmsg *nlmsg = NULL;
1160
1161 err = netlink_open(&nlh, NETLINK_ROUTE);
1162 if (err)
1163 return err;
1164
1165 err = -ENOMEM;
1166 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1167 if (!nlmsg)
1168 goto out;
1169
1170 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1171 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1172
1173 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1174 if (!ifi)
1175 goto out;
1176 ifi->ifi_family = AF_UNSPEC;
1177 ifi->ifi_index = ifindex;
1178
1179 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
1180 goto out;
1181
1182 if (ifname != NULL) {
1183 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1184 goto out;
1185 }
1186
1187 err = netlink_transaction(&nlh, nlmsg, nlmsg);
1188out:
1189 netlink_close(&nlh);
1190 nlmsg_free(nlmsg);
1191 return err;
1192}
1193
ebc73a67 1194int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1195{
ebc73a67 1196 int err;
0ad19a3f 1197 struct nl_handler nlh;
06f976ca 1198 struct ifinfomsg *ifi;
ebc73a67 1199 struct nlmsg *nlmsg = NULL;
0ad19a3f 1200
3cfc0f3a
MN
1201 err = netlink_open(&nlh, NETLINK_ROUTE);
1202 if (err)
1203 return err;
0ad19a3f 1204
3cfc0f3a 1205 err = -ENOMEM;
0ad19a3f 1206 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1207 if (!nlmsg)
1208 goto out;
1209
ebc73a67 1210 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1211 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1212
1213 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1214 if (!ifi)
1215 goto out;
06f976ca
SZ
1216 ifi->ifi_family = AF_UNSPEC;
1217 ifi->ifi_index = ifindex;
0ad19a3f 1218
1219 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
1220 goto out;
1221
8d357196
DY
1222 if (ifname != NULL) {
1223 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1224 goto out;
1225 }
1226
3cfc0f3a 1227 err = netlink_transaction(&nlh, nlmsg, nlmsg);
0ad19a3f 1228out:
1229 netlink_close(&nlh);
1230 nlmsg_free(nlmsg);
1231 return err;
1232}
1233
ebc73a67
CB
1234/* If we are asked to move a wireless interface, then we must actually move its
1235 * phyN device. Detect that condition and return the physname here. The physname
1236 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1237 */
1238#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1239char *is_wlan(const char *ifname)
e5848d39 1240{
b0293710 1241 __do_free char *path = NULL;
ebc73a67 1242 int i, ret;
e5848d39 1243 long physlen;
ebc73a67 1244 size_t len;
e5848d39 1245 FILE *f;
ebc73a67 1246 char *physname = NULL;
e5848d39 1247
ebc73a67 1248 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1249 path = must_realloc(NULL, len + 1);
e5848d39 1250 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1251 if (ret < 0 || (size_t)ret >= len)
e5848d39 1252 goto bad;
ebc73a67 1253
ebc73a67
CB
1254 f = fopen(path, "r");
1255 if (!f)
e5848d39 1256 goto bad;
ebc73a67 1257
1a0e70ac 1258 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1259 fseek(f, 0, SEEK_END);
1260 physlen = ftell(f);
1261 fseek(f, 0, SEEK_SET);
7d1cde93
SX
1262 if (physlen < 0) {
1263 fclose(f);
0382c0da 1264 goto bad;
7d1cde93 1265 }
ebc73a67
CB
1266
1267 physname = malloc(physlen + 1);
ee54ea9a 1268 if (!physname) {
acf47e1b 1269 fclose(f);
e5848d39 1270 goto bad;
ee54ea9a 1271 }
ebc73a67
CB
1272
1273 memset(physname, 0, physlen + 1);
e5848d39
SH
1274 ret = fread(physname, 1, physlen, f);
1275 fclose(f);
1276 if (ret < 0)
1277 goto bad;
1278
ebc73a67 1279 for (i = 0; i < physlen; i++) {
e5848d39
SH
1280 if (physname[i] == '\n')
1281 physname[i] = '\0';
ebc73a67 1282
e5848d39
SH
1283 if (physname[i] == '\0')
1284 break;
1285 }
1286
1287 return physname;
1288
1289bad:
f10fad2f 1290 free(physname);
e5848d39
SH
1291 return NULL;
1292}
1293
ebc73a67
CB
1294static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1295 const char *new)
e5848d39 1296{
ebc73a67 1297 pid_t fpid;
e5848d39 1298
ebc73a67 1299 fpid = fork();
e5848d39
SH
1300 if (fpid < 0)
1301 return -1;
ebc73a67 1302
e5848d39
SH
1303 if (fpid != 0)
1304 return wait_for_pid(fpid);
ebc73a67 1305
e5848d39
SH
1306 if (!switch_to_ns(pid, "net"))
1307 return -1;
ebc73a67 1308
05ec44f8 1309 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1310}
1311
e4103cf6 1312int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1313 const char *newname)
e5848d39 1314{
3dd78294 1315 __do_free char *cmd = NULL;
ebc73a67 1316 pid_t fpid;
e5848d39
SH
1317
1318 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1319 * However, IIUC this involves a bit more complicated work to talk to
1320 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1321 */
1322 cmd = on_path("iw", NULL);
3dd78294
CB
1323 if (!cmd) {
1324 return -1;
1325 }
e5848d39
SH
1326
1327 fpid = fork();
1328 if (fpid < 0)
3dd78294 1329 return -1;
ebc73a67 1330
e5848d39
SH
1331 if (fpid == 0) {
1332 char pidstr[30];
1333 sprintf(pidstr, "%d", pid);
ebc73a67
CB
1334 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1335 (char *)NULL);
ebd582ae 1336 _exit(EXIT_FAILURE);
e5848d39 1337 }
ebc73a67 1338
e5848d39 1339 if (wait_for_pid(fpid))
3dd78294 1340 return -1;
e5848d39 1341
e5848d39 1342 if (newname)
3dd78294 1343 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1344
3dd78294 1345 return 0;
e5848d39
SH
1346}
1347
8d357196 1348int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1349{
3dd78294 1350 __do_free char *physname = NULL;
8befa924
SH
1351 int index;
1352
8befa924
SH
1353 if (!ifname)
1354 return -EINVAL;
1355
32571606 1356 index = if_nametoindex(ifname);
49428bf3
DY
1357 if (!index)
1358 return -EINVAL;
32571606 1359
ebc73a67
CB
1360 physname = is_wlan(ifname);
1361 if (physname)
e5848d39
SH
1362 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1363
8d357196 1364 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1365}
1366
b84f58b9 1367int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1368{
b84f58b9 1369 int err;
ebc73a67
CB
1370 struct ifinfomsg *ifi;
1371 struct nl_handler nlh;
1372 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1373
3cfc0f3a
MN
1374 err = netlink_open(&nlh, NETLINK_ROUTE);
1375 if (err)
1376 return err;
0ad19a3f 1377
3cfc0f3a 1378 err = -ENOMEM;
0ad19a3f 1379 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1380 if (!nlmsg)
1381 goto out;
1382
06f976ca 1383 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1384 if (!answer)
1385 goto out;
1386
ebc73a67 1387 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1388 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1389
1390 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1391 if (!ifi)
1392 goto out;
06f976ca
SZ
1393 ifi->ifi_family = AF_UNSPEC;
1394 ifi->ifi_index = ifindex;
0ad19a3f 1395
3cfc0f3a 1396 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1397out:
1398 netlink_close(&nlh);
1399 nlmsg_free(answer);
1400 nlmsg_free(nlmsg);
1401 return err;
1402}
1403
b84f58b9
DL
1404int lxc_netdev_delete_by_name(const char *name)
1405{
1406 int index;
1407
1408 index = if_nametoindex(name);
1409 if (!index)
1410 return -EINVAL;
1411
1412 return lxc_netdev_delete_by_index(index);
1413}
1414
1415int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1416{
ebc73a67 1417 int err, len;
06f976ca 1418 struct ifinfomsg *ifi;
ebc73a67
CB
1419 struct nl_handler nlh;
1420 struct nlmsg *answer = NULL, *nlmsg = NULL;
b9a5bb58 1421
3cfc0f3a
MN
1422 err = netlink_open(&nlh, NETLINK_ROUTE);
1423 if (err)
1424 return err;
b9a5bb58 1425
b84f58b9 1426 len = strlen(newname);
90d79629
CB
1427 if (len == 1 || len >= IFNAMSIZ) {
1428 err = -EINVAL;
b84f58b9 1429 goto out;
90d79629 1430 }
b84f58b9 1431
3cfc0f3a 1432 err = -ENOMEM;
b9a5bb58
DL
1433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1434 if (!nlmsg)
1435 goto out;
1436
06f976ca 1437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58
DL
1438 if (!answer)
1439 goto out;
1440
ebc73a67 1441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1443
1444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1445 if (!ifi)
1446 goto out;
06f976ca
SZ
1447 ifi->ifi_family = AF_UNSPEC;
1448 ifi->ifi_index = ifindex;
b84f58b9
DL
1449
1450 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1451 goto out;
b9a5bb58 1452
3cfc0f3a 1453 err = netlink_transaction(&nlh, nlmsg, answer);
b9a5bb58
DL
1454out:
1455 netlink_close(&nlh);
1456 nlmsg_free(answer);
1457 nlmsg_free(nlmsg);
1458 return err;
1459}
1460
b84f58b9
DL
1461int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1462{
1463 int len, index;
1464
1465 len = strlen(oldname);
dae3fdf6 1466 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1467 return -EINVAL;
1468
1469 index = if_nametoindex(oldname);
1470 if (!index)
1471 return -EINVAL;
1472
1473 return lxc_netdev_rename_by_index(index, newname);
1474}
1475
8befa924 1476int netdev_set_flag(const char *name, int flag)
0ad19a3f 1477{
ebc73a67 1478 int err, index, len;
06f976ca 1479 struct ifinfomsg *ifi;
ebc73a67
CB
1480 struct nl_handler nlh;
1481 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1482
3cfc0f3a
MN
1483 err = netlink_open(&nlh, NETLINK_ROUTE);
1484 if (err)
1485 return err;
0ad19a3f 1486
3cfc0f3a 1487 err = -EINVAL;
0ad19a3f 1488 len = strlen(name);
dae3fdf6 1489 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1490 goto out;
1491
3cfc0f3a 1492 err = -ENOMEM;
0ad19a3f 1493 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1494 if (!nlmsg)
1495 goto out;
1496
06f976ca 1497 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1498 if (!answer)
1499 goto out;
1500
3cfc0f3a 1501 err = -EINVAL;
0ad19a3f 1502 index = if_nametoindex(name);
1503 if (!index)
1504 goto out;
1505
ebc73a67 1506 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1507 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1508
1509 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1510 if (!ifi) {
1511 err = -ENOMEM;
1512 goto out;
1513 }
06f976ca
SZ
1514 ifi->ifi_family = AF_UNSPEC;
1515 ifi->ifi_index = index;
1516 ifi->ifi_change |= IFF_UP;
1517 ifi->ifi_flags |= flag;
0ad19a3f 1518
1519 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1520out:
1521 netlink_close(&nlh);
1522 nlmsg_free(nlmsg);
1523 nlmsg_free(answer);
1524 return err;
1525}
1526
ebc73a67 1527int netdev_get_flag(const char *name, int *flag)
efa1cf45 1528{
ebc73a67 1529 int err, index, len;
a4318300 1530 struct ifinfomsg *ifi;
ebc73a67
CB
1531 struct nl_handler nlh;
1532 struct nlmsg *answer = NULL, *nlmsg = NULL;
efa1cf45
DY
1533
1534 if (!name)
1535 return -EINVAL;
1536
1537 err = netlink_open(&nlh, NETLINK_ROUTE);
1538 if (err)
1539 return err;
1540
1541 err = -EINVAL;
1542 len = strlen(name);
1543 if (len == 1 || len >= IFNAMSIZ)
1544 goto out;
1545
1546 err = -ENOMEM;
1547 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1548 if (!nlmsg)
1549 goto out;
1550
06f976ca 1551 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45
DY
1552 if (!answer)
1553 goto out;
1554
1555 err = -EINVAL;
1556 index = if_nametoindex(name);
1557 if (!index)
1558 goto out;
1559
06f976ca
SZ
1560 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1561 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1562
1563 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1564 if (!ifi) {
1565 err = -ENOMEM;
1566 goto out;
1567 }
06f976ca
SZ
1568 ifi->ifi_family = AF_UNSPEC;
1569 ifi->ifi_index = index;
efa1cf45
DY
1570
1571 err = netlink_transaction(&nlh, nlmsg, answer);
1572 if (err)
1573 goto out;
1574
06f976ca 1575 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1576
1577 *flag = ifi->ifi_flags;
1578out:
1579 netlink_close(&nlh);
1580 nlmsg_free(nlmsg);
1581 nlmsg_free(answer);
1582 return err;
1583}
1584
1585/*
1586 * \brief Check a interface is up or not.
1587 *
1588 * \param name: name for the interface.
1589 *
1590 * \return int.
1591 * 0 means interface is down.
1592 * 1 means interface is up.
1593 * Others means error happened, and ret-value is the error number.
1594 */
ebc73a67 1595int lxc_netdev_isup(const char *name)
efa1cf45 1596{
ebc73a67 1597 int err, flag;
efa1cf45
DY
1598
1599 err = netdev_get_flag(name, &flag);
1600 if (err)
ebc73a67
CB
1601 return err;
1602
efa1cf45
DY
1603 if (flag & IFF_UP)
1604 return 1;
ebc73a67 1605
efa1cf45 1606 return 0;
efa1cf45
DY
1607}
1608
0130df54
SH
1609int netdev_get_mtu(int ifindex)
1610{
ebc73a67 1611 int answer_len, err, res;
0130df54 1612 struct nl_handler nlh;
06f976ca 1613 struct ifinfomsg *ifi;
0130df54 1614 struct nlmsghdr *msg;
ebc73a67
CB
1615 int readmore = 0, recv_len = 0;
1616 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1617
1618 err = netlink_open(&nlh, NETLINK_ROUTE);
1619 if (err)
1620 return err;
1621
1622 err = -ENOMEM;
1623 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1624 if (!nlmsg)
1625 goto out;
1626
06f976ca 1627 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1628 if (!answer)
1629 goto out;
1630
1631 /* Save the answer buffer length, since it will be overwritten
1632 * on the first receive (and we might need to receive more than
ebc73a67
CB
1633 * once.
1634 */
06f976ca
SZ
1635 answer_len = answer->nlmsghdr->nlmsg_len;
1636
ebc73a67 1637 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1638 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1639
06f976ca 1640 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1641 if (!ifi)
1642 goto out;
06f976ca 1643 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1644
1645 /* Send the request for addresses, which returns all addresses
1646 * on all interfaces. */
1647 err = netlink_send(&nlh, nlmsg);
1648 if (err < 0)
1649 goto out;
1650
6ce39620
CB
1651#pragma GCC diagnostic push
1652#pragma GCC diagnostic ignored "-Wcast-align"
1653
0130df54
SH
1654 do {
1655 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1656 * overwritten by a previous receive.
1657 */
06f976ca 1658 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1659
1660 /* Get the (next) batch of reply messages */
1661 err = netlink_rcv(&nlh, answer);
1662 if (err < 0)
1663 goto out;
1664
1665 recv_len = err;
0130df54
SH
1666
1667 /* Satisfy the typing for the netlink macros */
06f976ca 1668 msg = answer->nlmsghdr;
0130df54
SH
1669
1670 while (NLMSG_OK(msg, recv_len)) {
1671
1672 /* Stop reading if we see an error message */
1673 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1674 struct nlmsgerr *errmsg =
1675 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1676 err = errmsg->error;
1677 goto out;
1678 }
1679
1680 /* Stop reading if we see a NLMSG_DONE message */
1681 if (msg->nlmsg_type == NLMSG_DONE) {
1682 readmore = 0;
1683 break;
1684 }
1685
06f976ca 1686 ifi = NLMSG_DATA(msg);
0130df54
SH
1687 if (ifi->ifi_index == ifindex) {
1688 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1689 int attr_len =
1690 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1691 res = 0;
ebc73a67
CB
1692 while (RTA_OK(rta, attr_len)) {
1693 /* Found a local address for the
1694 * requested interface, return it.
1695 */
0130df54 1696 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1697 memcpy(&res, RTA_DATA(rta),
1698 sizeof(int));
0130df54
SH
1699 err = res;
1700 goto out;
1701 }
1702 rta = RTA_NEXT(rta, attr_len);
1703 }
0130df54
SH
1704 }
1705
ebc73a67
CB
1706 /* Keep reading more data from the socket if the last
1707 * message had the NLF_F_MULTI flag set.
1708 */
0130df54
SH
1709 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1710
ebc73a67 1711 /* Look at the next message received in this buffer. */
0130df54
SH
1712 msg = NLMSG_NEXT(msg, recv_len);
1713 }
1714 } while (readmore);
1715
6ce39620
CB
1716#pragma GCC diagnostic pop
1717
ebc73a67 1718 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1719 err = -1;
1720
1721out:
1722 netlink_close(&nlh);
1723 nlmsg_free(answer);
1724 nlmsg_free(nlmsg);
1725 return err;
1726}
1727
d472214b 1728int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1729{
54256301 1730 int err, len;
06f976ca 1731 struct ifinfomsg *ifi;
ebc73a67
CB
1732 struct nl_handler nlh;
1733 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1734
3cfc0f3a
MN
1735 err = netlink_open(&nlh, NETLINK_ROUTE);
1736 if (err)
1737 return err;
75d09f83 1738
3cfc0f3a 1739 err = -EINVAL;
75d09f83 1740 len = strlen(name);
dae3fdf6 1741 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1742 goto out;
1743
3cfc0f3a 1744 err = -ENOMEM;
75d09f83
DL
1745 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1746 if (!nlmsg)
1747 goto out;
1748
06f976ca 1749 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1750 if (!answer)
1751 goto out;
1752
ebc73a67 1753 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1754 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1755
1756 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1757 if (!ifi) {
1758 err = -ENOMEM;
1759 goto out;
1760 }
06f976ca 1761 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1762
1763 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1764 goto out;
75d09f83
DL
1765
1766 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1767 goto out;
1768
1769 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1770out:
1771 netlink_close(&nlh);
1772 nlmsg_free(nlmsg);
1773 nlmsg_free(answer);
1774 return err;
1775}
1776
d472214b 1777int lxc_netdev_up(const char *name)
0ad19a3f 1778{
d472214b 1779 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1780}
1781
d472214b 1782int lxc_netdev_down(const char *name)
0ad19a3f 1783{
d472214b 1784 return netdev_set_flag(name, 0);
0ad19a3f 1785}
1786
54256301 1787int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1788{
ebc73a67 1789 int err, len;
06f976ca 1790 struct ifinfomsg *ifi;
ebc73a67 1791 struct nl_handler nlh;
0ad19a3f 1792 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1793 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1794
3cfc0f3a
MN
1795 err = netlink_open(&nlh, NETLINK_ROUTE);
1796 if (err)
1797 return err;
0ad19a3f 1798
3cfc0f3a 1799 err = -EINVAL;
0ad19a3f 1800 len = strlen(name1);
dae3fdf6 1801 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1802 goto out;
1803
1804 len = strlen(name2);
dae3fdf6 1805 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1806 goto out;
1807
3cfc0f3a 1808 err = -ENOMEM;
0ad19a3f 1809 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1810 if (!nlmsg)
1811 goto out;
1812
06f976ca 1813 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1814 if (!answer)
1815 goto out;
1816
06f976ca 1817 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1818 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1819 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1820
1821 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1822 if (!ifi)
1823 goto out;
06f976ca 1824 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1825
3cfc0f3a 1826 err = -EINVAL;
79e68309 1827 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1828 if (!nest1)
1829 goto out;
1830
1831 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1832 goto out;
1833
1834 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1835 if (!nest2)
1836 goto out;
1837
1838 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1839 if (!nest3)
1840 goto out;
1841
06f976ca 1842 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1843 if (!ifi) {
1844 err = -ENOMEM;
06f976ca 1845 goto out;
25a9939b 1846 }
0ad19a3f 1847
1848 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1849 goto out;
1850
54256301
CB
1851 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
1852 goto out;
1853
1854 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
1855 goto out;
1856
0ad19a3f 1857 nla_end_nested(nlmsg, nest3);
0ad19a3f 1858 nla_end_nested(nlmsg, nest2);
0ad19a3f 1859 nla_end_nested(nlmsg, nest1);
1860
1861 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1862 goto out;
1863
3cfc0f3a 1864 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1865out:
1866 netlink_close(&nlh);
1867 nlmsg_free(answer);
1868 nlmsg_free(nlmsg);
1869 return err;
1870}
1871
ebc73a67 1872/* TODO: merge with lxc_macvlan_create */
7c11d57a 1873int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1874{
ebc73a67 1875 int err, len, lindex;
06f976ca 1876 struct ifinfomsg *ifi;
ebc73a67 1877 struct nl_handler nlh;
26c39028 1878 struct rtattr *nest, *nest2;
ebc73a67 1879 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1880
3cfc0f3a
MN
1881 err = netlink_open(&nlh, NETLINK_ROUTE);
1882 if (err)
1883 return err;
26c39028 1884
3cfc0f3a 1885 err = -EINVAL;
26c39028 1886 len = strlen(master);
dae3fdf6 1887 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1888 goto err3;
1889
1890 len = strlen(name);
dae3fdf6 1891 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1892 goto err3;
1893
3cfc0f3a 1894 err = -ENOMEM;
26c39028
JHS
1895 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1896 if (!nlmsg)
1897 goto err3;
1898
06f976ca 1899 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1900 if (!answer)
1901 goto err2;
1902
3cfc0f3a 1903 err = -EINVAL;
26c39028
JHS
1904 lindex = if_nametoindex(master);
1905 if (!lindex)
1906 goto err1;
1907
06f976ca 1908 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1909 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1910 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1911
1912 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1913 if (!ifi) {
1914 err = -ENOMEM;
1915 goto err1;
1916 }
06f976ca 1917 ifi->ifi_family = AF_UNSPEC;
26c39028 1918
79e68309 1919 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1920 if (!nest)
1921 goto err1;
1922
1923 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1924 goto err1;
1925
1926 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1927 if (!nest2)
1928 goto err1;
e892973e 1929
26c39028
JHS
1930 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1931 goto err1;
e892973e 1932
26c39028 1933 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1934 nla_end_nested(nlmsg, nest);
1935
1936 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1937 goto err1;
1938
1939 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1940 goto err1;
1941
3cfc0f3a 1942 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1943err1:
1944 nlmsg_free(answer);
1945err2:
1946 nlmsg_free(nlmsg);
1947err3:
1948 netlink_close(&nlh);
1949 return err;
1950}
1951
e892973e 1952int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1953{
ebc73a67 1954 int err, index, len;
06f976ca 1955 struct ifinfomsg *ifi;
ebc73a67 1956 struct nl_handler nlh;
e892973e 1957 struct rtattr *nest, *nest2;
ebc73a67 1958 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1959
3cfc0f3a
MN
1960 err = netlink_open(&nlh, NETLINK_ROUTE);
1961 if (err)
1962 return err;
0ad19a3f 1963
3cfc0f3a 1964 err = -EINVAL;
0ad19a3f 1965 len = strlen(master);
dae3fdf6 1966 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1967 goto out;
1968
1969 len = strlen(name);
dae3fdf6 1970 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1971 goto out;
1972
3cfc0f3a 1973 err = -ENOMEM;
0ad19a3f 1974 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1975 if (!nlmsg)
1976 goto out;
1977
06f976ca 1978 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1979 if (!answer)
1980 goto out;
1981
3cfc0f3a 1982 err = -EINVAL;
0ad19a3f 1983 index = if_nametoindex(master);
1984 if (!index)
1985 goto out;
1986
06f976ca 1987 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1988 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1989 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1990
1991 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1992 if (!ifi) {
1993 err = -ENOMEM;
1994 goto out;
1995 }
06f976ca 1996 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1997
79e68309 1998 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1999 if (!nest)
2000 goto out;
2001
2002 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
2003 goto out;
2004
e892973e
DL
2005 if (mode) {
2006 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2007 if (!nest2)
2008 goto out;
2009
2010 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
2011 goto out;
2012
2013 nla_end_nested(nlmsg, nest2);
2014 }
2015
0ad19a3f 2016 nla_end_nested(nlmsg, nest);
2017
2018 if (nla_put_u32(nlmsg, IFLA_LINK, index))
2019 goto out;
2020
2021 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
2022 goto out;
2023
3cfc0f3a 2024 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 2025out:
2026 netlink_close(&nlh);
2027 nlmsg_free(answer);
2028 nlmsg_free(nlmsg);
2029 return err;
2030}
2031
2032static int proc_sys_net_write(const char *path, const char *value)
2033{
ebc73a67
CB
2034 int fd;
2035 int err = 0;
0ad19a3f 2036
2037 fd = open(path, O_WRONLY);
2038 if (fd < 0)
2039 return -errno;
2040
f640cf46 2041 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2042 err = -errno;
2043
2044 close(fd);
2045 return err;
2046}
2047
6dfa9581 2048static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2049{
2050 int ret;
2051 char path[PATH_MAX];
6509154d 2052
2053 if (family != AF_INET && family != AF_INET6)
6dfa9581 2054 return -EINVAL;
6509154d 2055
2056 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 2057 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
6509154d 2058 if (ret < 0 || (size_t)ret >= PATH_MAX)
6dfa9581 2059 return -E2BIG;
6509154d 2060
6dfa9581
TP
2061 return proc_sys_net_write(path, flag ? "1" : "0");
2062}
2063
2064int lxc_ip_forwarding_on(const char *name, int family)
2065{
2066 return ip_forwarding_set(name, family, 1);
2067}
2068
2069int lxc_ip_forwarding_off(const char *name, int family)
2070{
2071 return ip_forwarding_set(name, family, 0);
6509154d 2072}
2073
0ad19a3f 2074static int neigh_proxy_set(const char *ifname, int family, int flag)
2075{
9ba8130c 2076 int ret;
419590da 2077 char path[PATH_MAX];
0ad19a3f 2078
2079 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2080 return -EINVAL;
0ad19a3f 2081
419590da 2082 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
2083 family == AF_INET ? "ipv4" : "ipv6", ifname,
2084 family == AF_INET ? "proxy_arp" : "proxy_ndp");
419590da 2085 if (ret < 0 || (size_t)ret >= PATH_MAX)
9ba8130c 2086 return -E2BIG;
0ad19a3f 2087
ebc73a67 2088 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2089}
2090
6509154d 2091static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2092{
2093 int ret;
2094 char path[PATH_MAX];
2095 char buf[1] = "";
2096
2097 if (family != AF_INET && family != AF_INET6)
596a002c 2098 return ret_set_errno(-1, EINVAL);
6509154d 2099
2100 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
2101 family == AF_INET ? "ipv4" : "ipv6", ifname,
2102 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2103 if (ret < 0 || (size_t)ret >= PATH_MAX)
596a002c 2104 return ret_set_errno(-1, E2BIG);
6509154d 2105
2106 return lxc_read_file_expect(path, buf, 1, "1");
2107}
2108
497353b6 2109int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2110{
2111 return neigh_proxy_set(name, family, 1);
2112}
2113
497353b6 2114int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2115{
2116 return neigh_proxy_set(name, family, 0);
2117}
2118
2119int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2120{
1f1b18e7
DL
2121 int i = 0;
2122 unsigned val;
ebc73a67
CB
2123 char c;
2124 unsigned char *data;
1f1b18e7
DL
2125
2126 sockaddr->sa_family = ARPHRD_ETHER;
2127 data = (unsigned char *)sockaddr->sa_data;
2128
2129 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2130 c = *macaddr++;
2131 if (isdigit(c))
2132 val = c - '0';
2133 else if (c >= 'a' && c <= 'f')
2134 val = c - 'a' + 10;
2135 else if (c >= 'A' && c <= 'F')
2136 val = c - 'A' + 10;
2137 else
2138 return -EINVAL;
2139
2140 val <<= 4;
2141 c = *macaddr;
2142 if (isdigit(c))
2143 val |= c - '0';
2144 else if (c >= 'a' && c <= 'f')
2145 val |= c - 'a' + 10;
2146 else if (c >= 'A' && c <= 'F')
2147 val |= c - 'A' + 10;
2148 else if (c == ':' || c == 0)
2149 val >>= 4;
2150 else
2151 return -EINVAL;
2152 if (c != 0)
2153 macaddr++;
2154 *data++ = (unsigned char)(val & 0377);
2155 i++;
2156
2157 if (*macaddr == ':')
2158 macaddr++;
0ad19a3f 2159 }
0ad19a3f 2160
1f1b18e7 2161 return 0;
0ad19a3f 2162}
2163
ebc73a67
CB
2164static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2165 void *acast, int prefix)
0ad19a3f 2166{
ebc73a67 2167 int addrlen, err;
06f976ca 2168 struct ifaddrmsg *ifa;
ebc73a67
CB
2169 struct nl_handler nlh;
2170 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 2171
ebc73a67
CB
2172 addrlen = family == AF_INET ? sizeof(struct in_addr)
2173 : sizeof(struct in6_addr);
4bf1968d 2174
3cfc0f3a
MN
2175 err = netlink_open(&nlh, NETLINK_ROUTE);
2176 if (err)
2177 return err;
0ad19a3f 2178
3cfc0f3a 2179 err = -ENOMEM;
0ad19a3f 2180 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2181 if (!nlmsg)
2182 goto out;
2183
06f976ca 2184 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2185 if (!answer)
2186 goto out;
2187
06f976ca 2188 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2189 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2190 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2191
2192 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2193 if (!ifa)
25a9939b 2194 goto out;
06f976ca
SZ
2195 ifa->ifa_prefixlen = prefix;
2196 ifa->ifa_index = ifindex;
2197 ifa->ifa_family = family;
2198 ifa->ifa_scope = 0;
acf47e1b 2199
3cfc0f3a 2200 err = -EINVAL;
4bf1968d 2201 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 2202 goto out;
2203
4bf1968d 2204 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 2205 goto out;
2206
d8948a52 2207 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
2208 goto out;
2209
ebc73a67 2210 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 2211 err = -EPROTONOSUPPORT;
79881dc6
DL
2212 if (family == AF_INET6 &&
2213 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2214 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 2215 goto out;
0ad19a3f 2216
3cfc0f3a 2217 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 2218out:
2219 netlink_close(&nlh);
2220 nlmsg_free(answer);
2221 nlmsg_free(nlmsg);
2222 return err;
2223}
2224
1f1b18e7 2225int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2226 struct in6_addr *mcast, struct in6_addr *acast,
2227 int prefix)
1f1b18e7
DL
2228{
2229 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2230}
2231
ebc73a67
CB
2232int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2233 int prefix)
1f1b18e7
DL
2234{
2235 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2236}
2237
ebc73a67
CB
2238/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2239 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2240 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2241 */
6ce39620
CB
2242#pragma GCC diagnostic push
2243#pragma GCC diagnostic ignored "-Wcast-align"
2244
ebc73a67
CB
2245static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2246{
2247 int addrlen;
06f976ca
SZ
2248 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2249 struct rtattr *rta = IFA_RTA(ifa);
2250 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2251
06f976ca 2252 if (ifa->ifa_family != family)
19a26f82
MK
2253 return 0;
2254
ebc73a67
CB
2255 addrlen = family == AF_INET ? sizeof(struct in_addr)
2256 : sizeof(struct in6_addr);
19a26f82
MK
2257
2258 /* Loop over the rtattr's in this message */
ebc73a67 2259 while (RTA_OK(rta, attr_len)) {
19a26f82 2260 /* Found a local address for the requested interface,
ebc73a67
CB
2261 * return it.
2262 */
2263 if (rta->rta_type == IFA_LOCAL ||
2264 rta->rta_type == IFA_ADDRESS) {
2265 /* Sanity check. The family check above should make sure
2266 * the address length is correct, but check here just in
2267 * case.
2268 */
19a26f82
MK
2269 if (RTA_PAYLOAD(rta) != addrlen)
2270 return -1;
2271
ebc73a67
CB
2272 /* We might have found an IFA_ADDRESS before, which we
2273 * now overwrite with an IFA_LOCAL.
2274 */
dd66e5ad 2275 if (!*res) {
19a26f82 2276 *res = malloc(addrlen);
dd66e5ad
DE
2277 if (!*res)
2278 return -1;
2279 }
19a26f82
MK
2280
2281 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2282 if (rta->rta_type == IFA_LOCAL)
2283 break;
2284 }
2285 rta = RTA_NEXT(rta, attr_len);
2286 }
2287 return 0;
2288}
2289
6ce39620
CB
2290#pragma GCC diagnostic pop
2291
19a26f82
MK
2292static int ip_addr_get(int family, int ifindex, void **res)
2293{
ebc73a67 2294 int answer_len, err;
06f976ca 2295 struct ifaddrmsg *ifa;
ebc73a67 2296 struct nl_handler nlh;
19a26f82 2297 struct nlmsghdr *msg;
ebc73a67
CB
2298 int readmore = 0, recv_len = 0;
2299 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2300
2301 err = netlink_open(&nlh, NETLINK_ROUTE);
2302 if (err)
2303 return err;
2304
2305 err = -ENOMEM;
2306 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2307 if (!nlmsg)
2308 goto out;
2309
06f976ca 2310 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2311 if (!answer)
2312 goto out;
2313
ebc73a67
CB
2314 /* Save the answer buffer length, since it will be overwritten on the
2315 * first receive (and we might need to receive more than once).
2316 */
06f976ca
SZ
2317 answer_len = answer->nlmsghdr->nlmsg_len;
2318
ebc73a67 2319 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2320 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2321
06f976ca 2322 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2323 if (!ifa)
2324 goto out;
06f976ca 2325 ifa->ifa_family = family;
19a26f82 2326
ebc73a67
CB
2327 /* Send the request for addresses, which returns all addresses on all
2328 * interfaces.
2329 */
19a26f82
MK
2330 err = netlink_send(&nlh, nlmsg);
2331 if (err < 0)
2332 goto out;
19a26f82 2333
6ce39620
CB
2334#pragma GCC diagnostic push
2335#pragma GCC diagnostic ignored "-Wcast-align"
2336
19a26f82
MK
2337 do {
2338 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2339 * overwritten by a previous receive.
2340 */
06f976ca 2341 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2342
ebc73a67 2343 /* Get the (next) batch of reply messages. */
19a26f82
MK
2344 err = netlink_rcv(&nlh, answer);
2345 if (err < 0)
2346 goto out;
2347
2348 recv_len = err;
2349 err = 0;
2350
ebc73a67 2351 /* Satisfy the typing for the netlink macros. */
06f976ca 2352 msg = answer->nlmsghdr;
19a26f82
MK
2353
2354 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2355 /* Stop reading if we see an error message. */
19a26f82 2356 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2357 struct nlmsgerr *errmsg =
2358 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2359 err = errmsg->error;
2360 goto out;
2361 }
2362
ebc73a67 2363 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2364 if (msg->nlmsg_type == NLMSG_DONE) {
2365 readmore = 0;
2366 break;
2367 }
2368
2369 if (msg->nlmsg_type != RTM_NEWADDR) {
2370 err = -1;
2371 goto out;
2372 }
2373
06f976ca
SZ
2374 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2375 if (ifa->ifa_index == ifindex) {
2376 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2377 err = -1;
2378 goto out;
2379 }
2380
ebc73a67 2381 /* Found a result, stop searching. */
19a26f82
MK
2382 if (*res)
2383 goto out;
2384 }
2385
ebc73a67
CB
2386 /* Keep reading more data from the socket if the last
2387 * message had the NLF_F_MULTI flag set.
2388 */
19a26f82
MK
2389 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2390
ebc73a67 2391 /* Look at the next message received in this buffer. */
19a26f82
MK
2392 msg = NLMSG_NEXT(msg, recv_len);
2393 }
2394 } while (readmore);
2395
6ce39620
CB
2396#pragma GCC diagnostic pop
2397
19a26f82 2398 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2399 * error.
2400 */
19a26f82
MK
2401 err = -1;
2402
2403out:
2404 netlink_close(&nlh);
2405 nlmsg_free(answer);
2406 nlmsg_free(nlmsg);
2407 return err;
2408}
2409
2410int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2411{
ebc73a67 2412 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2413}
2414
ebc73a67 2415int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2416{
ebc73a67 2417 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2418}
2419
f8fee0e2
MK
2420static int ip_gateway_add(int family, int ifindex, void *gw)
2421{
ebc73a67 2422 int addrlen, err;
f8fee0e2 2423 struct nl_handler nlh;
06f976ca 2424 struct rtmsg *rt;
ebc73a67 2425 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2426
ebc73a67
CB
2427 addrlen = family == AF_INET ? sizeof(struct in_addr)
2428 : sizeof(struct in6_addr);
f8fee0e2
MK
2429
2430 err = netlink_open(&nlh, NETLINK_ROUTE);
2431 if (err)
2432 return err;
2433
2434 err = -ENOMEM;
2435 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2436 if (!nlmsg)
2437 goto out;
2438
06f976ca 2439 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2440 if (!answer)
2441 goto out;
2442
06f976ca 2443 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2444 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2445 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2446
2447 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2448 if (!rt)
2449 goto out;
06f976ca
SZ
2450 rt->rtm_family = family;
2451 rt->rtm_table = RT_TABLE_MAIN;
2452 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2453 rt->rtm_protocol = RTPROT_BOOT;
2454 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2455 /* "default" destination */
06f976ca 2456 rt->rtm_dst_len = 0;
f8fee0e2
MK
2457
2458 err = -EINVAL;
a2f9a670 2459
2460 /* If gateway address not supplied, then a device route will be created instead */
2461 if (gw != NULL) {
2462 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2463 goto out;
2464 }
f8fee0e2
MK
2465
2466 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2467 * addresses for the gateway.
2468 */
f8fee0e2
MK
2469 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2470 goto out;
2471
2472 err = netlink_transaction(&nlh, nlmsg, answer);
2473out:
2474 netlink_close(&nlh);
2475 nlmsg_free(answer);
2476 nlmsg_free(nlmsg);
2477 return err;
2478}
2479
2480int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2481{
2482 return ip_gateway_add(AF_INET, ifindex, gw);
2483}
2484
2485int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2486{
2487 return ip_gateway_add(AF_INET6, ifindex, gw);
2488}
581c75e7 2489bool is_ovs_bridge(const char *bridge)
0d204771 2490{
ebc73a67 2491 int ret;
0d204771 2492 struct stat sb;
ebc73a67 2493 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2494
ebc73a67
CB
2495 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2496 bridge);
2497 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2498 return false;
2499
2500 ret = stat(brdirname, &sb);
2501 if (ret < 0 && errno == ENOENT)
0d204771 2502 return true;
ebc73a67 2503
0d204771
SH
2504 return false;
2505}
2506
581c75e7
CB
2507struct ovs_veth_args {
2508 const char *bridge;
2509 const char *nic;
2510};
2511
cb0dc11b
CB
2512/* Called from a background thread - when nic goes away, remove it from the
2513 * bridge.
c43cbc04 2514 */
581c75e7 2515static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2516{
581c75e7 2517 struct ovs_veth_args *args = data;
cb0dc11b 2518
581c75e7
CB
2519 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2520 (char *)NULL);
2521 return -1;
c43cbc04
SH
2522}
2523
581c75e7 2524int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2525{
c43cbc04 2526 int ret;
419590da 2527 char cmd_output[PATH_MAX];
581c75e7 2528 struct ovs_veth_args args;
6ad22d06 2529
581c75e7
CB
2530 args.bridge = bridge;
2531 args.nic = nic;
2532 ret = run_command(cmd_output, sizeof(cmd_output),
2533 lxc_ovs_delete_port_exec, (void *)&args);
2534 if (ret < 0) {
2535 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
53796b94 2536 "%s", nic, bridge, cmd_output);
6ad22d06 2537 return -1;
581c75e7 2538 }
0d204771 2539
581c75e7
CB
2540 return 0;
2541}
ebc73a67 2542
581c75e7
CB
2543static int lxc_ovs_attach_bridge_exec(void *data)
2544{
2545 struct ovs_veth_args *args = data;
ebc73a67 2546
581c75e7
CB
2547 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2548 (char *)NULL);
2549 return -1;
2550}
ebc73a67 2551
581c75e7
CB
2552static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2553{
2554 int ret;
419590da 2555 char cmd_output[PATH_MAX];
581c75e7 2556 struct ovs_veth_args args;
ebc73a67 2557
581c75e7
CB
2558 args.bridge = bridge;
2559 args.nic = nic;
2560 ret = run_command(cmd_output, sizeof(cmd_output),
2561 lxc_ovs_attach_bridge_exec, (void *)&args);
2562 if (ret < 0) {
2563 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
53796b94 2564 nic, bridge, cmd_output);
581c75e7 2565 return -1;
c43cbc04 2566 }
0d204771 2567
581c75e7 2568 return 0;
0d204771 2569}
0d204771 2570
581c75e7 2571int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2572{
ebc73a67 2573 int err, fd, index;
9de31d5a 2574 size_t retlen;
0ad19a3f 2575 struct ifreq ifr;
2576
dae3fdf6 2577 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2578 return -EINVAL;
0ad19a3f 2579
2580 index = if_nametoindex(ifname);
2581 if (!index)
3cfc0f3a 2582 return -EINVAL;
0ad19a3f 2583
0d204771 2584 if (is_ovs_bridge(bridge))
581c75e7 2585 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2586
ad9429e5 2587 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2588 if (fd < 0)
3cfc0f3a 2589 return -errno;
0ad19a3f 2590
9de31d5a 2591 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2592 if (retlen >= IFNAMSIZ) {
2593 close(fd);
9de31d5a 2594 return -E2BIG;
42cc4083 2595 }
9de31d5a 2596
ebc73a67 2597 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2598 ifr.ifr_ifindex = index;
7d163508 2599 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2600 close(fd);
3cfc0f3a
MN
2601 if (err)
2602 err = -errno;
0ad19a3f 2603
2604 return err;
2605}
72d0e1cb 2606
ebc73a67 2607static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2608 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2609 [LXC_NET_VETH] = "veth",
2610 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2611 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2612 [LXC_NET_PHYS] = "phys",
b343592b
BP
2613 [LXC_NET_VLAN] = "vlan",
2614 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2615};
2616
2617const char *lxc_net_type_to_str(int type)
2618{
2619 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2620 return NULL;
ebc73a67 2621
72d0e1cb
SG
2622 return lxc_network_types[type];
2623}
8befa924 2624
ebc73a67 2625static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2626
966e9f1f 2627char *lxc_mkifname(char *template)
a0265685 2628{
2d7bf744 2629 int ret;
b1e44ed1 2630 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2631 char name[IFNAMSIZ];
2632 bool exists = false;
2633 size_t i = 0;
280cc35f 2634#ifdef HAVE_RAND_R
2635 unsigned int seed;
2636
2637 seed = randseed(false);
2638#else
2639
2640 (void)randseed(true);
2641#endif
a0265685 2642
535e8859
CB
2643 if (strlen(template) >= IFNAMSIZ)
2644 return NULL;
2645
ebc73a67 2646 /* Get all the network interfaces. */
b1e44ed1 2647 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2d7bf744 2648 if (ret < 0) {
6d1400b5 2649 SYSERROR("Failed to get network interfaces");
2d7bf744
CB
2650 return NULL;
2651 }
a0265685 2652
ebc73a67 2653 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2654 for (;;) {
966e9f1f 2655 name[0] = '\0';
94b1cade 2656 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2657
966e9f1f 2658 exists = false;
280cc35f 2659
a0265685
SG
2660 for (i = 0; i < strlen(name); i++) {
2661 if (name[i] == 'X') {
2662#ifdef HAVE_RAND_R
8523344a 2663 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2664#else
8523344a 2665 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2666#endif
2667 }
2668 }
2669
2670 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2671 if (!strcmp(ifa->ifa_name, name)) {
2672 exists = true;
a0265685
SG
2673 break;
2674 }
2675 }
2676
966e9f1f 2677 if (!exists)
a0265685 2678 break;
a0265685
SG
2679 }
2680
b1e44ed1 2681 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2682 (void)strlcpy(template, name, strlen(template) + 1);
2683
2684 return template;
a0265685
SG
2685}
2686
8befa924
SH
2687int setup_private_host_hw_addr(char *veth1)
2688{
ebc73a67 2689 int err, sockfd;
8befa924 2690 struct ifreq ifr;
8befa924 2691
ad9429e5 2692 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2693 if (sockfd < 0)
2694 return -errno;
2695
ebc73a67 2696 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2697 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2698 close(sockfd);
ebc73a67 2699 return -E2BIG;
87c6e5db 2700 }
ebc73a67 2701
8befa924
SH
2702 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2703 if (err < 0) {
8befa924 2704 close(sockfd);
8befa924
SH
2705 return -errno;
2706 }
2707
2708 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2709 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2710 close(sockfd);
8befa924
SH
2711 if (err < 0)
2712 return -errno;
2713
2714 return 0;
2715}
811ef482
CB
2716
2717int lxc_find_gateway_addresses(struct lxc_handler *handler)
2718{
2719 struct lxc_list *network = &handler->conf->network;
2720 struct lxc_list *iterator;
2721 struct lxc_netdev *netdev;
2722 int link_index;
2723
2724 lxc_list_for_each(iterator, network) {
2725 netdev = iterator->elem;
2726
2727 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2728 continue;
2729
2730 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
03ca4af8 2731 ERROR("Automatic gateway detection is only supported for veth and macvlan");
811ef482
CB
2732 return -1;
2733 }
2734
de4855a8 2735 if (netdev->link[0] == '\0') {
811ef482
CB
2736 ERROR("Automatic gateway detection needs a link interface");
2737 return -1;
2738 }
2739
2740 link_index = if_nametoindex(netdev->link);
2741 if (!link_index)
2742 return -EINVAL;
2743
2744 if (netdev->ipv4_gateway_auto) {
2745 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
03ca4af8
TP
2746 ERROR("Failed to automatically find ipv4 gateway address from link interface \"%s\"",
2747 netdev->link);
811ef482
CB
2748 return -1;
2749 }
2750 }
2751
2752 if (netdev->ipv6_gateway_auto) {
2753 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
03ca4af8
TP
2754 ERROR("Failed to automatically find ipv6 gateway address from link interface \"%s\"",
2755 netdev->link);
811ef482
CB
2756 return -1;
2757 }
2758 }
2759 }
2760
2761 return 0;
2762}
2763
2764#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2765static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2766 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2767{
2768 int ret;
2769 pid_t child;
2770 int bytes, pipefd[2];
2771 char *token, *saveptr = NULL;
095ead80 2772 char netdev_link[IFNAMSIZ];
419590da 2773 char buffer[PATH_MAX] = {0};
94b1cade 2774 size_t retlen;
811ef482
CB
2775
2776 if (netdev->type != LXC_NET_VETH) {
2777 ERROR("Network type %d not support for unprivileged use", netdev->type);
2778 return -1;
2779 }
2780
2781 ret = pipe(pipefd);
2782 if (ret < 0) {
2783 SYSERROR("Failed to create pipe");
2784 return -1;
2785 }
2786
2787 child = fork();
2788 if (child < 0) {
2789 SYSERROR("Failed to create new process");
2790 close(pipefd[0]);
2791 close(pipefd[1]);
2792 return -1;
2793 }
2794
2795 if (child == 0) {
8335fd40 2796 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2797
2798 close(pipefd[0]);
2799
2800 ret = dup2(pipefd[1], STDOUT_FILENO);
2801 if (ret >= 0)
2802 ret = dup2(pipefd[1], STDERR_FILENO);
2803 close(pipefd[1]);
2804 if (ret < 0) {
2805 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2806 _exit(EXIT_FAILURE);
811ef482
CB
2807 }
2808
de4855a8 2809 if (netdev->link[0] != '\0')
9de31d5a 2810 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2811 else
9de31d5a
CB
2812 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2813 if (retlen >= IFNAMSIZ) {
2814 SYSERROR("Invalid network device name");
2815 _exit(EXIT_FAILURE);
2816 }
811ef482 2817
8335fd40
CB
2818 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2819 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2820 _exit(EXIT_FAILURE);
8335fd40 2821 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2822
2823 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2824 lxcname, pidstr, netdev_link,
de4855a8
CB
2825 netdev->name[0] != '\0' ? netdev->name : "(null)");
2826 if (netdev->name[0] != '\0')
811ef482
CB
2827 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2828 lxcpath, lxcname, pidstr, "veth", netdev_link,
2829 netdev->name, (char *)NULL);
2830 else
2831 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2832 lxcpath, lxcname, pidstr, "veth", netdev_link,
2833 (char *)NULL);
2834 SYSERROR("Failed to execute lxc-user-nic");
78070056 2835 _exit(EXIT_FAILURE);
811ef482
CB
2836 }
2837
2838 /* close the write-end of the pipe */
2839 close(pipefd[1]);
2840
419590da 2841 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482 2842 if (bytes < 0) {
74c6e2b0 2843 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2844 close(pipefd[0]);
6b9f82a9
CB
2845 } else {
2846 buffer[bytes - 1] = '\0';
811ef482 2847 }
811ef482
CB
2848
2849 ret = wait_for_pid(child);
2850 close(pipefd[0]);
6b9f82a9 2851 if (ret != 0 || bytes < 0) {
811ef482
CB
2852 ERROR("lxc-user-nic failed to configure requested network: %s",
2853 buffer[0] != '\0' ? buffer : "(null)");
2854 return -1;
2855 }
2856 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2857
2858 /* netdev->name */
2859 token = strtok_r(buffer, ":", &saveptr);
74c6e2b0
CB
2860 if (!token) {
2861 ERROR("Failed to parse lxc-user-nic output");
811ef482 2862 return -1;
74c6e2b0 2863 }
811ef482 2864
e389f2af
CB
2865 /*
2866 * lxc-user-nic will take care of proper network device naming. So
2867 * netdev->name and netdev->created_name need to be identical to not
2868 * trigger another rename later on.
2869 */
2870 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2871 if (retlen < IFNAMSIZ)
2872 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2873 if (retlen >= IFNAMSIZ) {
2874 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2875 return -E2BIG;
2876 }
811ef482 2877
74c6e2b0 2878 /* netdev->ifindex */
811ef482 2879 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2880 if (!token) {
2881 ERROR("Failed to parse lxc-user-nic output");
811ef482 2882 return -1;
74c6e2b0 2883 }
811ef482 2884
74c6e2b0
CB
2885 ret = lxc_safe_int(token, &netdev->ifindex);
2886 if (ret < 0) {
6d1400b5 2887 errno = -ret;
2888 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2889 return -1;
2890 }
2891
74c6e2b0 2892 /* netdev->priv.veth_attr.veth1 */
811ef482 2893 token = strtok_r(NULL, ":", &saveptr);
74c6e2b0
CB
2894 if (!token) {
2895 ERROR("Failed to parse lxc-user-nic output");
811ef482 2896 return -1;
74c6e2b0 2897 }
811ef482 2898
94b1cade
DJ
2899 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2900 if (retlen >= IFNAMSIZ) {
74c6e2b0
CB
2901 ERROR("Host side veth device name returned by lxc-user-nic is "
2902 "too long");
2903 return -E2BIG;
2904 }
74c6e2b0
CB
2905
2906 /* netdev->priv.veth_attr.ifindex */
2907 token = strtok_r(NULL, ":", &saveptr);
2908 if (!token) {
2909 ERROR("Failed to parse lxc-user-nic output");
2910 return -1;
2911 }
2912
2913 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
811ef482 2914 if (ret < 0) {
6d1400b5 2915 errno = -ret;
2916 SYSERROR("Failed to convert string \"%s\" to integer", token);
811ef482
CB
2917 return -1;
2918 }
2919
4d781681 2920 if (netdev->upscript) {
2921 char *argv[] = {
2922 "veth",
2923 netdev->link,
2924 netdev->priv.veth_attr.veth1,
2925 NULL,
2926 };
2927
e389f2af
CB
2928 ret = run_script_argv(lxcname, hooks_version, "net",
2929 netdev->upscript, "up", argv);
4d781681 2930 if (ret < 0)
2931 return -1;
2932 }
2933
811ef482
CB
2934 return 0;
2935}
2936
f0ecc19d 2937static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2938 struct lxc_netdev *netdev,
2939 const char *netns_path)
811ef482
CB
2940{
2941 int bytes, ret;
2942 pid_t child;
2943 int pipefd[2];
419590da 2944 char buffer[PATH_MAX] = {0};
811ef482
CB
2945
2946 if (netdev->type != LXC_NET_VETH) {
2947 ERROR("Network type %d not support for unprivileged use", netdev->type);
2948 return -1;
2949 }
2950
2951 ret = pipe(pipefd);
2952 if (ret < 0) {
2953 SYSERROR("Failed to create pipe");
2954 return -1;
2955 }
2956
2957 child = fork();
2958 if (child < 0) {
2959 SYSERROR("Failed to create new process");
2960 close(pipefd[0]);
2961 close(pipefd[1]);
2962 return -1;
2963 }
2964
2965 if (child == 0) {
8843fde4 2966 char *hostveth;
811ef482
CB
2967
2968 close(pipefd[0]);
2969
2970 ret = dup2(pipefd[1], STDOUT_FILENO);
2971 if (ret >= 0)
2972 ret = dup2(pipefd[1], STDERR_FILENO);
2973 close(pipefd[1]);
2974 if (ret < 0) {
2975 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2976 _exit(EXIT_FAILURE);
811ef482
CB
2977 }
2978
8843fde4
CB
2979 if (netdev->priv.veth_attr.pair[0] != '\0')
2980 hostveth = netdev->priv.veth_attr.pair;
2981 else
2982 hostveth = netdev->priv.veth_attr.veth1;
2983 if (hostveth[0] == '\0') {
74c6e2b0 2984 SYSERROR("Host side veth device name is missing");
a30b9023 2985 _exit(EXIT_FAILURE);
74c6e2b0
CB
2986 }
2987
de4855a8 2988 if (netdev->link[0] == '\0') {
811ef482 2989 SYSERROR("Network link for network device \"%s\" is "
74c6e2b0 2990 "missing", netdev->priv.veth_attr.veth1);
a30b9023 2991 _exit(EXIT_FAILURE);
74c6e2b0 2992 }
811ef482 2993
811ef482 2994 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2995 lxcname, netns_path, netdev->link, hostveth);
811ef482 2996 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2997 lxcname, netns_path, "veth", netdev->link, hostveth,
2998 (char *)NULL);
811ef482 2999 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3000 _exit(EXIT_FAILURE);
811ef482
CB
3001 }
3002
3003 close(pipefd[1]);
3004
419590da 3005 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
811ef482
CB
3006 if (bytes < 0) {
3007 SYSERROR("Failed to read from pipe file descriptor.");
3008 close(pipefd[0]);
6b9f82a9
CB
3009 } else {
3010 buffer[bytes - 1] = '\0';
811ef482 3011 }
811ef482 3012
6b9f82a9
CB
3013 ret = wait_for_pid(child);
3014 close(pipefd[0]);
3015 if (ret != 0 || bytes < 0) {
811ef482
CB
3016 ERROR("lxc-user-nic failed to delete requested network: %s",
3017 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
3018 return -1;
3019 }
3020
811ef482
CB
3021 return 0;
3022}
3023
1bd8d726
CB
3024bool lxc_delete_network_unpriv(struct lxc_handler *handler)
3025{
3026 int ret;
3027 struct lxc_list *iterator;
3028 struct lxc_list *network = &handler->conf->network;
3029 /* strlen("/proc/") = 6
3030 * +
8335fd40 3031 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3032 * +
3033 * strlen("/fd/") = 4
3034 * +
8335fd40 3035 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3036 * +
3037 * \0
3038 */
8335fd40 3039 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3040
3041 *netns_path = '\0';
3042
28d9e29e 3043 if (handler->nsfd[LXC_NS_NET] < 0) {
1bd8d726
CB
3044 DEBUG("Cannot not guarantee safe deletion of network devices. "
3045 "Manual cleanup maybe needed");
3046 return false;
3047 }
3048
3049 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 3050 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
3051 if (ret < 0 || ret >= sizeof(netns_path))
3052 return false;
3053
3054 lxc_list_for_each(iterator, network) {
3055 char *hostveth = NULL;
3056 struct lxc_netdev *netdev = iterator->elem;
3057
3058 /* We can only delete devices whose ifindex we have. If we don't
3059 * have the index it means that we didn't create it.
3060 */
3061 if (!netdev->ifindex)
3062 continue;
3063
3064 if (netdev->type == LXC_NET_PHYS) {
3065 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3066 netdev->link);
3067 if (ret < 0)
3068 WARN("Failed to rename interface with index %d "
3069 "to its initial name \"%s\"",
3070 netdev->ifindex, netdev->link);
3071 else
3072 TRACE("Renamed interface with index %d to its "
3073 "initial name \"%s\"",
3074 netdev->ifindex, netdev->link);
b3259dc6
TP
3075
3076 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3077 goto clear_ifindices;
1bd8d726
CB
3078 }
3079
3080 ret = netdev_deconf[netdev->type](handler, netdev);
3081 if (ret < 0)
3082 WARN("Failed to deconfigure network device");
3083
3084 if (netdev->type != LXC_NET_VETH)
66a7c406 3085 goto clear_ifindices;
1bd8d726 3086
c869be20 3087 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
66a7c406 3088 goto clear_ifindices;
1bd8d726 3089
8843fde4
CB
3090 if (netdev->priv.veth_attr.pair[0] != '\0')
3091 hostveth = netdev->priv.veth_attr.pair;
3092 else
3093 hostveth = netdev->priv.veth_attr.veth1;
3094 if (hostveth[0] == '\0')
66a7c406 3095 goto clear_ifindices;
8843fde4 3096
1bd8d726
CB
3097 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3098 handler->name, netdev,
3099 netns_path);
3100 if (ret < 0) {
1bd8d726 3101 WARN("Failed to remove port \"%s\" from openvswitch "
8843fde4 3102 "bridge \"%s\"", hostveth, netdev->link);
66a7c406 3103 goto clear_ifindices;
1bd8d726
CB
3104 }
3105 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
3106 netdev->link);
66a7c406
CB
3107
3108clear_ifindices:
0858c829
CB
3109 /*
3110 * We need to clear any ifindices we recorded so liblxc won't
3111 * have cached stale data which would cause it to fail on
3112 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3113 */
3114 netdev->ifindex = 0;
3115 if (netdev->type == LXC_NET_PHYS) {
3116 netdev->priv.phys_attr.ifindex = 0;
3117 } else if (netdev->type == LXC_NET_VETH) {
3118 netdev->priv.veth_attr.veth1[0] = '\0';
3119 netdev->priv.veth_attr.ifindex = 0;
3120 }
1bd8d726
CB
3121 }
3122
bb84beda 3123 return true;
1bd8d726
CB
3124}
3125
6509154d 3126static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3127 struct lxc_list *cur, *next;
3128 struct lxc_inetdev *inet4dev;
3129 struct lxc_inet6dev *inet6dev;
3130 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3131 int err = 0;
5fe147e9
TP
3132 unsigned int lo_ifindex = 0, link_ifindex = 0;
3133
3134 link_ifindex = if_nametoindex(netdev->link);
3135 if (link_ifindex == 0) {
3136 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
3137 return ret_set_errno(-1, EINVAL);
3138 }
3139
6509154d 3140
3141 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3142 if (!lxc_list_empty(&netdev->ipv4)) {
3143 /* Check for net.ipv4.conf.[link].forwarding=1 */
3144 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
3145 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
596a002c 3146 return ret_set_errno(-1, EINVAL);
6509154d 3147 }
3148 }
3149
3150 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3151 if (!lxc_list_empty(&netdev->ipv6)) {
3152 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
3153 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
3154 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
596a002c 3155 return ret_set_errno(-1, EINVAL);
6509154d 3156 }
3157
3158 /* Check for net.ipv6.conf.[link].forwarding=1 */
3159 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
3160 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
596a002c 3161 return ret_set_errno(-1, EINVAL);
6509154d 3162 }
3163 }
3164
b670016a 3165 /* Perform IPVLAN specific checks. */
3166 if (netdev->type == LXC_NET_IPVLAN) {
3167 /* Check mode is l3s as other modes do not work with l2proxy. */
3168 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
3169 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
596a002c 3170 return ret_set_errno(-1, EINVAL);
b670016a 3171 }
3172
3173 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3174 lo_ifindex = if_nametoindex(loop_device);
b670016a 3175 if (lo_ifindex == 0) {
3ebffb98 3176 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
596a002c 3177 return ret_set_errno(-1, EINVAL);
b670016a 3178 }
3179 }
3180
6509154d 3181 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3182 inet4dev = cur->elem;
3183 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3184 return ret_set_errno(-1, -errno);
6509154d 3185
5fe147e9 3186 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3187 return ret_set_errno(-1, EINVAL);
b670016a 3188
3189 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3190 if (netdev->type == LXC_NET_IPVLAN) {
3191 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
3192 if (err < 0) {
3ebffb98 3193 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
596a002c 3194 return ret_set_errno(-1, -err);
b670016a 3195 }
3196 }
6509154d 3197 }
3198
3199 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3200 inet6dev = cur->elem;
3201 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3202 return ret_set_errno(-1, -errno);
6509154d 3203
5fe147e9 3204 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3205 return ret_set_errno(-1, EINVAL);
b670016a 3206
3207 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3208 if (netdev->type == LXC_NET_IPVLAN) {
3209 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
3210 if (err < 0) {
3ebffb98 3211 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
596a002c 3212 return ret_set_errno(-1, -err);
b670016a 3213 }
3214 }
6509154d 3215 }
3216
3217 return 0;
3218}
3219
b670016a 3220static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
3221 char bufinet4[INET_ADDRSTRLEN];
5fe147e9 3222 unsigned int errCount = 0, link_ifindex = 0;
b670016a 3223
3224 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3225 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
596a002c 3226 return ret_set_errno(-1, EINVAL);
b670016a 3227 }
3228
3229 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3230 if (lo_ifindex > 0) {
3231 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3232 errCount++;
3233 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3234 }
3235 }
3236
3237 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3238 if (link[0] != '\0') {
5fe147e9
TP
3239 link_ifindex = if_nametoindex(link);
3240 if (link_ifindex == 0) {
3241 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3242 return ret_set_errno(-1, EINVAL);
3243 }
3244
3245 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
b670016a 3246 errCount++;
3247 }
3248
3249 if (errCount > 0)
596a002c 3250 return ret_set_errno(-1, EINVAL);
b670016a 3251
3252 return 0;
3253}
3254
3255static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3256 char bufinet6[INET6_ADDRSTRLEN];
5fe147e9 3257 unsigned int errCount = 0, link_ifindex = 0;
b670016a 3258
3259 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3260 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
596a002c 3261 return ret_set_errno(-1, EINVAL);
b670016a 3262 }
3263
3264 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3265 if (lo_ifindex > 0) {
3266 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3267 errCount++;
3268 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3269 }
3270 }
3271
3272 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3273 if (link[0] != '\0') {
5fe147e9
TP
3274 link_ifindex = if_nametoindex(link);
3275 if (link_ifindex == 0) {
3276 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3277 return ret_set_errno(-1, EINVAL);
3278 }
3279
3280 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
b670016a 3281 errCount++;
3282 }
3283
3284 if (errCount > 0)
596a002c 3285 return ret_set_errno(-1, EINVAL);
b670016a 3286
3287 return 0;
3288}
3289
6509154d 3290static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3291 unsigned int lo_ifindex = 0;
3292 unsigned int errCount = 0;
6509154d 3293 struct lxc_list *cur, *next;
3294 struct lxc_inetdev *inet4dev;
3295 struct lxc_inet6dev *inet6dev;
6509154d 3296
b670016a 3297 /* Perform IPVLAN specific checks. */
3298 if (netdev->type == LXC_NET_IPVLAN) {
3299 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3300 lo_ifindex = if_nametoindex(loop_device);
b670016a 3301 if (lo_ifindex == 0) {
3302 errCount++;
3ebffb98 3303 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3304 }
b670016a 3305 }
6509154d 3306
b670016a 3307 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3308 inet4dev = cur->elem;
3309 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3310 errCount++;
6509154d 3311 }
3312
3313 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3314 inet6dev = cur->elem;
b670016a 3315 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3316 errCount++;
6509154d 3317 }
3318
b670016a 3319 if (errCount > 0)
596a002c 3320 return ret_set_errno(-1, EINVAL);
6509154d 3321
3322 return 0;
3323}
3324
e389f2af 3325static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3326{
811ef482
CB
3327 struct lxc_list *iterator;
3328 struct lxc_list *network = &handler->conf->network;
3329
811ef482
CB
3330 lxc_list_for_each(iterator, network) {
3331 struct lxc_netdev *netdev = iterator->elem;
3332
3333 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3334 ERROR("Invalid network configuration type %d", netdev->type);
3335 return -1;
3336 }
3337
6509154d 3338 /* Setup l2proxy entries if enabled and used with a link property */
3339 if (netdev->l2proxy && netdev->link[0] != '\0') {
3340 if (lxc_setup_l2proxy(netdev)) {
3341 ERROR("Failed to setup l2proxy");
3342 return -1;
3343 }
3344 }
3345
811ef482
CB
3346 if (netdev_conf[netdev->type](handler, netdev)) {
3347 ERROR("Failed to create network device");
3348 return -1;
3349 }
811ef482
CB
3350 }
3351
3352 return 0;
3353}
3354
e389f2af 3355int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3356{
e389f2af
CB
3357 pid_t pid = handler->pid;
3358 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3359 struct lxc_list *iterator;
3360
e0010464 3361 if (am_guest_unpriv())
74c6e2b0 3362 return 0;
811ef482
CB
3363
3364 lxc_list_for_each(iterator, network) {
3dd78294 3365 __do_free char *physname = NULL;
e389f2af 3366 int ret;
811ef482
CB
3367 struct lxc_netdev *netdev = iterator->elem;
3368
811ef482
CB
3369 if (!netdev->ifindex)
3370 continue;
3371
3dd78294
CB
3372 if (netdev->type == LXC_NET_PHYS)
3373 physname = is_wlan(netdev->link);
3374
3375 if (physname)
3376 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
3377 else
8bf64b77 3378 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->name);
535e8859 3379 if (ret) {
6d1400b5 3380 errno = -ret;
24190194
CB
3381 SYSERROR("Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3382 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3383 return -1;
3384 }
3385
24190194
CB
3386 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3387 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3388 }
3389
3390 return 0;
3391}
3392
3c09b97c
CB
3393static int network_requires_advanced_setup(int type)
3394{
3395 if (type == LXC_NET_EMPTY)
3396 return false;
3397
3398 if (type == LXC_NET_NONE)
3399 return false;
3400
3401 return true;
3402}
3403
e389f2af 3404static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3405{
e389f2af
CB
3406 int hooks_version = handler->conf->hooks_version;
3407 const char *lxcname = handler->name;
3408 const char *lxcpath = handler->lxcpath;
3409 struct lxc_list *network = &handler->conf->network;
3410 pid_t pid = handler->pid;
74c6e2b0
CB
3411 struct lxc_list *iterator;
3412
74c6e2b0
CB
3413 lxc_list_for_each(iterator, network) {
3414 struct lxc_netdev *netdev = iterator->elem;
3415
3c09b97c 3416 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3417 continue;
3418
3419 if (netdev->type != LXC_NET_VETH) {
e389f2af 3420 ERROR("Networks of type %s are not supported by unprivileged containers",
74c6e2b0
CB
3421 lxc_net_type_to_str(netdev->type));
3422 return -1;
3423 }
3424
3425 if (netdev->mtu)
3426 INFO("mtu ignored due to insufficient privilege");
3427
e389f2af
CB
3428 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3429 pid, hooks_version))
74c6e2b0
CB
3430 return -1;
3431 }
3432
3433 return 0;
3434}
3435
1bd8d726 3436bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3437{
3438 int ret;
3439 struct lxc_list *iterator;
3440 struct lxc_list *network = &handler->conf->network;
1bd8d726 3441
811ef482
CB
3442 lxc_list_for_each(iterator, network) {
3443 char *hostveth = NULL;
3444 struct lxc_netdev *netdev = iterator->elem;
3445
3446 /* We can only delete devices whose ifindex we have. If we don't
3447 * have the index it means that we didn't create it.
3448 */
3449 if (!netdev->ifindex)
3450 continue;
3451
0104c121
CB
3452 /*
3453 * If the network device has been moved back from the
3454 * containers network namespace, update the ifindex.
3455 */
3456 netdev->ifindex = if_nametoindex(netdev->name);
3457
6509154d 3458 /* Delete l2proxy entries if enabled and used with a link property */
3459 if (netdev->l2proxy && netdev->link[0] != '\0') {
3460 if (lxc_delete_l2proxy(netdev))
3461 WARN("Failed to delete all l2proxy config");
3462 /* Don't return, let the network be cleaned up as normal. */
3463 }
3464
811ef482
CB
3465 if (netdev->type == LXC_NET_PHYS) {
3466 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3467 if (ret < 0)
3468 WARN("Failed to rename interface with index %d "
b809f232
CB
3469 "from \"%s\" to its initial name \"%s\"",
3470 netdev->ifindex, netdev->name, netdev->link);
0b154989 3471 else {
29589196
CB
3472 TRACE("Renamed interface with index %d from "
3473 "\"%s\" to its initial name \"%s\"",
3474 netdev->ifindex, netdev->name,
3475 netdev->link);
0b154989
TP
3476
3477 /* Restore original MTU */
3478 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3479 if (ret < 0) {
3480 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3481 netdev->link, netdev->priv.phys_attr.mtu);
3482 } else {
3483 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3484 netdev->link, netdev->priv.phys_attr.mtu);
3485 }
3486 }
b3259dc6
TP
3487
3488 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3489 goto clear_ifindices;
811ef482
CB
3490 }
3491
3492 ret = netdev_deconf[netdev->type](handler, netdev);
3493 if (ret < 0)
3494 WARN("Failed to deconfigure network device");
3495
811ef482 3496 if (netdev->type != LXC_NET_VETH)
66a7c406 3497 goto clear_ifindices;
811ef482 3498
811ef482
CB
3499 /* Explicitly delete host veth device to prevent lingering
3500 * devices. We had issues in LXD around this.
3501 */
de4855a8 3502 if (netdev->priv.veth_attr.pair[0] != '\0')
811ef482
CB
3503 hostveth = netdev->priv.veth_attr.pair;
3504 else
3505 hostveth = netdev->priv.veth_attr.veth1;
de4855a8 3506 if (hostveth[0] == '\0')
66a7c406 3507 goto clear_ifindices;
811ef482
CB
3508
3509 ret = lxc_netdev_delete_by_name(hostveth);
3510 if (ret < 0) {
24548539
CB
3511 WARN("Failed to remove interface \"%s\" from \"%s\"",
3512 hostveth, netdev->link);
66a7c406 3513 goto clear_ifindices;
811ef482
CB
3514 }
3515 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3516
c869be20 3517 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
811ef482 3518 netdev->priv.veth_attr.veth1[0] = '\0';
66a7c406
CB
3519 netdev->ifindex = 0;
3520 netdev->priv.veth_attr.ifindex = 0;
3521 goto clear_ifindices;
811ef482
CB
3522 }
3523
3524 /* Delete the openvswitch port. */
3525 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3526 if (ret < 0)
3527 WARN("Failed to remove port \"%s\" from openvswitch "
3528 "bridge \"%s\"", hostveth, netdev->link);
3529 else
3530 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3531 hostveth, netdev->link);
3532
66a7c406 3533clear_ifindices:
ad2ddfcd 3534 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3535 * have cached stale data which would cause it to fail on reboot
3536 * we're we don't re-read the on-disk config file.
3537 */
3538 netdev->ifindex = 0;
3539 if (netdev->type == LXC_NET_PHYS) {
3540 netdev->priv.phys_attr.ifindex = 0;
3541 } else if (netdev->type == LXC_NET_VETH) {
3542 netdev->priv.veth_attr.veth1[0] = '\0';
3543 netdev->priv.veth_attr.ifindex = 0;
3544 }
811ef482
CB
3545 }
3546
bb84beda 3547 return true;
811ef482
CB
3548}
3549
3550int lxc_requests_empty_network(struct lxc_handler *handler)
3551{
3552 struct lxc_list *network = &handler->conf->network;
3553 struct lxc_list *iterator;
3554 bool found_none = false, found_nic = false;
3555
3556 if (lxc_list_empty(network))
3557 return 0;
3558
3559 lxc_list_for_each(iterator, network) {
3560 struct lxc_netdev *netdev = iterator->elem;
3561
3562 if (netdev->type == LXC_NET_NONE)
3563 found_none = true;
3564 else
3565 found_nic = true;
3566 }
3567 if (found_none && !found_nic)
3568 return 1;
3569 return 0;
3570}
3571
3572/* try to move physical nics to the init netns */
b809f232 3573int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482
CB
3574{
3575 int ret;
b809f232 3576 int oldfd;
811ef482 3577 char ifname[IFNAMSIZ];
b809f232 3578 struct lxc_list *iterator;
28d9e29e 3579 int netnsfd = handler->nsfd[LXC_NS_NET];
b809f232 3580 struct lxc_conf *conf = handler->conf;
811ef482 3581
b809f232
CB
3582 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3583 * the parent network namespace. We won't have this capability if we are
3584 * unprivileged.
3585 */
d0fbc7ba 3586 if (!handler->am_root)
b809f232 3587 return 0;
811ef482 3588
b809f232 3589 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3590
0037ab49 3591 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
811ef482
CB
3592 if (oldfd < 0) {
3593 SYSERROR("Failed to preserve network namespace");
b809f232 3594 return -1;
811ef482
CB
3595 }
3596
b809f232 3597 ret = setns(netnsfd, CLONE_NEWNET);
811ef482
CB
3598 if (ret < 0) {
3599 SYSERROR("Failed to enter network namespace");
3600 close(oldfd);
b809f232 3601 return -1;
811ef482
CB
3602 }
3603
b809f232
CB
3604 lxc_list_for_each(iterator, &conf->network) {
3605 struct lxc_netdev *netdev = iterator->elem;
811ef482 3606
b809f232
CB
3607 if (netdev->type != LXC_NET_PHYS)
3608 continue;
3609
3610 /* Retrieve the name of the interface in the container's network
3611 * namespace.
3612 */
3613 if (!if_indextoname(netdev->ifindex, ifname)) {
811ef482 3614 WARN("No interface corresponding to ifindex %d",
b809f232 3615 netdev->ifindex);
811ef482
CB
3616 continue;
3617 }
b809f232 3618
0037ab49 3619 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3620 if (ret < 0)
811ef482
CB
3621 WARN("Error moving network device \"%s\" back to "
3622 "network namespace", ifname);
b809f232
CB
3623 else
3624 TRACE("Moved network device \"%s\" back to network "
3625 "namespace", ifname);
811ef482 3626 }
811ef482 3627
b809f232 3628 ret = setns(oldfd, CLONE_NEWNET);
811ef482 3629 close(oldfd);
b809f232
CB
3630 if (ret < 0) {
3631 SYSERROR("Failed to enter network namespace");
3632 return -1;
3633 }
3634
3635 return 0;
811ef482
CB
3636}
3637
3638static int setup_hw_addr(char *hwaddr, const char *ifname)
3639{
3640 struct sockaddr sockaddr;
3641 struct ifreq ifr;
6d1400b5 3642 int ret, fd;
811ef482
CB
3643
3644 ret = lxc_convert_mac(hwaddr, &sockaddr);
3645 if (ret) {
6d1400b5 3646 errno = -ret;
3647 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3648 return -1;
3649 }
3650
3651 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3652 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3653 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3654
ad9429e5 3655 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3656 if (fd < 0)
3657 return -1;
3658
3659 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3660 if (ret)
6d1400b5 3661 SYSERROR("Failed to perform ioctl");
3662
3663 close(fd);
811ef482
CB
3664
3665 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3666 ifr.ifr_name);
3667
3668 return ret;
3669}
3670
3671static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3672{
3673 struct lxc_list *iterator;
3674 int err;
3675
3676 lxc_list_for_each(iterator, ip) {
3677 struct lxc_inetdev *inetdev = iterator->elem;
3678
3679 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3680 &inetdev->bcast, inetdev->prefix);
3681 if (err) {
6d1400b5 3682 errno = -err;
3683 SYSERROR("Failed to setup ipv4 address for network device "
d4a7da46 3684 "with ifindex %d", ifindex);
811ef482
CB
3685 return -1;
3686 }
3687 }
3688
3689 return 0;
3690}
3691
3692static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3693{
3694 struct lxc_list *iterator;
3695 int err;
3696
3697 lxc_list_for_each(iterator, ip) {
3698 struct lxc_inet6dev *inet6dev = iterator->elem;
3699
3700 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3701 &inet6dev->mcast, &inet6dev->acast,
3702 inet6dev->prefix);
3703 if (err) {
6d1400b5 3704 errno = -err;
3705 SYSERROR("Failed to setup ipv6 address for network device "
d4a7da46 3706 "with ifindex %d", ifindex);
811ef482
CB
3707 return -1;
3708 }
3709 }
3710
3711 return 0;
3712}
3713
8bf64b77 3714static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3715{
811ef482 3716 int err;
009d6127 3717 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3718
3719 /* empty network namespace */
8bf64b77
CB
3720 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3721 err = lxc_netdev_up("lo");
811ef482 3722 if (err) {
6d1400b5 3723 errno = -err;
8bf64b77 3724 SYSERROR( "Failed to set the loopback network device up");
811ef482
CB
3725 return -1;
3726 }
3727 }
3728
811ef482
CB
3729 /* set a mac address */
3730 if (netdev->hwaddr) {
8bf64b77 3731 if (setup_hw_addr(netdev->hwaddr, netdev->name)) {
811ef482 3732 ERROR("Failed to setup hw address for network device \"%s\"",
8bf64b77 3733 netdev->name);
811ef482
CB
3734 return -1;
3735 }
3736 }
3737
3738 /* setup ipv4 addresses on the interface */
3739 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3740 ERROR("Failed to setup ip addresses for network device \"%s\"",
8bf64b77 3741 netdev->name);
811ef482
CB
3742 return -1;
3743 }
3744
3745 /* setup ipv6 addresses on the interface */
3746 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3747 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
8bf64b77 3748 netdev->name);
811ef482
CB
3749 return -1;
3750 }
3751
3752 /* set the network device up */
3753 if (netdev->flags & IFF_UP) {
8bf64b77 3754 err = lxc_netdev_up(netdev->name);
811ef482 3755 if (err) {
6d1400b5 3756 errno = -err;
3757 SYSERROR("Failed to set network device \"%s\" up",
8bf64b77 3758 netdev->name);
811ef482
CB
3759 return -1;
3760 }
3761
3762 /* the network is up, make the loopback up too */
3763 err = lxc_netdev_up("lo");
3764 if (err) {
6d1400b5 3765 errno = -err;
3766 SYSERROR("Failed to set the loopback network device up");
811ef482
CB
3767 return -1;
3768 }
3769 }
3770
811ef482 3771 /* setup ipv4 gateway on the interface */
a2f9a670 3772 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
811ef482
CB
3773 if (!(netdev->flags & IFF_UP)) {
3774 ERROR("Cannot add ipv4 gateway for network device "
8bf64b77 3775 "\"%s\" when not bringing up the interface", netdev->name);
811ef482
CB
3776 return -1;
3777 }
3778
3779 if (lxc_list_empty(&netdev->ipv4)) {
3780 ERROR("Cannot add ipv4 gateway for network device "
8bf64b77 3781 "\"%s\" when not assigning an address", netdev->name);
811ef482
CB
3782 return -1;
3783 }
3784
a2f9a670 3785 /* Setup device route if ipv4_gateway_dev is enabled */
3786 if (netdev->ipv4_gateway_dev) {
3787 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3788 if (err < 0) {
3789 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
8bf64b77 3790 netdev->name);
596a002c 3791 return ret_set_errno(-1, -err);
811ef482 3792 }
a2f9a670 3793 } else {
009d6127 3794 /* Check the gateway address is valid */
3795 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3796 return ret_set_errno(-1, errno);
009d6127 3797
3798 /* Try adding a default route to the gateway address */
811ef482 3799 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3800 if (err < 0) {
3801 /* If adding the default route fails, this could be because the
3802 * gateway address is in a different subnet to the container's address.
3803 * To work around this, we try adding a static device route to the
3804 * gateway address first, and then try again.
3805 */
a2f9a670 3806 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
009d6127 3807 if (err < 0) {
a2f9a670 3808 errno = -err;
009d6127 3809 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
8bf64b77 3810 bufinet4, netdev->name);
009d6127 3811 return -1;
a2f9a670 3812 }
6d1400b5 3813
a2f9a670 3814 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3815 if (err < 0) {
a2f9a670 3816 errno = -err;
009d6127 3817 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
8bf64b77 3818 bufinet4, netdev->name);
a2f9a670 3819 return -1;
811ef482 3820 }
811ef482
CB
3821 }
3822 }
3823 }
3824
3825 /* setup ipv6 gateway on the interface */
a2f9a670 3826 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
811ef482 3827 if (!(netdev->flags & IFF_UP)) {
e389f2af 3828 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
8bf64b77 3829 netdev->name);
811ef482
CB
3830 return -1;
3831 }
3832
3833 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
e389f2af 3834 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
8bf64b77 3835 netdev->name);
811ef482
CB
3836 return -1;
3837 }
3838
a2f9a670 3839 /* Setup device route if ipv6_gateway_dev is enabled */
3840 if (netdev->ipv6_gateway_dev) {
3841 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3842 if (err < 0) {
3843 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
8bf64b77 3844 netdev->name);
596a002c 3845 return ret_set_errno(-1, -err);
811ef482 3846 }
a2f9a670 3847 } else {
009d6127 3848 /* Check the gateway address is valid */
3849 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3850 return ret_set_errno(-1, errno);
009d6127 3851
3852 /* Try adding a default route to the gateway address */
811ef482 3853 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3854 if (err < 0) {
3855 /* If adding the default route fails, this could be because the
3856 * gateway address is in a different subnet to the container's address.
3857 * To work around this, we try adding a static device route to the
3858 * gateway address first, and then try again.
3859 */
a2f9a670 3860 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
009d6127 3861 if (err < 0) {
a2f9a670 3862 errno = -err;
009d6127 3863 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
8bf64b77 3864 bufinet6, netdev->name);
009d6127 3865 return -1;
a2f9a670 3866 }
6d1400b5 3867
a2f9a670 3868 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3869 if (err < 0) {
a2f9a670 3870 errno = -err;
009d6127 3871 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
8bf64b77 3872 bufinet6, netdev->name);
a2f9a670 3873 return -1;
811ef482 3874 }
811ef482
CB
3875 }
3876 }
3877 }
3878
8bf64b77 3879 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3880
3881 return 0;
3882}
3883
3884int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3885 struct lxc_list *network)
3886{
3887 struct lxc_list *iterator;
811ef482 3888
8bf64b77 3889 lxc_list_for_each (iterator, network) {
e389f2af 3890 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 3891 int ret;
811ef482 3892
8bf64b77
CB
3893 ret = netdev_ns_conf[netdev->type](netdev);
3894 if (!ret)
3895 ret = lxc_network_setup_in_child_namespaces_common(netdev);
3896 if (ret) {
e389f2af 3897 ERROR("Failed to setup netdev");
811ef482
CB
3898 return -1;
3899 }
3900 }
3901
3902 if (!lxc_list_empty(network))
e389f2af 3903 INFO("Network has been setup");
811ef482
CB
3904
3905 return 0;
3906}
7ab1ba02 3907
3c09b97c 3908int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3909{
3910 struct lxc_list *iterator;
3911 struct lxc_list *network = &handler->conf->network;
3912 int data_sock = handler->data_sock[0];
3913
7ab1ba02
CB
3914 lxc_list_for_each(iterator, network) {
3915 int ret;
3916 struct lxc_netdev *netdev = iterator->elem;
3917
3c09b97c 3918 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3919 continue;
3920
7fbb15ec 3921 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3922 if (ret < 0)
7ab1ba02 3923 return -1;
e389f2af
CB
3924
3925 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3926 if (ret < 0)
3927 return -1;
3928
3929 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3930 }
3931
3932 return 0;
3933}
3934
3c09b97c 3935int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3936{
3937 struct lxc_list *iterator;
3938 struct lxc_list *network = &handler->conf->network;
3939 int data_sock = handler->data_sock[1];
3940
7ab1ba02
CB
3941 lxc_list_for_each(iterator, network) {
3942 int ret;
3943 struct lxc_netdev *netdev = iterator->elem;
3944
3c09b97c 3945 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3946 continue;
3947
e3233f26 3948 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3949 if (ret < 0)
7ab1ba02 3950 return -1;
e389f2af
CB
3951
3952 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3953 if (ret < 0)
3954 return -1;
54256301 3955
e389f2af 3956 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3957 }
3958
3959 return 0;
3960}
a1ae535a
CB
3961
3962int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3963{
3964 struct lxc_list *iterator, *network;
3965 int data_sock = handler->data_sock[0];
3966
3967 if (!handler->am_root)
3968 return 0;
3969
3970 network = &handler->conf->network;
3971 lxc_list_for_each(iterator, network) {
3972 int ret;
3973 struct lxc_netdev *netdev = iterator->elem;
3974
3975 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3976 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3977 if (ret < 0)
7729f8e5 3978 return -1;
a1ae535a
CB
3979
3980 /* Send network device ifindex in the child's namespace to
3981 * parent.
3982 */
7fbb15ec 3983 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3984 if (ret < 0)
7729f8e5 3985 return -1;
a1ae535a
CB
3986 }
3987
e389f2af
CB
3988 if (!lxc_list_empty(network))
3989 TRACE("Sent network device names and ifindices to parent");
3990
a1ae535a 3991 return 0;
a1ae535a
CB
3992}
3993
3994int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3995{
3996 struct lxc_list *iterator, *network;
3997 int data_sock = handler->data_sock[1];
3998
3999 if (!handler->am_root)
4000 return 0;
4001
4002 network = &handler->conf->network;
4003 lxc_list_for_each(iterator, network) {
4004 int ret;
4005 struct lxc_netdev *netdev = iterator->elem;
4006
4007 /* Receive network device name in the child's namespace to
4008 * parent.
4009 */
e3233f26 4010 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 4011 if (ret < 0)
7729f8e5 4012 return -1;
a1ae535a
CB
4013
4014 /* Receive network device ifindex in the child's namespace to
4015 * parent.
4016 */
e3233f26 4017 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 4018 if (ret < 0)
7729f8e5 4019 return -1;
a1ae535a
CB
4020 }
4021
4022 return 0;
a1ae535a 4023}
bb84beda
CB
4024
4025void lxc_delete_network(struct lxc_handler *handler)
4026{
4027 bool bret;
4028
4029 if (handler->am_root)
4030 bret = lxc_delete_network_priv(handler);
4031 else
4032 bret = lxc_delete_network_unpriv(handler);
4033 if (!bret)
4034 DEBUG("Failed to delete network devices");
4035 else
4036 DEBUG("Deleted network devices");
4037}
1cd95214 4038
1cd95214
CB
4039int lxc_netns_set_nsid(int fd)
4040{
41a3300d 4041 int ret;
0ce60f0d
CB
4042 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4043 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4044 NLMSG_ALIGN(1024)];
1cd95214 4045 struct nl_handler nlh;
0ce60f0d
CB
4046 struct nlmsghdr *hdr;
4047 struct rtgenmsg *msg;
bfcedc7e 4048 int saved_errno;
9d036caa
CB
4049 const __s32 ns_id = -1;
4050 const __u32 netns_fd = fd;
1cd95214
CB
4051
4052 ret = netlink_open(&nlh, NETLINK_ROUTE);
4053 if (ret < 0)
41a3300d 4054 return -1;
1cd95214 4055
0ce60f0d 4056 memset(buf, 0, sizeof(buf));
6ce39620
CB
4057
4058#pragma GCC diagnostic push
4059#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4060 hdr = (struct nlmsghdr *)buf;
4061 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4062#pragma GCC diagnostic pop
1cd95214 4063
0ce60f0d
CB
4064 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4065 hdr->nlmsg_type = RTM_NEWNSID;
4066 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4067 hdr->nlmsg_pid = 0;
4068 hdr->nlmsg_seq = RTM_NEWNSID;
4069 msg->rtgen_family = AF_UNSPEC;
1cd95214 4070
9d036caa
CB
4071 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4072 if (ret < 0)
4073 goto on_error;
4074
4075 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4076 if (ret < 0)
4077 goto on_error;
1cd95214 4078
9fbbc427 4079 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
4080
4081on_error:
bfcedc7e 4082 saved_errno = errno;
1cd95214 4083 netlink_close(&nlh);
bfcedc7e 4084 errno = saved_errno;
1cd95214 4085
9d036caa 4086 return ret;
1cd95214 4087}
938980ba
CB
4088
4089static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4090{
4091
4092 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4093
4094 while (RTA_OK(rta, len)) {
4095 unsigned short type = rta->rta_type;
4096
4097 if ((type <= max) && (!tb[type]))
4098 tb[type] = rta;
4099
6ce39620
CB
4100#pragma GCC diagnostic push
4101#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4102 rta = RTA_NEXT(rta, len);
6ce39620 4103#pragma GCC diagnostic pop
938980ba
CB
4104 }
4105
4106 return 0;
4107}
4108
4109static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4110{
4111 return *(__s32 *)RTA_DATA(rta);
4112}
4113
4114#ifndef NETNS_RTA
4115#define NETNS_RTA(r) \
4116 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4117#endif
4118
4119int lxc_netns_get_nsid(int fd)
4120{
4121 int ret;
4122 ssize_t len;
4123 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4124 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4125 NLMSG_ALIGN(1024)];
938980ba
CB
4126 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
4127 struct nl_handler nlh;
4128 struct nlmsghdr *hdr;
4129 struct rtgenmsg *msg;
4130 int saved_errno;
4131 __u32 netns_fd = fd;
4132
4133 ret = netlink_open(&nlh, NETLINK_ROUTE);
4134 if (ret < 0)
4135 return -1;
4136
4137 memset(buf, 0, sizeof(buf));
6ce39620
CB
4138
4139#pragma GCC diagnostic push
4140#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4141 hdr = (struct nlmsghdr *)buf;
4142 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4143#pragma GCC diagnostic pop
938980ba
CB
4144
4145 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4146 hdr->nlmsg_type = RTM_GETNSID;
4147 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4148 hdr->nlmsg_pid = 0;
4149 hdr->nlmsg_seq = RTM_GETNSID;
4150 msg->rtgen_family = AF_UNSPEC;
4151
9d036caa
CB
4152 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4153 if (ret == 0)
4154 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 4155
938980ba
CB
4156 saved_errno = errno;
4157 netlink_close(&nlh);
4158 errno = saved_errno;
4159 if (ret < 0)
4160 return -1;
4161
9d036caa 4162 errno = EINVAL;
938980ba
CB
4163 msg = NLMSG_DATA(hdr);
4164 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4165 if (len < 0)
4166 return -1;
4167
6ce39620
CB
4168#pragma GCC diagnostic push
4169#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4170 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4171 if (tb[__LXC_NETNSA_NSID])
4172 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4173#pragma GCC diagnostic pop
938980ba
CB
4174
4175 return -1;
4176}
e389f2af
CB
4177
4178int lxc_create_network(struct lxc_handler *handler)
4179{
4180 int ret;
4181
e389f2af
CB
4182 if (handler->am_root) {
4183 ret = lxc_create_network_priv(handler);
4184 if (ret)
4185 return -1;
4186
4187 return lxc_network_move_created_netdev_priv(handler);
4188 }
4189
4190 return lxc_create_network_unpriv(handler);
4191}