]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
Merge pull request #3310 from brauner/2020-03-19/fixes
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
d7b58715 39#include "raw_syscalls.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
8bf64b77 50typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
3ebffb98 51static const char loop_device[] = "lo";
811ef482 52
b670016a 53static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 54{
d16bda44 55 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 56 struct nl_handler nlh;
d16bda44
CB
57 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
58 int addrlen, err;
8f82874c 59 struct rtmsg *rt;
8f82874c 60
61 addrlen = family == AF_INET ? sizeof(struct in_addr)
62 : sizeof(struct in6_addr);
63
d16bda44 64 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 65 if (err)
66 return err;
67
8f82874c 68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
d16bda44 70 return -ENOMEM;
8f82874c 71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
d16bda44 74 err = -ENOMEM;
8f82874c 75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
d16bda44
CB
82 err = -ENOMEM;
83
8f82874c 84 rt->rtm_family = family;
85 rt->rtm_table = RT_TABLE_MAIN;
86 rt->rtm_scope = RT_SCOPE_LINK;
87 rt->rtm_protocol = RTPROT_BOOT;
88 rt->rtm_type = RTN_UNICAST;
89 rt->rtm_dst_len = netmask;
90
8f82874c 91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
92 return -EINVAL;
93
8f82874c 94 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
95 return -EINVAL;
96
97 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 98}
99
100static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
101{
b670016a 102 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 103}
104
105static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
106{
b670016a 107 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
108}
109
110static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
111{
112 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
113}
114
115static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
116{
117 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 118}
119
d4a7da46 120static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
121{
122 struct lxc_list *iterator;
123 int err;
124
125 lxc_list_for_each(iterator, ip) {
126 struct lxc_inetdev *inetdev = iterator->elem;
127
128 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
129 if (err)
130 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 131 }
132
133 return 0;
134}
135
136static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
137{
138 struct lxc_list *iterator;
139 int err;
140
141 lxc_list_for_each(iterator, ip) {
142 struct lxc_inet6dev *inet6dev = iterator->elem;
143
144 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
145 if (err)
146 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 147 }
148
149 return 0;
150}
151
6dfa9581
TP
152static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
153{
154 struct lxc_list *iterator;
155 int err;
156
157 lxc_list_for_each(iterator, ip) {
158 struct lxc_inetdev *inetdev = iterator->elem;
159
160 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
161
162 if (err)
9c66dc4f 163 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
164 }
165
166 return 0;
167}
168
169static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
170{
171 struct lxc_list *iterator;
172 int err;
173
174 lxc_list_for_each(iterator, ip) {
175 struct lxc_inet6dev *inet6dev = iterator->elem;
176
177 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
178 if (err)
9c66dc4f 179 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
180 }
181
182 return 0;
183}
184
185struct ip_proxy_args {
186 const char *ip;
187 const char *dev;
188};
189
5fe147e9 190static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 191{
d16bda44 192 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 193 struct nl_handler nlh;
d16bda44
CB
194 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
195 int addrlen, err;
5fe147e9 196 struct ndmsg *rt;
6dfa9581 197
5fe147e9 198 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 199
d16bda44 200 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
201 if (err)
202 return err;
6dfa9581 203
5fe147e9
TP
204 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
205 if (!nlmsg)
d16bda44 206 return -ENOMEM;
6dfa9581 207
5fe147e9
TP
208 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
209 if (!answer)
d16bda44 210 return -ENOMEM;
6dfa9581 211
5fe147e9
TP
212 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
213 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 214
5fe147e9
TP
215 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
216 if (!rt)
d16bda44
CB
217 return -ENOMEM;
218
5fe147e9
TP
219 rt->ndm_ifindex = ifindex;
220 rt->ndm_flags = NTF_PROXY;
221 rt->ndm_type = NDA_DST;
222 rt->ndm_family = family;
6dfa9581 223
5fe147e9 224 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 225 return -EINVAL;
6dfa9581 226
d16bda44 227 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
228}
229
230static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
231{
232 int ret;
233 char path[PATH_MAX];
234 char buf[1] = "";
235
236 if (family != AF_INET && family != AF_INET6)
596a002c 237 return ret_set_errno(-1, EINVAL);
6dfa9581 238
9c66dc4f 239 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581
TP
240 family == AF_INET ? "ipv4" : "ipv6", ifname,
241 "forwarding");
9c66dc4f 242 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 243 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
244
245 return lxc_read_file_expect(path, buf, 1, "1");
246}
247
811ef482
CB
248static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
249{
54256301 250 int err;
a00fbab5 251 unsigned int mtu = 1500;
811ef482
CB
252 char *veth1, *veth2;
253 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 254
f2711167 255 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
256 veth1 = netdev->priv.veth_attr.pair;
257 if (handler->conf->reboot)
258 lxc_netdev_delete_by_name(veth1);
259 } else {
260 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
261 if (err < 0 || (size_t)err >= sizeof(veth1buf))
262 return -1;
263
264 veth1 = lxc_mkifname(veth1buf);
265 if (!veth1)
266 return -1;
267
268 /* store away for deconf */
269 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
270 }
271
d34212ad
CB
272 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
273 if (err < 0 || (size_t)err >= sizeof(veth2buf))
274 return -1;
275
811ef482
CB
276 veth2 = lxc_mkifname(veth2buf);
277 if (!veth2)
54256301
CB
278 return -1;
279
a00fbab5
TP
280 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
281 if (netdev->mtu) {
282 if (lxc_safe_uint(netdev->mtu, &mtu))
283 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 284 } else if (!is_empty_string(netdev->link)) {
54256301 285 int ifindex_mtu;
811ef482 286
54256301
CB
287 ifindex_mtu = if_nametoindex(netdev->link);
288 if (ifindex_mtu) {
289 mtu = netdev_get_mtu(ifindex_mtu);
290 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
291 }
292 }
293
294 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
295 if (err)
296 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 297
24190194
CB
298 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
299
811ef482
CB
300 /* changing the high byte of the mac address to 0xfe, the bridge interface
301 * will always keep the host's mac address and not take the mac address
302 * of a container */
303 err = setup_private_host_hw_addr(veth1);
304 if (err) {
6d1400b5 305 errno = -err;
306 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
307 goto out_delete;
308 }
309
8da62485
CB
310 /* Retrieve ifindex of the host's veth device. */
311 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
312 if (!netdev->priv.veth_attr.ifindex) {
313 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
314 goto out_delete;
315 }
316
811ef482
CB
317 if (mtu) {
318 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 319 if (err) {
6d1400b5 320 errno = -err;
54256301 321 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
322 goto out_delete;
323 }
324 }
325
f2711167 326 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
811ef482
CB
327 err = lxc_bridge_attach(netdev->link, veth1);
328 if (err) {
6d1400b5 329 errno = -err;
330 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
331 veth1, netdev->link);
811ef482
CB
332 goto out_delete;
333 }
334 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
335 }
336
337 err = lxc_netdev_up(veth1);
338 if (err) {
6d1400b5 339 errno = -err;
340 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
341 goto out_delete;
342 }
343
d4a7da46 344 /* setup ipv4 routes on the host interface */
345 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
346 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
347 goto out_delete;
348 }
349
350 /* setup ipv6 routes on the host interface */
351 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
352 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
353 goto out_delete;
354 }
355
6dfa9581 356 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
357 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
358 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
359 appear to be added successfully but then do not appear in the proxy list. The length of time
360 slept doesn't appear to be important, only that the process sleeps for a short period of time.
361 */
362 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
363
6dfa9581
TP
364 if (netdev->ipv4_gateway) {
365 char bufinet4[INET_ADDRSTRLEN];
366 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 367 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
368 goto out_delete;
369 }
370
371 err = lxc_ip_forwarding_on(veth1, AF_INET);
372 if (err) {
9c66dc4f 373 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
374 goto out_delete;
375 }
376
5fe147e9 377 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 378 if (err) {
9c66dc4f 379 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
380 goto out_delete;
381 }
382 }
383
384 if (netdev->ipv6_gateway) {
385 char bufinet6[INET6_ADDRSTRLEN];
386
387 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 388 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
389 goto out_delete;
390 }
391
392 /* Check for sysctl net.ipv6.conf.all.forwarding=1
393 Kernel requires this to route any packets for IPv6.
394 */
395 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
396 if (err) {
9c66dc4f 397 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
398 goto out_delete;
399 }
400
401 err = lxc_ip_forwarding_on(veth1, AF_INET6);
402 if (err) {
9c66dc4f 403 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
404 goto out_delete;
405 }
406
407 err = lxc_neigh_proxy_on(veth1, AF_INET6);
408 if (err) {
9c66dc4f 409 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
410 goto out_delete;
411 }
412
5fe147e9 413 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 414 if (err) {
9c66dc4f 415 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
416 goto out_delete;
417 }
418 }
419
420 /* setup ipv4 address routes on the host interface */
421 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
422 if (err) {
9c66dc4f 423 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
424 goto out_delete;
425 }
426
427 /* setup ipv6 address routes on the host interface */
428 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
429 if (err) {
9c66dc4f 430 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
431 goto out_delete;
432 }
433 }
434
811ef482 435 if (netdev->upscript) {
14a7b0f9
CB
436 char *argv[] = {
437 "veth",
438 netdev->link,
990b9ac3 439 veth1,
14a7b0f9
CB
440 NULL,
441 };
442
443 err = run_script_argv(handler->name,
444 handler->conf->hooks_version, "net",
445 netdev->upscript, "up", argv);
446 if (err < 0)
811ef482
CB
447 goto out_delete;
448 }
449
54256301 450 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
451
452 return 0;
453
454out_delete:
54256301 455 lxc_netdev_delete_by_name(veth1);
811ef482
CB
456 return -1;
457}
458
459static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
460{
8021de25 461 char peer[IFNAMSIZ];
811ef482
CB
462 int err;
463
f2711167 464 if (is_empty_string(netdev->link)) {
811ef482
CB
465 ERROR("No link for macvlan network device specified");
466 return -1;
467 }
468
8021de25
CB
469 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
470 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
471 return -1;
472
8021de25 473 if (!lxc_mkifname(peer))
811ef482
CB
474 return -1;
475
476 err = lxc_macvlan_create(netdev->link, peer,
477 netdev->priv.macvlan_attr.mode);
478 if (err) {
6d1400b5 479 errno = -err;
480 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
481 peer, netdev->link);
966e9f1f 482 goto on_error;
811ef482
CB
483 }
484
a9704f05 485 strlcpy(netdev->created_name, peer, IFNAMSIZ);
3473ca76 486 if (is_empty_string(netdev->name))
8bf64b77 487 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
a9704f05 488
811ef482
CB
489 netdev->ifindex = if_nametoindex(peer);
490 if (!netdev->ifindex) {
491 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 492 goto on_error;
811ef482
CB
493 }
494
3bef7b7b 495 if (netdev->mtu) {
54256301
CB
496 unsigned int mtu;
497
3bef7b7b
TP
498 err = lxc_safe_uint(netdev->mtu, &mtu);
499 if (err < 0) {
500 errno = -err;
501 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
502 goto on_error;
503 }
504
505 err = lxc_netdev_set_mtu(peer, mtu);
506 if (err < 0) {
507 errno = -err;
508 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
509 goto on_error;
510 }
511 }
512
811ef482 513 if (netdev->upscript) {
14a7b0f9
CB
514 char *argv[] = {
515 "macvlan",
516 netdev->link,
517 NULL,
518 };
519
520 err = run_script_argv(handler->name,
521 handler->conf->hooks_version, "net",
522 netdev->upscript, "up", argv);
523 if (err < 0)
966e9f1f 524 goto on_error;
811ef482
CB
525 }
526
527 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
528 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
529
530 return 0;
966e9f1f
CB
531
532on_error:
811ef482 533 lxc_netdev_delete_by_name(peer);
811ef482
CB
534 return -1;
535}
536
c9f52382 537static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
538{
d16bda44
CB
539 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
540 struct nl_handler nlh;
541 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 542 int err, index, len;
543 struct ifinfomsg *ifi;
c9f52382 544 struct rtattr *nest, *nest2;
c9f52382 545
546 len = strlen(master);
547 if (len == 1 || len >= IFNAMSIZ)
d16bda44 548 return ret_errno(EINVAL);
c9f52382 549
550 len = strlen(name);
551 if (len == 1 || len >= IFNAMSIZ)
d16bda44 552 return ret_errno(EINVAL);
c9f52382 553
554 index = if_nametoindex(master);
555 if (!index)
d16bda44 556 return ret_errno(EINVAL);
c9f52382 557
d16bda44 558 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 559 if (err)
d16bda44 560 return ret_errno(-err);
c9f52382 561
c9f52382 562 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
563 if (!nlmsg)
d16bda44 564 return ret_errno(ENOMEM);
c9f52382 565
566 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
567 if (!answer)
d16bda44 568 return ret_errno(ENOMEM);
c9f52382 569
570 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
571 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
572
573 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
574 if (!ifi)
575 return ret_errno(ENOMEM);
c9f52382 576 ifi->ifi_family = AF_UNSPEC;
577
c9f52382 578 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
579 if (!nest)
d16bda44 580 return ret_errno(EPROTO);
c9f52382 581
582 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 583 return ret_errno(EPROTO);
c9f52382 584
585 if (mode) {
586 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
587 if (!nest2)
d16bda44 588 return ret_errno(EPROTO);
c9f52382 589
590 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
d16bda44 591 return ret_errno(EPROTO);
c9f52382 592
593 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
594 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
595 */
d16bda44
CB
596 if (isolation > 0 &&
597 nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
598 return ret_errno(EPROTO);
c9f52382 599
600 nla_end_nested(nlmsg, nest2);
601 }
602
603 nla_end_nested(nlmsg, nest);
604
605 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 606 return ret_errno(EPROTO);
c9f52382 607
608 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
609 return ret_errno(EPROTO);
610
611 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 612}
613
614static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
615{
dd119206 616 char peer[IFNAMSIZ];
c9f52382 617 int err;
618
f2711167 619 if (is_empty_string(netdev->link)) {
c9f52382 620 ERROR("No link for ipvlan network device specified");
621 return -1;
622 }
623
dd119206
CB
624 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
625 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 626 return -1;
627
dd119206 628 if (!lxc_mkifname(peer))
c9f52382 629 return -1;
630
dd119206
CB
631 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
632 netdev->priv.ipvlan_attr.isolation);
c9f52382 633 if (err) {
dd119206
CB
634 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
635 peer, netdev->link);
c9f52382 636 goto on_error;
637 }
638
e7fdd504 639 strlcpy(netdev->created_name, peer, IFNAMSIZ);
3473ca76 640 if (is_empty_string(netdev->name))
8bf64b77 641 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
e7fdd504 642
c9f52382 643 netdev->ifindex = if_nametoindex(peer);
644 if (!netdev->ifindex) {
645 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
646 goto on_error;
647 }
648
006e135e 649 if (netdev->mtu) {
54256301
CB
650 unsigned int mtu;
651
006e135e 652 err = lxc_safe_uint(netdev->mtu, &mtu);
653 if (err < 0) {
654 errno = -err;
54256301 655 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 656 goto on_error;
657 }
658
659 err = lxc_netdev_set_mtu(peer, mtu);
660 if (err < 0) {
661 errno = -err;
54256301 662 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 663 goto on_error;
664 }
665 }
666
c9f52382 667 if (netdev->upscript) {
668 char *argv[] = {
669 "ipvlan",
670 netdev->link,
671 NULL,
672 };
673
dd119206
CB
674 err = run_script_argv(handler->name, handler->conf->hooks_version,
675 "net", netdev->upscript, "up", argv);
c9f52382 676 if (err < 0)
677 goto on_error;
678 }
679
dd119206
CB
680 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
681 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 682
683 return 0;
684
685on_error:
686 lxc_netdev_delete_by_name(peer);
687 return -1;
688}
689
811ef482
CB
690static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
691{
692 char peer[IFNAMSIZ];
693 int err;
694 static uint16_t vlan_cntr = 0;
811ef482 695
f2711167 696 if (is_empty_string(netdev->link)) {
811ef482
CB
697 ERROR("No link for vlan network device specified");
698 return -1;
699 }
700
d4d68410
CB
701 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
702 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
703 if (err < 0 || (size_t)err >= sizeof(peer))
704 return -1;
705
706 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
707 if (err) {
6d1400b5 708 errno = -err;
709 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
710 peer, netdev->link);
811ef482
CB
711 return -1;
712 }
713
83530dba 714 strlcpy(netdev->created_name, peer, IFNAMSIZ);
3473ca76 715 if (is_empty_string(netdev->name))
8bf64b77 716 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
83530dba 717
811ef482
CB
718 netdev->ifindex = if_nametoindex(peer);
719 if (!netdev->ifindex) {
720 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 721 goto on_error;
722 }
723
724 if (netdev->mtu) {
54256301
CB
725 unsigned int mtu;
726
3e2a7b08 727 err = lxc_safe_uint(netdev->mtu, &mtu);
728 if (err < 0) {
729 errno = -err;
54256301 730 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 731 goto on_error;
732 }
733
734 err = lxc_netdev_set_mtu(peer, mtu);
54256301 735 if (err < 0) {
3e2a7b08 736 errno = -err;
54256301 737 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 738 goto on_error;
739 }
811ef482
CB
740 }
741
3a73d9f1 742 if (netdev->upscript) {
743 char *argv[] = {
744 "vlan",
745 netdev->link,
746 NULL,
747 };
748
d4d68410
CB
749 err = run_script_argv(handler->name, handler->conf->hooks_version,
750 "net", netdev->upscript, "up", argv);
19abca58 751 if (err < 0) {
3e2a7b08 752 goto on_error;
19abca58 753 }
3a73d9f1 754 }
755
d4d68410
CB
756 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
757 netdev->ifindex);
811ef482
CB
758
759 return 0;
3e2a7b08 760
761on_error:
762 lxc_netdev_delete_by_name(peer);
763 return -1;
811ef482
CB
764}
765
766static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
767{
0b154989 768 int err, mtu_orig = 0;
14a7b0f9 769
9c66dc4f
CB
770 if (is_empty_string(netdev->link))
771 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 772
75b074ee
CB
773 /*
774 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
775 * network namespace because we need it to move the device from the
776 * host's network namespace to the container's network namespace later
777 * on.
778 * Note that netdev->link will contain the name of the physical network
779 * device in the host's namespace.
780 */
811ef482 781 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
782 if (!netdev->ifindex)
783 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 784
61302ef7 785 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 786 if (is_empty_string(netdev->name))
8bf64b77 787 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 788
75b074ee
CB
789 /*
790 * Store the ifindex of the host's network device in the host's
790255cf
CB
791 * namespace.
792 */
793 netdev->priv.phys_attr.ifindex = netdev->ifindex;
794
75b074ee
CB
795 /*
796 * Get original device MTU setting and store for restoration after
797 * container shutdown.
798 */
0b154989 799 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
800 if (mtu_orig < 0)
801 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
802
803 netdev->priv.phys_attr.mtu = mtu_orig;
804
3bef7b7b 805 if (netdev->mtu) {
54256301
CB
806 unsigned int mtu;
807
3bef7b7b 808 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
809 if (err < 0)
810 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 811
3bef7b7b 812 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
813 if (err < 0)
814 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
815 }
816
817 if (netdev->upscript) {
818 char *argv[] = {
819 "phys",
820 netdev->link,
821 NULL,
822 };
823
75b074ee
CB
824 err = run_script_argv(handler->name, handler->conf->hooks_version,
825 "net", netdev->upscript, "up", argv);
9c66dc4f 826 if (err < 0)
3bef7b7b 827 return -1;
3bef7b7b
TP
828 }
829
75b074ee
CB
830 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
831 netdev->ifindex);
811ef482
CB
832
833 return 0;
834}
835
836static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
837{
14a7b0f9
CB
838 int ret;
839 char *argv[] = {
840 "empty",
841 NULL,
842 };
843
811ef482 844 netdev->ifindex = 0;
14a7b0f9
CB
845 if (!netdev->upscript)
846 return 0;
847
848 ret = run_script_argv(handler->name, handler->conf->hooks_version,
849 "net", netdev->upscript, "up", argv);
850 if (ret < 0)
851 return -1;
852
811ef482
CB
853 return 0;
854}
855
856static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
857{
858 netdev->ifindex = 0;
859 return 0;
860}
861
862static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
863 [LXC_NET_VETH] = instantiate_veth,
864 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 865 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
866 [LXC_NET_VLAN] = instantiate_vlan,
867 [LXC_NET_PHYS] = instantiate_phys,
868 [LXC_NET_EMPTY] = instantiate_empty,
869 [LXC_NET_NONE] = instantiate_none,
870};
871
8bf64b77
CB
872static int instantiate_ns_veth(struct lxc_netdev *netdev)
873{
874 char current_ifname[IFNAMSIZ];
875
876 netdev->ifindex = if_nametoindex(netdev->created_name);
877 if (!netdev->ifindex)
878 return log_error_errno(-1,
879 errno, "Failed to retrieve ifindex for network device with name %s",
880 netdev->created_name);
881
3473ca76 882 if (is_empty_string(netdev->name))
8bf64b77
CB
883 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
884
885 if (strcmp(netdev->created_name, netdev->name) != 0) {
886 int ret;
887
888 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
889 if (ret)
9c66dc4f 890 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
8bf64b77
CB
891 netdev->created_name,
892 netdev->name);
893
894 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
895 }
896
897 /*
898 * Re-read the name of the interface because its name has changed and
899 * would be automatically allocated by the system
900 */
901 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 902 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
903
904 /*
905 * Now update the recorded name of the network device to reflect the
906 * name of the network device in the child's network namespace. We will
907 * later on send this information back to the parent.
908 */
909 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
910
911 return 0;
912}
913
914static int __instantiate_common(struct lxc_netdev *netdev)
915{
916 netdev->ifindex = if_nametoindex(netdev->name);
917 if (!netdev->ifindex)
9c66dc4f 918 return log_error_errno(-1, errno, "Failed to retrieve ifindex for network device with name %s", netdev->name);
8bf64b77
CB
919
920 return 0;
921}
922
923static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
924{
925 return __instantiate_common(netdev);
926}
927
928static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
929{
930 return __instantiate_common(netdev);
931}
932
933static int instantiate_ns_vlan(struct lxc_netdev *netdev)
934{
935 return __instantiate_common(netdev);
936}
937
938static int instantiate_ns_phys(struct lxc_netdev *netdev)
939{
940 return __instantiate_common(netdev);
941}
942
943static int instantiate_ns_empty(struct lxc_netdev *netdev)
944{
945 return 0;
946}
947
948static int instantiate_ns_none(struct lxc_netdev *netdev)
949{
950 return 0;
951}
952
953static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
954 [LXC_NET_VETH] = instantiate_ns_veth,
955 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
956 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
957 [LXC_NET_VLAN] = instantiate_ns_vlan,
958 [LXC_NET_PHYS] = instantiate_ns_phys,
959 [LXC_NET_EMPTY] = instantiate_ns_empty,
960 [LXC_NET_NONE] = instantiate_ns_none,
961};
962
811ef482
CB
963static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
964{
14a7b0f9
CB
965 int ret;
966 char *argv[] = {
967 "veth",
968 netdev->link,
969 NULL,
970 NULL,
971 };
972
973 if (!netdev->downscript)
974 return 0;
811ef482 975
f2711167 976 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 977 argv[2] = netdev->priv.veth_attr.pair;
811ef482 978 else
14a7b0f9
CB
979 argv[2] = netdev->priv.veth_attr.veth1;
980
981 ret = run_script_argv(handler->name,
982 handler->conf->hooks_version, "net",
983 netdev->downscript, "down", argv);
984 if (ret < 0)
985 return -1;
811ef482 986
811ef482
CB
987 return 0;
988}
989
990static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
991{
14a7b0f9
CB
992 int ret;
993 char *argv[] = {
994 "macvlan",
995 netdev->link,
996 NULL,
997 };
998
999 if (!netdev->downscript)
1000 return 0;
1001
1002 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1003 "net", netdev->downscript, "down", argv);
1004 if (ret < 0)
1005 return -1;
811ef482 1006
811ef482
CB
1007 return 0;
1008}
1009
c9f52382 1010static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1011{
1012 int ret;
1013 char *argv[] = {
1014 "ipvlan",
1015 netdev->link,
1016 NULL,
1017 };
1018
1019 if (!netdev->downscript)
1020 return 0;
1021
1022 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1023 "net", netdev->downscript, "down", argv);
1024 if (ret < 0)
1025 return -1;
1026
1027 return 0;
1028}
1029
811ef482
CB
1030static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1031{
3a73d9f1 1032 int ret;
1033 char *argv[] = {
1034 "vlan",
1035 netdev->link,
1036 NULL,
1037 };
1038
1039 if (!netdev->downscript)
1040 return 0;
1041
1042 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1043 "net", netdev->downscript, "down", argv);
1044 if (ret < 0)
1045 return -1;
1046
811ef482
CB
1047 return 0;
1048}
1049
1050static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1051{
14a7b0f9
CB
1052 int ret;
1053 char *argv[] = {
1054 "phys",
1055 netdev->link,
1056 NULL,
1057 };
1058
1059 if (!netdev->downscript)
1060 return 0;
1061
1062 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1063 "net", netdev->downscript, "down", argv);
1064 if (ret < 0)
1065 return -1;
811ef482 1066
811ef482
CB
1067 return 0;
1068}
1069
1070static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1071{
14a7b0f9
CB
1072 int ret;
1073 char *argv[] = {
1074 "empty",
1075 NULL,
1076 };
1077
1078 if (!netdev->downscript)
1079 return 0;
1080
1081 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1082 "net", netdev->downscript, "down", argv);
1083 if (ret < 0)
1084 return -1;
811ef482 1085
811ef482
CB
1086 return 0;
1087}
1088
1089static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1090{
1091 return 0;
1092}
1093
1094static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1095 [LXC_NET_VETH] = shutdown_veth,
1096 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1097 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1098 [LXC_NET_VLAN] = shutdown_vlan,
1099 [LXC_NET_PHYS] = shutdown_phys,
1100 [LXC_NET_EMPTY] = shutdown_empty,
1101 [LXC_NET_NONE] = shutdown_none,
1102};
1103
0037ab49
TP
1104static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1105{
d16bda44 1106 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1107 struct nl_handler nlh;
d16bda44
CB
1108 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1109 int err;
0037ab49 1110 struct ifinfomsg *ifi;
0037ab49 1111
d16bda44 1112 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1113 if (err)
1114 return err;
1115
0037ab49
TP
1116 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1117 if (!nlmsg)
d16bda44 1118 return ret_errno(ENOMEM);
0037ab49
TP
1119
1120 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1121 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1122
1123 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1124 if (!ifi)
d16bda44
CB
1125 return ret_errno(ENOMEM);
1126
0037ab49
TP
1127 ifi->ifi_family = AF_UNSPEC;
1128 ifi->ifi_index = ifindex;
1129
1130 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1131 return ret_errno(ENOMEM);
0037ab49 1132
3473ca76 1133 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1134 return ret_errno(ENOMEM);
0037ab49 1135
d16bda44 1136 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1137}
1138
ebc73a67 1139int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1140{
d16bda44 1141 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1142 struct nl_handler nlh;
d16bda44
CB
1143 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1144 int err;
06f976ca 1145 struct ifinfomsg *ifi;
0ad19a3f 1146
d16bda44 1147 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1148 if (err)
1149 return err;
0ad19a3f 1150
0ad19a3f 1151 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1152 if (!nlmsg)
d16bda44 1153 return ret_errno(ENOMEM);
0ad19a3f 1154
ebc73a67 1155 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1156 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1157
1158 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1159 if (!ifi)
d16bda44
CB
1160 return ret_errno(ENOMEM);
1161
06f976ca
SZ
1162 ifi->ifi_family = AF_UNSPEC;
1163 ifi->ifi_index = ifindex;
0ad19a3f 1164
1165 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1166 return ret_errno(ENOMEM);
0ad19a3f 1167
3473ca76 1168 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1169 return ret_errno(ENOMEM);
8d357196 1170
d16bda44 1171 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1172}
1173
ebc73a67
CB
1174/* If we are asked to move a wireless interface, then we must actually move its
1175 * phyN device. Detect that condition and return the physname here. The physname
1176 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1177 */
1178#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1179char *is_wlan(const char *ifname)
e5848d39 1180{
4110345b
CB
1181 __do_fclose FILE *f = NULL;
1182 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1183 int i, ret;
e5848d39 1184 long physlen;
ebc73a67 1185 size_t len;
e5848d39 1186
ebc73a67 1187 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1188 path = must_realloc(NULL, len + 1);
e5848d39 1189 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1190 if (ret < 0 || (size_t)ret >= len)
4110345b 1191 return NULL;
ebc73a67 1192
4110345b 1193 f = fopen(path, "re");
ebc73a67 1194 if (!f)
4110345b 1195 return NULL;
ebc73a67 1196
1a0e70ac 1197 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1198 fseek(f, 0, SEEK_END);
1199 physlen = ftell(f);
1200 fseek(f, 0, SEEK_SET);
4110345b
CB
1201 if (physlen < 0)
1202 return NULL;
ebc73a67
CB
1203
1204 physname = malloc(physlen + 1);
4110345b
CB
1205 if (!physname)
1206 return NULL;
ebc73a67
CB
1207
1208 memset(physname, 0, physlen + 1);
e5848d39 1209 ret = fread(physname, 1, physlen, f);
e5848d39 1210 if (ret < 0)
4110345b 1211 return NULL;
e5848d39 1212
ebc73a67 1213 for (i = 0; i < physlen; i++) {
e5848d39
SH
1214 if (physname[i] == '\n')
1215 physname[i] = '\0';
ebc73a67 1216
e5848d39
SH
1217 if (physname[i] == '\0')
1218 break;
1219 }
1220
4110345b 1221 return move_ptr(physname);
e5848d39
SH
1222}
1223
ebc73a67
CB
1224static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1225 const char *new)
e5848d39 1226{
ebc73a67 1227 pid_t fpid;
e5848d39 1228
ebc73a67 1229 fpid = fork();
e5848d39
SH
1230 if (fpid < 0)
1231 return -1;
ebc73a67 1232
e5848d39
SH
1233 if (fpid != 0)
1234 return wait_for_pid(fpid);
ebc73a67 1235
e5848d39
SH
1236 if (!switch_to_ns(pid, "net"))
1237 return -1;
ebc73a67 1238
05ec44f8 1239 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1240}
1241
e4103cf6 1242int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1243 const char *newname)
e5848d39 1244{
3dd78294 1245 __do_free char *cmd = NULL;
ebc73a67 1246 pid_t fpid;
e5848d39
SH
1247
1248 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1249 * However, IIUC this involves a bit more complicated work to talk to
1250 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1251 */
1252 cmd = on_path("iw", NULL);
9c66dc4f 1253 if (!cmd)
3dd78294 1254 return -1;
e5848d39
SH
1255
1256 fpid = fork();
1257 if (fpid < 0)
3dd78294 1258 return -1;
ebc73a67 1259
e5848d39
SH
1260 if (fpid == 0) {
1261 char pidstr[30];
1262 sprintf(pidstr, "%d", pid);
9c66dc4f 1263 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1264 _exit(EXIT_FAILURE);
e5848d39 1265 }
ebc73a67 1266
e5848d39 1267 if (wait_for_pid(fpid))
3dd78294 1268 return -1;
e5848d39 1269
e5848d39 1270 if (newname)
3dd78294 1271 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1272
3dd78294 1273 return 0;
e5848d39
SH
1274}
1275
8d357196 1276int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1277{
3dd78294 1278 __do_free char *physname = NULL;
8befa924
SH
1279 int index;
1280
8befa924
SH
1281 if (!ifname)
1282 return -EINVAL;
1283
32571606 1284 index = if_nametoindex(ifname);
49428bf3
DY
1285 if (!index)
1286 return -EINVAL;
32571606 1287
ebc73a67
CB
1288 physname = is_wlan(ifname);
1289 if (physname)
e5848d39
SH
1290 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1291
8d357196 1292 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1293}
1294
b84f58b9 1295int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1296{
d16bda44
CB
1297 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1298 struct nl_handler nlh;
1299 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1300 int err;
ebc73a67 1301 struct ifinfomsg *ifi;
0ad19a3f 1302
d16bda44 1303 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1304 if (err)
1305 return err;
0ad19a3f 1306
0ad19a3f 1307 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1308 if (!nlmsg)
d16bda44 1309 return ret_errno(ENOMEM);
0ad19a3f 1310
06f976ca 1311 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1312 if (!answer)
d16bda44 1313 return ret_errno(ENOMEM);
0ad19a3f 1314
ebc73a67 1315 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1316 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1317
1318 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1319 if (!ifi)
d16bda44
CB
1320 return ret_errno(ENOMEM);
1321
06f976ca
SZ
1322 ifi->ifi_family = AF_UNSPEC;
1323 ifi->ifi_index = ifindex;
0ad19a3f 1324
d16bda44 1325 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1326}
1327
b84f58b9
DL
1328int lxc_netdev_delete_by_name(const char *name)
1329{
1330 int index;
1331
1332 index = if_nametoindex(name);
1333 if (!index)
1334 return -EINVAL;
1335
1336 return lxc_netdev_delete_by_index(index);
1337}
1338
1339int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1340{
d16bda44
CB
1341 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1342 struct nl_handler nlh;
1343 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1344 int err, len;
06f976ca 1345 struct ifinfomsg *ifi;
b9a5bb58 1346
d16bda44 1347 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1348 if (err)
1349 return err;
b9a5bb58 1350
b84f58b9 1351 len = strlen(newname);
d16bda44
CB
1352 if (len == 1 || len >= IFNAMSIZ)
1353 return ret_errno(EINVAL);
b84f58b9 1354
b9a5bb58
DL
1355 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1356 if (!nlmsg)
d16bda44 1357 return ret_errno(ENOMEM);
b9a5bb58 1358
06f976ca 1359 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1360 if (!answer)
d16bda44 1361 return ret_errno(ENOMEM);
b9a5bb58 1362
ebc73a67 1363 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1364 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1365
1366 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1367 if (!ifi)
d16bda44
CB
1368 return ret_errno(ENOMEM);
1369
06f976ca
SZ
1370 ifi->ifi_family = AF_UNSPEC;
1371 ifi->ifi_index = ifindex;
b84f58b9
DL
1372
1373 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1374 return ret_errno(ENOMEM);
b9a5bb58 1375
d16bda44 1376 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1377}
1378
b84f58b9
DL
1379int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1380{
1381 int len, index;
1382
1383 len = strlen(oldname);
dae3fdf6 1384 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1385 return -EINVAL;
1386
1387 index = if_nametoindex(oldname);
1388 if (!index)
1389 return -EINVAL;
1390
1391 return lxc_netdev_rename_by_index(index, newname);
1392}
1393
8befa924 1394int netdev_set_flag(const char *name, int flag)
0ad19a3f 1395{
d16bda44
CB
1396 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1397 struct nl_handler nlh;
1398 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1399 int err, index, len;
06f976ca 1400 struct ifinfomsg *ifi;
0ad19a3f 1401
d16bda44 1402 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1403 if (err)
1404 return err;
0ad19a3f 1405
1406 len = strlen(name);
dae3fdf6 1407 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1408 return ret_errno(EINVAL);
0ad19a3f 1409
1410 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1411 if (!nlmsg)
d16bda44 1412 return ret_errno(ENOMEM);
0ad19a3f 1413
06f976ca 1414 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1415 if (!answer)
d16bda44 1416 return ret_errno(ENOMEM);
0ad19a3f 1417
1418 index = if_nametoindex(name);
1419 if (!index)
d16bda44 1420 return ret_errno(EINVAL);
0ad19a3f 1421
ebc73a67 1422 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1423 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1424
1425 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1426 if (!ifi)
1427 return ret_errno(ENOMEM);
1428
06f976ca
SZ
1429 ifi->ifi_family = AF_UNSPEC;
1430 ifi->ifi_index = index;
1431 ifi->ifi_change |= IFF_UP;
1432 ifi->ifi_flags |= flag;
0ad19a3f 1433
d16bda44 1434 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1435}
1436
ebc73a67 1437int netdev_get_flag(const char *name, int *flag)
efa1cf45 1438{
d16bda44
CB
1439 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1440 struct nl_handler nlh;
1441 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1442 int err, index, len;
a4318300 1443 struct ifinfomsg *ifi;
efa1cf45
DY
1444
1445 if (!name)
d16bda44 1446 return ret_errno(EINVAL);
efa1cf45 1447
d16bda44 1448 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1449 if (err)
1450 return err;
1451
efa1cf45
DY
1452 len = strlen(name);
1453 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1454 return ret_errno(EINVAL);
efa1cf45 1455
efa1cf45
DY
1456 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1457 if (!nlmsg)
d16bda44 1458 return ret_errno(ENOMEM);
efa1cf45 1459
06f976ca 1460 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1461 if (!answer)
d16bda44 1462 return ret_errno(ENOMEM);
efa1cf45 1463
efa1cf45
DY
1464 index = if_nametoindex(name);
1465 if (!index)
d16bda44 1466 return ret_errno(EINVAL);
efa1cf45 1467
06f976ca
SZ
1468 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1469 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1470
1471 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1472 if (!ifi)
1473 return ret_errno(ENOMEM);
1474
06f976ca
SZ
1475 ifi->ifi_family = AF_UNSPEC;
1476 ifi->ifi_index = index;
efa1cf45 1477
d16bda44 1478 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1479 if (err)
d16bda44 1480 return ret_set_errno(-1, errno);
efa1cf45 1481
06f976ca 1482 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1483
1484 *flag = ifi->ifi_flags;
efa1cf45
DY
1485 return err;
1486}
1487
1488/*
1489 * \brief Check a interface is up or not.
1490 *
1491 * \param name: name for the interface.
1492 *
1493 * \return int.
1494 * 0 means interface is down.
1495 * 1 means interface is up.
1496 * Others means error happened, and ret-value is the error number.
1497 */
ebc73a67 1498int lxc_netdev_isup(const char *name)
efa1cf45 1499{
ebc73a67 1500 int err, flag;
efa1cf45
DY
1501
1502 err = netdev_get_flag(name, &flag);
1503 if (err)
ebc73a67
CB
1504 return err;
1505
efa1cf45
DY
1506 if (flag & IFF_UP)
1507 return 1;
ebc73a67 1508
efa1cf45 1509 return 0;
efa1cf45
DY
1510}
1511
0130df54
SH
1512int netdev_get_mtu(int ifindex)
1513{
ebc73a67 1514 int answer_len, err, res;
0130df54 1515 struct nl_handler nlh;
06f976ca 1516 struct ifinfomsg *ifi;
0130df54 1517 struct nlmsghdr *msg;
ebc73a67
CB
1518 int readmore = 0, recv_len = 0;
1519 struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54
SH
1520
1521 err = netlink_open(&nlh, NETLINK_ROUTE);
1522 if (err)
1523 return err;
1524
1525 err = -ENOMEM;
1526 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1527 if (!nlmsg)
1528 goto out;
1529
06f976ca 1530 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54
SH
1531 if (!answer)
1532 goto out;
1533
1534 /* Save the answer buffer length, since it will be overwritten
1535 * on the first receive (and we might need to receive more than
ebc73a67
CB
1536 * once.
1537 */
06f976ca
SZ
1538 answer_len = answer->nlmsghdr->nlmsg_len;
1539
ebc73a67 1540 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1541 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1542
06f976ca 1543 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1544 if (!ifi)
1545 goto out;
06f976ca 1546 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1547
1548 /* Send the request for addresses, which returns all addresses
1549 * on all interfaces. */
1550 err = netlink_send(&nlh, nlmsg);
1551 if (err < 0)
1552 goto out;
1553
6ce39620
CB
1554#pragma GCC diagnostic push
1555#pragma GCC diagnostic ignored "-Wcast-align"
1556
0130df54
SH
1557 do {
1558 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1559 * overwritten by a previous receive.
1560 */
06f976ca 1561 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1562
1563 /* Get the (next) batch of reply messages */
1564 err = netlink_rcv(&nlh, answer);
1565 if (err < 0)
1566 goto out;
1567
1568 recv_len = err;
0130df54
SH
1569
1570 /* Satisfy the typing for the netlink macros */
06f976ca 1571 msg = answer->nlmsghdr;
0130df54
SH
1572
1573 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1574 /* Stop reading if we see an error message */
1575 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
1576 struct nlmsgerr *errmsg =
1577 (struct nlmsgerr *)NLMSG_DATA(msg);
0130df54
SH
1578 err = errmsg->error;
1579 goto out;
1580 }
1581
1582 /* Stop reading if we see a NLMSG_DONE message */
1583 if (msg->nlmsg_type == NLMSG_DONE) {
1584 readmore = 0;
1585 break;
1586 }
1587
06f976ca 1588 ifi = NLMSG_DATA(msg);
0130df54
SH
1589 if (ifi->ifi_index == ifindex) {
1590 struct rtattr *rta = IFLA_RTA(ifi);
ebc73a67
CB
1591 int attr_len =
1592 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
0130df54 1593 res = 0;
ebc73a67 1594 while (RTA_OK(rta, attr_len)) {
9c66dc4f
CB
1595 /*
1596 Found a local address for the
ebc73a67
CB
1597 * requested interface, return it.
1598 */
0130df54 1599 if (rta->rta_type == IFLA_MTU) {
ebc73a67
CB
1600 memcpy(&res, RTA_DATA(rta),
1601 sizeof(int));
0130df54
SH
1602 err = res;
1603 goto out;
1604 }
1605 rta = RTA_NEXT(rta, attr_len);
1606 }
0130df54
SH
1607 }
1608
ebc73a67
CB
1609 /* Keep reading more data from the socket if the last
1610 * message had the NLF_F_MULTI flag set.
1611 */
0130df54
SH
1612 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1613
ebc73a67 1614 /* Look at the next message received in this buffer. */
0130df54
SH
1615 msg = NLMSG_NEXT(msg, recv_len);
1616 }
1617 } while (readmore);
1618
6ce39620
CB
1619#pragma GCC diagnostic pop
1620
ebc73a67 1621 /* If we end up here, we didn't find any result, so signal an error. */
0130df54
SH
1622 err = -1;
1623
1624out:
1625 netlink_close(&nlh);
1626 nlmsg_free(answer);
1627 nlmsg_free(nlmsg);
1628 return err;
1629}
1630
d472214b 1631int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1632{
54256301 1633 int err, len;
06f976ca 1634 struct ifinfomsg *ifi;
ebc73a67
CB
1635 struct nl_handler nlh;
1636 struct nlmsg *answer = NULL, *nlmsg = NULL;
75d09f83 1637
3cfc0f3a
MN
1638 err = netlink_open(&nlh, NETLINK_ROUTE);
1639 if (err)
1640 return err;
75d09f83 1641
3cfc0f3a 1642 err = -EINVAL;
75d09f83 1643 len = strlen(name);
dae3fdf6 1644 if (len == 1 || len >= IFNAMSIZ)
75d09f83
DL
1645 goto out;
1646
3cfc0f3a 1647 err = -ENOMEM;
75d09f83
DL
1648 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1649 if (!nlmsg)
1650 goto out;
1651
06f976ca 1652 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83
DL
1653 if (!answer)
1654 goto out;
1655
ebc73a67 1656 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1657 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1658
1659 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1660 if (!ifi) {
1661 err = -ENOMEM;
1662 goto out;
1663 }
06f976ca 1664 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1665
1666 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1667 goto out;
75d09f83
DL
1668
1669 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1670 goto out;
1671
1672 err = netlink_transaction(&nlh, nlmsg, answer);
75d09f83
DL
1673out:
1674 netlink_close(&nlh);
1675 nlmsg_free(nlmsg);
1676 nlmsg_free(answer);
1677 return err;
1678}
1679
d472214b 1680int lxc_netdev_up(const char *name)
0ad19a3f 1681{
d472214b 1682 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1683}
1684
d472214b 1685int lxc_netdev_down(const char *name)
0ad19a3f 1686{
d472214b 1687 return netdev_set_flag(name, 0);
0ad19a3f 1688}
1689
54256301 1690int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1691{
ebc73a67 1692 int err, len;
06f976ca 1693 struct ifinfomsg *ifi;
ebc73a67 1694 struct nl_handler nlh;
0ad19a3f 1695 struct rtattr *nest1, *nest2, *nest3;
ebc73a67 1696 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1697
3cfc0f3a
MN
1698 err = netlink_open(&nlh, NETLINK_ROUTE);
1699 if (err)
1700 return err;
0ad19a3f 1701
3cfc0f3a 1702 err = -EINVAL;
0ad19a3f 1703 len = strlen(name1);
dae3fdf6 1704 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1705 goto out;
1706
1707 len = strlen(name2);
dae3fdf6 1708 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1709 goto out;
1710
3cfc0f3a 1711 err = -ENOMEM;
0ad19a3f 1712 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1713 if (!nlmsg)
1714 goto out;
1715
06f976ca 1716 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1717 if (!answer)
1718 goto out;
1719
06f976ca 1720 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1721 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1722 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1723
1724 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1725 if (!ifi)
1726 goto out;
06f976ca 1727 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1728
3cfc0f3a 1729 err = -EINVAL;
79e68309 1730 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1731 if (!nest1)
1732 goto out;
1733
1734 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1735 goto out;
1736
1737 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1738 if (!nest2)
1739 goto out;
1740
1741 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1742 if (!nest3)
1743 goto out;
1744
06f976ca 1745 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1746 if (!ifi) {
1747 err = -ENOMEM;
06f976ca 1748 goto out;
25a9939b 1749 }
0ad19a3f 1750
1751 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1752 goto out;
1753
54256301
CB
1754 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
1755 goto out;
1756
1757 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
1758 goto out;
1759
0ad19a3f 1760 nla_end_nested(nlmsg, nest3);
0ad19a3f 1761 nla_end_nested(nlmsg, nest2);
0ad19a3f 1762 nla_end_nested(nlmsg, nest1);
1763
1764 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1765 goto out;
1766
3cfc0f3a 1767 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1768out:
1769 netlink_close(&nlh);
1770 nlmsg_free(answer);
1771 nlmsg_free(nlmsg);
1772 return err;
1773}
1774
ebc73a67 1775/* TODO: merge with lxc_macvlan_create */
7c11d57a 1776int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1777{
ebc73a67 1778 int err, len, lindex;
06f976ca 1779 struct ifinfomsg *ifi;
ebc73a67 1780 struct nl_handler nlh;
26c39028 1781 struct rtattr *nest, *nest2;
ebc73a67 1782 struct nlmsg *answer = NULL, *nlmsg = NULL;
26c39028 1783
3cfc0f3a
MN
1784 err = netlink_open(&nlh, NETLINK_ROUTE);
1785 if (err)
1786 return err;
26c39028 1787
3cfc0f3a 1788 err = -EINVAL;
26c39028 1789 len = strlen(master);
dae3fdf6 1790 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1791 goto err3;
1792
1793 len = strlen(name);
dae3fdf6 1794 if (len == 1 || len >= IFNAMSIZ)
26c39028
JHS
1795 goto err3;
1796
3cfc0f3a 1797 err = -ENOMEM;
26c39028
JHS
1798 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1799 if (!nlmsg)
1800 goto err3;
1801
06f976ca 1802 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028
JHS
1803 if (!answer)
1804 goto err2;
1805
3cfc0f3a 1806 err = -EINVAL;
26c39028
JHS
1807 lindex = if_nametoindex(master);
1808 if (!lindex)
1809 goto err1;
1810
06f976ca 1811 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1812 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1813 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1814
1815 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1816 if (!ifi) {
1817 err = -ENOMEM;
1818 goto err1;
1819 }
06f976ca 1820 ifi->ifi_family = AF_UNSPEC;
26c39028 1821
79e68309 1822 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028
JHS
1823 if (!nest)
1824 goto err1;
1825
1826 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1827 goto err1;
1828
1829 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1830 if (!nest2)
1831 goto err1;
e892973e 1832
26c39028
JHS
1833 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1834 goto err1;
e892973e 1835
26c39028 1836 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1837 nla_end_nested(nlmsg, nest);
1838
1839 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1840 goto err1;
1841
1842 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1843 goto err1;
1844
3cfc0f3a 1845 err = netlink_transaction(&nlh, nlmsg, answer);
26c39028
JHS
1846err1:
1847 nlmsg_free(answer);
1848err2:
1849 nlmsg_free(nlmsg);
1850err3:
1851 netlink_close(&nlh);
1852 return err;
1853}
1854
e892973e 1855int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1856{
ebc73a67 1857 int err, index, len;
06f976ca 1858 struct ifinfomsg *ifi;
ebc73a67 1859 struct nl_handler nlh;
e892973e 1860 struct rtattr *nest, *nest2;
ebc73a67 1861 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 1862
3cfc0f3a
MN
1863 err = netlink_open(&nlh, NETLINK_ROUTE);
1864 if (err)
1865 return err;
0ad19a3f 1866
3cfc0f3a 1867 err = -EINVAL;
0ad19a3f 1868 len = strlen(master);
dae3fdf6 1869 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1870 goto out;
1871
1872 len = strlen(name);
dae3fdf6 1873 if (len == 1 || len >= IFNAMSIZ)
0ad19a3f 1874 goto out;
1875
3cfc0f3a 1876 err = -ENOMEM;
0ad19a3f 1877 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1878 if (!nlmsg)
1879 goto out;
1880
06f976ca 1881 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1882 if (!answer)
1883 goto out;
1884
3cfc0f3a 1885 err = -EINVAL;
0ad19a3f 1886 index = if_nametoindex(master);
1887 if (!index)
1888 goto out;
1889
06f976ca 1890 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 1891 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1892 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1893
1894 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b
WC
1895 if (!ifi) {
1896 err = -ENOMEM;
1897 goto out;
1898 }
06f976ca 1899 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1900
79e68309 1901 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1902 if (!nest)
1903 goto out;
1904
1905 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1906 goto out;
1907
e892973e
DL
1908 if (mode) {
1909 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1910 if (!nest2)
1911 goto out;
1912
1913 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1914 goto out;
1915
1916 nla_end_nested(nlmsg, nest2);
1917 }
1918
0ad19a3f 1919 nla_end_nested(nlmsg, nest);
1920
1921 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1922 goto out;
1923
1924 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1925 goto out;
1926
3cfc0f3a 1927 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 1928out:
1929 netlink_close(&nlh);
1930 nlmsg_free(answer);
1931 nlmsg_free(nlmsg);
1932 return err;
1933}
1934
1935static int proc_sys_net_write(const char *path, const char *value)
1936{
ebc73a67
CB
1937 int fd;
1938 int err = 0;
0ad19a3f 1939
1940 fd = open(path, O_WRONLY);
1941 if (fd < 0)
1942 return -errno;
1943
f640cf46 1944 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1945 err = -errno;
1946
1947 close(fd);
1948 return err;
1949}
1950
6dfa9581 1951static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 1952{
1953 int ret;
1954 char path[PATH_MAX];
6509154d 1955
1956 if (family != AF_INET && family != AF_INET6)
6dfa9581 1957 return -EINVAL;
6509154d 1958
9c66dc4f 1959 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 1960 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
9c66dc4f 1961 if (ret < 0 || (size_t)ret >= sizeof(path))
6dfa9581 1962 return -E2BIG;
6509154d 1963
6dfa9581
TP
1964 return proc_sys_net_write(path, flag ? "1" : "0");
1965}
1966
1967int lxc_ip_forwarding_on(const char *name, int family)
1968{
1969 return ip_forwarding_set(name, family, 1);
1970}
1971
1972int lxc_ip_forwarding_off(const char *name, int family)
1973{
1974 return ip_forwarding_set(name, family, 0);
6509154d 1975}
1976
0ad19a3f 1977static int neigh_proxy_set(const char *ifname, int family, int flag)
1978{
9ba8130c 1979 int ret;
419590da 1980 char path[PATH_MAX];
0ad19a3f 1981
1982 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1983 return -EINVAL;
0ad19a3f 1984
9c66dc4f 1985 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1986 family == AF_INET ? "ipv4" : "ipv6", ifname,
1987 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 1988 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 1989 return -E2BIG;
0ad19a3f 1990
ebc73a67 1991 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1992}
1993
6509154d 1994static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1995{
1996 int ret;
1997 char path[PATH_MAX];
1998 char buf[1] = "";
1999
2000 if (family != AF_INET && family != AF_INET6)
596a002c 2001 return ret_set_errno(-1, EINVAL);
6509154d 2002
9c66dc4f 2003 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6509154d 2004 family == AF_INET ? "ipv4" : "ipv6", ifname,
2005 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2006 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 2007 return ret_set_errno(-1, E2BIG);
6509154d 2008
2009 return lxc_read_file_expect(path, buf, 1, "1");
2010}
2011
497353b6 2012int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2013{
2014 return neigh_proxy_set(name, family, 1);
2015}
2016
497353b6 2017int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2018{
2019 return neigh_proxy_set(name, family, 0);
2020}
2021
2022int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2023{
1f1b18e7
DL
2024 int i = 0;
2025 unsigned val;
ebc73a67
CB
2026 char c;
2027 unsigned char *data;
1f1b18e7
DL
2028
2029 sockaddr->sa_family = ARPHRD_ETHER;
2030 data = (unsigned char *)sockaddr->sa_data;
2031
2032 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2033 c = *macaddr++;
2034 if (isdigit(c))
2035 val = c - '0';
2036 else if (c >= 'a' && c <= 'f')
2037 val = c - 'a' + 10;
2038 else if (c >= 'A' && c <= 'F')
2039 val = c - 'A' + 10;
2040 else
2041 return -EINVAL;
2042
2043 val <<= 4;
2044 c = *macaddr;
2045 if (isdigit(c))
2046 val |= c - '0';
2047 else if (c >= 'a' && c <= 'f')
2048 val |= c - 'a' + 10;
2049 else if (c >= 'A' && c <= 'F')
2050 val |= c - 'A' + 10;
2051 else if (c == ':' || c == 0)
2052 val >>= 4;
2053 else
2054 return -EINVAL;
2055 if (c != 0)
2056 macaddr++;
2057 *data++ = (unsigned char)(val & 0377);
2058 i++;
2059
2060 if (*macaddr == ':')
2061 macaddr++;
0ad19a3f 2062 }
0ad19a3f 2063
1f1b18e7 2064 return 0;
0ad19a3f 2065}
2066
ebc73a67
CB
2067static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2068 void *acast, int prefix)
0ad19a3f 2069{
ebc73a67 2070 int addrlen, err;
06f976ca 2071 struct ifaddrmsg *ifa;
ebc73a67
CB
2072 struct nl_handler nlh;
2073 struct nlmsg *answer = NULL, *nlmsg = NULL;
0ad19a3f 2074
ebc73a67
CB
2075 addrlen = family == AF_INET ? sizeof(struct in_addr)
2076 : sizeof(struct in6_addr);
4bf1968d 2077
3cfc0f3a
MN
2078 err = netlink_open(&nlh, NETLINK_ROUTE);
2079 if (err)
2080 return err;
0ad19a3f 2081
3cfc0f3a 2082 err = -ENOMEM;
0ad19a3f 2083 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2084 if (!nlmsg)
2085 goto out;
2086
06f976ca 2087 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2088 if (!answer)
2089 goto out;
2090
06f976ca 2091 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2092 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2093 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2094
2095 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2096 if (!ifa)
25a9939b 2097 goto out;
06f976ca
SZ
2098 ifa->ifa_prefixlen = prefix;
2099 ifa->ifa_index = ifindex;
2100 ifa->ifa_family = family;
2101 ifa->ifa_scope = 0;
acf47e1b 2102
3cfc0f3a 2103 err = -EINVAL;
4bf1968d 2104 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
0ad19a3f 2105 goto out;
2106
4bf1968d 2107 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
0ad19a3f 2108 goto out;
2109
d8948a52 2110 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1f1b18e7
DL
2111 goto out;
2112
ebc73a67 2113 /* TODO: multicast, anycast with ipv6 */
7ddc8f24 2114 err = -EPROTONOSUPPORT;
79881dc6
DL
2115 if (family == AF_INET6 &&
2116 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2117 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1f1b18e7 2118 goto out;
0ad19a3f 2119
3cfc0f3a 2120 err = netlink_transaction(&nlh, nlmsg, answer);
0ad19a3f 2121out:
2122 netlink_close(&nlh);
2123 nlmsg_free(answer);
2124 nlmsg_free(nlmsg);
2125 return err;
2126}
2127
1f1b18e7 2128int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2129 struct in6_addr *mcast, struct in6_addr *acast,
2130 int prefix)
1f1b18e7
DL
2131{
2132 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2133}
2134
ebc73a67
CB
2135int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2136 int prefix)
1f1b18e7
DL
2137{
2138 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2139}
2140
ebc73a67
CB
2141/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2142 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2143 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2144 */
6ce39620
CB
2145#pragma GCC diagnostic push
2146#pragma GCC diagnostic ignored "-Wcast-align"
2147
ebc73a67
CB
2148static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2149{
2150 int addrlen;
06f976ca
SZ
2151 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2152 struct rtattr *rta = IFA_RTA(ifa);
2153 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2154
06f976ca 2155 if (ifa->ifa_family != family)
19a26f82
MK
2156 return 0;
2157
ebc73a67
CB
2158 addrlen = family == AF_INET ? sizeof(struct in_addr)
2159 : sizeof(struct in6_addr);
19a26f82
MK
2160
2161 /* Loop over the rtattr's in this message */
ebc73a67 2162 while (RTA_OK(rta, attr_len)) {
19a26f82 2163 /* Found a local address for the requested interface,
ebc73a67
CB
2164 * return it.
2165 */
2166 if (rta->rta_type == IFA_LOCAL ||
2167 rta->rta_type == IFA_ADDRESS) {
2168 /* Sanity check. The family check above should make sure
2169 * the address length is correct, but check here just in
2170 * case.
2171 */
19a26f82
MK
2172 if (RTA_PAYLOAD(rta) != addrlen)
2173 return -1;
2174
ebc73a67
CB
2175 /* We might have found an IFA_ADDRESS before, which we
2176 * now overwrite with an IFA_LOCAL.
2177 */
dd66e5ad 2178 if (!*res) {
19a26f82 2179 *res = malloc(addrlen);
dd66e5ad
DE
2180 if (!*res)
2181 return -1;
2182 }
19a26f82
MK
2183
2184 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2185 if (rta->rta_type == IFA_LOCAL)
2186 break;
2187 }
2188 rta = RTA_NEXT(rta, attr_len);
2189 }
2190 return 0;
2191}
2192
6ce39620
CB
2193#pragma GCC diagnostic pop
2194
19a26f82
MK
2195static int ip_addr_get(int family, int ifindex, void **res)
2196{
ebc73a67 2197 int answer_len, err;
06f976ca 2198 struct ifaddrmsg *ifa;
ebc73a67 2199 struct nl_handler nlh;
19a26f82 2200 struct nlmsghdr *msg;
ebc73a67
CB
2201 int readmore = 0, recv_len = 0;
2202 struct nlmsg *answer = NULL, *nlmsg = NULL;
19a26f82
MK
2203
2204 err = netlink_open(&nlh, NETLINK_ROUTE);
2205 if (err)
2206 return err;
2207
2208 err = -ENOMEM;
2209 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2210 if (!nlmsg)
2211 goto out;
2212
06f976ca 2213 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82
MK
2214 if (!answer)
2215 goto out;
2216
ebc73a67
CB
2217 /* Save the answer buffer length, since it will be overwritten on the
2218 * first receive (and we might need to receive more than once).
2219 */
06f976ca
SZ
2220 answer_len = answer->nlmsghdr->nlmsg_len;
2221
ebc73a67 2222 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2223 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2224
06f976ca 2225 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b
WC
2226 if (!ifa)
2227 goto out;
06f976ca 2228 ifa->ifa_family = family;
19a26f82 2229
ebc73a67
CB
2230 /* Send the request for addresses, which returns all addresses on all
2231 * interfaces.
2232 */
19a26f82
MK
2233 err = netlink_send(&nlh, nlmsg);
2234 if (err < 0)
2235 goto out;
19a26f82 2236
6ce39620
CB
2237#pragma GCC diagnostic push
2238#pragma GCC diagnostic ignored "-Wcast-align"
2239
19a26f82
MK
2240 do {
2241 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2242 * overwritten by a previous receive.
2243 */
06f976ca 2244 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2245
ebc73a67 2246 /* Get the (next) batch of reply messages. */
19a26f82
MK
2247 err = netlink_rcv(&nlh, answer);
2248 if (err < 0)
2249 goto out;
2250
2251 recv_len = err;
2252 err = 0;
2253
ebc73a67 2254 /* Satisfy the typing for the netlink macros. */
06f976ca 2255 msg = answer->nlmsghdr;
19a26f82
MK
2256
2257 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2258 /* Stop reading if we see an error message. */
19a26f82 2259 if (msg->nlmsg_type == NLMSG_ERROR) {
ebc73a67
CB
2260 struct nlmsgerr *errmsg =
2261 (struct nlmsgerr *)NLMSG_DATA(msg);
19a26f82
MK
2262 err = errmsg->error;
2263 goto out;
2264 }
2265
ebc73a67 2266 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2267 if (msg->nlmsg_type == NLMSG_DONE) {
2268 readmore = 0;
2269 break;
2270 }
2271
2272 if (msg->nlmsg_type != RTM_NEWADDR) {
2273 err = -1;
2274 goto out;
2275 }
2276
06f976ca
SZ
2277 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2278 if (ifa->ifa_index == ifindex) {
2279 if (ifa_get_local_ip(family, msg, res) < 0) {
51e7a874
SG
2280 err = -1;
2281 goto out;
2282 }
2283
ebc73a67 2284 /* Found a result, stop searching. */
19a26f82
MK
2285 if (*res)
2286 goto out;
2287 }
2288
ebc73a67
CB
2289 /* Keep reading more data from the socket if the last
2290 * message had the NLF_F_MULTI flag set.
2291 */
19a26f82
MK
2292 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2293
ebc73a67 2294 /* Look at the next message received in this buffer. */
19a26f82
MK
2295 msg = NLMSG_NEXT(msg, recv_len);
2296 }
2297 } while (readmore);
2298
6ce39620
CB
2299#pragma GCC diagnostic pop
2300
19a26f82 2301 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2302 * error.
2303 */
19a26f82
MK
2304 err = -1;
2305
2306out:
2307 netlink_close(&nlh);
2308 nlmsg_free(answer);
2309 nlmsg_free(nlmsg);
2310 return err;
2311}
2312
2313int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2314{
ebc73a67 2315 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2316}
2317
ebc73a67 2318int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2319{
ebc73a67 2320 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2321}
2322
f8fee0e2
MK
2323static int ip_gateway_add(int family, int ifindex, void *gw)
2324{
ebc73a67 2325 int addrlen, err;
f8fee0e2 2326 struct nl_handler nlh;
06f976ca 2327 struct rtmsg *rt;
ebc73a67 2328 struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2329
ebc73a67
CB
2330 addrlen = family == AF_INET ? sizeof(struct in_addr)
2331 : sizeof(struct in6_addr);
f8fee0e2
MK
2332
2333 err = netlink_open(&nlh, NETLINK_ROUTE);
2334 if (err)
2335 return err;
2336
2337 err = -ENOMEM;
2338 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2339 if (!nlmsg)
2340 goto out;
2341
06f976ca 2342 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2
MK
2343 if (!answer)
2344 goto out;
2345
06f976ca 2346 nlmsg->nlmsghdr->nlmsg_flags =
ebc73a67 2347 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2348 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2349
2350 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b
WC
2351 if (!rt)
2352 goto out;
06f976ca
SZ
2353 rt->rtm_family = family;
2354 rt->rtm_table = RT_TABLE_MAIN;
2355 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2356 rt->rtm_protocol = RTPROT_BOOT;
2357 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2358 /* "default" destination */
06f976ca 2359 rt->rtm_dst_len = 0;
f8fee0e2
MK
2360
2361 err = -EINVAL;
a2f9a670 2362
2363 /* If gateway address not supplied, then a device route will be created instead */
2364 if (gw != NULL) {
2365 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2366 goto out;
2367 }
f8fee0e2
MK
2368
2369 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2370 * addresses for the gateway.
2371 */
f8fee0e2
MK
2372 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2373 goto out;
2374
2375 err = netlink_transaction(&nlh, nlmsg, answer);
2376out:
2377 netlink_close(&nlh);
2378 nlmsg_free(answer);
2379 nlmsg_free(nlmsg);
2380 return err;
2381}
2382
2383int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2384{
2385 return ip_gateway_add(AF_INET, ifindex, gw);
2386}
2387
2388int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2389{
2390 return ip_gateway_add(AF_INET6, ifindex, gw);
2391}
581c75e7 2392bool is_ovs_bridge(const char *bridge)
0d204771 2393{
ebc73a67 2394 int ret;
0d204771 2395 struct stat sb;
ebc73a67 2396 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2397
ebc73a67
CB
2398 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2399 bridge);
2400 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2401 return false;
2402
2403 ret = stat(brdirname, &sb);
2404 if (ret < 0 && errno == ENOENT)
0d204771 2405 return true;
ebc73a67 2406
0d204771
SH
2407 return false;
2408}
2409
581c75e7
CB
2410struct ovs_veth_args {
2411 const char *bridge;
2412 const char *nic;
2413};
2414
cb0dc11b
CB
2415/* Called from a background thread - when nic goes away, remove it from the
2416 * bridge.
c43cbc04 2417 */
581c75e7 2418static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2419{
581c75e7 2420 struct ovs_veth_args *args = data;
cb0dc11b 2421
9c66dc4f 2422 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2423 return -1;
c43cbc04
SH
2424}
2425
581c75e7 2426int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2427{
c43cbc04 2428 int ret;
419590da 2429 char cmd_output[PATH_MAX];
581c75e7 2430 struct ovs_veth_args args;
6ad22d06 2431
581c75e7
CB
2432 args.bridge = bridge;
2433 args.nic = nic;
2434 ret = run_command(cmd_output, sizeof(cmd_output),
2435 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2436 if (ret < 0)
2437 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2438
581c75e7
CB
2439 return 0;
2440}
ebc73a67 2441
581c75e7
CB
2442static int lxc_ovs_attach_bridge_exec(void *data)
2443{
2444 struct ovs_veth_args *args = data;
ebc73a67 2445
9c66dc4f 2446 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2447 return -1;
2448}
ebc73a67 2449
581c75e7
CB
2450static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2451{
2452 int ret;
419590da 2453 char cmd_output[PATH_MAX];
581c75e7 2454 struct ovs_veth_args args;
ebc73a67 2455
581c75e7
CB
2456 args.bridge = bridge;
2457 args.nic = nic;
2458 ret = run_command(cmd_output, sizeof(cmd_output),
2459 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2460 if (ret < 0)
2461 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2462
581c75e7 2463 return 0;
0d204771 2464}
0d204771 2465
581c75e7 2466int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2467{
ebc73a67 2468 int err, fd, index;
9de31d5a 2469 size_t retlen;
0ad19a3f 2470 struct ifreq ifr;
2471
dae3fdf6 2472 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2473 return -EINVAL;
0ad19a3f 2474
2475 index = if_nametoindex(ifname);
2476 if (!index)
3cfc0f3a 2477 return -EINVAL;
0ad19a3f 2478
0d204771 2479 if (is_ovs_bridge(bridge))
581c75e7 2480 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2481
ad9429e5 2482 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2483 if (fd < 0)
3cfc0f3a 2484 return -errno;
0ad19a3f 2485
9de31d5a 2486 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2487 if (retlen >= IFNAMSIZ) {
2488 close(fd);
9de31d5a 2489 return -E2BIG;
42cc4083 2490 }
9de31d5a 2491
ebc73a67 2492 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2493 ifr.ifr_ifindex = index;
7d163508 2494 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2495 close(fd);
3cfc0f3a
MN
2496 if (err)
2497 err = -errno;
0ad19a3f 2498
2499 return err;
2500}
72d0e1cb 2501
ebc73a67 2502static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2503 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2504 [LXC_NET_VETH] = "veth",
2505 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2506 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2507 [LXC_NET_PHYS] = "phys",
b343592b
BP
2508 [LXC_NET_VLAN] = "vlan",
2509 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2510};
2511
2512const char *lxc_net_type_to_str(int type)
2513{
2514 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2515 return NULL;
ebc73a67 2516
72d0e1cb
SG
2517 return lxc_network_types[type];
2518}
8befa924 2519
ebc73a67 2520static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2521
966e9f1f 2522char *lxc_mkifname(char *template)
a0265685 2523{
2d7bf744 2524 int ret;
b1e44ed1 2525 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2526 char name[IFNAMSIZ];
2527 bool exists = false;
2528 size_t i = 0;
280cc35f 2529#ifdef HAVE_RAND_R
2530 unsigned int seed;
2531
2532 seed = randseed(false);
2533#else
2534
2535 (void)randseed(true);
2536#endif
a0265685 2537
535e8859
CB
2538 if (strlen(template) >= IFNAMSIZ)
2539 return NULL;
2540
ebc73a67 2541 /* Get all the network interfaces. */
b1e44ed1 2542 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
9c66dc4f
CB
2543 if (ret < 0)
2544 return log_error_errno(NULL, errno, "Failed to get network interfaces");
a0265685 2545
ebc73a67 2546 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2547 for (;;) {
966e9f1f 2548 name[0] = '\0';
94b1cade 2549 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2550
966e9f1f 2551 exists = false;
280cc35f 2552
a0265685
SG
2553 for (i = 0; i < strlen(name); i++) {
2554 if (name[i] == 'X') {
2555#ifdef HAVE_RAND_R
8523344a 2556 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2557#else
8523344a 2558 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2559#endif
2560 }
2561 }
2562
2563 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2564 if (!strcmp(ifa->ifa_name, name)) {
2565 exists = true;
a0265685
SG
2566 break;
2567 }
2568 }
2569
966e9f1f 2570 if (!exists)
a0265685 2571 break;
a0265685
SG
2572 }
2573
b1e44ed1 2574 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2575 (void)strlcpy(template, name, strlen(template) + 1);
2576
2577 return template;
a0265685
SG
2578}
2579
8befa924
SH
2580int setup_private_host_hw_addr(char *veth1)
2581{
ebc73a67 2582 int err, sockfd;
8befa924 2583 struct ifreq ifr;
8befa924 2584
ad9429e5 2585 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2586 if (sockfd < 0)
2587 return -errno;
2588
ebc73a67 2589 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2590 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2591 close(sockfd);
ebc73a67 2592 return -E2BIG;
87c6e5db 2593 }
ebc73a67 2594
8befa924
SH
2595 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2596 if (err < 0) {
8befa924 2597 close(sockfd);
8befa924
SH
2598 return -errno;
2599 }
2600
2601 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2602 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2603 close(sockfd);
8befa924
SH
2604 if (err < 0)
2605 return -errno;
2606
2607 return 0;
2608}
811ef482
CB
2609
2610int lxc_find_gateway_addresses(struct lxc_handler *handler)
2611{
2612 struct lxc_list *network = &handler->conf->network;
2613 struct lxc_list *iterator;
2614 struct lxc_netdev *netdev;
2615 int link_index;
2616
2617 lxc_list_for_each(iterator, network) {
2618 netdev = iterator->elem;
2619
2620 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2621 continue;
2622
9c66dc4f
CB
2623 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2624 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2625
f2711167 2626 if (is_empty_string(netdev->link)) {
9c66dc4f 2627 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2628 }
2629
2630 link_index = if_nametoindex(netdev->link);
2631 if (!link_index)
2632 return -EINVAL;
2633
2634 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2635 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2636 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2637 }
2638
2639 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2640 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2641 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2642 }
2643 }
2644
2645 return 0;
2646}
2647
2648#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2649static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2650 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2651{
2652 int ret;
2653 pid_t child;
2654 int bytes, pipefd[2];
2655 char *token, *saveptr = NULL;
095ead80 2656 char netdev_link[IFNAMSIZ];
419590da 2657 char buffer[PATH_MAX] = {0};
94b1cade 2658 size_t retlen;
811ef482 2659
9c66dc4f
CB
2660 if (netdev->type != LXC_NET_VETH)
2661 return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2662
2663 ret = pipe(pipefd);
9c66dc4f
CB
2664 if (ret < 0)
2665 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2666
2667 child = fork();
2668 if (child < 0) {
811ef482
CB
2669 close(pipefd[0]);
2670 close(pipefd[1]);
9c66dc4f 2671 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2672 }
2673
2674 if (child == 0) {
8335fd40 2675 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2676
2677 close(pipefd[0]);
2678
2679 ret = dup2(pipefd[1], STDOUT_FILENO);
2680 if (ret >= 0)
2681 ret = dup2(pipefd[1], STDERR_FILENO);
2682 close(pipefd[1]);
2683 if (ret < 0) {
2684 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2685 _exit(EXIT_FAILURE);
811ef482
CB
2686 }
2687
f2711167 2688 if (!is_empty_string(netdev->link))
9de31d5a 2689 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2690 else
9de31d5a
CB
2691 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2692 if (retlen >= IFNAMSIZ) {
2693 SYSERROR("Invalid network device name");
2694 _exit(EXIT_FAILURE);
2695 }
811ef482 2696
8335fd40
CB
2697 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2698 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2699 _exit(EXIT_FAILURE);
8335fd40 2700 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2701
2702 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2703 lxcname, pidstr, netdev_link,
3473ca76
CB
2704 !is_empty_string(netdev->name) ? netdev->name : "(null)");
2705 if (!is_empty_string(netdev->name))
811ef482
CB
2706 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2707 lxcpath, lxcname, pidstr, "veth", netdev_link,
2708 netdev->name, (char *)NULL);
2709 else
2710 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2711 lxcpath, lxcname, pidstr, "veth", netdev_link,
2712 (char *)NULL);
2713 SYSERROR("Failed to execute lxc-user-nic");
78070056 2714 _exit(EXIT_FAILURE);
811ef482
CB
2715 }
2716
2717 /* close the write-end of the pipe */
2718 close(pipefd[1]);
2719
9c66dc4f 2720 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2721 if (bytes < 0) {
74c6e2b0 2722 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2723 close(pipefd[0]);
6b9f82a9
CB
2724 } else {
2725 buffer[bytes - 1] = '\0';
811ef482 2726 }
811ef482
CB
2727
2728 ret = wait_for_pid(child);
2729 close(pipefd[0]);
9c66dc4f
CB
2730 if (ret != 0 || bytes < 0)
2731 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2732 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2733
2734 /* netdev->name */
2735 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2736 if (!token)
2737 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2738
e389f2af
CB
2739 /*
2740 * lxc-user-nic will take care of proper network device naming. So
2741 * netdev->name and netdev->created_name need to be identical to not
2742 * trigger another rename later on.
2743 */
2744 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2745 if (retlen < IFNAMSIZ)
2746 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
9c66dc4f
CB
2747 if (retlen >= IFNAMSIZ)
2748 return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2749
74c6e2b0 2750 /* netdev->ifindex */
811ef482 2751 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2752 if (!token)
2753 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2754
74c6e2b0 2755 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f
CB
2756 if (ret < 0)
2757 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2758
74c6e2b0 2759 /* netdev->priv.veth_attr.veth1 */
811ef482 2760 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2761 if (!token)
2762 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2763
94b1cade 2764 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f
CB
2765 if (retlen >= IFNAMSIZ)
2766 return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
2767
2768 /* netdev->priv.veth_attr.ifindex */
2769 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2770 if (!token)
2771 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
2772
2773 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f
CB
2774 if (ret < 0)
2775 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2776
4d781681 2777 if (netdev->upscript) {
2778 char *argv[] = {
2779 "veth",
2780 netdev->link,
2781 netdev->priv.veth_attr.veth1,
2782 NULL,
2783 };
2784
e389f2af
CB
2785 ret = run_script_argv(lxcname, hooks_version, "net",
2786 netdev->upscript, "up", argv);
4d781681 2787 if (ret < 0)
2788 return -1;
2789 }
2790
811ef482
CB
2791 return 0;
2792}
2793
f0ecc19d 2794static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2795 struct lxc_netdev *netdev,
2796 const char *netns_path)
811ef482
CB
2797{
2798 int bytes, ret;
2799 pid_t child;
2800 int pipefd[2];
419590da 2801 char buffer[PATH_MAX] = {0};
811ef482 2802
9c66dc4f
CB
2803 if (netdev->type != LXC_NET_VETH)
2804 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2805
2806 ret = pipe(pipefd);
9c66dc4f
CB
2807 if (ret < 0)
2808 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2809
2810 child = fork();
2811 if (child < 0) {
811ef482
CB
2812 close(pipefd[0]);
2813 close(pipefd[1]);
9c66dc4f 2814 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2815 }
2816
2817 if (child == 0) {
8843fde4 2818 char *hostveth;
811ef482
CB
2819
2820 close(pipefd[0]);
2821
2822 ret = dup2(pipefd[1], STDOUT_FILENO);
2823 if (ret >= 0)
2824 ret = dup2(pipefd[1], STDERR_FILENO);
2825 close(pipefd[1]);
2826 if (ret < 0) {
2827 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2828 _exit(EXIT_FAILURE);
811ef482
CB
2829 }
2830
f2711167 2831 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
2832 hostveth = netdev->priv.veth_attr.pair;
2833 else
2834 hostveth = netdev->priv.veth_attr.veth1;
f2711167 2835 if (is_empty_string(hostveth)) {
74c6e2b0 2836 SYSERROR("Host side veth device name is missing");
a30b9023 2837 _exit(EXIT_FAILURE);
74c6e2b0
CB
2838 }
2839
f2711167
CB
2840 if (is_empty_string(netdev->link)) {
2841 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 2842 _exit(EXIT_FAILURE);
74c6e2b0 2843 }
811ef482 2844
811ef482 2845 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2846 lxcname, netns_path, netdev->link, hostveth);
811ef482 2847 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2848 lxcname, netns_path, "veth", netdev->link, hostveth,
2849 (char *)NULL);
811ef482 2850 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2851 _exit(EXIT_FAILURE);
811ef482
CB
2852 }
2853
2854 close(pipefd[1]);
2855
9c66dc4f 2856 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
2857 if (bytes < 0) {
2858 SYSERROR("Failed to read from pipe file descriptor.");
2859 close(pipefd[0]);
6b9f82a9
CB
2860 } else {
2861 buffer[bytes - 1] = '\0';
811ef482 2862 }
811ef482 2863
6b9f82a9 2864 ret = wait_for_pid(child);
9c66dc4f
CB
2865 close_prot_errno_disarm(pipefd[0]);
2866 if (ret != 0 || bytes < 0)
2867 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
2868 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 2869
811ef482
CB
2870 return 0;
2871}
2872
1bd8d726
CB
2873bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2874{
2875 int ret;
2876 struct lxc_list *iterator;
2877 struct lxc_list *network = &handler->conf->network;
2878 /* strlen("/proc/") = 6
2879 * +
8335fd40 2880 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2881 * +
2882 * strlen("/fd/") = 4
2883 * +
8335fd40 2884 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2885 * +
2886 * \0
2887 */
8335fd40 2888 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2889
2890 *netns_path = '\0';
2891
9c66dc4f
CB
2892 if (handler->nsfd[LXC_NS_NET] < 0)
2893 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726
CB
2894
2895 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2896 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2897 if (ret < 0 || ret >= sizeof(netns_path))
2898 return false;
2899
2900 lxc_list_for_each(iterator, network) {
2901 char *hostveth = NULL;
2902 struct lxc_netdev *netdev = iterator->elem;
2903
2904 /* We can only delete devices whose ifindex we have. If we don't
2905 * have the index it means that we didn't create it.
2906 */
2907 if (!netdev->ifindex)
2908 continue;
2909
2910 if (netdev->type == LXC_NET_PHYS) {
2911 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2912 netdev->link);
2913 if (ret < 0)
9c66dc4f 2914 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
2915 netdev->ifindex, netdev->link);
2916 else
9c66dc4f 2917 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 2918 netdev->ifindex, netdev->link);
b3259dc6
TP
2919
2920 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2921 goto clear_ifindices;
1bd8d726
CB
2922 }
2923
2924 ret = netdev_deconf[netdev->type](handler, netdev);
2925 if (ret < 0)
2926 WARN("Failed to deconfigure network device");
2927
2928 if (netdev->type != LXC_NET_VETH)
66a7c406 2929 goto clear_ifindices;
1bd8d726 2930
f2711167 2931 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 2932 goto clear_ifindices;
1bd8d726 2933
f2711167 2934 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
2935 hostveth = netdev->priv.veth_attr.pair;
2936 else
2937 hostveth = netdev->priv.veth_attr.veth1;
f2711167 2938 if (is_empty_string(hostveth))
66a7c406 2939 goto clear_ifindices;
8843fde4 2940
1bd8d726
CB
2941 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2942 handler->name, netdev,
2943 netns_path);
2944 if (ret < 0) {
9c66dc4f 2945 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 2946 goto clear_ifindices;
1bd8d726 2947 }
9c66dc4f 2948 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
2949
2950clear_ifindices:
0858c829
CB
2951 /*
2952 * We need to clear any ifindices we recorded so liblxc won't
2953 * have cached stale data which would cause it to fail on
2954 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
2955 */
2956 netdev->ifindex = 0;
2957 if (netdev->type == LXC_NET_PHYS) {
2958 netdev->priv.phys_attr.ifindex = 0;
2959 } else if (netdev->type == LXC_NET_VETH) {
2960 netdev->priv.veth_attr.veth1[0] = '\0';
2961 netdev->priv.veth_attr.ifindex = 0;
2962 }
1bd8d726
CB
2963 }
2964
bb84beda 2965 return true;
1bd8d726
CB
2966}
2967
6509154d 2968static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2969 struct lxc_list *cur, *next;
2970 struct lxc_inetdev *inet4dev;
2971 struct lxc_inet6dev *inet6dev;
2972 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2973 int err = 0;
5fe147e9
TP
2974 unsigned int lo_ifindex = 0, link_ifindex = 0;
2975
2976 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
2977 if (link_ifindex == 0)
2978 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 2979
6509154d 2980
2981 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2982 if (!lxc_list_empty(&netdev->ipv4)) {
2983 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
2984 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
2985 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 2986 }
2987
2988 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2989 if (!lxc_list_empty(&netdev->ipv6)) {
2990 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
2991 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
2992 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 2993
2994 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
2995 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
2996 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 2997 }
2998
b670016a 2999 /* Perform IPVLAN specific checks. */
3000 if (netdev->type == LXC_NET_IPVLAN) {
3001 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3002 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3003 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3004
3005 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3006 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3007 if (lo_ifindex == 0)
3008 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3009 }
3010
6509154d 3011 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3012 inet4dev = cur->elem;
3013 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3014 return ret_set_errno(-1, -errno);
6509154d 3015
5fe147e9 3016 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3017 return ret_set_errno(-1, EINVAL);
b670016a 3018
3019 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3020 if (netdev->type == LXC_NET_IPVLAN) {
3021 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3022 if (err < 0)
3023 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3024 }
6509154d 3025 }
3026
3027 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3028 inet6dev = cur->elem;
3029 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3030 return ret_set_errno(-1, -errno);
6509154d 3031
5fe147e9 3032 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3033 return ret_set_errno(-1, EINVAL);
b670016a 3034
3035 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3036 if (netdev->type == LXC_NET_IPVLAN) {
3037 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3038 if (err < 0)
3039 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3040 }
6509154d 3041 }
3042
3043 return 0;
3044}
3045
9c66dc4f
CB
3046static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3047{
b670016a 3048 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3049 bool had_error = false;
3050 unsigned int link_ifindex = 0;
b670016a 3051
9c66dc4f
CB
3052 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3053 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3054
3055 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3056 if (lo_ifindex > 0) {
3057 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3058 had_error = true;
b670016a 3059 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3060 }
3061 }
3062
3063 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3064 if (!is_empty_string(link)) {
5fe147e9 3065 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3066 if (link_ifindex == 0)
3067 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3068
3069 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3070 had_error = true;
b670016a 3071 }
3072
9c66dc4f 3073 if (had_error)
596a002c 3074 return ret_set_errno(-1, EINVAL);
b670016a 3075
3076 return 0;
3077}
3078
9c66dc4f
CB
3079static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3080{
b670016a 3081 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3082 bool had_error = false;
3083 unsigned int link_ifindex = 0;
b670016a 3084
9c66dc4f
CB
3085 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3086 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3087
3088 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3089 if (lo_ifindex > 0) {
3090 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3091 had_error = true;
b670016a 3092 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3093 }
3094 }
3095
3096 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3097 if (!is_empty_string(link)) {
5fe147e9
TP
3098 link_ifindex = if_nametoindex(link);
3099 if (link_ifindex == 0) {
3100 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3101 return ret_set_errno(-1, EINVAL);
3102 }
3103
3104 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3105 had_error = true;
b670016a 3106 }
3107
9c66dc4f 3108 if (had_error)
596a002c 3109 return ret_set_errno(-1, EINVAL);
b670016a 3110
3111 return 0;
3112}
3113
6509154d 3114static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3115 unsigned int lo_ifindex = 0;
3116 unsigned int errCount = 0;
6509154d 3117 struct lxc_list *cur, *next;
3118 struct lxc_inetdev *inet4dev;
3119 struct lxc_inet6dev *inet6dev;
6509154d 3120
b670016a 3121 /* Perform IPVLAN specific checks. */
3122 if (netdev->type == LXC_NET_IPVLAN) {
3123 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3124 lo_ifindex = if_nametoindex(loop_device);
b670016a 3125 if (lo_ifindex == 0) {
3126 errCount++;
3ebffb98 3127 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3128 }
b670016a 3129 }
6509154d 3130
b670016a 3131 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3132 inet4dev = cur->elem;
3133 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3134 errCount++;
6509154d 3135 }
3136
3137 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3138 inet6dev = cur->elem;
b670016a 3139 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3140 errCount++;
6509154d 3141 }
3142
b670016a 3143 if (errCount > 0)
596a002c 3144 return ret_set_errno(-1, EINVAL);
6509154d 3145
3146 return 0;
3147}
3148
e389f2af 3149static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3150{
811ef482
CB
3151 struct lxc_list *iterator;
3152 struct lxc_list *network = &handler->conf->network;
3153
811ef482
CB
3154 lxc_list_for_each(iterator, network) {
3155 struct lxc_netdev *netdev = iterator->elem;
3156
9c66dc4f
CB
3157 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3158 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3159
6509154d 3160 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3161 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3162 if (lxc_setup_l2proxy(netdev))
3163 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3164 }
3165
9c66dc4f
CB
3166 if (netdev_conf[netdev->type](handler, netdev))
3167 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3168 }
3169
3170 return 0;
3171}
3172
e389f2af 3173int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3174{
e389f2af
CB
3175 pid_t pid = handler->pid;
3176 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3177 struct lxc_list *iterator;
3178
e0010464 3179 if (am_guest_unpriv())
74c6e2b0 3180 return 0;
811ef482
CB
3181
3182 lxc_list_for_each(iterator, network) {
3dd78294 3183 __do_free char *physname = NULL;
e389f2af 3184 int ret;
811ef482
CB
3185 struct lxc_netdev *netdev = iterator->elem;
3186
811ef482
CB
3187 if (!netdev->ifindex)
3188 continue;
3189
3dd78294
CB
3190 if (netdev->type == LXC_NET_PHYS)
3191 physname = is_wlan(netdev->link);
3192
3193 if (physname)
3c9fdb32 3194 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->name);
3dd78294 3195 else
8bf64b77 3196 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->name);
9c66dc4f
CB
3197 if (ret)
3198 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3199 netdev->created_name,
3200 netdev->ifindex, pid);
811ef482 3201
24190194
CB
3202 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3203 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3204 }
3205
3206 return 0;
3207}
3208
3c09b97c
CB
3209static int network_requires_advanced_setup(int type)
3210{
3211 if (type == LXC_NET_EMPTY)
3212 return false;
3213
3214 if (type == LXC_NET_NONE)
3215 return false;
3216
3217 return true;
3218}
3219
e389f2af 3220static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3221{
e389f2af
CB
3222 int hooks_version = handler->conf->hooks_version;
3223 const char *lxcname = handler->name;
3224 const char *lxcpath = handler->lxcpath;
3225 struct lxc_list *network = &handler->conf->network;
3226 pid_t pid = handler->pid;
74c6e2b0
CB
3227 struct lxc_list *iterator;
3228
74c6e2b0
CB
3229 lxc_list_for_each(iterator, network) {
3230 struct lxc_netdev *netdev = iterator->elem;
3231
3c09b97c 3232 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3233 continue;
3234
9c66dc4f
CB
3235 if (netdev->type != LXC_NET_VETH)
3236 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3237 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3238
3239 if (netdev->mtu)
3240 INFO("mtu ignored due to insufficient privilege");
3241
e389f2af
CB
3242 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3243 pid, hooks_version))
74c6e2b0
CB
3244 return -1;
3245 }
3246
3247 return 0;
3248}
3249
1bd8d726 3250bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3251{
3252 int ret;
3253 struct lxc_list *iterator;
3254 struct lxc_list *network = &handler->conf->network;
1bd8d726 3255
811ef482
CB
3256 lxc_list_for_each(iterator, network) {
3257 char *hostveth = NULL;
3258 struct lxc_netdev *netdev = iterator->elem;
3259
3260 /* We can only delete devices whose ifindex we have. If we don't
3261 * have the index it means that we didn't create it.
3262 */
3263 if (!netdev->ifindex)
3264 continue;
3265
0104c121
CB
3266 /*
3267 * If the network device has been moved back from the
3268 * containers network namespace, update the ifindex.
3269 */
3270 netdev->ifindex = if_nametoindex(netdev->name);
3271
6509154d 3272 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3273 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3274 if (lxc_delete_l2proxy(netdev))
3275 WARN("Failed to delete all l2proxy config");
3276 /* Don't return, let the network be cleaned up as normal. */
3277 }
3278
811ef482
CB
3279 if (netdev->type == LXC_NET_PHYS) {
3280 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3281 if (ret < 0)
3282 WARN("Failed to rename interface with index %d "
b809f232
CB
3283 "from \"%s\" to its initial name \"%s\"",
3284 netdev->ifindex, netdev->name, netdev->link);
0b154989 3285 else {
29589196
CB
3286 TRACE("Renamed interface with index %d from "
3287 "\"%s\" to its initial name \"%s\"",
3288 netdev->ifindex, netdev->name,
3289 netdev->link);
0b154989
TP
3290
3291 /* Restore original MTU */
3292 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3293 if (ret < 0) {
3294 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3295 netdev->link, netdev->priv.phys_attr.mtu);
3296 } else {
3297 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3298 netdev->link, netdev->priv.phys_attr.mtu);
3299 }
3300 }
b3259dc6
TP
3301
3302 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3303 goto clear_ifindices;
811ef482
CB
3304 }
3305
3306 ret = netdev_deconf[netdev->type](handler, netdev);
3307 if (ret < 0)
3308 WARN("Failed to deconfigure network device");
3309
811ef482 3310 if (netdev->type != LXC_NET_VETH)
66a7c406 3311 goto clear_ifindices;
811ef482 3312
811ef482
CB
3313 /* Explicitly delete host veth device to prevent lingering
3314 * devices. We had issues in LXD around this.
3315 */
f2711167 3316 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3317 hostveth = netdev->priv.veth_attr.pair;
3318 else
3319 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3320 if (is_empty_string(hostveth))
66a7c406 3321 goto clear_ifindices;
811ef482 3322
1ee56cff
CB
3323 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3324 ret = lxc_netdev_delete_by_name(hostveth);
3325 if (ret < 0)
3326 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3327
1ee56cff
CB
3328 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3329 } else if (!is_empty_string(netdev->link)) {
3330 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3331 if (ret < 0)
3332 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3333
1ee56cff
CB
3334 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3335 }
811ef482 3336
66a7c406 3337clear_ifindices:
ad2ddfcd 3338 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3339 * have cached stale data which would cause it to fail on reboot
3340 * we're we don't re-read the on-disk config file.
3341 */
3342 netdev->ifindex = 0;
3343 if (netdev->type == LXC_NET_PHYS) {
3344 netdev->priv.phys_attr.ifindex = 0;
3345 } else if (netdev->type == LXC_NET_VETH) {
3346 netdev->priv.veth_attr.veth1[0] = '\0';
3347 netdev->priv.veth_attr.ifindex = 0;
3348 }
811ef482
CB
3349 }
3350
bb84beda 3351 return true;
811ef482
CB
3352}
3353
3354int lxc_requests_empty_network(struct lxc_handler *handler)
3355{
3356 struct lxc_list *network = &handler->conf->network;
3357 struct lxc_list *iterator;
3358 bool found_none = false, found_nic = false;
3359
3360 if (lxc_list_empty(network))
3361 return 0;
3362
9c66dc4f 3363 lxc_list_for_each (iterator, network) {
811ef482
CB
3364 struct lxc_netdev *netdev = iterator->elem;
3365
3366 if (netdev->type == LXC_NET_NONE)
3367 found_none = true;
3368 else
3369 found_nic = true;
3370 }
9c66dc4f 3371
811ef482
CB
3372 if (found_none && !found_nic)
3373 return 1;
9c66dc4f 3374
811ef482
CB
3375 return 0;
3376}
3377
3378/* try to move physical nics to the init netns */
b809f232 3379int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3380{
9c66dc4f
CB
3381 __do_close int oldfd = -EBADF;
3382 int netnsfd = handler->nsfd[LXC_NS_NET];
3383 struct lxc_conf *conf = handler->conf;
811ef482 3384 int ret;
811ef482 3385 char ifname[IFNAMSIZ];
b809f232 3386 struct lxc_list *iterator;
811ef482 3387
b809f232
CB
3388 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3389 * the parent network namespace. We won't have this capability if we are
3390 * unprivileged.
3391 */
d0fbc7ba 3392 if (!handler->am_root)
b809f232 3393 return 0;
811ef482 3394
b809f232 3395 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3396
0037ab49 3397 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3398 if (oldfd < 0)
3399 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3400
b809f232 3401 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3402 if (ret < 0)
3403 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3404
b809f232
CB
3405 lxc_list_for_each(iterator, &conf->network) {
3406 struct lxc_netdev *netdev = iterator->elem;
811ef482 3407
b809f232
CB
3408 if (netdev->type != LXC_NET_PHYS)
3409 continue;
3410
3411 /* Retrieve the name of the interface in the container's network
3412 * namespace.
3413 */
3414 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3415 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3416 continue;
3417 }
b809f232 3418
0037ab49 3419 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3420 if (ret < 0)
9c66dc4f 3421 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3422 else
9c66dc4f 3423 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3424 }
811ef482 3425
b809f232 3426 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3427 if (ret < 0)
3428 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3429
3430 return 0;
811ef482
CB
3431}
3432
3433static int setup_hw_addr(char *hwaddr, const char *ifname)
3434{
9c66dc4f 3435 __do_close int fd = -EBADF;
811ef482
CB
3436 struct sockaddr sockaddr;
3437 struct ifreq ifr;
9c66dc4f 3438 int ret;
811ef482
CB
3439
3440 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3441 if (ret)
3442 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3443
3444 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3445 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3446 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3447
ad9429e5 3448 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3449 if (fd < 0)
3450 return -1;
3451
3452 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3453 if (ret)
6d1400b5 3454 SYSERROR("Failed to perform ioctl");
3455
9c66dc4f 3456 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3457
3458 return ret;
3459}
3460
3461static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3462{
3463 struct lxc_list *iterator;
3464 int err;
3465
3466 lxc_list_for_each(iterator, ip) {
3467 struct lxc_inetdev *inetdev = iterator->elem;
3468
3469 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3470 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3471 if (err)
3472 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3473 }
3474
3475 return 0;
3476}
3477
3478static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3479{
3480 struct lxc_list *iterator;
3481 int err;
3482
3483 lxc_list_for_each(iterator, ip) {
3484 struct lxc_inet6dev *inet6dev = iterator->elem;
3485
3486 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3487 &inet6dev->mcast, &inet6dev->acast,
3488 inet6dev->prefix);
9c66dc4f
CB
3489 if (err)
3490 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3491 }
3492
3493 return 0;
3494}
3495
8bf64b77 3496static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3497{
811ef482 3498 int err;
009d6127 3499 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3500
3501 /* empty network namespace */
8bf64b77
CB
3502 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3503 err = lxc_netdev_up("lo");
9c66dc4f
CB
3504 if (err)
3505 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3506 }
3507
811ef482 3508 /* set a mac address */
9c66dc4f
CB
3509 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3510 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3511
3512 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3513 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3514 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3515
3516 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3517 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3518 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3519
3520 /* set the network device up */
3521 if (netdev->flags & IFF_UP) {
8bf64b77 3522 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3523 if (err)
3524 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3525
3526 /* the network is up, make the loopback up too */
3527 err = lxc_netdev_up("lo");
9c66dc4f
CB
3528 if (err)
3529 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3530 }
3531
811ef482 3532 /* setup ipv4 gateway on the interface */
a2f9a670 3533 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3534 if (!(netdev->flags & IFF_UP))
3535 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3536
9c66dc4f
CB
3537 if (lxc_list_empty(&netdev->ipv4))
3538 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3539
a2f9a670 3540 /* Setup device route if ipv4_gateway_dev is enabled */
3541 if (netdev->ipv4_gateway_dev) {
3542 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3543 if (err < 0)
3544 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3545 } else {
009d6127 3546 /* Check the gateway address is valid */
3547 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3548 return ret_set_errno(-1, errno);
009d6127 3549
3550 /* Try adding a default route to the gateway address */
811ef482 3551 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3552 if (err < 0) {
3553 /* If adding the default route fails, this could be because the
3554 * gateway address is in a different subnet to the container's address.
3555 * To work around this, we try adding a static device route to the
3556 * gateway address first, and then try again.
3557 */
a2f9a670 3558 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3559 if (err < 0)
3560 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3561
a2f9a670 3562 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3563 if (err < 0)
3564 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3565 }
3566 }
3567 }
3568
3569 /* setup ipv6 gateway on the interface */
a2f9a670 3570 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3571 if (!(netdev->flags & IFF_UP))
3572 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3573
9c66dc4f
CB
3574 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3575 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3576
a2f9a670 3577 /* Setup device route if ipv6_gateway_dev is enabled */
3578 if (netdev->ipv6_gateway_dev) {
3579 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3580 if (err < 0)
3581 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3582 } else {
009d6127 3583 /* Check the gateway address is valid */
3584 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3585 return ret_set_errno(-1, errno);
009d6127 3586
3587 /* Try adding a default route to the gateway address */
811ef482 3588 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3589 if (err < 0) {
3590 /* If adding the default route fails, this could be because the
3591 * gateway address is in a different subnet to the container's address.
3592 * To work around this, we try adding a static device route to the
3593 * gateway address first, and then try again.
3594 */
a2f9a670 3595 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3596 if (err < 0)
3597 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3598
a2f9a670 3599 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3600 if (err < 0)
3601 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3602 }
3603 }
3604 }
3605
8bf64b77 3606 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3607
3608 return 0;
3609}
3610
3611int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3612 struct lxc_list *network)
3613{
3614 struct lxc_list *iterator;
811ef482 3615
8bf64b77 3616 lxc_list_for_each (iterator, network) {
e389f2af 3617 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 3618 int ret;
811ef482 3619
8bf64b77
CB
3620 ret = netdev_ns_conf[netdev->type](netdev);
3621 if (!ret)
3622 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
3623 if (ret)
3624 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482
CB
3625 }
3626
3627 if (!lxc_list_empty(network))
e389f2af 3628 INFO("Network has been setup");
811ef482
CB
3629
3630 return 0;
3631}
7ab1ba02 3632
3c09b97c 3633int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3634{
3635 struct lxc_list *iterator;
3636 struct lxc_list *network = &handler->conf->network;
3637 int data_sock = handler->data_sock[0];
3638
7ab1ba02
CB
3639 lxc_list_for_each(iterator, network) {
3640 int ret;
3641 struct lxc_netdev *netdev = iterator->elem;
3642
3c09b97c 3643 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3644 continue;
3645
7fbb15ec 3646 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3647 if (ret < 0)
7ab1ba02 3648 return -1;
e389f2af
CB
3649
3650 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3651 if (ret < 0)
3652 return -1;
3653
3654 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3655 }
3656
3657 return 0;
3658}
3659
3c09b97c 3660int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3661{
3662 struct lxc_list *iterator;
3663 struct lxc_list *network = &handler->conf->network;
3664 int data_sock = handler->data_sock[1];
3665
7ab1ba02
CB
3666 lxc_list_for_each(iterator, network) {
3667 int ret;
3668 struct lxc_netdev *netdev = iterator->elem;
3669
3c09b97c 3670 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3671 continue;
3672
e3233f26 3673 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3674 if (ret < 0)
7ab1ba02 3675 return -1;
e389f2af
CB
3676
3677 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3678 if (ret < 0)
3679 return -1;
54256301 3680
e389f2af 3681 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3682 }
3683
3684 return 0;
3685}
a1ae535a
CB
3686
3687int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3688{
3689 struct lxc_list *iterator, *network;
3690 int data_sock = handler->data_sock[0];
3691
3692 if (!handler->am_root)
3693 return 0;
3694
3695 network = &handler->conf->network;
3696 lxc_list_for_each(iterator, network) {
3697 int ret;
3698 struct lxc_netdev *netdev = iterator->elem;
3699
3700 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3701 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3702 if (ret < 0)
7729f8e5 3703 return -1;
a1ae535a
CB
3704
3705 /* Send network device ifindex in the child's namespace to
3706 * parent.
3707 */
7fbb15ec 3708 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3709 if (ret < 0)
7729f8e5 3710 return -1;
a1ae535a
CB
3711 }
3712
e389f2af
CB
3713 if (!lxc_list_empty(network))
3714 TRACE("Sent network device names and ifindices to parent");
3715
a1ae535a 3716 return 0;
a1ae535a
CB
3717}
3718
3719int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3720{
3721 struct lxc_list *iterator, *network;
3722 int data_sock = handler->data_sock[1];
3723
3724 if (!handler->am_root)
3725 return 0;
3726
3727 network = &handler->conf->network;
3728 lxc_list_for_each(iterator, network) {
3729 int ret;
3730 struct lxc_netdev *netdev = iterator->elem;
3731
3732 /* Receive network device name in the child's namespace to
3733 * parent.
3734 */
e3233f26 3735 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3736 if (ret < 0)
7729f8e5 3737 return -1;
a1ae535a
CB
3738
3739 /* Receive network device ifindex in the child's namespace to
3740 * parent.
3741 */
e3233f26 3742 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3743 if (ret < 0)
7729f8e5 3744 return -1;
a1ae535a
CB
3745 }
3746
3747 return 0;
a1ae535a 3748}
bb84beda
CB
3749
3750void lxc_delete_network(struct lxc_handler *handler)
3751{
3752 bool bret;
3753
3754 if (handler->am_root)
3755 bret = lxc_delete_network_priv(handler);
3756 else
3757 bret = lxc_delete_network_unpriv(handler);
3758 if (!bret)
3759 DEBUG("Failed to delete network devices");
3760 else
3761 DEBUG("Deleted network devices");
3762}
1cd95214 3763
1cd95214
CB
3764int lxc_netns_set_nsid(int fd)
3765{
41a3300d 3766 int ret;
0ce60f0d
CB
3767 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3768 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3769 NLMSG_ALIGN(1024)];
1cd95214 3770 struct nl_handler nlh;
0ce60f0d
CB
3771 struct nlmsghdr *hdr;
3772 struct rtgenmsg *msg;
bfcedc7e 3773 int saved_errno;
9d036caa
CB
3774 const __s32 ns_id = -1;
3775 const __u32 netns_fd = fd;
1cd95214
CB
3776
3777 ret = netlink_open(&nlh, NETLINK_ROUTE);
3778 if (ret < 0)
41a3300d 3779 return -1;
1cd95214 3780
0ce60f0d 3781 memset(buf, 0, sizeof(buf));
6ce39620
CB
3782
3783#pragma GCC diagnostic push
3784#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3785 hdr = (struct nlmsghdr *)buf;
3786 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3787#pragma GCC diagnostic pop
1cd95214 3788
0ce60f0d
CB
3789 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3790 hdr->nlmsg_type = RTM_NEWNSID;
3791 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3792 hdr->nlmsg_pid = 0;
3793 hdr->nlmsg_seq = RTM_NEWNSID;
3794 msg->rtgen_family = AF_UNSPEC;
1cd95214 3795
9d036caa
CB
3796 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3797 if (ret < 0)
3798 goto on_error;
3799
3800 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3801 if (ret < 0)
3802 goto on_error;
1cd95214 3803
9fbbc427 3804 ret = __netlink_transaction(&nlh, hdr, hdr);
9d036caa
CB
3805
3806on_error:
bfcedc7e 3807 saved_errno = errno;
1cd95214 3808 netlink_close(&nlh);
bfcedc7e 3809 errno = saved_errno;
1cd95214 3810
9d036caa 3811 return ret;
1cd95214 3812}
938980ba
CB
3813
3814static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3815{
3816
3817 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3818
3819 while (RTA_OK(rta, len)) {
3820 unsigned short type = rta->rta_type;
3821
3822 if ((type <= max) && (!tb[type]))
3823 tb[type] = rta;
3824
6ce39620
CB
3825#pragma GCC diagnostic push
3826#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3827 rta = RTA_NEXT(rta, len);
6ce39620 3828#pragma GCC diagnostic pop
938980ba
CB
3829 }
3830
3831 return 0;
3832}
3833
3834static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3835{
3836 return *(__s32 *)RTA_DATA(rta);
3837}
3838
3839#ifndef NETNS_RTA
3840#define NETNS_RTA(r) \
3841 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3842#endif
3843
3844int lxc_netns_get_nsid(int fd)
3845{
3846 int ret;
3847 ssize_t len;
3848 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3849 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3850 NLMSG_ALIGN(1024)];
938980ba
CB
3851 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3852 struct nl_handler nlh;
3853 struct nlmsghdr *hdr;
3854 struct rtgenmsg *msg;
3855 int saved_errno;
3856 __u32 netns_fd = fd;
3857
3858 ret = netlink_open(&nlh, NETLINK_ROUTE);
3859 if (ret < 0)
3860 return -1;
3861
3862 memset(buf, 0, sizeof(buf));
6ce39620
CB
3863
3864#pragma GCC diagnostic push
3865#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3866 hdr = (struct nlmsghdr *)buf;
3867 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3868#pragma GCC diagnostic pop
938980ba
CB
3869
3870 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3871 hdr->nlmsg_type = RTM_GETNSID;
3872 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3873 hdr->nlmsg_pid = 0;
3874 hdr->nlmsg_seq = RTM_GETNSID;
3875 msg->rtgen_family = AF_UNSPEC;
3876
9d036caa
CB
3877 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3878 if (ret == 0)
3879 ret = __netlink_transaction(&nlh, hdr, hdr);
938980ba 3880
938980ba
CB
3881 saved_errno = errno;
3882 netlink_close(&nlh);
3883 errno = saved_errno;
3884 if (ret < 0)
3885 return -1;
3886
9d036caa 3887 errno = EINVAL;
938980ba
CB
3888 msg = NLMSG_DATA(hdr);
3889 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
3890 if (len < 0)
3891 return -1;
3892
6ce39620
CB
3893#pragma GCC diagnostic push
3894#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3895 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
3896 if (tb[__LXC_NETNSA_NSID])
3897 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 3898#pragma GCC diagnostic pop
938980ba
CB
3899
3900 return -1;
3901}
e389f2af
CB
3902
3903int lxc_create_network(struct lxc_handler *handler)
3904{
3905 int ret;
3906
e389f2af
CB
3907 if (handler->am_root) {
3908 ret = lxc_create_network_priv(handler);
3909 if (ret)
3910 return -1;
3911
3912 return lxc_network_move_created_netdev_priv(handler);
3913 }
3914
3915 return lxc_create_network_unpriv(handler);
3916}