]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
Merge pull request #3365 from albatross0/ipvlan_l2
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
d7b58715 39#include "raw_syscalls.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
8bf64b77 50typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
3ebffb98 51static const char loop_device[] = "lo";
811ef482 52
b670016a 53static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 54{
d16bda44 55 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 56 struct nl_handler nlh;
d16bda44
CB
57 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
58 int addrlen, err;
8f82874c 59 struct rtmsg *rt;
8f82874c 60
61 addrlen = family == AF_INET ? sizeof(struct in_addr)
62 : sizeof(struct in6_addr);
63
d16bda44 64 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 65 if (err)
66 return err;
67
8f82874c 68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
d16bda44 70 return -ENOMEM;
8f82874c 71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
a5f5cb41 74 return -ENOMEM;
8f82874c 75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
a5f5cb41 82 return -ENOMEM;
d16bda44 83
8f82874c 84 rt->rtm_family = family;
85 rt->rtm_table = RT_TABLE_MAIN;
86 rt->rtm_scope = RT_SCOPE_LINK;
87 rt->rtm_protocol = RTPROT_BOOT;
88 rt->rtm_type = RTN_UNICAST;
89 rt->rtm_dst_len = netmask;
90
8f82874c 91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
92 return -EINVAL;
93
8f82874c 94 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
95 return -EINVAL;
96
97 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 98}
99
100static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
101{
b670016a 102 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 103}
104
105static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
106{
b670016a 107 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
108}
109
110static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
111{
112 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
113}
114
115static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
116{
117 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 118}
119
d4a7da46 120static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
121{
122 struct lxc_list *iterator;
123 int err;
124
125 lxc_list_for_each(iterator, ip) {
126 struct lxc_inetdev *inetdev = iterator->elem;
127
128 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
129 if (err)
130 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 131 }
132
133 return 0;
134}
135
136static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
137{
138 struct lxc_list *iterator;
139 int err;
140
141 lxc_list_for_each(iterator, ip) {
142 struct lxc_inet6dev *inet6dev = iterator->elem;
143
144 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
145 if (err)
146 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 147 }
148
149 return 0;
150}
151
6dfa9581
TP
152static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
153{
154 struct lxc_list *iterator;
155 int err;
156
157 lxc_list_for_each(iterator, ip) {
158 struct lxc_inetdev *inetdev = iterator->elem;
159
160 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
161
162 if (err)
9c66dc4f 163 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
164 }
165
166 return 0;
167}
168
169static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
170{
171 struct lxc_list *iterator;
172 int err;
173
174 lxc_list_for_each(iterator, ip) {
175 struct lxc_inet6dev *inet6dev = iterator->elem;
176
177 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
178 if (err)
9c66dc4f 179 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
180 }
181
182 return 0;
183}
184
185struct ip_proxy_args {
186 const char *ip;
187 const char *dev;
188};
189
5fe147e9 190static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 191{
d16bda44 192 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 193 struct nl_handler nlh;
d16bda44
CB
194 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
195 int addrlen, err;
5fe147e9 196 struct ndmsg *rt;
6dfa9581 197
5fe147e9 198 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 199
d16bda44 200 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
201 if (err)
202 return err;
6dfa9581 203
5fe147e9
TP
204 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
205 if (!nlmsg)
d16bda44 206 return -ENOMEM;
6dfa9581 207
5fe147e9
TP
208 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
209 if (!answer)
d16bda44 210 return -ENOMEM;
6dfa9581 211
5fe147e9
TP
212 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
213 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 214
5fe147e9
TP
215 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
216 if (!rt)
d16bda44
CB
217 return -ENOMEM;
218
5fe147e9
TP
219 rt->ndm_ifindex = ifindex;
220 rt->ndm_flags = NTF_PROXY;
221 rt->ndm_type = NDA_DST;
222 rt->ndm_family = family;
6dfa9581 223
5fe147e9 224 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 225 return -EINVAL;
6dfa9581 226
d16bda44 227 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
228}
229
230static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
231{
232 int ret;
233 char path[PATH_MAX];
234 char buf[1] = "";
235
236 if (family != AF_INET && family != AF_INET6)
596a002c 237 return ret_set_errno(-1, EINVAL);
6dfa9581 238
9c66dc4f 239 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581
TP
240 family == AF_INET ? "ipv4" : "ipv6", ifname,
241 "forwarding");
9c66dc4f 242 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 243 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
244
245 return lxc_read_file_expect(path, buf, 1, "1");
246}
247
811ef482
CB
248static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
249{
54256301 250 int err;
a00fbab5 251 unsigned int mtu = 1500;
811ef482
CB
252 char *veth1, *veth2;
253 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 254
f2711167 255 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
256 veth1 = netdev->priv.veth_attr.pair;
257 if (handler->conf->reboot)
258 lxc_netdev_delete_by_name(veth1);
259 } else {
260 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
261 if (err < 0 || (size_t)err >= sizeof(veth1buf))
262 return -1;
263
3646ffd9 264 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
265 if (!veth1)
266 return -1;
267
268 /* store away for deconf */
269 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
270 }
271
d34212ad
CB
272 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
273 if (err < 0 || (size_t)err >= sizeof(veth2buf))
274 return -1;
275
3646ffd9 276 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 277 if (!veth2)
54256301
CB
278 return -1;
279
a00fbab5
TP
280 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
281 if (netdev->mtu) {
282 if (lxc_safe_uint(netdev->mtu, &mtu))
283 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 284 } else if (!is_empty_string(netdev->link)) {
54256301 285 int ifindex_mtu;
811ef482 286
54256301
CB
287 ifindex_mtu = if_nametoindex(netdev->link);
288 if (ifindex_mtu) {
289 mtu = netdev_get_mtu(ifindex_mtu);
290 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
291 }
292 }
293
294 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
295 if (err)
296 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 297
24190194
CB
298 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
299
811ef482
CB
300 /* changing the high byte of the mac address to 0xfe, the bridge interface
301 * will always keep the host's mac address and not take the mac address
302 * of a container */
303 err = setup_private_host_hw_addr(veth1);
304 if (err) {
6d1400b5 305 errno = -err;
306 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
307 goto out_delete;
308 }
309
8da62485
CB
310 /* Retrieve ifindex of the host's veth device. */
311 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
312 if (!netdev->priv.veth_attr.ifindex) {
313 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
314 goto out_delete;
315 }
316
811ef482
CB
317 if (mtu) {
318 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 319 if (err) {
6d1400b5 320 errno = -err;
54256301 321 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
322 goto out_delete;
323 }
324 }
325
f2711167 326 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
811ef482
CB
327 err = lxc_bridge_attach(netdev->link, veth1);
328 if (err) {
6d1400b5 329 errno = -err;
330 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
331 veth1, netdev->link);
811ef482
CB
332 goto out_delete;
333 }
334 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
335 }
336
337 err = lxc_netdev_up(veth1);
338 if (err) {
6d1400b5 339 errno = -err;
340 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
341 goto out_delete;
342 }
343
d4a7da46 344 /* setup ipv4 routes on the host interface */
345 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
346 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
347 goto out_delete;
348 }
349
350 /* setup ipv6 routes on the host interface */
351 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
352 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
353 goto out_delete;
354 }
355
6dfa9581 356 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
357 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
358 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
359 appear to be added successfully but then do not appear in the proxy list. The length of time
360 slept doesn't appear to be important, only that the process sleeps for a short period of time.
361 */
362 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
363
6dfa9581
TP
364 if (netdev->ipv4_gateway) {
365 char bufinet4[INET_ADDRSTRLEN];
366 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 367 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
368 goto out_delete;
369 }
370
371 err = lxc_ip_forwarding_on(veth1, AF_INET);
372 if (err) {
9c66dc4f 373 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
374 goto out_delete;
375 }
376
5fe147e9 377 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 378 if (err) {
9c66dc4f 379 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
380 goto out_delete;
381 }
382 }
383
384 if (netdev->ipv6_gateway) {
385 char bufinet6[INET6_ADDRSTRLEN];
386
387 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 388 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
389 goto out_delete;
390 }
391
392 /* Check for sysctl net.ipv6.conf.all.forwarding=1
393 Kernel requires this to route any packets for IPv6.
394 */
395 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
396 if (err) {
9c66dc4f 397 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
398 goto out_delete;
399 }
400
401 err = lxc_ip_forwarding_on(veth1, AF_INET6);
402 if (err) {
9c66dc4f 403 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
404 goto out_delete;
405 }
406
407 err = lxc_neigh_proxy_on(veth1, AF_INET6);
408 if (err) {
9c66dc4f 409 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
410 goto out_delete;
411 }
412
5fe147e9 413 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 414 if (err) {
9c66dc4f 415 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
416 goto out_delete;
417 }
418 }
419
420 /* setup ipv4 address routes on the host interface */
421 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
422 if (err) {
9c66dc4f 423 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
424 goto out_delete;
425 }
426
427 /* setup ipv6 address routes on the host interface */
428 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
429 if (err) {
9c66dc4f 430 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
431 goto out_delete;
432 }
433 }
434
811ef482 435 if (netdev->upscript) {
14a7b0f9
CB
436 char *argv[] = {
437 "veth",
438 netdev->link,
990b9ac3 439 veth1,
14a7b0f9
CB
440 NULL,
441 };
442
443 err = run_script_argv(handler->name,
444 handler->conf->hooks_version, "net",
445 netdev->upscript, "up", argv);
446 if (err < 0)
811ef482
CB
447 goto out_delete;
448 }
449
54256301 450 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
451
452 return 0;
453
454out_delete:
54256301 455 lxc_netdev_delete_by_name(veth1);
811ef482
CB
456 return -1;
457}
458
459static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
460{
8021de25 461 char peer[IFNAMSIZ];
811ef482
CB
462 int err;
463
f2711167 464 if (is_empty_string(netdev->link)) {
811ef482
CB
465 ERROR("No link for macvlan network device specified");
466 return -1;
467 }
468
8021de25
CB
469 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
470 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
471 return -1;
472
3646ffd9 473 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
474 return -1;
475
476 err = lxc_macvlan_create(netdev->link, peer,
477 netdev->priv.macvlan_attr.mode);
478 if (err) {
6d1400b5 479 errno = -err;
480 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
481 peer, netdev->link);
966e9f1f 482 goto on_error;
811ef482
CB
483 }
484
a9704f05 485 strlcpy(netdev->created_name, peer, IFNAMSIZ);
3473ca76 486 if (is_empty_string(netdev->name))
8bf64b77 487 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
a9704f05 488
811ef482
CB
489 netdev->ifindex = if_nametoindex(peer);
490 if (!netdev->ifindex) {
491 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 492 goto on_error;
811ef482
CB
493 }
494
3bef7b7b 495 if (netdev->mtu) {
54256301
CB
496 unsigned int mtu;
497
3bef7b7b
TP
498 err = lxc_safe_uint(netdev->mtu, &mtu);
499 if (err < 0) {
500 errno = -err;
501 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
502 goto on_error;
503 }
504
505 err = lxc_netdev_set_mtu(peer, mtu);
506 if (err < 0) {
507 errno = -err;
508 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
509 goto on_error;
510 }
511 }
512
811ef482 513 if (netdev->upscript) {
14a7b0f9
CB
514 char *argv[] = {
515 "macvlan",
516 netdev->link,
517 NULL,
518 };
519
520 err = run_script_argv(handler->name,
521 handler->conf->hooks_version, "net",
522 netdev->upscript, "up", argv);
523 if (err < 0)
966e9f1f 524 goto on_error;
811ef482
CB
525 }
526
527 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
528 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
529
530 return 0;
966e9f1f
CB
531
532on_error:
811ef482 533 lxc_netdev_delete_by_name(peer);
811ef482
CB
534 return -1;
535}
536
c9f52382 537static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
538{
d16bda44
CB
539 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
540 struct nl_handler nlh;
541 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 542 int err, index, len;
543 struct ifinfomsg *ifi;
c9f52382 544 struct rtattr *nest, *nest2;
c9f52382 545
546 len = strlen(master);
547 if (len == 1 || len >= IFNAMSIZ)
d16bda44 548 return ret_errno(EINVAL);
c9f52382 549
550 len = strlen(name);
551 if (len == 1 || len >= IFNAMSIZ)
d16bda44 552 return ret_errno(EINVAL);
c9f52382 553
554 index = if_nametoindex(master);
555 if (!index)
d16bda44 556 return ret_errno(EINVAL);
c9f52382 557
d16bda44 558 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 559 if (err)
d16bda44 560 return ret_errno(-err);
c9f52382 561
c9f52382 562 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
563 if (!nlmsg)
d16bda44 564 return ret_errno(ENOMEM);
c9f52382 565
566 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
567 if (!answer)
d16bda44 568 return ret_errno(ENOMEM);
c9f52382 569
570 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
571 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
572
573 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
574 if (!ifi)
575 return ret_errno(ENOMEM);
c9f52382 576 ifi->ifi_family = AF_UNSPEC;
577
c9f52382 578 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
579 if (!nest)
d16bda44 580 return ret_errno(EPROTO);
c9f52382 581
582 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 583 return ret_errno(EPROTO);
c9f52382 584
5755765e
KT
585 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
586 if (!nest2)
587 return ret_errno(EPROTO);
588
589 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
590 return ret_errno(EPROTO);
591
592 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
593 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
594 */
595 if (isolation > 0 &&
596 nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
597 return ret_errno(EPROTO);
c9f52382 598
5755765e 599 nla_end_nested(nlmsg, nest2);
c9f52382 600 nla_end_nested(nlmsg, nest);
601
602 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 603 return ret_errno(EPROTO);
c9f52382 604
605 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
606 return ret_errno(EPROTO);
607
608 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 609}
610
611static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
612{
dd119206 613 char peer[IFNAMSIZ];
c9f52382 614 int err;
615
f2711167 616 if (is_empty_string(netdev->link)) {
c9f52382 617 ERROR("No link for ipvlan network device specified");
618 return -1;
619 }
620
dd119206
CB
621 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
622 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 623 return -1;
624
3646ffd9 625 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 626 return -1;
627
dd119206
CB
628 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
629 netdev->priv.ipvlan_attr.isolation);
c9f52382 630 if (err) {
dd119206
CB
631 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
632 peer, netdev->link);
c9f52382 633 goto on_error;
634 }
635
e7fdd504 636 strlcpy(netdev->created_name, peer, IFNAMSIZ);
3473ca76 637 if (is_empty_string(netdev->name))
8bf64b77 638 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
e7fdd504 639
c9f52382 640 netdev->ifindex = if_nametoindex(peer);
641 if (!netdev->ifindex) {
642 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
643 goto on_error;
644 }
645
006e135e 646 if (netdev->mtu) {
54256301
CB
647 unsigned int mtu;
648
006e135e 649 err = lxc_safe_uint(netdev->mtu, &mtu);
650 if (err < 0) {
651 errno = -err;
54256301 652 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 653 goto on_error;
654 }
655
656 err = lxc_netdev_set_mtu(peer, mtu);
657 if (err < 0) {
658 errno = -err;
54256301 659 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 660 goto on_error;
661 }
662 }
663
c9f52382 664 if (netdev->upscript) {
665 char *argv[] = {
666 "ipvlan",
667 netdev->link,
668 NULL,
669 };
670
dd119206
CB
671 err = run_script_argv(handler->name, handler->conf->hooks_version,
672 "net", netdev->upscript, "up", argv);
c9f52382 673 if (err < 0)
674 goto on_error;
675 }
676
dd119206
CB
677 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
678 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 679
680 return 0;
681
682on_error:
683 lxc_netdev_delete_by_name(peer);
684 return -1;
685}
686
811ef482
CB
687static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
688{
689 char peer[IFNAMSIZ];
690 int err;
691 static uint16_t vlan_cntr = 0;
811ef482 692
f2711167 693 if (is_empty_string(netdev->link)) {
811ef482
CB
694 ERROR("No link for vlan network device specified");
695 return -1;
696 }
697
d4d68410
CB
698 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
699 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
700 if (err < 0 || (size_t)err >= sizeof(peer))
701 return -1;
702
703 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
704 if (err) {
6d1400b5 705 errno = -err;
706 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
707 peer, netdev->link);
811ef482
CB
708 return -1;
709 }
710
83530dba 711 strlcpy(netdev->created_name, peer, IFNAMSIZ);
3473ca76 712 if (is_empty_string(netdev->name))
8bf64b77 713 (void)strlcpy(netdev->name, peer, IFNAMSIZ);
83530dba 714
811ef482
CB
715 netdev->ifindex = if_nametoindex(peer);
716 if (!netdev->ifindex) {
717 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 718 goto on_error;
719 }
720
721 if (netdev->mtu) {
54256301
CB
722 unsigned int mtu;
723
3e2a7b08 724 err = lxc_safe_uint(netdev->mtu, &mtu);
725 if (err < 0) {
726 errno = -err;
54256301 727 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 728 goto on_error;
729 }
730
731 err = lxc_netdev_set_mtu(peer, mtu);
54256301 732 if (err < 0) {
3e2a7b08 733 errno = -err;
54256301 734 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 735 goto on_error;
736 }
811ef482
CB
737 }
738
3a73d9f1 739 if (netdev->upscript) {
740 char *argv[] = {
741 "vlan",
742 netdev->link,
743 NULL,
744 };
745
d4d68410
CB
746 err = run_script_argv(handler->name, handler->conf->hooks_version,
747 "net", netdev->upscript, "up", argv);
19abca58 748 if (err < 0) {
3e2a7b08 749 goto on_error;
19abca58 750 }
3a73d9f1 751 }
752
d4d68410
CB
753 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
754 netdev->ifindex);
811ef482
CB
755
756 return 0;
3e2a7b08 757
758on_error:
759 lxc_netdev_delete_by_name(peer);
760 return -1;
811ef482
CB
761}
762
763static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
764{
0b154989 765 int err, mtu_orig = 0;
14a7b0f9 766
9c66dc4f
CB
767 if (is_empty_string(netdev->link))
768 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 769
75b074ee
CB
770 /*
771 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
772 * network namespace because we need it to move the device from the
773 * host's network namespace to the container's network namespace later
774 * on.
775 * Note that netdev->link will contain the name of the physical network
776 * device in the host's namespace.
777 */
811ef482 778 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
779 if (!netdev->ifindex)
780 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 781
61302ef7 782 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 783 if (is_empty_string(netdev->name))
8bf64b77 784 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 785
75b074ee
CB
786 /*
787 * Store the ifindex of the host's network device in the host's
790255cf
CB
788 * namespace.
789 */
790 netdev->priv.phys_attr.ifindex = netdev->ifindex;
791
75b074ee
CB
792 /*
793 * Get original device MTU setting and store for restoration after
794 * container shutdown.
795 */
0b154989 796 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
797 if (mtu_orig < 0)
798 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
799
800 netdev->priv.phys_attr.mtu = mtu_orig;
801
3bef7b7b 802 if (netdev->mtu) {
54256301
CB
803 unsigned int mtu;
804
3bef7b7b 805 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
806 if (err < 0)
807 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 808
3bef7b7b 809 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
810 if (err < 0)
811 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
812 }
813
814 if (netdev->upscript) {
815 char *argv[] = {
816 "phys",
817 netdev->link,
818 NULL,
819 };
820
75b074ee
CB
821 err = run_script_argv(handler->name, handler->conf->hooks_version,
822 "net", netdev->upscript, "up", argv);
9c66dc4f 823 if (err < 0)
3bef7b7b 824 return -1;
3bef7b7b
TP
825 }
826
75b074ee
CB
827 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
828 netdev->ifindex);
811ef482
CB
829
830 return 0;
831}
832
833static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
834{
14a7b0f9
CB
835 int ret;
836 char *argv[] = {
837 "empty",
838 NULL,
839 };
840
811ef482 841 netdev->ifindex = 0;
14a7b0f9
CB
842 if (!netdev->upscript)
843 return 0;
844
845 ret = run_script_argv(handler->name, handler->conf->hooks_version,
846 "net", netdev->upscript, "up", argv);
847 if (ret < 0)
848 return -1;
849
811ef482
CB
850 return 0;
851}
852
853static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
854{
855 netdev->ifindex = 0;
856 return 0;
857}
858
859static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
860 [LXC_NET_VETH] = instantiate_veth,
861 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 862 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
863 [LXC_NET_VLAN] = instantiate_vlan,
864 [LXC_NET_PHYS] = instantiate_phys,
865 [LXC_NET_EMPTY] = instantiate_empty,
866 [LXC_NET_NONE] = instantiate_none,
867};
868
8bf64b77
CB
869static int instantiate_ns_veth(struct lxc_netdev *netdev)
870{
871 char current_ifname[IFNAMSIZ];
872
873 netdev->ifindex = if_nametoindex(netdev->created_name);
874 if (!netdev->ifindex)
875 return log_error_errno(-1,
876 errno, "Failed to retrieve ifindex for network device with name %s",
877 netdev->created_name);
878
3473ca76 879 if (is_empty_string(netdev->name))
8bf64b77
CB
880 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
881
882 if (strcmp(netdev->created_name, netdev->name) != 0) {
883 int ret;
884
885 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
886 if (ret)
9c66dc4f 887 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
8bf64b77
CB
888 netdev->created_name,
889 netdev->name);
890
891 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
892 }
893
894 /*
895 * Re-read the name of the interface because its name has changed and
896 * would be automatically allocated by the system
897 */
898 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 899 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
900
901 /*
902 * Now update the recorded name of the network device to reflect the
903 * name of the network device in the child's network namespace. We will
904 * later on send this information back to the parent.
905 */
906 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
907
908 return 0;
909}
910
911static int __instantiate_common(struct lxc_netdev *netdev)
912{
913 netdev->ifindex = if_nametoindex(netdev->name);
914 if (!netdev->ifindex)
9c66dc4f 915 return log_error_errno(-1, errno, "Failed to retrieve ifindex for network device with name %s", netdev->name);
8bf64b77
CB
916
917 return 0;
918}
919
920static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
921{
922 return __instantiate_common(netdev);
923}
924
925static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
926{
927 return __instantiate_common(netdev);
928}
929
930static int instantiate_ns_vlan(struct lxc_netdev *netdev)
931{
932 return __instantiate_common(netdev);
933}
934
935static int instantiate_ns_phys(struct lxc_netdev *netdev)
936{
937 return __instantiate_common(netdev);
938}
939
940static int instantiate_ns_empty(struct lxc_netdev *netdev)
941{
942 return 0;
943}
944
945static int instantiate_ns_none(struct lxc_netdev *netdev)
946{
947 return 0;
948}
949
950static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
951 [LXC_NET_VETH] = instantiate_ns_veth,
952 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
953 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
954 [LXC_NET_VLAN] = instantiate_ns_vlan,
955 [LXC_NET_PHYS] = instantiate_ns_phys,
956 [LXC_NET_EMPTY] = instantiate_ns_empty,
957 [LXC_NET_NONE] = instantiate_ns_none,
958};
959
811ef482
CB
960static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
961{
14a7b0f9
CB
962 int ret;
963 char *argv[] = {
964 "veth",
965 netdev->link,
966 NULL,
967 NULL,
968 };
969
970 if (!netdev->downscript)
971 return 0;
811ef482 972
f2711167 973 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 974 argv[2] = netdev->priv.veth_attr.pair;
811ef482 975 else
14a7b0f9
CB
976 argv[2] = netdev->priv.veth_attr.veth1;
977
978 ret = run_script_argv(handler->name,
979 handler->conf->hooks_version, "net",
980 netdev->downscript, "down", argv);
981 if (ret < 0)
982 return -1;
811ef482 983
811ef482
CB
984 return 0;
985}
986
987static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
988{
14a7b0f9
CB
989 int ret;
990 char *argv[] = {
991 "macvlan",
992 netdev->link,
993 NULL,
994 };
995
996 if (!netdev->downscript)
997 return 0;
998
999 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1000 "net", netdev->downscript, "down", argv);
1001 if (ret < 0)
1002 return -1;
811ef482 1003
811ef482
CB
1004 return 0;
1005}
1006
c9f52382 1007static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1008{
1009 int ret;
1010 char *argv[] = {
1011 "ipvlan",
1012 netdev->link,
1013 NULL,
1014 };
1015
1016 if (!netdev->downscript)
1017 return 0;
1018
1019 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1020 "net", netdev->downscript, "down", argv);
1021 if (ret < 0)
1022 return -1;
1023
1024 return 0;
1025}
1026
811ef482
CB
1027static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1028{
3a73d9f1 1029 int ret;
1030 char *argv[] = {
1031 "vlan",
1032 netdev->link,
1033 NULL,
1034 };
1035
1036 if (!netdev->downscript)
1037 return 0;
1038
1039 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1040 "net", netdev->downscript, "down", argv);
1041 if (ret < 0)
1042 return -1;
1043
811ef482
CB
1044 return 0;
1045}
1046
1047static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1048{
14a7b0f9
CB
1049 int ret;
1050 char *argv[] = {
1051 "phys",
1052 netdev->link,
1053 NULL,
1054 };
1055
1056 if (!netdev->downscript)
1057 return 0;
1058
1059 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1060 "net", netdev->downscript, "down", argv);
1061 if (ret < 0)
1062 return -1;
811ef482 1063
811ef482
CB
1064 return 0;
1065}
1066
1067static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1068{
14a7b0f9
CB
1069 int ret;
1070 char *argv[] = {
1071 "empty",
1072 NULL,
1073 };
1074
1075 if (!netdev->downscript)
1076 return 0;
1077
1078 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1079 "net", netdev->downscript, "down", argv);
1080 if (ret < 0)
1081 return -1;
811ef482 1082
811ef482
CB
1083 return 0;
1084}
1085
1086static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1087{
1088 return 0;
1089}
1090
1091static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1092 [LXC_NET_VETH] = shutdown_veth,
1093 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1094 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1095 [LXC_NET_VLAN] = shutdown_vlan,
1096 [LXC_NET_PHYS] = shutdown_phys,
1097 [LXC_NET_EMPTY] = shutdown_empty,
1098 [LXC_NET_NONE] = shutdown_none,
1099};
1100
0037ab49
TP
1101static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1102{
d16bda44 1103 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1104 struct nl_handler nlh;
d16bda44
CB
1105 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1106 int err;
0037ab49 1107 struct ifinfomsg *ifi;
0037ab49 1108
d16bda44 1109 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1110 if (err)
1111 return err;
1112
0037ab49
TP
1113 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1114 if (!nlmsg)
d16bda44 1115 return ret_errno(ENOMEM);
0037ab49
TP
1116
1117 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1118 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1119
1120 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1121 if (!ifi)
d16bda44
CB
1122 return ret_errno(ENOMEM);
1123
0037ab49
TP
1124 ifi->ifi_family = AF_UNSPEC;
1125 ifi->ifi_index = ifindex;
1126
1127 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1128 return ret_errno(ENOMEM);
0037ab49 1129
3473ca76 1130 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1131 return ret_errno(ENOMEM);
0037ab49 1132
d16bda44 1133 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1134}
1135
ebc73a67 1136int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1137{
d16bda44 1138 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1139 struct nl_handler nlh;
d16bda44
CB
1140 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1141 int err;
06f976ca 1142 struct ifinfomsg *ifi;
0ad19a3f 1143
d16bda44 1144 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1145 if (err)
1146 return err;
0ad19a3f 1147
0ad19a3f 1148 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1149 if (!nlmsg)
d16bda44 1150 return ret_errno(ENOMEM);
0ad19a3f 1151
ebc73a67 1152 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1153 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1154
1155 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1156 if (!ifi)
d16bda44
CB
1157 return ret_errno(ENOMEM);
1158
06f976ca
SZ
1159 ifi->ifi_family = AF_UNSPEC;
1160 ifi->ifi_index = ifindex;
0ad19a3f 1161
1162 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1163 return ret_errno(ENOMEM);
0ad19a3f 1164
3473ca76 1165 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1166 return ret_errno(ENOMEM);
8d357196 1167
d16bda44 1168 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1169}
1170
ebc73a67
CB
1171/* If we are asked to move a wireless interface, then we must actually move its
1172 * phyN device. Detect that condition and return the physname here. The physname
1173 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1174 */
1175#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1176char *is_wlan(const char *ifname)
e5848d39 1177{
4110345b
CB
1178 __do_fclose FILE *f = NULL;
1179 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1180 int i, ret;
e5848d39 1181 long physlen;
ebc73a67 1182 size_t len;
e5848d39 1183
ebc73a67 1184 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1185 path = must_realloc(NULL, len + 1);
e5848d39 1186 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1187 if (ret < 0 || (size_t)ret >= len)
4110345b 1188 return NULL;
ebc73a67 1189
4110345b 1190 f = fopen(path, "re");
ebc73a67 1191 if (!f)
4110345b 1192 return NULL;
ebc73a67 1193
1a0e70ac 1194 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1195 fseek(f, 0, SEEK_END);
1196 physlen = ftell(f);
1197 fseek(f, 0, SEEK_SET);
4110345b
CB
1198 if (physlen < 0)
1199 return NULL;
ebc73a67
CB
1200
1201 physname = malloc(physlen + 1);
4110345b
CB
1202 if (!physname)
1203 return NULL;
ebc73a67
CB
1204
1205 memset(physname, 0, physlen + 1);
e5848d39 1206 ret = fread(physname, 1, physlen, f);
e5848d39 1207 if (ret < 0)
4110345b 1208 return NULL;
e5848d39 1209
ebc73a67 1210 for (i = 0; i < physlen; i++) {
e5848d39
SH
1211 if (physname[i] == '\n')
1212 physname[i] = '\0';
ebc73a67 1213
e5848d39
SH
1214 if (physname[i] == '\0')
1215 break;
1216 }
1217
4110345b 1218 return move_ptr(physname);
e5848d39
SH
1219}
1220
ebc73a67
CB
1221static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1222 const char *new)
e5848d39 1223{
ebc73a67 1224 pid_t fpid;
e5848d39 1225
ebc73a67 1226 fpid = fork();
e5848d39
SH
1227 if (fpid < 0)
1228 return -1;
ebc73a67 1229
e5848d39
SH
1230 if (fpid != 0)
1231 return wait_for_pid(fpid);
ebc73a67 1232
e5848d39
SH
1233 if (!switch_to_ns(pid, "net"))
1234 return -1;
ebc73a67 1235
05ec44f8 1236 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1237}
1238
e4103cf6 1239int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1240 const char *newname)
e5848d39 1241{
3dd78294 1242 __do_free char *cmd = NULL;
ebc73a67 1243 pid_t fpid;
e5848d39
SH
1244
1245 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1246 * However, IIUC this involves a bit more complicated work to talk to
1247 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1248 */
1249 cmd = on_path("iw", NULL);
9c66dc4f 1250 if (!cmd)
3dd78294 1251 return -1;
e5848d39
SH
1252
1253 fpid = fork();
1254 if (fpid < 0)
3dd78294 1255 return -1;
ebc73a67 1256
e5848d39
SH
1257 if (fpid == 0) {
1258 char pidstr[30];
1259 sprintf(pidstr, "%d", pid);
9c66dc4f 1260 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1261 _exit(EXIT_FAILURE);
e5848d39 1262 }
ebc73a67 1263
e5848d39 1264 if (wait_for_pid(fpid))
3dd78294 1265 return -1;
e5848d39 1266
e5848d39 1267 if (newname)
3dd78294 1268 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1269
3dd78294 1270 return 0;
e5848d39
SH
1271}
1272
8d357196 1273int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1274{
3dd78294 1275 __do_free char *physname = NULL;
8befa924
SH
1276 int index;
1277
8befa924
SH
1278 if (!ifname)
1279 return -EINVAL;
1280
32571606 1281 index = if_nametoindex(ifname);
49428bf3
DY
1282 if (!index)
1283 return -EINVAL;
32571606 1284
ebc73a67
CB
1285 physname = is_wlan(ifname);
1286 if (physname)
e5848d39
SH
1287 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1288
8d357196 1289 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1290}
1291
b84f58b9 1292int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1293{
d16bda44
CB
1294 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1295 struct nl_handler nlh;
1296 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1297 int err;
ebc73a67 1298 struct ifinfomsg *ifi;
0ad19a3f 1299
d16bda44 1300 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1301 if (err)
1302 return err;
0ad19a3f 1303
0ad19a3f 1304 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1305 if (!nlmsg)
d16bda44 1306 return ret_errno(ENOMEM);
0ad19a3f 1307
06f976ca 1308 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1309 if (!answer)
d16bda44 1310 return ret_errno(ENOMEM);
0ad19a3f 1311
ebc73a67 1312 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1313 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1314
1315 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1316 if (!ifi)
d16bda44
CB
1317 return ret_errno(ENOMEM);
1318
06f976ca
SZ
1319 ifi->ifi_family = AF_UNSPEC;
1320 ifi->ifi_index = ifindex;
0ad19a3f 1321
d16bda44 1322 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1323}
1324
b84f58b9
DL
1325int lxc_netdev_delete_by_name(const char *name)
1326{
1327 int index;
1328
1329 index = if_nametoindex(name);
1330 if (!index)
1331 return -EINVAL;
1332
1333 return lxc_netdev_delete_by_index(index);
1334}
1335
1336int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1337{
d16bda44
CB
1338 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1339 struct nl_handler nlh;
1340 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1341 int err, len;
06f976ca 1342 struct ifinfomsg *ifi;
b9a5bb58 1343
d16bda44 1344 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1345 if (err)
1346 return err;
b9a5bb58 1347
b84f58b9 1348 len = strlen(newname);
d16bda44
CB
1349 if (len == 1 || len >= IFNAMSIZ)
1350 return ret_errno(EINVAL);
b84f58b9 1351
b9a5bb58
DL
1352 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1353 if (!nlmsg)
d16bda44 1354 return ret_errno(ENOMEM);
b9a5bb58 1355
06f976ca 1356 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1357 if (!answer)
d16bda44 1358 return ret_errno(ENOMEM);
b9a5bb58 1359
ebc73a67 1360 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1361 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1362
1363 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1364 if (!ifi)
d16bda44
CB
1365 return ret_errno(ENOMEM);
1366
06f976ca
SZ
1367 ifi->ifi_family = AF_UNSPEC;
1368 ifi->ifi_index = ifindex;
b84f58b9
DL
1369
1370 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1371 return ret_errno(ENOMEM);
b9a5bb58 1372
d16bda44 1373 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1374}
1375
b84f58b9
DL
1376int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1377{
1378 int len, index;
1379
1380 len = strlen(oldname);
dae3fdf6 1381 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1382 return -EINVAL;
1383
1384 index = if_nametoindex(oldname);
1385 if (!index)
1386 return -EINVAL;
1387
1388 return lxc_netdev_rename_by_index(index, newname);
1389}
1390
8befa924 1391int netdev_set_flag(const char *name, int flag)
0ad19a3f 1392{
d16bda44
CB
1393 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1394 struct nl_handler nlh;
1395 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1396 int err, index, len;
06f976ca 1397 struct ifinfomsg *ifi;
0ad19a3f 1398
d16bda44 1399 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1400 if (err)
1401 return err;
0ad19a3f 1402
1403 len = strlen(name);
dae3fdf6 1404 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1405 return ret_errno(EINVAL);
0ad19a3f 1406
1407 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1408 if (!nlmsg)
d16bda44 1409 return ret_errno(ENOMEM);
0ad19a3f 1410
06f976ca 1411 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1412 if (!answer)
d16bda44 1413 return ret_errno(ENOMEM);
0ad19a3f 1414
1415 index = if_nametoindex(name);
1416 if (!index)
d16bda44 1417 return ret_errno(EINVAL);
0ad19a3f 1418
ebc73a67 1419 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1420 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1421
1422 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1423 if (!ifi)
1424 return ret_errno(ENOMEM);
1425
06f976ca
SZ
1426 ifi->ifi_family = AF_UNSPEC;
1427 ifi->ifi_index = index;
1428 ifi->ifi_change |= IFF_UP;
1429 ifi->ifi_flags |= flag;
0ad19a3f 1430
d16bda44 1431 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1432}
1433
ebc73a67 1434int netdev_get_flag(const char *name, int *flag)
efa1cf45 1435{
d16bda44
CB
1436 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1437 struct nl_handler nlh;
1438 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1439 int err, index, len;
a4318300 1440 struct ifinfomsg *ifi;
efa1cf45
DY
1441
1442 if (!name)
d16bda44 1443 return ret_errno(EINVAL);
efa1cf45 1444
d16bda44 1445 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1446 if (err)
1447 return err;
1448
efa1cf45
DY
1449 len = strlen(name);
1450 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1451 return ret_errno(EINVAL);
efa1cf45 1452
efa1cf45
DY
1453 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1454 if (!nlmsg)
d16bda44 1455 return ret_errno(ENOMEM);
efa1cf45 1456
06f976ca 1457 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1458 if (!answer)
d16bda44 1459 return ret_errno(ENOMEM);
efa1cf45 1460
efa1cf45
DY
1461 index = if_nametoindex(name);
1462 if (!index)
d16bda44 1463 return ret_errno(EINVAL);
efa1cf45 1464
06f976ca
SZ
1465 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1466 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1467
1468 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1469 if (!ifi)
1470 return ret_errno(ENOMEM);
1471
06f976ca
SZ
1472 ifi->ifi_family = AF_UNSPEC;
1473 ifi->ifi_index = index;
efa1cf45 1474
d16bda44 1475 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1476 if (err)
d16bda44 1477 return ret_set_errno(-1, errno);
efa1cf45 1478
06f976ca 1479 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1480
1481 *flag = ifi->ifi_flags;
efa1cf45
DY
1482 return err;
1483}
1484
1485/*
1486 * \brief Check a interface is up or not.
1487 *
1488 * \param name: name for the interface.
1489 *
1490 * \return int.
1491 * 0 means interface is down.
1492 * 1 means interface is up.
1493 * Others means error happened, and ret-value is the error number.
1494 */
ebc73a67 1495int lxc_netdev_isup(const char *name)
efa1cf45 1496{
ebc73a67 1497 int err, flag;
efa1cf45
DY
1498
1499 err = netdev_get_flag(name, &flag);
1500 if (err)
ebc73a67
CB
1501 return err;
1502
efa1cf45
DY
1503 if (flag & IFF_UP)
1504 return 1;
ebc73a67 1505
efa1cf45 1506 return 0;
efa1cf45
DY
1507}
1508
0130df54
SH
1509int netdev_get_mtu(int ifindex)
1510{
a5f5cb41 1511 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1512 struct nl_handler nlh;
a5f5cb41
CB
1513 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1514 int readmore = 0, recv_len = 0;
1515 int answer_len, err, res;
06f976ca 1516 struct ifinfomsg *ifi;
0130df54 1517 struct nlmsghdr *msg;
0130df54 1518
a5f5cb41 1519 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1520 if (err)
1521 return err;
1522
0130df54
SH
1523 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1524 if (!nlmsg)
a5f5cb41 1525 return ret_errno(ENOMEM);
0130df54 1526
06f976ca 1527 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1528 if (!answer)
a5f5cb41 1529 return ret_errno(ENOMEM);
0130df54
SH
1530
1531 /* Save the answer buffer length, since it will be overwritten
1532 * on the first receive (and we might need to receive more than
ebc73a67
CB
1533 * once.
1534 */
06f976ca
SZ
1535 answer_len = answer->nlmsghdr->nlmsg_len;
1536
ebc73a67 1537 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1538 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1539
06f976ca 1540 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1541 if (!ifi)
a5f5cb41
CB
1542 return ret_errno(ENOMEM);
1543
06f976ca 1544 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1545
1546 /* Send the request for addresses, which returns all addresses
1547 * on all interfaces. */
a5f5cb41 1548 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1549 if (err < 0)
a5f5cb41 1550 return ret_set_errno(-1, errno);
0130df54 1551
6ce39620
CB
1552#pragma GCC diagnostic push
1553#pragma GCC diagnostic ignored "-Wcast-align"
1554
0130df54
SH
1555 do {
1556 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1557 * overwritten by a previous receive.
1558 */
06f976ca 1559 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1560
1561 /* Get the (next) batch of reply messages */
a5f5cb41 1562 err = netlink_rcv(nlh_ptr, answer);
0130df54 1563 if (err < 0)
a5f5cb41 1564 return ret_set_errno(-1, errno);
0130df54
SH
1565
1566 recv_len = err;
0130df54
SH
1567
1568 /* Satisfy the typing for the netlink macros */
06f976ca 1569 msg = answer->nlmsghdr;
0130df54
SH
1570
1571 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1572 /* Stop reading if we see an error message */
1573 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1574 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1575 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1576 }
1577
1578 /* Stop reading if we see a NLMSG_DONE message */
1579 if (msg->nlmsg_type == NLMSG_DONE) {
1580 readmore = 0;
1581 break;
1582 }
1583
06f976ca 1584 ifi = NLMSG_DATA(msg);
0130df54
SH
1585 if (ifi->ifi_index == ifindex) {
1586 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1587 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1588
0130df54 1589 res = 0;
ebc73a67 1590 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1591 /*
a5f5cb41 1592 * Found a local address for the
ebc73a67
CB
1593 * requested interface, return it.
1594 */
0130df54 1595 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1596 memcpy(&res, RTA_DATA(rta), sizeof(int));
1597 return res;
0130df54 1598 }
a5f5cb41 1599
0130df54
SH
1600 rta = RTA_NEXT(rta, attr_len);
1601 }
0130df54
SH
1602 }
1603
ebc73a67
CB
1604 /* Keep reading more data from the socket if the last
1605 * message had the NLF_F_MULTI flag set.
1606 */
0130df54
SH
1607 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1608
ebc73a67 1609 /* Look at the next message received in this buffer. */
0130df54
SH
1610 msg = NLMSG_NEXT(msg, recv_len);
1611 }
1612 } while (readmore);
1613
6ce39620
CB
1614#pragma GCC diagnostic pop
1615
ebc73a67 1616 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 1617 return -1;
0130df54
SH
1618}
1619
d472214b 1620int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1621{
a5f5cb41
CB
1622 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1623 struct nl_handler nlh;
1624 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 1625 int err, len;
06f976ca 1626 struct ifinfomsg *ifi;
75d09f83 1627
a5f5cb41 1628 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1629 if (err)
1630 return err;
75d09f83
DL
1631
1632 len = strlen(name);
dae3fdf6 1633 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1634 return ret_errno(EINVAL);
75d09f83
DL
1635
1636 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1637 if (!nlmsg)
a5f5cb41 1638 return ret_errno(ENOMEM);
75d09f83 1639
06f976ca 1640 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 1641 if (!answer)
a5f5cb41 1642 return ret_errno(ENOMEM);
75d09f83 1643
ebc73a67 1644 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1645 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1646
1647 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1648 if (!ifi)
1649 return ret_errno(ENOMEM);
1650
06f976ca 1651 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1652
1653 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 1654 return ret_errno(ENOMEM);
75d09f83
DL
1655
1656 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 1657 return ret_errno(ENOMEM);
75d09f83 1658
a5f5cb41 1659 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
1660}
1661
d472214b 1662int lxc_netdev_up(const char *name)
0ad19a3f 1663{
d472214b 1664 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1665}
1666
d472214b 1667int lxc_netdev_down(const char *name)
0ad19a3f 1668{
d472214b 1669 return netdev_set_flag(name, 0);
0ad19a3f 1670}
1671
54256301 1672int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1673{
a5f5cb41
CB
1674 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1675 struct nl_handler nlh;
1676 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1677 int err, len;
06f976ca 1678 struct ifinfomsg *ifi;
0ad19a3f 1679 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 1680
a5f5cb41 1681 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1682 if (err)
1683 return err;
0ad19a3f 1684
1685 len = strlen(name1);
dae3fdf6 1686 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1687 return ret_errno(EINVAL);
0ad19a3f 1688
1689 len = strlen(name2);
dae3fdf6 1690 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1691 return ret_errno(EINVAL);
0ad19a3f 1692
1693 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1694 if (!nlmsg)
a5f5cb41 1695 return ret_errno(ENOMEM);
0ad19a3f 1696
06f976ca 1697 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1698 if (!answer)
a5f5cb41 1699 return ret_errno(ENOMEM);
0ad19a3f 1700
a5f5cb41 1701 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1702 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1703
1704 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1705 if (!ifi)
a5f5cb41
CB
1706 return ret_errno(ENOMEM);
1707
06f976ca 1708 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1709
79e68309 1710 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1711 if (!nest1)
a5f5cb41 1712 return ret_errno(EINVAL);
0ad19a3f 1713
1714 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 1715 return ret_errno(ENOMEM);
0ad19a3f 1716
1717 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1718 if (!nest2)
a5f5cb41 1719 return ret_errno(ENOMEM);
0ad19a3f 1720
1721 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1722 if (!nest3)
a5f5cb41 1723 return ret_errno(ENOMEM);
0ad19a3f 1724
06f976ca 1725 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1726 if (!ifi)
1727 return ret_errno(ENOMEM);
0ad19a3f 1728
1729 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 1730 return ret_errno(ENOMEM);
0ad19a3f 1731
54256301 1732 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 1733 return ret_errno(ENOMEM);
54256301
CB
1734
1735 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 1736 return ret_errno(ENOMEM);
54256301 1737
0ad19a3f 1738 nla_end_nested(nlmsg, nest3);
0ad19a3f 1739 nla_end_nested(nlmsg, nest2);
0ad19a3f 1740 nla_end_nested(nlmsg, nest1);
1741
1742 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 1743 return ret_errno(ENOMEM);
0ad19a3f 1744
a5f5cb41 1745 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1746}
1747
ebc73a67 1748/* TODO: merge with lxc_macvlan_create */
7c11d57a 1749int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1750{
a5f5cb41
CB
1751 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1752 struct nl_handler nlh;
1753 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1754 int err, len, lindex;
06f976ca 1755 struct ifinfomsg *ifi;
26c39028 1756 struct rtattr *nest, *nest2;
26c39028 1757
a5f5cb41 1758 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1759 if (err)
1760 return err;
26c39028
JHS
1761
1762 len = strlen(master);
dae3fdf6 1763 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1764 return ret_errno(EINVAL);
26c39028
JHS
1765
1766 len = strlen(name);
dae3fdf6 1767 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1768 return ret_errno(EINVAL);
26c39028
JHS
1769
1770 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1771 if (!nlmsg)
a5f5cb41 1772 return ret_errno(ENOMEM);
26c39028 1773
06f976ca 1774 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 1775 if (!answer)
a5f5cb41 1776 return ret_errno(ENOMEM);
26c39028
JHS
1777
1778 lindex = if_nametoindex(master);
1779 if (!lindex)
a5f5cb41 1780 return ret_errno(EINVAL);
26c39028 1781
a5f5cb41 1782 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1783 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1784
1785 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1786 if (!ifi)
1787 return ret_errno(ENOMEM);
1788
06f976ca 1789 ifi->ifi_family = AF_UNSPEC;
26c39028 1790
79e68309 1791 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 1792 if (!nest)
a5f5cb41 1793 return ret_errno(ENOMEM);
26c39028
JHS
1794
1795 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 1796 return ret_errno(ENOMEM);
26c39028
JHS
1797
1798 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1799 if (!nest2)
a5f5cb41 1800 return ret_errno(ENOMEM);
e892973e 1801
26c39028 1802 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 1803 return ret_errno(ENOMEM);
e892973e 1804
26c39028 1805 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1806 nla_end_nested(nlmsg, nest);
1807
1808 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 1809 return ret_errno(ENOMEM);
26c39028
JHS
1810
1811 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
1812 return ret_errno(ENOMEM);
1813
1814 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
1815}
1816
e892973e 1817int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 1818{
a5f5cb41
CB
1819 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1820 struct nl_handler nlh;
1821 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1822 int err, index, len;
06f976ca 1823 struct ifinfomsg *ifi;
e892973e 1824 struct rtattr *nest, *nest2;
0ad19a3f 1825
a5f5cb41 1826 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1827 if (err)
1828 return err;
0ad19a3f 1829
1830 len = strlen(master);
dae3fdf6 1831 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1832 return ret_errno(EINVAL);
0ad19a3f 1833
1834 len = strlen(name);
dae3fdf6 1835 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1836 return ret_errno(EINVAL);
0ad19a3f 1837
1838 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1839 if (!nlmsg)
a5f5cb41 1840 return ret_errno(ENOMEM);
0ad19a3f 1841
06f976ca 1842 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1843 if (!answer)
a5f5cb41 1844 return ret_errno(ENOMEM);
0ad19a3f 1845
1846 index = if_nametoindex(master);
1847 if (!index)
a5f5cb41 1848 return ret_errno(EINVAL);
0ad19a3f 1849
a5f5cb41 1850 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1851 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1852
1853 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1854 if (!ifi)
1855 return ret_errno(ENOMEM);
1856
06f976ca 1857 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1858
79e68309 1859 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1860 if (!nest)
a5f5cb41 1861 return ret_errno(ENOMEM);
0ad19a3f 1862
1863 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 1864 return ret_errno(ENOMEM);
0ad19a3f 1865
e892973e
DL
1866 if (mode) {
1867 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1868 if (!nest2)
a5f5cb41 1869 return ret_errno(ENOMEM);
e892973e
DL
1870
1871 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 1872 return ret_errno(ENOMEM);
e892973e
DL
1873
1874 nla_end_nested(nlmsg, nest2);
1875 }
1876
0ad19a3f 1877 nla_end_nested(nlmsg, nest);
1878
1879 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 1880 return ret_errno(ENOMEM);
0ad19a3f 1881
1882 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 1883 return ret_errno(ENOMEM);
0ad19a3f 1884
a5f5cb41 1885 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1886}
1887
1888static int proc_sys_net_write(const char *path, const char *value)
1889{
ebc73a67
CB
1890 int fd;
1891 int err = 0;
0ad19a3f 1892
1893 fd = open(path, O_WRONLY);
1894 if (fd < 0)
1895 return -errno;
1896
f640cf46 1897 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 1898 err = -errno;
1899
1900 close(fd);
1901 return err;
1902}
1903
6dfa9581 1904static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 1905{
1906 int ret;
1907 char path[PATH_MAX];
6509154d 1908
1909 if (family != AF_INET && family != AF_INET6)
6dfa9581 1910 return -EINVAL;
6509154d 1911
9c66dc4f 1912 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 1913 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
9c66dc4f 1914 if (ret < 0 || (size_t)ret >= sizeof(path))
6dfa9581 1915 return -E2BIG;
6509154d 1916
6dfa9581
TP
1917 return proc_sys_net_write(path, flag ? "1" : "0");
1918}
1919
1920int lxc_ip_forwarding_on(const char *name, int family)
1921{
1922 return ip_forwarding_set(name, family, 1);
1923}
1924
1925int lxc_ip_forwarding_off(const char *name, int family)
1926{
1927 return ip_forwarding_set(name, family, 0);
6509154d 1928}
1929
0ad19a3f 1930static int neigh_proxy_set(const char *ifname, int family, int flag)
1931{
9ba8130c 1932 int ret;
419590da 1933 char path[PATH_MAX];
0ad19a3f 1934
1935 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 1936 return -EINVAL;
0ad19a3f 1937
9c66dc4f 1938 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
1939 family == AF_INET ? "ipv4" : "ipv6", ifname,
1940 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 1941 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 1942 return -E2BIG;
0ad19a3f 1943
ebc73a67 1944 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 1945}
1946
6509154d 1947static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1948{
1949 int ret;
1950 char path[PATH_MAX];
1951 char buf[1] = "";
1952
1953 if (family != AF_INET && family != AF_INET6)
596a002c 1954 return ret_set_errno(-1, EINVAL);
6509154d 1955
9c66dc4f 1956 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6509154d 1957 family == AF_INET ? "ipv4" : "ipv6", ifname,
1958 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 1959 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 1960 return ret_set_errno(-1, E2BIG);
6509154d 1961
1962 return lxc_read_file_expect(path, buf, 1, "1");
1963}
1964
497353b6 1965int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 1966{
1967 return neigh_proxy_set(name, family, 1);
1968}
1969
497353b6 1970int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 1971{
1972 return neigh_proxy_set(name, family, 0);
1973}
1974
1975int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1976{
1f1b18e7
DL
1977 int i = 0;
1978 unsigned val;
ebc73a67
CB
1979 char c;
1980 unsigned char *data;
1f1b18e7
DL
1981
1982 sockaddr->sa_family = ARPHRD_ETHER;
1983 data = (unsigned char *)sockaddr->sa_data;
1984
1985 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
1986 c = *macaddr++;
1987 if (isdigit(c))
1988 val = c - '0';
1989 else if (c >= 'a' && c <= 'f')
1990 val = c - 'a' + 10;
1991 else if (c >= 'A' && c <= 'F')
1992 val = c - 'A' + 10;
1993 else
1994 return -EINVAL;
1995
1996 val <<= 4;
1997 c = *macaddr;
1998 if (isdigit(c))
1999 val |= c - '0';
2000 else if (c >= 'a' && c <= 'f')
2001 val |= c - 'a' + 10;
2002 else if (c >= 'A' && c <= 'F')
2003 val |= c - 'A' + 10;
2004 else if (c == ':' || c == 0)
2005 val >>= 4;
2006 else
2007 return -EINVAL;
2008 if (c != 0)
2009 macaddr++;
2010 *data++ = (unsigned char)(val & 0377);
2011 i++;
2012
2013 if (*macaddr == ':')
2014 macaddr++;
0ad19a3f 2015 }
0ad19a3f 2016
1f1b18e7 2017 return 0;
0ad19a3f 2018}
2019
ebc73a67
CB
2020static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2021 void *acast, int prefix)
0ad19a3f 2022{
a5f5cb41
CB
2023 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2024 struct nl_handler nlh;
2025 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2026 int addrlen, err;
06f976ca 2027 struct ifaddrmsg *ifa;
0ad19a3f 2028
ebc73a67
CB
2029 addrlen = family == AF_INET ? sizeof(struct in_addr)
2030 : sizeof(struct in6_addr);
4bf1968d 2031
a5f5cb41 2032 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2033 if (err)
2034 return err;
0ad19a3f 2035
0ad19a3f 2036 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2037 if (!nlmsg)
a5f5cb41 2038 return ret_errno(ENOMEM);
0ad19a3f 2039
06f976ca 2040 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2041 if (!answer)
a5f5cb41 2042 return ret_errno(ENOMEM);
0ad19a3f 2043
a5f5cb41 2044 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2045 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2046
2047 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2048 if (!ifa)
a5f5cb41
CB
2049 return ret_errno(ENOMEM);
2050
06f976ca
SZ
2051 ifa->ifa_prefixlen = prefix;
2052 ifa->ifa_index = ifindex;
2053 ifa->ifa_family = family;
2054 ifa->ifa_scope = 0;
acf47e1b 2055
4bf1968d 2056 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2057 return ret_errno(EINVAL);
0ad19a3f 2058
4bf1968d 2059 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2060 return ret_errno(EINVAL);
0ad19a3f 2061
d8948a52 2062 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2063 return ret_errno(EINVAL);
1f1b18e7 2064
ebc73a67 2065 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2066 if (family == AF_INET6 &&
2067 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2068 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2069 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2070
a5f5cb41 2071 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2072}
2073
1f1b18e7 2074int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2075 struct in6_addr *mcast, struct in6_addr *acast,
2076 int prefix)
1f1b18e7
DL
2077{
2078 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2079}
2080
ebc73a67
CB
2081int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2082 int prefix)
1f1b18e7
DL
2083{
2084 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2085}
2086
ebc73a67
CB
2087/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2088 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2089 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2090 */
6ce39620
CB
2091#pragma GCC diagnostic push
2092#pragma GCC diagnostic ignored "-Wcast-align"
2093
ebc73a67
CB
2094static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2095{
2096 int addrlen;
06f976ca
SZ
2097 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2098 struct rtattr *rta = IFA_RTA(ifa);
2099 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2100
06f976ca 2101 if (ifa->ifa_family != family)
19a26f82
MK
2102 return 0;
2103
ebc73a67
CB
2104 addrlen = family == AF_INET ? sizeof(struct in_addr)
2105 : sizeof(struct in6_addr);
19a26f82
MK
2106
2107 /* Loop over the rtattr's in this message */
ebc73a67 2108 while (RTA_OK(rta, attr_len)) {
19a26f82 2109 /* Found a local address for the requested interface,
ebc73a67
CB
2110 * return it.
2111 */
2112 if (rta->rta_type == IFA_LOCAL ||
2113 rta->rta_type == IFA_ADDRESS) {
2114 /* Sanity check. The family check above should make sure
2115 * the address length is correct, but check here just in
2116 * case.
2117 */
19a26f82
MK
2118 if (RTA_PAYLOAD(rta) != addrlen)
2119 return -1;
2120
ebc73a67
CB
2121 /* We might have found an IFA_ADDRESS before, which we
2122 * now overwrite with an IFA_LOCAL.
2123 */
dd66e5ad 2124 if (!*res) {
19a26f82 2125 *res = malloc(addrlen);
dd66e5ad
DE
2126 if (!*res)
2127 return -1;
2128 }
19a26f82
MK
2129
2130 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2131 if (rta->rta_type == IFA_LOCAL)
2132 break;
2133 }
2134 rta = RTA_NEXT(rta, attr_len);
2135 }
2136 return 0;
2137}
2138
6ce39620
CB
2139#pragma GCC diagnostic pop
2140
19a26f82
MK
2141static int ip_addr_get(int family, int ifindex, void **res)
2142{
a5f5cb41
CB
2143 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2144 struct nl_handler nlh;
2145 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2146 int answer_len, err;
06f976ca 2147 struct ifaddrmsg *ifa;
19a26f82 2148 struct nlmsghdr *msg;
ebc73a67 2149 int readmore = 0, recv_len = 0;
19a26f82 2150
a5f5cb41 2151 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2152 if (err)
2153 return err;
2154
19a26f82
MK
2155 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2156 if (!nlmsg)
a5f5cb41 2157 return ret_errno(ENOMEM);
19a26f82 2158
06f976ca 2159 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2160 if (!answer)
a5f5cb41 2161 return ret_errno(ENOMEM);
19a26f82 2162
ebc73a67
CB
2163 /* Save the answer buffer length, since it will be overwritten on the
2164 * first receive (and we might need to receive more than once).
2165 */
06f976ca
SZ
2166 answer_len = answer->nlmsghdr->nlmsg_len;
2167
ebc73a67 2168 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2169 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2170
06f976ca 2171 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2172 if (!ifa)
a5f5cb41
CB
2173 return ret_errno(ENOMEM);
2174
06f976ca 2175 ifa->ifa_family = family;
19a26f82 2176
ebc73a67
CB
2177 /* Send the request for addresses, which returns all addresses on all
2178 * interfaces.
2179 */
a5f5cb41 2180 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2181 if (err < 0)
a5f5cb41 2182 return ret_set_errno(err, errno);
19a26f82 2183
6ce39620
CB
2184#pragma GCC diagnostic push
2185#pragma GCC diagnostic ignored "-Wcast-align"
2186
19a26f82
MK
2187 do {
2188 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2189 * overwritten by a previous receive.
2190 */
06f976ca 2191 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2192
ebc73a67 2193 /* Get the (next) batch of reply messages. */
a5f5cb41 2194 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2195 if (err < 0)
a5f5cb41 2196 return ret_set_errno(err, errno);
19a26f82
MK
2197
2198 recv_len = err;
2199 err = 0;
2200
ebc73a67 2201 /* Satisfy the typing for the netlink macros. */
06f976ca 2202 msg = answer->nlmsghdr;
19a26f82
MK
2203
2204 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2205 /* Stop reading if we see an error message. */
19a26f82 2206 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2207 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2208 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2209 }
2210
ebc73a67 2211 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2212 if (msg->nlmsg_type == NLMSG_DONE) {
2213 readmore = 0;
2214 break;
2215 }
2216
a5f5cb41
CB
2217 if (msg->nlmsg_type != RTM_NEWADDR)
2218 return ret_errno(EINVAL);
19a26f82 2219
06f976ca
SZ
2220 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2221 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2222 if (ifa_get_local_ip(family, msg, res) < 0)
2223 return ret_errno(EINVAL);
51e7a874 2224
ebc73a67 2225 /* Found a result, stop searching. */
19a26f82 2226 if (*res)
a5f5cb41 2227 return 0;
19a26f82
MK
2228 }
2229
ebc73a67
CB
2230 /* Keep reading more data from the socket if the last
2231 * message had the NLF_F_MULTI flag set.
2232 */
19a26f82
MK
2233 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2234
ebc73a67 2235 /* Look at the next message received in this buffer. */
19a26f82
MK
2236 msg = NLMSG_NEXT(msg, recv_len);
2237 }
2238 } while (readmore);
2239
6ce39620
CB
2240#pragma GCC diagnostic pop
2241
19a26f82 2242 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2243 * error.
2244 */
a5f5cb41 2245 return -1;
19a26f82
MK
2246}
2247
2248int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2249{
ebc73a67 2250 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2251}
2252
ebc73a67 2253int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2254{
ebc73a67 2255 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2256}
2257
f8fee0e2
MK
2258static int ip_gateway_add(int family, int ifindex, void *gw)
2259{
a5f5cb41 2260 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2261 struct nl_handler nlh;
a5f5cb41
CB
2262 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2263 int addrlen, err;
06f976ca 2264 struct rtmsg *rt;
f8fee0e2 2265
ebc73a67
CB
2266 addrlen = family == AF_INET ? sizeof(struct in_addr)
2267 : sizeof(struct in6_addr);
f8fee0e2 2268
a5f5cb41 2269 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2270 if (err)
2271 return err;
2272
f8fee0e2
MK
2273 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2274 if (!nlmsg)
a5f5cb41 2275 return ret_errno(ENOMEM);
f8fee0e2 2276
06f976ca 2277 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2278 if (!answer)
a5f5cb41 2279 return ret_errno(ENOMEM);
f8fee0e2 2280
a5f5cb41 2281 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2282 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2283
2284 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2285 if (!rt)
a5f5cb41
CB
2286 return ret_errno(ENOMEM);
2287
06f976ca
SZ
2288 rt->rtm_family = family;
2289 rt->rtm_table = RT_TABLE_MAIN;
2290 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2291 rt->rtm_protocol = RTPROT_BOOT;
2292 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2293 /* "default" destination */
06f976ca 2294 rt->rtm_dst_len = 0;
f8fee0e2 2295
a2f9a670 2296 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2297 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2298 return ret_errno(ENOMEM);
f8fee0e2
MK
2299
2300 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2301 * addresses for the gateway.
2302 */
f8fee0e2 2303 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2304 return ret_errno(EINVAL);
f8fee0e2 2305
a5f5cb41 2306 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2307}
2308
2309int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2310{
2311 return ip_gateway_add(AF_INET, ifindex, gw);
2312}
2313
2314int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2315{
2316 return ip_gateway_add(AF_INET6, ifindex, gw);
2317}
581c75e7 2318bool is_ovs_bridge(const char *bridge)
0d204771 2319{
ebc73a67 2320 int ret;
0d204771 2321 struct stat sb;
ebc73a67 2322 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2323
ebc73a67
CB
2324 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2325 bridge);
2326 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2327 return false;
2328
2329 ret = stat(brdirname, &sb);
2330 if (ret < 0 && errno == ENOENT)
0d204771 2331 return true;
ebc73a67 2332
0d204771
SH
2333 return false;
2334}
2335
581c75e7
CB
2336struct ovs_veth_args {
2337 const char *bridge;
2338 const char *nic;
2339};
2340
cb0dc11b
CB
2341/* Called from a background thread - when nic goes away, remove it from the
2342 * bridge.
c43cbc04 2343 */
581c75e7 2344static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2345{
581c75e7 2346 struct ovs_veth_args *args = data;
cb0dc11b 2347
9c66dc4f 2348 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2349 return -1;
c43cbc04
SH
2350}
2351
581c75e7 2352int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2353{
c43cbc04 2354 int ret;
419590da 2355 char cmd_output[PATH_MAX];
581c75e7 2356 struct ovs_veth_args args;
6ad22d06 2357
581c75e7
CB
2358 args.bridge = bridge;
2359 args.nic = nic;
2360 ret = run_command(cmd_output, sizeof(cmd_output),
2361 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2362 if (ret < 0)
2363 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2364
581c75e7
CB
2365 return 0;
2366}
ebc73a67 2367
581c75e7
CB
2368static int lxc_ovs_attach_bridge_exec(void *data)
2369{
2370 struct ovs_veth_args *args = data;
ebc73a67 2371
9c66dc4f 2372 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2373 return -1;
2374}
ebc73a67 2375
581c75e7
CB
2376static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2377{
2378 int ret;
419590da 2379 char cmd_output[PATH_MAX];
581c75e7 2380 struct ovs_veth_args args;
ebc73a67 2381
581c75e7
CB
2382 args.bridge = bridge;
2383 args.nic = nic;
2384 ret = run_command(cmd_output, sizeof(cmd_output),
2385 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2386 if (ret < 0)
2387 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2388
581c75e7 2389 return 0;
0d204771 2390}
0d204771 2391
581c75e7 2392int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2393{
ebc73a67 2394 int err, fd, index;
9de31d5a 2395 size_t retlen;
0ad19a3f 2396 struct ifreq ifr;
2397
dae3fdf6 2398 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2399 return -EINVAL;
0ad19a3f 2400
2401 index = if_nametoindex(ifname);
2402 if (!index)
3cfc0f3a 2403 return -EINVAL;
0ad19a3f 2404
0d204771 2405 if (is_ovs_bridge(bridge))
581c75e7 2406 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2407
ad9429e5 2408 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2409 if (fd < 0)
3cfc0f3a 2410 return -errno;
0ad19a3f 2411
9de31d5a 2412 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2413 if (retlen >= IFNAMSIZ) {
2414 close(fd);
9de31d5a 2415 return -E2BIG;
42cc4083 2416 }
9de31d5a 2417
ebc73a67 2418 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2419 ifr.ifr_ifindex = index;
7d163508 2420 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2421 close(fd);
3cfc0f3a
MN
2422 if (err)
2423 err = -errno;
0ad19a3f 2424
2425 return err;
2426}
72d0e1cb 2427
ebc73a67 2428static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2429 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2430 [LXC_NET_VETH] = "veth",
2431 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2432 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2433 [LXC_NET_PHYS] = "phys",
b343592b
BP
2434 [LXC_NET_VLAN] = "vlan",
2435 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2436};
2437
2438const char *lxc_net_type_to_str(int type)
2439{
2440 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2441 return NULL;
ebc73a67 2442
72d0e1cb
SG
2443 return lxc_network_types[type];
2444}
8befa924 2445
3646ffd9 2446static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2447
3646ffd9 2448char *lxc_ifname_alnum_case_sensitive(char *template)
a0265685 2449{
2d7bf744 2450 int ret;
b1e44ed1 2451 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2452 char name[IFNAMSIZ];
2453 bool exists = false;
2454 size_t i = 0;
280cc35f 2455#ifdef HAVE_RAND_R
2456 unsigned int seed;
2457
2458 seed = randseed(false);
2459#else
2460
2461 (void)randseed(true);
2462#endif
a0265685 2463
535e8859
CB
2464 if (strlen(template) >= IFNAMSIZ)
2465 return NULL;
2466
ebc73a67 2467 /* Get all the network interfaces. */
b1e44ed1 2468 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
9c66dc4f
CB
2469 if (ret < 0)
2470 return log_error_errno(NULL, errno, "Failed to get network interfaces");
a0265685 2471
ebc73a67 2472 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2473 for (;;) {
966e9f1f 2474 name[0] = '\0';
94b1cade 2475 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2476
966e9f1f 2477 exists = false;
280cc35f 2478
a0265685
SG
2479 for (i = 0; i < strlen(name); i++) {
2480 if (name[i] == 'X') {
2481#ifdef HAVE_RAND_R
8523344a 2482 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2483#else
8523344a 2484 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2485#endif
2486 }
2487 }
2488
2489 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2490 if (!strcmp(ifa->ifa_name, name)) {
2491 exists = true;
a0265685
SG
2492 break;
2493 }
2494 }
2495
966e9f1f 2496 if (!exists)
a0265685 2497 break;
a0265685
SG
2498 }
2499
b1e44ed1 2500 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2501 (void)strlcpy(template, name, strlen(template) + 1);
2502
2503 return template;
a0265685
SG
2504}
2505
8befa924
SH
2506int setup_private_host_hw_addr(char *veth1)
2507{
ebc73a67 2508 int err, sockfd;
8befa924 2509 struct ifreq ifr;
8befa924 2510
ad9429e5 2511 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2512 if (sockfd < 0)
2513 return -errno;
2514
ebc73a67 2515 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2516 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2517 close(sockfd);
ebc73a67 2518 return -E2BIG;
87c6e5db 2519 }
ebc73a67 2520
8befa924
SH
2521 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2522 if (err < 0) {
8befa924 2523 close(sockfd);
8befa924
SH
2524 return -errno;
2525 }
2526
2527 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2528 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2529 close(sockfd);
8befa924
SH
2530 if (err < 0)
2531 return -errno;
2532
2533 return 0;
2534}
811ef482
CB
2535
2536int lxc_find_gateway_addresses(struct lxc_handler *handler)
2537{
2538 struct lxc_list *network = &handler->conf->network;
2539 struct lxc_list *iterator;
2540 struct lxc_netdev *netdev;
2541 int link_index;
2542
2543 lxc_list_for_each(iterator, network) {
2544 netdev = iterator->elem;
2545
2546 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2547 continue;
2548
9c66dc4f
CB
2549 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2550 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2551
f2711167 2552 if (is_empty_string(netdev->link)) {
9c66dc4f 2553 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2554 }
2555
2556 link_index = if_nametoindex(netdev->link);
2557 if (!link_index)
2558 return -EINVAL;
2559
2560 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2561 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2562 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2563 }
2564
2565 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2566 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2567 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2568 }
2569 }
2570
2571 return 0;
2572}
2573
2574#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2575static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2576 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2577{
2578 int ret;
2579 pid_t child;
2580 int bytes, pipefd[2];
2581 char *token, *saveptr = NULL;
095ead80 2582 char netdev_link[IFNAMSIZ];
419590da 2583 char buffer[PATH_MAX] = {0};
94b1cade 2584 size_t retlen;
811ef482 2585
9c66dc4f
CB
2586 if (netdev->type != LXC_NET_VETH)
2587 return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2588
2589 ret = pipe(pipefd);
9c66dc4f
CB
2590 if (ret < 0)
2591 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2592
2593 child = fork();
2594 if (child < 0) {
811ef482
CB
2595 close(pipefd[0]);
2596 close(pipefd[1]);
9c66dc4f 2597 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2598 }
2599
2600 if (child == 0) {
8335fd40 2601 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2602
2603 close(pipefd[0]);
2604
2605 ret = dup2(pipefd[1], STDOUT_FILENO);
2606 if (ret >= 0)
2607 ret = dup2(pipefd[1], STDERR_FILENO);
2608 close(pipefd[1]);
2609 if (ret < 0) {
2610 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2611 _exit(EXIT_FAILURE);
811ef482
CB
2612 }
2613
f2711167 2614 if (!is_empty_string(netdev->link))
9de31d5a 2615 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2616 else
9de31d5a
CB
2617 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2618 if (retlen >= IFNAMSIZ) {
2619 SYSERROR("Invalid network device name");
2620 _exit(EXIT_FAILURE);
2621 }
811ef482 2622
8335fd40
CB
2623 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2624 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2625 _exit(EXIT_FAILURE);
8335fd40 2626 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2627
2628 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2629 lxcname, pidstr, netdev_link,
3473ca76
CB
2630 !is_empty_string(netdev->name) ? netdev->name : "(null)");
2631 if (!is_empty_string(netdev->name))
811ef482
CB
2632 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2633 lxcpath, lxcname, pidstr, "veth", netdev_link,
2634 netdev->name, (char *)NULL);
2635 else
2636 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2637 lxcpath, lxcname, pidstr, "veth", netdev_link,
2638 (char *)NULL);
2639 SYSERROR("Failed to execute lxc-user-nic");
78070056 2640 _exit(EXIT_FAILURE);
811ef482
CB
2641 }
2642
2643 /* close the write-end of the pipe */
2644 close(pipefd[1]);
2645
9c66dc4f 2646 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2647 if (bytes < 0) {
74c6e2b0 2648 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2649 close(pipefd[0]);
6b9f82a9
CB
2650 } else {
2651 buffer[bytes - 1] = '\0';
811ef482 2652 }
811ef482
CB
2653
2654 ret = wait_for_pid(child);
2655 close(pipefd[0]);
9c66dc4f
CB
2656 if (ret != 0 || bytes < 0)
2657 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2658 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2659
2660 /* netdev->name */
2661 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2662 if (!token)
2663 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2664
e389f2af
CB
2665 /*
2666 * lxc-user-nic will take care of proper network device naming. So
2667 * netdev->name and netdev->created_name need to be identical to not
2668 * trigger another rename later on.
2669 */
2670 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2671 if (retlen < IFNAMSIZ)
2672 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
9c66dc4f
CB
2673 if (retlen >= IFNAMSIZ)
2674 return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2675
74c6e2b0 2676 /* netdev->ifindex */
811ef482 2677 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2678 if (!token)
2679 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2680
74c6e2b0 2681 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f
CB
2682 if (ret < 0)
2683 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2684
74c6e2b0 2685 /* netdev->priv.veth_attr.veth1 */
811ef482 2686 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2687 if (!token)
2688 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2689
94b1cade 2690 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f
CB
2691 if (retlen >= IFNAMSIZ)
2692 return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
2693
2694 /* netdev->priv.veth_attr.ifindex */
2695 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2696 if (!token)
2697 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
2698
2699 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f
CB
2700 if (ret < 0)
2701 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2702
4d781681 2703 if (netdev->upscript) {
2704 char *argv[] = {
2705 "veth",
2706 netdev->link,
2707 netdev->priv.veth_attr.veth1,
2708 NULL,
2709 };
2710
e389f2af
CB
2711 ret = run_script_argv(lxcname, hooks_version, "net",
2712 netdev->upscript, "up", argv);
4d781681 2713 if (ret < 0)
2714 return -1;
2715 }
2716
811ef482
CB
2717 return 0;
2718}
2719
f0ecc19d 2720static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2721 struct lxc_netdev *netdev,
2722 const char *netns_path)
811ef482
CB
2723{
2724 int bytes, ret;
2725 pid_t child;
2726 int pipefd[2];
419590da 2727 char buffer[PATH_MAX] = {0};
811ef482 2728
9c66dc4f
CB
2729 if (netdev->type != LXC_NET_VETH)
2730 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2731
2732 ret = pipe(pipefd);
9c66dc4f
CB
2733 if (ret < 0)
2734 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2735
2736 child = fork();
2737 if (child < 0) {
811ef482
CB
2738 close(pipefd[0]);
2739 close(pipefd[1]);
9c66dc4f 2740 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2741 }
2742
2743 if (child == 0) {
8843fde4 2744 char *hostveth;
811ef482
CB
2745
2746 close(pipefd[0]);
2747
2748 ret = dup2(pipefd[1], STDOUT_FILENO);
2749 if (ret >= 0)
2750 ret = dup2(pipefd[1], STDERR_FILENO);
2751 close(pipefd[1]);
2752 if (ret < 0) {
2753 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2754 _exit(EXIT_FAILURE);
811ef482
CB
2755 }
2756
f2711167 2757 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
2758 hostveth = netdev->priv.veth_attr.pair;
2759 else
2760 hostveth = netdev->priv.veth_attr.veth1;
f2711167 2761 if (is_empty_string(hostveth)) {
74c6e2b0 2762 SYSERROR("Host side veth device name is missing");
a30b9023 2763 _exit(EXIT_FAILURE);
74c6e2b0
CB
2764 }
2765
f2711167
CB
2766 if (is_empty_string(netdev->link)) {
2767 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 2768 _exit(EXIT_FAILURE);
74c6e2b0 2769 }
811ef482 2770
811ef482 2771 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2772 lxcname, netns_path, netdev->link, hostveth);
811ef482 2773 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2774 lxcname, netns_path, "veth", netdev->link, hostveth,
2775 (char *)NULL);
811ef482 2776 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2777 _exit(EXIT_FAILURE);
811ef482
CB
2778 }
2779
2780 close(pipefd[1]);
2781
9c66dc4f 2782 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
2783 if (bytes < 0) {
2784 SYSERROR("Failed to read from pipe file descriptor.");
2785 close(pipefd[0]);
6b9f82a9
CB
2786 } else {
2787 buffer[bytes - 1] = '\0';
811ef482 2788 }
811ef482 2789
6b9f82a9 2790 ret = wait_for_pid(child);
9c66dc4f
CB
2791 close_prot_errno_disarm(pipefd[0]);
2792 if (ret != 0 || bytes < 0)
2793 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
2794 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 2795
811ef482
CB
2796 return 0;
2797}
2798
1bd8d726
CB
2799bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2800{
2801 int ret;
2802 struct lxc_list *iterator;
2803 struct lxc_list *network = &handler->conf->network;
2804 /* strlen("/proc/") = 6
2805 * +
8335fd40 2806 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2807 * +
2808 * strlen("/fd/") = 4
2809 * +
8335fd40 2810 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2811 * +
2812 * \0
2813 */
8335fd40 2814 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2815
2816 *netns_path = '\0';
2817
9c66dc4f
CB
2818 if (handler->nsfd[LXC_NS_NET] < 0)
2819 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726
CB
2820
2821 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 2822 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
2823 if (ret < 0 || ret >= sizeof(netns_path))
2824 return false;
2825
2826 lxc_list_for_each(iterator, network) {
2827 char *hostveth = NULL;
2828 struct lxc_netdev *netdev = iterator->elem;
2829
2830 /* We can only delete devices whose ifindex we have. If we don't
2831 * have the index it means that we didn't create it.
2832 */
2833 if (!netdev->ifindex)
2834 continue;
2835
2836 if (netdev->type == LXC_NET_PHYS) {
2837 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2838 netdev->link);
2839 if (ret < 0)
9c66dc4f 2840 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
2841 netdev->ifindex, netdev->link);
2842 else
9c66dc4f 2843 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 2844 netdev->ifindex, netdev->link);
b3259dc6
TP
2845
2846 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 2847 goto clear_ifindices;
1bd8d726
CB
2848 }
2849
2850 ret = netdev_deconf[netdev->type](handler, netdev);
2851 if (ret < 0)
2852 WARN("Failed to deconfigure network device");
2853
2854 if (netdev->type != LXC_NET_VETH)
66a7c406 2855 goto clear_ifindices;
1bd8d726 2856
f2711167 2857 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 2858 goto clear_ifindices;
1bd8d726 2859
f2711167 2860 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
2861 hostveth = netdev->priv.veth_attr.pair;
2862 else
2863 hostveth = netdev->priv.veth_attr.veth1;
f2711167 2864 if (is_empty_string(hostveth))
66a7c406 2865 goto clear_ifindices;
8843fde4 2866
1bd8d726
CB
2867 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2868 handler->name, netdev,
2869 netns_path);
2870 if (ret < 0) {
9c66dc4f 2871 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 2872 goto clear_ifindices;
1bd8d726 2873 }
9c66dc4f 2874 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
2875
2876clear_ifindices:
0858c829
CB
2877 /*
2878 * We need to clear any ifindices we recorded so liblxc won't
2879 * have cached stale data which would cause it to fail on
2880 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
2881 */
2882 netdev->ifindex = 0;
2883 if (netdev->type == LXC_NET_PHYS) {
2884 netdev->priv.phys_attr.ifindex = 0;
2885 } else if (netdev->type == LXC_NET_VETH) {
2886 netdev->priv.veth_attr.veth1[0] = '\0';
2887 netdev->priv.veth_attr.ifindex = 0;
2888 }
1bd8d726
CB
2889 }
2890
bb84beda 2891 return true;
1bd8d726
CB
2892}
2893
6509154d 2894static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2895 struct lxc_list *cur, *next;
2896 struct lxc_inetdev *inet4dev;
2897 struct lxc_inet6dev *inet6dev;
2898 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 2899 int err = 0;
5fe147e9
TP
2900 unsigned int lo_ifindex = 0, link_ifindex = 0;
2901
2902 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
2903 if (link_ifindex == 0)
2904 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 2905
6509154d 2906
2907 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2908 if (!lxc_list_empty(&netdev->ipv4)) {
2909 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
2910 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
2911 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 2912 }
2913
2914 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2915 if (!lxc_list_empty(&netdev->ipv6)) {
2916 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
2917 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
2918 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 2919
2920 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
2921 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
2922 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 2923 }
2924
b670016a 2925 /* Perform IPVLAN specific checks. */
2926 if (netdev->type == LXC_NET_IPVLAN) {
2927 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
2928 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
2929 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 2930
2931 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 2932 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
2933 if (lo_ifindex == 0)
2934 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 2935 }
2936
6509154d 2937 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2938 inet4dev = cur->elem;
2939 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 2940 return ret_set_errno(-1, -errno);
6509154d 2941
5fe147e9 2942 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 2943 return ret_set_errno(-1, EINVAL);
b670016a 2944
2945 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2946 if (netdev->type == LXC_NET_IPVLAN) {
2947 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
2948 if (err < 0)
2949 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 2950 }
6509154d 2951 }
2952
2953 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2954 inet6dev = cur->elem;
2955 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 2956 return ret_set_errno(-1, -errno);
6509154d 2957
5fe147e9 2958 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 2959 return ret_set_errno(-1, EINVAL);
b670016a 2960
2961 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2962 if (netdev->type == LXC_NET_IPVLAN) {
2963 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
2964 if (err < 0)
2965 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 2966 }
6509154d 2967 }
2968
2969 return 0;
2970}
2971
9c66dc4f
CB
2972static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
2973{
b670016a 2974 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
2975 bool had_error = false;
2976 unsigned int link_ifindex = 0;
b670016a 2977
9c66dc4f
CB
2978 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
2979 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 2980
2981 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
2982 if (lo_ifindex > 0) {
2983 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 2984 had_error = true;
b670016a 2985 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
2986 }
2987 }
2988
2989 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 2990 if (!is_empty_string(link)) {
5fe147e9 2991 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
2992 if (link_ifindex == 0)
2993 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
2994
2995 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 2996 had_error = true;
b670016a 2997 }
2998
9c66dc4f 2999 if (had_error)
596a002c 3000 return ret_set_errno(-1, EINVAL);
b670016a 3001
3002 return 0;
3003}
3004
9c66dc4f
CB
3005static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3006{
b670016a 3007 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3008 bool had_error = false;
3009 unsigned int link_ifindex = 0;
b670016a 3010
9c66dc4f
CB
3011 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3012 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3013
3014 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3015 if (lo_ifindex > 0) {
3016 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3017 had_error = true;
b670016a 3018 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3019 }
3020 }
3021
3022 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3023 if (!is_empty_string(link)) {
5fe147e9
TP
3024 link_ifindex = if_nametoindex(link);
3025 if (link_ifindex == 0) {
3026 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3027 return ret_set_errno(-1, EINVAL);
3028 }
3029
3030 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3031 had_error = true;
b670016a 3032 }
3033
9c66dc4f 3034 if (had_error)
596a002c 3035 return ret_set_errno(-1, EINVAL);
b670016a 3036
3037 return 0;
3038}
3039
6509154d 3040static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3041 unsigned int lo_ifindex = 0;
3042 unsigned int errCount = 0;
6509154d 3043 struct lxc_list *cur, *next;
3044 struct lxc_inetdev *inet4dev;
3045 struct lxc_inet6dev *inet6dev;
6509154d 3046
b670016a 3047 /* Perform IPVLAN specific checks. */
3048 if (netdev->type == LXC_NET_IPVLAN) {
3049 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3050 lo_ifindex = if_nametoindex(loop_device);
b670016a 3051 if (lo_ifindex == 0) {
3052 errCount++;
3ebffb98 3053 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3054 }
b670016a 3055 }
6509154d 3056
b670016a 3057 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3058 inet4dev = cur->elem;
3059 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3060 errCount++;
6509154d 3061 }
3062
3063 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3064 inet6dev = cur->elem;
b670016a 3065 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3066 errCount++;
6509154d 3067 }
3068
b670016a 3069 if (errCount > 0)
596a002c 3070 return ret_set_errno(-1, EINVAL);
6509154d 3071
3072 return 0;
3073}
3074
e389f2af 3075static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3076{
811ef482
CB
3077 struct lxc_list *iterator;
3078 struct lxc_list *network = &handler->conf->network;
3079
811ef482
CB
3080 lxc_list_for_each(iterator, network) {
3081 struct lxc_netdev *netdev = iterator->elem;
3082
9c66dc4f
CB
3083 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3084 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3085
6509154d 3086 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3087 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3088 if (lxc_setup_l2proxy(netdev))
3089 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3090 }
3091
9c66dc4f
CB
3092 if (netdev_conf[netdev->type](handler, netdev))
3093 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3094 }
3095
3096 return 0;
3097}
3098
e389f2af 3099int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3100{
e389f2af
CB
3101 pid_t pid = handler->pid;
3102 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3103 struct lxc_list *iterator;
3104
e0010464 3105 if (am_guest_unpriv())
74c6e2b0 3106 return 0;
811ef482
CB
3107
3108 lxc_list_for_each(iterator, network) {
3dd78294 3109 __do_free char *physname = NULL;
e389f2af 3110 int ret;
811ef482
CB
3111 struct lxc_netdev *netdev = iterator->elem;
3112
811ef482
CB
3113 if (!netdev->ifindex)
3114 continue;
3115
3dd78294
CB
3116 if (netdev->type == LXC_NET_PHYS)
3117 physname = is_wlan(netdev->link);
3118
3119 if (physname)
3c9fdb32 3120 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->name);
3dd78294 3121 else
8bf64b77 3122 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->name);
9c66dc4f
CB
3123 if (ret)
3124 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3125 netdev->created_name,
3126 netdev->ifindex, pid);
811ef482 3127
24190194
CB
3128 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3129 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3130 }
3131
3132 return 0;
3133}
3134
3c09b97c
CB
3135static int network_requires_advanced_setup(int type)
3136{
3137 if (type == LXC_NET_EMPTY)
3138 return false;
3139
3140 if (type == LXC_NET_NONE)
3141 return false;
3142
3143 return true;
3144}
3145
e389f2af 3146static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3147{
e389f2af
CB
3148 int hooks_version = handler->conf->hooks_version;
3149 const char *lxcname = handler->name;
3150 const char *lxcpath = handler->lxcpath;
3151 struct lxc_list *network = &handler->conf->network;
3152 pid_t pid = handler->pid;
74c6e2b0
CB
3153 struct lxc_list *iterator;
3154
74c6e2b0
CB
3155 lxc_list_for_each(iterator, network) {
3156 struct lxc_netdev *netdev = iterator->elem;
3157
3c09b97c 3158 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3159 continue;
3160
9c66dc4f
CB
3161 if (netdev->type != LXC_NET_VETH)
3162 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3163 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3164
3165 if (netdev->mtu)
3166 INFO("mtu ignored due to insufficient privilege");
3167
e389f2af
CB
3168 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3169 pid, hooks_version))
74c6e2b0
CB
3170 return -1;
3171 }
3172
3173 return 0;
3174}
3175
1bd8d726 3176bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3177{
3178 int ret;
3179 struct lxc_list *iterator;
3180 struct lxc_list *network = &handler->conf->network;
1bd8d726 3181
811ef482
CB
3182 lxc_list_for_each(iterator, network) {
3183 char *hostveth = NULL;
3184 struct lxc_netdev *netdev = iterator->elem;
3185
3186 /* We can only delete devices whose ifindex we have. If we don't
3187 * have the index it means that we didn't create it.
3188 */
3189 if (!netdev->ifindex)
3190 continue;
3191
0104c121
CB
3192 /*
3193 * If the network device has been moved back from the
3194 * containers network namespace, update the ifindex.
3195 */
3196 netdev->ifindex = if_nametoindex(netdev->name);
3197
6509154d 3198 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3199 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3200 if (lxc_delete_l2proxy(netdev))
3201 WARN("Failed to delete all l2proxy config");
3202 /* Don't return, let the network be cleaned up as normal. */
3203 }
3204
811ef482
CB
3205 if (netdev->type == LXC_NET_PHYS) {
3206 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3207 if (ret < 0)
3208 WARN("Failed to rename interface with index %d "
b809f232
CB
3209 "from \"%s\" to its initial name \"%s\"",
3210 netdev->ifindex, netdev->name, netdev->link);
0b154989 3211 else {
29589196
CB
3212 TRACE("Renamed interface with index %d from "
3213 "\"%s\" to its initial name \"%s\"",
3214 netdev->ifindex, netdev->name,
3215 netdev->link);
0b154989
TP
3216
3217 /* Restore original MTU */
3218 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3219 if (ret < 0) {
3220 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3221 netdev->link, netdev->priv.phys_attr.mtu);
3222 } else {
3223 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3224 netdev->link, netdev->priv.phys_attr.mtu);
3225 }
3226 }
b3259dc6
TP
3227
3228 ret = netdev_deconf[netdev->type](handler, netdev);
66a7c406 3229 goto clear_ifindices;
811ef482
CB
3230 }
3231
3232 ret = netdev_deconf[netdev->type](handler, netdev);
3233 if (ret < 0)
3234 WARN("Failed to deconfigure network device");
3235
811ef482 3236 if (netdev->type != LXC_NET_VETH)
66a7c406 3237 goto clear_ifindices;
811ef482 3238
811ef482
CB
3239 /* Explicitly delete host veth device to prevent lingering
3240 * devices. We had issues in LXD around this.
3241 */
f2711167 3242 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3243 hostveth = netdev->priv.veth_attr.pair;
3244 else
3245 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3246 if (is_empty_string(hostveth))
66a7c406 3247 goto clear_ifindices;
811ef482 3248
1ee56cff
CB
3249 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3250 ret = lxc_netdev_delete_by_name(hostveth);
3251 if (ret < 0)
3252 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3253
1ee56cff
CB
3254 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3255 } else if (!is_empty_string(netdev->link)) {
3256 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3257 if (ret < 0)
3258 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3259
1ee56cff
CB
3260 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3261 }
811ef482 3262
66a7c406 3263clear_ifindices:
ad2ddfcd 3264 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3265 * have cached stale data which would cause it to fail on reboot
3266 * we're we don't re-read the on-disk config file.
3267 */
3268 netdev->ifindex = 0;
3269 if (netdev->type == LXC_NET_PHYS) {
3270 netdev->priv.phys_attr.ifindex = 0;
3271 } else if (netdev->type == LXC_NET_VETH) {
3272 netdev->priv.veth_attr.veth1[0] = '\0';
3273 netdev->priv.veth_attr.ifindex = 0;
3274 }
811ef482
CB
3275 }
3276
bb84beda 3277 return true;
811ef482
CB
3278}
3279
3280int lxc_requests_empty_network(struct lxc_handler *handler)
3281{
3282 struct lxc_list *network = &handler->conf->network;
3283 struct lxc_list *iterator;
3284 bool found_none = false, found_nic = false;
3285
3286 if (lxc_list_empty(network))
3287 return 0;
3288
9c66dc4f 3289 lxc_list_for_each (iterator, network) {
811ef482
CB
3290 struct lxc_netdev *netdev = iterator->elem;
3291
3292 if (netdev->type == LXC_NET_NONE)
3293 found_none = true;
3294 else
3295 found_nic = true;
3296 }
9c66dc4f 3297
811ef482
CB
3298 if (found_none && !found_nic)
3299 return 1;
9c66dc4f 3300
811ef482
CB
3301 return 0;
3302}
3303
3304/* try to move physical nics to the init netns */
b809f232 3305int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3306{
9c66dc4f
CB
3307 __do_close int oldfd = -EBADF;
3308 int netnsfd = handler->nsfd[LXC_NS_NET];
3309 struct lxc_conf *conf = handler->conf;
811ef482 3310 int ret;
811ef482 3311 char ifname[IFNAMSIZ];
b809f232 3312 struct lxc_list *iterator;
811ef482 3313
b809f232
CB
3314 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3315 * the parent network namespace. We won't have this capability if we are
3316 * unprivileged.
3317 */
d0fbc7ba 3318 if (!handler->am_root)
b809f232 3319 return 0;
811ef482 3320
b809f232 3321 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3322
0037ab49 3323 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3324 if (oldfd < 0)
3325 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3326
b809f232 3327 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3328 if (ret < 0)
3329 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3330
b809f232
CB
3331 lxc_list_for_each(iterator, &conf->network) {
3332 struct lxc_netdev *netdev = iterator->elem;
811ef482 3333
b809f232
CB
3334 if (netdev->type != LXC_NET_PHYS)
3335 continue;
3336
3337 /* Retrieve the name of the interface in the container's network
3338 * namespace.
3339 */
3340 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3341 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3342 continue;
3343 }
b809f232 3344
0037ab49 3345 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3346 if (ret < 0)
9c66dc4f 3347 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3348 else
9c66dc4f 3349 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3350 }
811ef482 3351
b809f232 3352 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3353 if (ret < 0)
3354 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3355
3356 return 0;
811ef482
CB
3357}
3358
3359static int setup_hw_addr(char *hwaddr, const char *ifname)
3360{
9c66dc4f 3361 __do_close int fd = -EBADF;
811ef482
CB
3362 struct sockaddr sockaddr;
3363 struct ifreq ifr;
9c66dc4f 3364 int ret;
811ef482
CB
3365
3366 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3367 if (ret)
3368 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3369
3370 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3371 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3372 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3373
ad9429e5 3374 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3375 if (fd < 0)
3376 return -1;
3377
3378 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3379 if (ret)
6d1400b5 3380 SYSERROR("Failed to perform ioctl");
3381
9c66dc4f 3382 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3383
3384 return ret;
3385}
3386
3387static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3388{
3389 struct lxc_list *iterator;
3390 int err;
3391
3392 lxc_list_for_each(iterator, ip) {
3393 struct lxc_inetdev *inetdev = iterator->elem;
3394
3395 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3396 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3397 if (err)
3398 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3399 }
3400
3401 return 0;
3402}
3403
3404static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3405{
3406 struct lxc_list *iterator;
3407 int err;
3408
3409 lxc_list_for_each(iterator, ip) {
3410 struct lxc_inet6dev *inet6dev = iterator->elem;
3411
3412 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3413 &inet6dev->mcast, &inet6dev->acast,
3414 inet6dev->prefix);
9c66dc4f
CB
3415 if (err)
3416 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3417 }
3418
3419 return 0;
3420}
3421
8bf64b77 3422static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3423{
811ef482 3424 int err;
009d6127 3425 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3426
3427 /* empty network namespace */
8bf64b77
CB
3428 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3429 err = lxc_netdev_up("lo");
9c66dc4f
CB
3430 if (err)
3431 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3432 }
3433
811ef482 3434 /* set a mac address */
9c66dc4f
CB
3435 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3436 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3437
3438 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3439 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3440 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3441
3442 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3443 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3444 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3445
3446 /* set the network device up */
3447 if (netdev->flags & IFF_UP) {
8bf64b77 3448 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3449 if (err)
3450 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3451
3452 /* the network is up, make the loopback up too */
3453 err = lxc_netdev_up("lo");
9c66dc4f
CB
3454 if (err)
3455 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3456 }
3457
811ef482 3458 /* setup ipv4 gateway on the interface */
a2f9a670 3459 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3460 if (!(netdev->flags & IFF_UP))
3461 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3462
9c66dc4f
CB
3463 if (lxc_list_empty(&netdev->ipv4))
3464 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3465
a2f9a670 3466 /* Setup device route if ipv4_gateway_dev is enabled */
3467 if (netdev->ipv4_gateway_dev) {
3468 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3469 if (err < 0)
3470 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3471 } else {
009d6127 3472 /* Check the gateway address is valid */
3473 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3474 return ret_set_errno(-1, errno);
009d6127 3475
3476 /* Try adding a default route to the gateway address */
811ef482 3477 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3478 if (err < 0) {
3479 /* If adding the default route fails, this could be because the
3480 * gateway address is in a different subnet to the container's address.
3481 * To work around this, we try adding a static device route to the
3482 * gateway address first, and then try again.
3483 */
a2f9a670 3484 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3485 if (err < 0)
3486 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3487
a2f9a670 3488 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3489 if (err < 0)
3490 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3491 }
3492 }
3493 }
3494
3495 /* setup ipv6 gateway on the interface */
a2f9a670 3496 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3497 if (!(netdev->flags & IFF_UP))
3498 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3499
9c66dc4f
CB
3500 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3501 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3502
a2f9a670 3503 /* Setup device route if ipv6_gateway_dev is enabled */
3504 if (netdev->ipv6_gateway_dev) {
3505 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3506 if (err < 0)
3507 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3508 } else {
009d6127 3509 /* Check the gateway address is valid */
3510 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3511 return ret_set_errno(-1, errno);
009d6127 3512
3513 /* Try adding a default route to the gateway address */
811ef482 3514 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3515 if (err < 0) {
3516 /* If adding the default route fails, this could be because the
3517 * gateway address is in a different subnet to the container's address.
3518 * To work around this, we try adding a static device route to the
3519 * gateway address first, and then try again.
3520 */
a2f9a670 3521 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3522 if (err < 0)
3523 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3524
a2f9a670 3525 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3526 if (err < 0)
3527 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3528 }
3529 }
3530 }
3531
8bf64b77 3532 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3533
3534 return 0;
3535}
3536
3537int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3538 struct lxc_list *network)
3539{
3540 struct lxc_list *iterator;
811ef482 3541
8bf64b77 3542 lxc_list_for_each (iterator, network) {
e389f2af 3543 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 3544 int ret;
811ef482 3545
8bf64b77
CB
3546 ret = netdev_ns_conf[netdev->type](netdev);
3547 if (!ret)
3548 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
3549 if (ret)
3550 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482
CB
3551 }
3552
3553 if (!lxc_list_empty(network))
e389f2af 3554 INFO("Network has been setup");
811ef482
CB
3555
3556 return 0;
3557}
7ab1ba02 3558
3c09b97c 3559int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3560{
3561 struct lxc_list *iterator;
3562 struct lxc_list *network = &handler->conf->network;
3563 int data_sock = handler->data_sock[0];
3564
7ab1ba02
CB
3565 lxc_list_for_each(iterator, network) {
3566 int ret;
3567 struct lxc_netdev *netdev = iterator->elem;
3568
3c09b97c 3569 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3570 continue;
3571
7fbb15ec 3572 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3573 if (ret < 0)
7ab1ba02 3574 return -1;
e389f2af
CB
3575
3576 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3577 if (ret < 0)
3578 return -1;
3579
3580 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3581 }
3582
3583 return 0;
3584}
3585
3c09b97c 3586int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3587{
3588 struct lxc_list *iterator;
3589 struct lxc_list *network = &handler->conf->network;
3590 int data_sock = handler->data_sock[1];
3591
7ab1ba02
CB
3592 lxc_list_for_each(iterator, network) {
3593 int ret;
3594 struct lxc_netdev *netdev = iterator->elem;
3595
3c09b97c 3596 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3597 continue;
3598
e3233f26 3599 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3600 if (ret < 0)
7ab1ba02 3601 return -1;
e389f2af
CB
3602
3603 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3604 if (ret < 0)
3605 return -1;
54256301 3606
e389f2af 3607 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3608 }
3609
3610 return 0;
3611}
a1ae535a
CB
3612
3613int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3614{
3615 struct lxc_list *iterator, *network;
3616 int data_sock = handler->data_sock[0];
3617
3618 if (!handler->am_root)
3619 return 0;
3620
3621 network = &handler->conf->network;
3622 lxc_list_for_each(iterator, network) {
3623 int ret;
3624 struct lxc_netdev *netdev = iterator->elem;
3625
3626 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3627 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3628 if (ret < 0)
7729f8e5 3629 return -1;
a1ae535a
CB
3630
3631 /* Send network device ifindex in the child's namespace to
3632 * parent.
3633 */
7fbb15ec 3634 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3635 if (ret < 0)
7729f8e5 3636 return -1;
a1ae535a
CB
3637 }
3638
e389f2af
CB
3639 if (!lxc_list_empty(network))
3640 TRACE("Sent network device names and ifindices to parent");
3641
a1ae535a 3642 return 0;
a1ae535a
CB
3643}
3644
3645int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3646{
3647 struct lxc_list *iterator, *network;
3648 int data_sock = handler->data_sock[1];
3649
3650 if (!handler->am_root)
3651 return 0;
3652
3653 network = &handler->conf->network;
3654 lxc_list_for_each(iterator, network) {
3655 int ret;
3656 struct lxc_netdev *netdev = iterator->elem;
3657
3658 /* Receive network device name in the child's namespace to
3659 * parent.
3660 */
e3233f26 3661 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3662 if (ret < 0)
7729f8e5 3663 return -1;
a1ae535a
CB
3664
3665 /* Receive network device ifindex in the child's namespace to
3666 * parent.
3667 */
e3233f26 3668 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3669 if (ret < 0)
7729f8e5 3670 return -1;
a1ae535a
CB
3671 }
3672
3673 return 0;
a1ae535a 3674}
bb84beda
CB
3675
3676void lxc_delete_network(struct lxc_handler *handler)
3677{
3678 bool bret;
3679
3680 if (handler->am_root)
3681 bret = lxc_delete_network_priv(handler);
3682 else
3683 bret = lxc_delete_network_unpriv(handler);
3684 if (!bret)
3685 DEBUG("Failed to delete network devices");
3686 else
3687 DEBUG("Deleted network devices");
3688}
1cd95214 3689
1cd95214
CB
3690int lxc_netns_set_nsid(int fd)
3691{
41a3300d 3692 int ret;
0ce60f0d
CB
3693 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3694 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3695 NLMSG_ALIGN(1024)];
1cd95214 3696 struct nl_handler nlh;
a5f5cb41 3697 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
3698 struct nlmsghdr *hdr;
3699 struct rtgenmsg *msg;
9d036caa
CB
3700 const __s32 ns_id = -1;
3701 const __u32 netns_fd = fd;
1cd95214 3702
a5f5cb41 3703 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 3704 if (ret < 0)
41a3300d 3705 return -1;
1cd95214 3706
0ce60f0d 3707 memset(buf, 0, sizeof(buf));
6ce39620
CB
3708
3709#pragma GCC diagnostic push
3710#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3711 hdr = (struct nlmsghdr *)buf;
3712 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3713#pragma GCC diagnostic pop
1cd95214 3714
0ce60f0d
CB
3715 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3716 hdr->nlmsg_type = RTM_NEWNSID;
3717 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3718 hdr->nlmsg_pid = 0;
3719 hdr->nlmsg_seq = RTM_NEWNSID;
3720 msg->rtgen_family = AF_UNSPEC;
1cd95214 3721
9d036caa
CB
3722 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3723 if (ret < 0)
a5f5cb41 3724 return ret_errno(ENOMEM);
9d036caa
CB
3725
3726 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3727 if (ret < 0)
a5f5cb41 3728 return ret_errno(ENOMEM);
1cd95214 3729
a5f5cb41 3730 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 3731}
938980ba
CB
3732
3733static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3734{
3735
3736 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3737
3738 while (RTA_OK(rta, len)) {
3739 unsigned short type = rta->rta_type;
3740
3741 if ((type <= max) && (!tb[type]))
3742 tb[type] = rta;
3743
6ce39620
CB
3744#pragma GCC diagnostic push
3745#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3746 rta = RTA_NEXT(rta, len);
6ce39620 3747#pragma GCC diagnostic pop
938980ba
CB
3748 }
3749
3750 return 0;
3751}
3752
3753static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3754{
3755 return *(__s32 *)RTA_DATA(rta);
3756}
3757
3758#ifndef NETNS_RTA
3759#define NETNS_RTA(r) \
3760 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3761#endif
3762
3763int lxc_netns_get_nsid(int fd)
3764{
a5f5cb41
CB
3765 struct nl_handler nlh;
3766 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
3767 int ret;
3768 ssize_t len;
3769 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3770 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3771 NLMSG_ALIGN(1024)];
938980ba 3772 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
3773 struct nlmsghdr *hdr;
3774 struct rtgenmsg *msg;
938980ba
CB
3775 __u32 netns_fd = fd;
3776
a5f5cb41 3777 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
3778 if (ret < 0)
3779 return -1;
3780
3781 memset(buf, 0, sizeof(buf));
6ce39620
CB
3782
3783#pragma GCC diagnostic push
3784#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3785 hdr = (struct nlmsghdr *)buf;
3786 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3787#pragma GCC diagnostic pop
938980ba
CB
3788
3789 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3790 hdr->nlmsg_type = RTM_GETNSID;
3791 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3792 hdr->nlmsg_pid = 0;
3793 hdr->nlmsg_seq = RTM_GETNSID;
3794 msg->rtgen_family = AF_UNSPEC;
3795
9d036caa 3796 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
3797 if (ret < 0)
3798 return ret_errno(ENOMEM);
938980ba 3799
a5f5cb41 3800 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
3801 if (ret < 0)
3802 return -1;
3803
3804 msg = NLMSG_DATA(hdr);
3805 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
3806 if (len < 0)
a5f5cb41 3807 return ret_errno(EINVAL);
938980ba 3808
6ce39620
CB
3809#pragma GCC diagnostic push
3810#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3811 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
3812 if (tb[__LXC_NETNSA_NSID])
3813 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 3814#pragma GCC diagnostic pop
938980ba
CB
3815
3816 return -1;
3817}
e389f2af
CB
3818
3819int lxc_create_network(struct lxc_handler *handler)
3820{
3821 int ret;
3822
e389f2af
CB
3823 if (handler->am_root) {
3824 ret = lxc_create_network_priv(handler);
3825 if (ret)
3826 return -1;
3827
3828 return lxc_network_move_created_netdev_priv(handler);
3829 }
3830
3831 return lxc_create_network_unpriv(handler);
3832}