]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
Merge pull request #3921 from brauner/2021-08-03.fixes
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
fdd6be55 40#include "string_utils.h"
59524108 41#include "syscall_wrappers.h"
0d204771 42#include "utils.h"
0ad19a3f 43
9de31d5a
CB
44#ifndef HAVE_STRLCPY
45#include "include/strlcpy.h"
46#endif
47
ac2cecc4 48lxc_log_define(network, lxc);
f8fee0e2 49
bad2f913
CB
50typedef int (*netdev_configure_server_cb)(struct lxc_handler *, struct lxc_netdev *);
51typedef int (*netdev_configure_container_cb)(struct lxc_netdev *);
52typedef int (*netdev_shutdown_server_cb)(struct lxc_handler *, struct lxc_netdev *);
53
3392d379
CB
54const struct lxc_network_info {
55 const char *name;
fdd6be55
CB
56 const char template[IFNAMSIZ];
57 size_t template_len;
3392d379 58} lxc_network_info[LXC_NET_MAXCONFTYPE + 1] = {
fdd6be55
CB
59 [LXC_NET_EMPTY] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
60 [LXC_NET_VETH] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
61 [LXC_NET_MACVLAN] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
62 [LXC_NET_IPVLAN] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
63 [LXC_NET_PHYS] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
64 [LXC_NET_VLAN] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
65 [LXC_NET_NONE] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
66 [LXC_NET_MAXCONFTYPE] = { NULL, "", 0 }
3392d379
CB
67};
68
69const char *lxc_net_type_to_str(int type)
70{
71 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
72 return NULL;
73
74 return lxc_network_info[type].name;
75}
76
77static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
78
79char *lxc_ifname_alnum_case_sensitive(char *template)
80{
81 char name[IFNAMSIZ];
82 size_t i = 0;
83#ifdef HAVE_RAND_R
84 unsigned int seed;
85
86 seed = randseed(false);
87#else
88
89 (void)randseed(true);
90#endif
91
92 if (strlen(template) >= IFNAMSIZ)
93 return NULL;
94
95 /* Generate random names until we find one that doesn't exist. */
96 for (;;) {
97 name[0] = '\0';
98 (void)strlcpy(name, template, IFNAMSIZ);
99
100 for (i = 0; i < strlen(name); i++) {
101 if (name[i] == 'X') {
102#ifdef HAVE_RAND_R
103 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
104#else
105 name[i] = padchar[rand() % strlen(padchar)];
106#endif
107 }
108 }
109
110 if (if_nametoindex(name) == 0)
111 break;
112 }
113
114 (void)strlcpy(template, name, strlen(template) + 1);
115
116 return template;
117}
3ebffb98 118static const char loop_device[] = "lo";
811ef482 119
b670016a 120static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 121{
d16bda44 122 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 123 struct nl_handler nlh;
d16bda44
CB
124 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
125 int addrlen, err;
8f82874c 126 struct rtmsg *rt;
8f82874c 127
128 addrlen = family == AF_INET ? sizeof(struct in_addr)
129 : sizeof(struct in6_addr);
130
d16bda44 131 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 132 if (err)
133 return err;
134
8f82874c 135 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
136 if (!nlmsg)
d16bda44 137 return -ENOMEM;
8f82874c 138
139 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
140 if (!answer)
a5f5cb41 141 return -ENOMEM;
8f82874c 142
143 nlmsg->nlmsghdr->nlmsg_flags =
144 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 145 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 146
147 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
148 if (!rt)
a5f5cb41 149 return -ENOMEM;
d16bda44 150
8f82874c 151 rt->rtm_family = family;
152 rt->rtm_table = RT_TABLE_MAIN;
153 rt->rtm_scope = RT_SCOPE_LINK;
154 rt->rtm_protocol = RTPROT_BOOT;
155 rt->rtm_type = RTN_UNICAST;
156 rt->rtm_dst_len = netmask;
157
8f82874c 158 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
159 return -EINVAL;
160
8f82874c 161 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
162 return -EINVAL;
163
164 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 165}
166
167static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
168{
b670016a 169 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 170}
171
172static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
173{
b670016a 174 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
175}
176
177static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
178{
179 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
180}
181
182static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
183{
184 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 185}
186
d4a7da46 187static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
188{
189 struct lxc_list *iterator;
190 int err;
191
192 lxc_list_for_each(iterator, ip) {
193 struct lxc_inetdev *inetdev = iterator->elem;
194
195 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
196 if (err)
197 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 198 }
199
200 return 0;
201}
202
203static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
204{
205 struct lxc_list *iterator;
206 int err;
207
208 lxc_list_for_each(iterator, ip) {
209 struct lxc_inet6dev *inet6dev = iterator->elem;
210
211 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
212 if (err)
213 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 214 }
215
216 return 0;
217}
218
6dfa9581
TP
219static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
220{
221 struct lxc_list *iterator;
222 int err;
223
224 lxc_list_for_each(iterator, ip) {
225 struct lxc_inetdev *inetdev = iterator->elem;
226
227 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
228
229 if (err)
9c66dc4f 230 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
231 }
232
233 return 0;
234}
235
236static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
237{
238 struct lxc_list *iterator;
239 int err;
240
241 lxc_list_for_each(iterator, ip) {
242 struct lxc_inet6dev *inet6dev = iterator->elem;
243
244 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
245 if (err)
9c66dc4f 246 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
247 }
248
249 return 0;
250}
251
5fe147e9 252static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 253{
d16bda44 254 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 255 struct nl_handler nlh;
d16bda44
CB
256 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
257 int addrlen, err;
5fe147e9 258 struct ndmsg *rt;
6dfa9581 259
5fe147e9 260 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 261
d16bda44 262 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
263 if (err)
264 return err;
6dfa9581 265
5fe147e9
TP
266 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
267 if (!nlmsg)
d16bda44 268 return -ENOMEM;
6dfa9581 269
5fe147e9
TP
270 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
271 if (!answer)
d16bda44 272 return -ENOMEM;
6dfa9581 273
5fe147e9
TP
274 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
275 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 276
5fe147e9
TP
277 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
278 if (!rt)
d16bda44
CB
279 return -ENOMEM;
280
5fe147e9
TP
281 rt->ndm_ifindex = ifindex;
282 rt->ndm_flags = NTF_PROXY;
283 rt->ndm_type = NDA_DST;
284 rt->ndm_family = family;
6dfa9581 285
5fe147e9 286 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 287 return -EINVAL;
6dfa9581 288
d16bda44 289 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
290}
291
292static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
293{
294 int ret;
295 char path[PATH_MAX];
296 char buf[1] = "";
297
298 if (family != AF_INET && family != AF_INET6)
596a002c 299 return ret_set_errno(-1, EINVAL);
6dfa9581 300
387c1c70
CB
301 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
302 family == AF_INET ? "ipv4" : "ipv6", ifname,
303 "forwarding");
304 if (ret < 0)
596a002c 305 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
306
307 return lxc_read_file_expect(path, buf, 1, "1");
308}
309
622f05c7
TP
310struct bridge_vlan_info {
311 __u16 flags;
312 __u16 vid;
313};
314
315static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
316{
317 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
318 struct nl_handler nlh;
319 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
320 int err;
321 struct ifinfomsg *ifi;
322 struct rtattr *nest;
323 unsigned short bridge_flags = 0;
324 struct bridge_vlan_info vlan_info;
325
326 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
327 if (err)
328 return err;
329
330 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
331 if (!nlmsg)
332 return ret_errno(ENOMEM);
333
334 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
335 if (!answer)
336 return ret_errno(ENOMEM);
337
338 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
339 nlmsg->nlmsghdr->nlmsg_type = operation;
340
341 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
342 if (!ifi)
343 return ret_errno(ENOMEM);
344 ifi->ifi_family = AF_BRIDGE;
345 ifi->ifi_index = ifindex;
346
347 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
348 if (!nest)
349 return ret_errno(ENOMEM);
350
351 bridge_flags |= BRIDGE_FLAGS_MASTER;
352 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
353 return ret_errno(ENOMEM);
354
355 vlan_info.vid = vlan_id;
356 vlan_info.flags = 0;
357 if (!tagged)
358 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
359
360 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
361 return ret_errno(ENOMEM);
362
363 nla_end_nested(nlmsg, nest);
364
365 return netlink_transaction(nlh_ptr, nlmsg, answer);
366}
367
368static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
369{
370 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
371}
372
373static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
374{
375 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
376}
377
378static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
379{
380 struct lxc_list *iterator;
381 int err;
382
383 lxc_list_for_each(iterator, vlan_ids) {
384 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
385
386 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
387 if (err)
388 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
389 }
390
391 return 0;
392}
393
33320936
TP
394static int validate_veth(struct lxc_netdev *netdev)
395{
396 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
397 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
398 if (netdev->priv.veth_attr.vlan_id_set)
399 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
400
401 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
402 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
403 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
404 }
405
406 if (netdev->priv.veth_attr.vlan_id_set) {
407 struct lxc_list *it;
408 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
409 unsigned short i = PTR_TO_USHORT(it->elem);
410 if (i == netdev->priv.veth_attr.vlan_id)
411 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
412 }
413 }
414
415 return 0;
416}
417
418static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
419{
420 int err, rc, veth1index;
421 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
422 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
423
424 /* Skip setup if no VLAN options are specified. */
425 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
426 return 0;
427
428 /* Check vlan filtering is enabled on parent bridge. */
387c1c70
CB
429 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
430 if (rc < 0)
33320936
TP
431 return -1;
432
433 rc = lxc_read_from_file(path, buf, sizeof(buf));
434 if (rc < 0)
435 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
436
437 buf[rc - 1] = '\0';
438
6ee997a7 439 if (!strequal(buf, "1"))
33320936
TP
440 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
441
442 /* Get veth1 ifindex for use with netlink. */
443 veth1index = if_nametoindex(veth1);
444 if (!veth1index)
445 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
446
447 /* Configure untagged VLAN settings on bridge port if specified. */
448 if (netdev->priv.veth_attr.vlan_id_set) {
449 unsigned short default_pvid;
450
451 /* Get the bridge's default VLAN PVID. */
387c1c70
CB
452 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
453 if (rc < 0)
33320936
TP
454 return -1;
455
456 rc = lxc_read_from_file(path, buf, sizeof(buf));
457 if (rc < 0)
458 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
459
460 buf[rc - 1] = '\0';
461 err = get_u16(&default_pvid, buf, 0);
462 if (err)
463 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
464
465 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
466 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
467 err = lxc_bridge_vlan_del(veth1index, default_pvid);
468 if (err)
469 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
470 }
471
472 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
473 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
474 if (err)
475 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
476 }
477 }
478
479 /* Configure tagged VLAN settings on bridge port if specified. */
480 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
481 if (err)
482 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
483
484 return 0;
485}
486
8f7c3358
TP
487struct ovs_veth_vlan_args {
488 const char *nic;
489 const char *vlan_mode; /* Port VLAN mode. */
490 short vlan_id; /* PVID VLAN ID. */
d2f8b272 491 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
8f7c3358
TP
492};
493
d2f8b272
TP
494static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
495{
496 free_disarm(args->trunks);
497}
8f7c3358
TP
498
499static int lxc_ovs_setup_bridge_vlan_exec(void *data)
500{
501 struct ovs_veth_vlan_args *args = data;
785e1540
TP
502 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
503
504 if (!args->vlan_mode)
505 return ret_errno(EINVAL);
8f7c3358
TP
506
507 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
508
785e1540 509 if (args->vlan_id > BRIDGE_VLAN_NONE) {
8f7c3358
TP
510 char buf[5];
511 int rc;
512
387c1c70
CB
513 rc = strnprintf(buf, sizeof(buf), "%u", args->vlan_id);
514 if (rc < 0)
72e8122b 515 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
8f7c3358
TP
516
517 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
518 }
519
785e1540 520 if (args->trunks)
8f7c3358
TP
521 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
522
523 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
785e1540 524 if (tag && trunks)
8f7c3358 525 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
785e1540 526 else if (tag)
8f7c3358 527 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
785e1540 528 else if (trunks)
8f7c3358
TP
529 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
530 else
531 return -EINVAL;
532
533 return -errno;
534}
535
536static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
537{
538 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
539 struct ovs_veth_vlan_args args;
540 args.nic = veth1;
1ee07848
TP
541 args.vlan_mode = NULL;
542 args.vlan_id = BRIDGE_VLAN_NONE;
543 args.trunks = NULL;
8f7c3358
TP
544
545 /* Skip setup if no VLAN options are specified. */
546 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
547 return 0;
548
549 /* Configure untagged VLAN settings on bridge port if specified. */
550 if (netdev->priv.veth_attr.vlan_id_set) {
551 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
552 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
553
554 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
555 * Also set the vlan_mode=access, which will drop any tagged frames.
556 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
557 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
558 */
559 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
560 args.vlan_mode = "access";
561 args.vlan_id = netdev->priv.veth_attr.vlan_id;
562 }
563 }
564
565 if (taggedLength > 0) {
566 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
567
568 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
569 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
570 args.vlan_mode = "native-untagged";
571 }
572
573 struct lxc_list *iterator;
574 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
575 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
576 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
577 int rc;
578
387c1c70
CB
579 rc = strnprintf(buf, sizeof(buf), "%u", vlan_id);
580 if (rc < 0) {
3fe6b5cf 581 free_ovs_veth_vlan_args(&args);
8f7c3358 582 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
3fe6b5cf 583 }
8f7c3358 584
1ee07848
TP
585 if (args.trunks)
586 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
587 else
588 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
8f7c3358
TP
589 }
590 }
591
1ee07848 592 if (args.vlan_mode) {
8f7c3358
TP
593 int ret;
594 char cmd_output[PATH_MAX];
595
596 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
3fe6b5cf
TP
597 if (ret < 0) {
598 free_ovs_veth_vlan_args(&args);
8f7c3358 599 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
3fe6b5cf 600 }
8f7c3358
TP
601 }
602
3fe6b5cf 603 free_ovs_veth_vlan_args(&args);
8f7c3358
TP
604 return 0;
605}
606
bad2f913 607static int netdev_configure_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 608{
54256301 609 int err;
a00fbab5 610 unsigned int mtu = 1500;
811ef482
CB
611 char *veth1, *veth2;
612 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 613
33320936
TP
614 err = validate_veth(netdev);
615 if (err)
616 return err;
617
f2711167 618 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
619 veth1 = netdev->priv.veth_attr.pair;
620 if (handler->conf->reboot)
621 lxc_netdev_delete_by_name(veth1);
622 } else {
387c1c70
CB
623 err = strnprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
624 if (err < 0)
811ef482
CB
625 return -1;
626
3646ffd9 627 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
628 if (!veth1)
629 return -1;
630
631 /* store away for deconf */
632 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
633 }
634
387c1c70
CB
635 err = strnprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
636 if (err < 0)
d34212ad
CB
637 return -1;
638
3646ffd9 639 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 640 if (!veth2)
54256301
CB
641 return -1;
642
a00fbab5
TP
643 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
644 if (netdev->mtu) {
645 if (lxc_safe_uint(netdev->mtu, &mtu))
646 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 647 } else if (!is_empty_string(netdev->link)) {
54256301 648 int ifindex_mtu;
811ef482 649
54256301
CB
650 ifindex_mtu = if_nametoindex(netdev->link);
651 if (ifindex_mtu) {
652 mtu = netdev_get_mtu(ifindex_mtu);
653 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
654 }
655 }
656
657 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
658 if (err)
659 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 660
fdd6be55
CB
661 /*
662 * Veth devices are directly created in the container's network
663 * namespace so the device doesn't need to be moved into the
664 * container's network namespace. Make this explicit by setting the
665 * devices ifindex to 0.
666 */
667 netdev->ifindex = 0;
668
24190194
CB
669 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
670
fdd6be55
CB
671 /*
672 * Since the device won't be moved transient name generation won't
673 * happen. But the transient name is needed for the container to
674 * retrieve the ifindex for the device.
675 */
676 strlcpy(netdev->transient_name, veth2, IFNAMSIZ);
677
678 /*
679 * Changing the high byte of the mac address to 0xfe, the bridge interface
811ef482 680 * will always keep the host's mac address and not take the mac address
fdd6be55
CB
681 * of a container.
682 */
811ef482
CB
683 err = setup_private_host_hw_addr(veth1);
684 if (err) {
6d1400b5 685 errno = -err;
686 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
687 goto out_delete;
688 }
689
8da62485
CB
690 /* Retrieve ifindex of the host's veth device. */
691 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
692 if (!netdev->priv.veth_attr.ifindex) {
693 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
694 goto out_delete;
695 }
696
811ef482
CB
697 if (mtu) {
698 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 699 if (err) {
6d1400b5 700 errno = -err;
54256301 701 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
702 goto out_delete;
703 }
704 }
705
f2711167 706 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
707 if (!lxc_nic_exists(netdev->link)) {
708 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
709 goto out_delete;
710 }
711
811ef482
CB
712 err = lxc_bridge_attach(netdev->link, veth1);
713 if (err) {
6d1400b5 714 errno = -err;
26da53c3 715 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
716 goto out_delete;
717 }
718 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936 719
38790036
TP
720 if (is_ovs_bridge(netdev->link)) {
721 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
722 if (err) {
723 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
724 lxc_ovs_delete_port(netdev->link, veth1);
725 goto out_delete;
726 }
727 } else {
33320936
TP
728 err = setup_veth_native_bridge_vlan(veth1, netdev);
729 if (err) {
730 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
731 goto out_delete;
732 }
733 }
811ef482
CB
734 }
735
736 err = lxc_netdev_up(veth1);
737 if (err) {
6d1400b5 738 errno = -err;
739 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
740 goto out_delete;
741 }
742
d4a7da46 743 /* setup ipv4 routes on the host interface */
744 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
745 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
746 goto out_delete;
747 }
748
749 /* setup ipv6 routes on the host interface */
750 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
751 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
752 goto out_delete;
753 }
754
6dfa9581 755 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
756 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
757 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
758 appear to be added successfully but then do not appear in the proxy list. The length of time
759 slept doesn't appear to be important, only that the process sleeps for a short period of time.
760 */
761 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
762
6dfa9581
TP
763 if (netdev->ipv4_gateway) {
764 char bufinet4[INET_ADDRSTRLEN];
765 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 766 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
767 goto out_delete;
768 }
769
770 err = lxc_ip_forwarding_on(veth1, AF_INET);
771 if (err) {
9c66dc4f 772 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
773 goto out_delete;
774 }
775
5fe147e9 776 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 777 if (err) {
9c66dc4f 778 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
779 goto out_delete;
780 }
781 }
782
783 if (netdev->ipv6_gateway) {
784 char bufinet6[INET6_ADDRSTRLEN];
785
786 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 787 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
788 goto out_delete;
789 }
790
791 /* Check for sysctl net.ipv6.conf.all.forwarding=1
792 Kernel requires this to route any packets for IPv6.
793 */
794 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
795 if (err) {
9c66dc4f 796 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
797 goto out_delete;
798 }
799
800 err = lxc_ip_forwarding_on(veth1, AF_INET6);
801 if (err) {
9c66dc4f 802 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
803 goto out_delete;
804 }
805
806 err = lxc_neigh_proxy_on(veth1, AF_INET6);
807 if (err) {
9c66dc4f 808 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
809 goto out_delete;
810 }
811
5fe147e9 812 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 813 if (err) {
9c66dc4f 814 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
815 goto out_delete;
816 }
817 }
818
819 /* setup ipv4 address routes on the host interface */
820 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
821 if (err) {
9c66dc4f 822 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
823 goto out_delete;
824 }
825
826 /* setup ipv6 address routes on the host interface */
827 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
828 if (err) {
9c66dc4f 829 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
830 goto out_delete;
831 }
832 }
833
811ef482 834 if (netdev->upscript) {
14a7b0f9
CB
835 char *argv[] = {
836 "veth",
837 netdev->link,
990b9ac3 838 veth1,
14a7b0f9
CB
839 NULL,
840 };
841
842 err = run_script_argv(handler->name,
843 handler->conf->hooks_version, "net",
844 netdev->upscript, "up", argv);
845 if (err < 0)
811ef482
CB
846 goto out_delete;
847 }
848
54256301 849 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
850
851 return 0;
852
853out_delete:
54256301 854 lxc_netdev_delete_by_name(veth1);
811ef482
CB
855 return -1;
856}
857
bad2f913 858static int netdev_configure_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 859{
8021de25 860 char peer[IFNAMSIZ];
811ef482
CB
861 int err;
862
f2711167 863 if (is_empty_string(netdev->link)) {
811ef482
CB
864 ERROR("No link for macvlan network device specified");
865 return -1;
866 }
867
387c1c70
CB
868 err = strnprintf(peer, sizeof(peer), "mcXXXXXX");
869 if (err < 0)
811ef482
CB
870 return -1;
871
3646ffd9 872 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
873 return -1;
874
875 err = lxc_macvlan_create(netdev->link, peer,
876 netdev->priv.macvlan_attr.mode);
877 if (err) {
6d1400b5 878 errno = -err;
879 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
880 peer, netdev->link);
966e9f1f 881 goto on_error;
811ef482
CB
882 }
883
9f8cf6e1
CB
884 strlcpy(netdev->created_name, peer, IFNAMSIZ);
885
811ef482
CB
886 netdev->ifindex = if_nametoindex(peer);
887 if (!netdev->ifindex) {
888 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 889 goto on_error;
811ef482
CB
890 }
891
3bef7b7b 892 if (netdev->mtu) {
54256301
CB
893 unsigned int mtu;
894
3bef7b7b
TP
895 err = lxc_safe_uint(netdev->mtu, &mtu);
896 if (err < 0) {
897 errno = -err;
898 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
899 goto on_error;
900 }
901
902 err = lxc_netdev_set_mtu(peer, mtu);
903 if (err < 0) {
904 errno = -err;
905 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
906 goto on_error;
907 }
908 }
909
811ef482 910 if (netdev->upscript) {
14a7b0f9
CB
911 char *argv[] = {
912 "macvlan",
913 netdev->link,
914 NULL,
915 };
916
917 err = run_script_argv(handler->name,
918 handler->conf->hooks_version, "net",
919 netdev->upscript, "up", argv);
920 if (err < 0)
966e9f1f 921 goto on_error;
811ef482
CB
922 }
923
4a037d61 924 DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
811ef482
CB
925 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
926
927 return 0;
966e9f1f
CB
928
929on_error:
811ef482 930 lxc_netdev_delete_by_name(peer);
811ef482
CB
931 return -1;
932}
933
0dc9a142 934static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
c9f52382 935{
d16bda44
CB
936 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
937 struct nl_handler nlh;
938 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 939 int err, index, len;
940 struct ifinfomsg *ifi;
c9f52382 941 struct rtattr *nest, *nest2;
c9f52382 942
0dc9a142 943 len = strlen(parent);
c9f52382 944 if (len == 1 || len >= IFNAMSIZ)
d16bda44 945 return ret_errno(EINVAL);
c9f52382 946
947 len = strlen(name);
948 if (len == 1 || len >= IFNAMSIZ)
d16bda44 949 return ret_errno(EINVAL);
c9f52382 950
0dc9a142 951 index = if_nametoindex(parent);
c9f52382 952 if (!index)
d16bda44 953 return ret_errno(EINVAL);
c9f52382 954
d16bda44 955 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 956 if (err)
df62850d 957 return err;
c9f52382 958
c9f52382 959 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
960 if (!nlmsg)
d16bda44 961 return ret_errno(ENOMEM);
c9f52382 962
963 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
964 if (!answer)
d16bda44 965 return ret_errno(ENOMEM);
c9f52382 966
967 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
968 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
969
970 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
971 if (!ifi)
972 return ret_errno(ENOMEM);
c9f52382 973 ifi->ifi_family = AF_UNSPEC;
974
c9f52382 975 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
976 if (!nest)
d16bda44 977 return ret_errno(EPROTO);
c9f52382 978
979 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 980 return ret_errno(EPROTO);
c9f52382 981
5755765e
KT
982 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
983 if (!nest2)
984 return ret_errno(EPROTO);
985
3a934e2e 986 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
987 return ret_errno(EPROTO);
988
cf88a827
TP
989 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
990 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
991 * according to ipvlan docs.
5755765e 992 */
cf88a827 993 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 994 return ret_errno(EPROTO);
c9f52382 995
5755765e 996 nla_end_nested(nlmsg, nest2);
c9f52382 997 nla_end_nested(nlmsg, nest);
998
999 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 1000 return ret_errno(EPROTO);
c9f52382 1001
1002 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
1003 return ret_errno(EPROTO);
1004
1005 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 1006}
1007
bad2f913 1008static int netdev_configure_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1009{
dd119206 1010 char peer[IFNAMSIZ];
c9f52382 1011 int err;
1012
f2711167 1013 if (is_empty_string(netdev->link)) {
c9f52382 1014 ERROR("No link for ipvlan network device specified");
1015 return -1;
1016 }
1017
387c1c70
CB
1018 err = strnprintf(peer, sizeof(peer), "ipXXXXXX");
1019 if (err < 0)
c9f52382 1020 return -1;
1021
3646ffd9 1022 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 1023 return -1;
1024
dd119206
CB
1025 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
1026 netdev->priv.ipvlan_attr.isolation);
c9f52382 1027 if (err) {
dd119206
CB
1028 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
1029 peer, netdev->link);
c9f52382 1030 goto on_error;
1031 }
1032
e7fdd504
CB
1033 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1034
c9f52382 1035 netdev->ifindex = if_nametoindex(peer);
1036 if (!netdev->ifindex) {
1037 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1038 goto on_error;
1039 }
1040
006e135e 1041 if (netdev->mtu) {
54256301
CB
1042 unsigned int mtu;
1043
006e135e 1044 err = lxc_safe_uint(netdev->mtu, &mtu);
1045 if (err < 0) {
1046 errno = -err;
54256301 1047 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1048 goto on_error;
1049 }
1050
1051 err = lxc_netdev_set_mtu(peer, mtu);
1052 if (err < 0) {
1053 errno = -err;
54256301 1054 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1055 goto on_error;
1056 }
1057 }
1058
c9f52382 1059 if (netdev->upscript) {
1060 char *argv[] = {
1061 "ipvlan",
1062 netdev->link,
1063 NULL,
1064 };
1065
dd119206
CB
1066 err = run_script_argv(handler->name, handler->conf->hooks_version,
1067 "net", netdev->upscript, "up", argv);
c9f52382 1068 if (err < 0)
1069 goto on_error;
1070 }
1071
4a037d61 1072 DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer,
dd119206 1073 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 1074
1075 return 0;
1076
1077on_error:
1078 lxc_netdev_delete_by_name(peer);
1079 return -1;
1080}
1081
bad2f913 1082static int netdev_configure_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1083{
1084 char peer[IFNAMSIZ];
1085 int err;
1086 static uint16_t vlan_cntr = 0;
811ef482 1087
f2711167 1088 if (is_empty_string(netdev->link)) {
811ef482
CB
1089 ERROR("No link for vlan network device specified");
1090 return -1;
1091 }
1092
387c1c70
CB
1093 err = strnprintf(peer, sizeof(peer), "vlan%d-%d",
1094 netdev->priv.vlan_attr.vid, vlan_cntr++);
1095 if (err < 0)
811ef482
CB
1096 return -1;
1097
1098 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1099 if (err) {
6d1400b5 1100 errno = -err;
1101 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1102 peer, netdev->link);
811ef482
CB
1103 return -1;
1104 }
1105
83530dba
CB
1106 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1107
811ef482
CB
1108 netdev->ifindex = if_nametoindex(peer);
1109 if (!netdev->ifindex) {
1110 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1111 goto on_error;
1112 }
1113
1114 if (netdev->mtu) {
54256301
CB
1115 unsigned int mtu;
1116
3e2a7b08 1117 err = lxc_safe_uint(netdev->mtu, &mtu);
1118 if (err < 0) {
1119 errno = -err;
54256301 1120 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1121 goto on_error;
1122 }
1123
1124 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1125 if (err < 0) {
3e2a7b08 1126 errno = -err;
54256301 1127 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1128 goto on_error;
1129 }
811ef482
CB
1130 }
1131
3a73d9f1 1132 if (netdev->upscript) {
1133 char *argv[] = {
1134 "vlan",
1135 netdev->link,
1136 NULL,
1137 };
1138
d4d68410
CB
1139 err = run_script_argv(handler->name, handler->conf->hooks_version,
1140 "net", netdev->upscript, "up", argv);
19abca58 1141 if (err < 0) {
3e2a7b08 1142 goto on_error;
19abca58 1143 }
3a73d9f1 1144 }
1145
4a037d61 1146 DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer,
d4d68410 1147 netdev->ifindex);
811ef482
CB
1148
1149 return 0;
3e2a7b08 1150
1151on_error:
1152 lxc_netdev_delete_by_name(peer);
1153 return -1;
811ef482
CB
1154}
1155
bad2f913 1156static int netdev_configure_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1157{
0b154989 1158 int err, mtu_orig = 0;
14a7b0f9 1159
9c66dc4f
CB
1160 if (is_empty_string(netdev->link))
1161 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1162
75b074ee
CB
1163 /*
1164 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1165 * network namespace because we need it to move the device from the
1166 * host's network namespace to the container's network namespace later
1167 * on.
1168 * Note that netdev->link will contain the name of the physical network
1169 * device in the host's namespace.
1170 */
811ef482 1171 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1172 if (!netdev->ifindex)
1173 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1174
61302ef7 1175 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1176 if (is_empty_string(netdev->name))
8bf64b77 1177 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1178
75b074ee
CB
1179 /*
1180 * Store the ifindex of the host's network device in the host's
790255cf
CB
1181 * namespace.
1182 */
1183 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1184
75b074ee
CB
1185 /*
1186 * Get original device MTU setting and store for restoration after
1187 * container shutdown.
1188 */
0b154989 1189 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1190 if (mtu_orig < 0)
1191 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1192
1193 netdev->priv.phys_attr.mtu = mtu_orig;
1194
3bef7b7b 1195 if (netdev->mtu) {
54256301
CB
1196 unsigned int mtu;
1197
3bef7b7b 1198 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1199 if (err < 0)
1200 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1201
3bef7b7b 1202 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1203 if (err < 0)
1204 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1205 }
1206
1207 if (netdev->upscript) {
1208 char *argv[] = {
1209 "phys",
1210 netdev->link,
1211 NULL,
1212 };
1213
75b074ee
CB
1214 err = run_script_argv(handler->name, handler->conf->hooks_version,
1215 "net", netdev->upscript, "up", argv);
9c66dc4f 1216 if (err < 0)
3bef7b7b 1217 return -1;
3bef7b7b
TP
1218 }
1219
4a037d61 1220 DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev->link,
75b074ee 1221 netdev->ifindex);
811ef482
CB
1222
1223 return 0;
1224}
1225
bad2f913 1226static int netdev_configure_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1227{
14a7b0f9
CB
1228 int ret;
1229 char *argv[] = {
1230 "empty",
1231 NULL,
1232 };
1233
811ef482 1234 netdev->ifindex = 0;
14a7b0f9
CB
1235 if (!netdev->upscript)
1236 return 0;
1237
1238 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1239 "net", netdev->upscript, "up", argv);
1240 if (ret < 0)
1241 return -1;
1242
811ef482
CB
1243 return 0;
1244}
1245
bad2f913 1246static int netdev_configure_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1247{
1248 netdev->ifindex = 0;
1249 return 0;
1250}
1251
bad2f913
CB
1252static netdev_configure_server_cb netdev_configure_server[LXC_NET_MAXCONFTYPE + 1] = {
1253 [LXC_NET_VETH] = netdev_configure_server_veth,
1254 [LXC_NET_MACVLAN] = netdev_configure_server_macvlan,
1255 [LXC_NET_IPVLAN] = netdev_configure_server_ipvlan,
1256 [LXC_NET_VLAN] = netdev_configure_server_vlan,
1257 [LXC_NET_PHYS] = netdev_configure_server_phys,
1258 [LXC_NET_EMPTY] = netdev_configure_server_empty,
1259 [LXC_NET_NONE] = netdev_configure_server_none,
811ef482
CB
1260};
1261
bad2f913 1262static int __netdev_configure_container_common(struct lxc_netdev *netdev)
8bf64b77
CB
1263{
1264 char current_ifname[IFNAMSIZ];
1265
fdd6be55 1266 netdev->ifindex = if_nametoindex(netdev->transient_name);
8bf64b77
CB
1267 if (!netdev->ifindex)
1268 return log_error_errno(-1,
1269 errno, "Failed to retrieve ifindex for network device with name %s",
fdd6be55 1270 netdev->transient_name);
8bf64b77 1271
3473ca76 1272 if (is_empty_string(netdev->name))
8bf64b77
CB
1273 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1274
fdd6be55 1275 if (!strequal(netdev->transient_name, netdev->name)) {
8bf64b77
CB
1276 int ret;
1277
fdd6be55 1278 ret = lxc_netdev_rename_by_name(netdev->transient_name, netdev->name);
8bf64b77 1279 if (ret)
9c66dc4f 1280 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
fdd6be55 1281 netdev->transient_name, netdev->name);
8bf64b77 1282
fdd6be55 1283 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->transient_name, netdev->name);
8bf64b77
CB
1284 }
1285
1286 /*
1287 * Re-read the name of the interface because its name has changed and
1288 * would be automatically allocated by the system
1289 */
1290 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1291 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1292
1293 /*
1294 * Now update the recorded name of the network device to reflect the
1295 * name of the network device in the child's network namespace. We will
1296 * later on send this information back to the parent.
1297 */
1298 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
fdd6be55 1299 netdev->transient_name[0] = '\0';
8bf64b77
CB
1300
1301 return 0;
1302}
1303
bad2f913 1304static int netdev_configure_container_veth(struct lxc_netdev *netdev)
8bf64b77 1305{
8bf64b77 1306
bad2f913 1307 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1308}
1309
bad2f913 1310static int netdev_configure_container_macvlan(struct lxc_netdev *netdev)
8bf64b77 1311{
bad2f913 1312 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1313}
1314
bad2f913 1315static int netdev_configure_container_ipvlan(struct lxc_netdev *netdev)
8bf64b77 1316{
bad2f913 1317 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1318}
1319
bad2f913 1320static int netdev_configure_container_vlan(struct lxc_netdev *netdev)
8bf64b77 1321{
bad2f913 1322 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1323}
1324
bad2f913 1325static int netdev_configure_container_phys(struct lxc_netdev *netdev)
8bf64b77 1326{
bad2f913 1327 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1328}
1329
bad2f913 1330static int netdev_configure_container_empty(struct lxc_netdev *netdev)
8bf64b77
CB
1331{
1332 return 0;
1333}
1334
bad2f913 1335static int netdev_configure_container_none(struct lxc_netdev *netdev)
8bf64b77
CB
1336{
1337 return 0;
1338}
1339
bad2f913
CB
1340static netdev_configure_container_cb netdev_configure_container[LXC_NET_MAXCONFTYPE + 1] = {
1341 [LXC_NET_VETH] = netdev_configure_container_veth,
1342 [LXC_NET_MACVLAN] = netdev_configure_container_macvlan,
1343 [LXC_NET_IPVLAN] = netdev_configure_container_ipvlan,
1344 [LXC_NET_VLAN] = netdev_configure_container_vlan,
1345 [LXC_NET_PHYS] = netdev_configure_container_phys,
1346 [LXC_NET_EMPTY] = netdev_configure_container_empty,
1347 [LXC_NET_NONE] = netdev_configure_container_none,
8bf64b77
CB
1348};
1349
bad2f913 1350static int netdev_shutdown_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1351{
14a7b0f9
CB
1352 int ret;
1353 char *argv[] = {
1354 "veth",
1355 netdev->link,
1356 NULL,
1357 NULL,
1358 };
1359
1360 if (!netdev->downscript)
1361 return 0;
811ef482 1362
f2711167 1363 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1364 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1365 else
14a7b0f9
CB
1366 argv[2] = netdev->priv.veth_attr.veth1;
1367
1368 ret = run_script_argv(handler->name,
1369 handler->conf->hooks_version, "net",
1370 netdev->downscript, "down", argv);
1371 if (ret < 0)
1372 return -1;
811ef482 1373
811ef482
CB
1374 return 0;
1375}
1376
bad2f913 1377static int netdev_shutdown_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1378{
14a7b0f9
CB
1379 int ret;
1380 char *argv[] = {
1381 "macvlan",
1382 netdev->link,
1383 NULL,
1384 };
1385
1386 if (!netdev->downscript)
1387 return 0;
1388
1389 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1390 "net", netdev->downscript, "down", argv);
1391 if (ret < 0)
1392 return -1;
811ef482 1393
811ef482
CB
1394 return 0;
1395}
1396
bad2f913 1397static int netdev_shutdown_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1398{
1399 int ret;
1400 char *argv[] = {
1401 "ipvlan",
1402 netdev->link,
1403 NULL,
1404 };
1405
1406 if (!netdev->downscript)
1407 return 0;
1408
1409 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1410 "net", netdev->downscript, "down", argv);
1411 if (ret < 0)
1412 return -1;
1413
1414 return 0;
1415}
1416
bad2f913 1417static int netdev_shutdown_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1418{
3a73d9f1 1419 int ret;
1420 char *argv[] = {
1421 "vlan",
1422 netdev->link,
1423 NULL,
1424 };
1425
1426 if (!netdev->downscript)
1427 return 0;
1428
1429 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1430 "net", netdev->downscript, "down", argv);
1431 if (ret < 0)
1432 return -1;
1433
811ef482
CB
1434 return 0;
1435}
1436
bad2f913 1437static int netdev_shutdown_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1438{
14a7b0f9
CB
1439 int ret;
1440 char *argv[] = {
1441 "phys",
1442 netdev->link,
1443 NULL,
1444 };
1445
1446 if (!netdev->downscript)
1447 return 0;
1448
1449 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1450 "net", netdev->downscript, "down", argv);
1451 if (ret < 0)
1452 return -1;
811ef482 1453
811ef482
CB
1454 return 0;
1455}
1456
bad2f913 1457static int netdev_shutdown_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1458{
14a7b0f9
CB
1459 int ret;
1460 char *argv[] = {
1461 "empty",
1462 NULL,
1463 };
1464
1465 if (!netdev->downscript)
1466 return 0;
1467
1468 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1469 "net", netdev->downscript, "down", argv);
1470 if (ret < 0)
1471 return -1;
811ef482 1472
811ef482
CB
1473 return 0;
1474}
1475
bad2f913 1476static int netdev_shutdown_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1477{
1478 return 0;
1479}
1480
bad2f913
CB
1481static netdev_shutdown_server_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1482 [LXC_NET_VETH] = netdev_shutdown_server_veth,
1483 [LXC_NET_MACVLAN] = netdev_shutdown_server_macvlan,
1484 [LXC_NET_IPVLAN] = netdev_shutdown_server_ipvlan,
1485 [LXC_NET_VLAN] = netdev_shutdown_server_vlan,
1486 [LXC_NET_PHYS] = netdev_shutdown_server_phys,
1487 [LXC_NET_EMPTY] = netdev_shutdown_server_empty,
1488 [LXC_NET_NONE] = netdev_shutdown_server_none,
811ef482
CB
1489};
1490
0037ab49
TP
1491static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1492{
d16bda44 1493 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1494 struct nl_handler nlh;
d16bda44
CB
1495 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1496 int err;
0037ab49 1497 struct ifinfomsg *ifi;
0037ab49 1498
d16bda44 1499 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1500 if (err)
1501 return err;
1502
0037ab49
TP
1503 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1504 if (!nlmsg)
d16bda44 1505 return ret_errno(ENOMEM);
0037ab49
TP
1506
1507 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1508 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1509
1510 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1511 if (!ifi)
d16bda44
CB
1512 return ret_errno(ENOMEM);
1513
0037ab49
TP
1514 ifi->ifi_family = AF_UNSPEC;
1515 ifi->ifi_index = ifindex;
1516
1517 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1518 return ret_errno(ENOMEM);
0037ab49 1519
3473ca76 1520 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1521 return ret_errno(ENOMEM);
0037ab49 1522
d16bda44 1523 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1524}
1525
ebc73a67 1526int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1527{
d16bda44 1528 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1529 struct nl_handler nlh;
d16bda44
CB
1530 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1531 int err;
06f976ca 1532 struct ifinfomsg *ifi;
0ad19a3f 1533
d16bda44 1534 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1535 if (err)
1536 return err;
0ad19a3f 1537
0ad19a3f 1538 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1539 if (!nlmsg)
d16bda44 1540 return ret_errno(ENOMEM);
0ad19a3f 1541
ebc73a67 1542 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1543 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1544
1545 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1546 if (!ifi)
d16bda44
CB
1547 return ret_errno(ENOMEM);
1548
06f976ca
SZ
1549 ifi->ifi_family = AF_UNSPEC;
1550 ifi->ifi_index = ifindex;
0ad19a3f 1551
1552 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1553 return ret_errno(ENOMEM);
0ad19a3f 1554
3473ca76 1555 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1556 return ret_errno(ENOMEM);
8d357196 1557
d16bda44 1558 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1559}
1560
ebc73a67
CB
1561/* If we are asked to move a wireless interface, then we must actually move its
1562 * phyN device. Detect that condition and return the physname here. The physname
1563 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1564 */
1565#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1566char *is_wlan(const char *ifname)
e5848d39 1567{
4110345b
CB
1568 __do_fclose FILE *f = NULL;
1569 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1570 int i, ret;
e5848d39 1571 long physlen;
ebc73a67 1572 size_t len;
e5848d39 1573
ebc73a67 1574 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1575 path = must_realloc(NULL, len + 1);
387c1c70
CB
1576 ret = strnprintf(path, len, PHYSNAME, ifname);
1577 if (ret < 0)
4110345b 1578 return NULL;
ebc73a67 1579
4110345b 1580 f = fopen(path, "re");
ebc73a67 1581 if (!f)
4110345b 1582 return NULL;
ebc73a67 1583
1a0e70ac 1584 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1585 fseek(f, 0, SEEK_END);
1586 physlen = ftell(f);
1587 fseek(f, 0, SEEK_SET);
4110345b
CB
1588 if (physlen < 0)
1589 return NULL;
ebc73a67
CB
1590
1591 physname = malloc(physlen + 1);
4110345b
CB
1592 if (!physname)
1593 return NULL;
ebc73a67
CB
1594
1595 memset(physname, 0, physlen + 1);
e5848d39 1596 ret = fread(physname, 1, physlen, f);
e5848d39 1597 if (ret < 0)
4110345b 1598 return NULL;
e5848d39 1599
ebc73a67 1600 for (i = 0; i < physlen; i++) {
e5848d39
SH
1601 if (physname[i] == '\n')
1602 physname[i] = '\0';
ebc73a67 1603
e5848d39
SH
1604 if (physname[i] == '\0')
1605 break;
1606 }
1607
4110345b 1608 return move_ptr(physname);
e5848d39
SH
1609}
1610
ebc73a67
CB
1611static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1612 const char *new)
e5848d39 1613{
ebc73a67 1614 pid_t fpid;
e5848d39 1615
ebc73a67 1616 fpid = fork();
e5848d39
SH
1617 if (fpid < 0)
1618 return -1;
ebc73a67 1619
e5848d39
SH
1620 if (fpid != 0)
1621 return wait_for_pid(fpid);
ebc73a67 1622
e5848d39
SH
1623 if (!switch_to_ns(pid, "net"))
1624 return -1;
ebc73a67 1625
05ec44f8 1626 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1627}
1628
e4103cf6 1629int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1630 const char *newname)
e5848d39 1631{
3dd78294 1632 __do_free char *cmd = NULL;
ebc73a67 1633 pid_t fpid;
e5848d39
SH
1634
1635 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1636 * However, IIUC this involves a bit more complicated work to talk to
1637 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1638 */
1639 cmd = on_path("iw", NULL);
0ed79f45
M
1640 if (!cmd) {
1641 ERROR("Couldn't find the application iw in PATH");
3dd78294 1642 return -1;
0ed79f45 1643 }
e5848d39
SH
1644
1645 fpid = fork();
1646 if (fpid < 0)
3dd78294 1647 return -1;
ebc73a67 1648
e5848d39
SH
1649 if (fpid == 0) {
1650 char pidstr[30];
1651 sprintf(pidstr, "%d", pid);
9c66dc4f 1652 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1653 _exit(EXIT_FAILURE);
e5848d39 1654 }
ebc73a67 1655
e5848d39 1656 if (wait_for_pid(fpid))
3dd78294 1657 return -1;
e5848d39 1658
e5848d39 1659 if (newname)
3dd78294 1660 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1661
3dd78294 1662 return 0;
e5848d39
SH
1663}
1664
8d357196 1665int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1666{
3dd78294 1667 __do_free char *physname = NULL;
8befa924
SH
1668 int index;
1669
8befa924
SH
1670 if (!ifname)
1671 return -EINVAL;
1672
32571606 1673 index = if_nametoindex(ifname);
49428bf3
DY
1674 if (!index)
1675 return -EINVAL;
32571606 1676
ebc73a67
CB
1677 physname = is_wlan(ifname);
1678 if (physname)
e5848d39
SH
1679 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1680
8d357196 1681 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1682}
1683
b84f58b9 1684int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1685{
d16bda44
CB
1686 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1687 struct nl_handler nlh;
1688 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1689 int err;
ebc73a67 1690 struct ifinfomsg *ifi;
0ad19a3f 1691
d16bda44 1692 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1693 if (err)
1694 return err;
0ad19a3f 1695
0ad19a3f 1696 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1697 if (!nlmsg)
d16bda44 1698 return ret_errno(ENOMEM);
0ad19a3f 1699
06f976ca 1700 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1701 if (!answer)
d16bda44 1702 return ret_errno(ENOMEM);
0ad19a3f 1703
ebc73a67 1704 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1705 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1706
1707 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1708 if (!ifi)
d16bda44
CB
1709 return ret_errno(ENOMEM);
1710
06f976ca
SZ
1711 ifi->ifi_family = AF_UNSPEC;
1712 ifi->ifi_index = ifindex;
0ad19a3f 1713
d16bda44 1714 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1715}
1716
b84f58b9
DL
1717int lxc_netdev_delete_by_name(const char *name)
1718{
1719 int index;
1720
1721 index = if_nametoindex(name);
1722 if (!index)
1723 return -EINVAL;
1724
1725 return lxc_netdev_delete_by_index(index);
1726}
1727
1728int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1729{
d16bda44
CB
1730 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1731 struct nl_handler nlh;
1732 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1733 int err, len;
06f976ca 1734 struct ifinfomsg *ifi;
b9a5bb58 1735
d16bda44 1736 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1737 if (err)
1738 return err;
b9a5bb58 1739
b84f58b9 1740 len = strlen(newname);
d16bda44
CB
1741 if (len == 1 || len >= IFNAMSIZ)
1742 return ret_errno(EINVAL);
b84f58b9 1743
b9a5bb58
DL
1744 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1745 if (!nlmsg)
d16bda44 1746 return ret_errno(ENOMEM);
b9a5bb58 1747
06f976ca 1748 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1749 if (!answer)
d16bda44 1750 return ret_errno(ENOMEM);
b9a5bb58 1751
ebc73a67 1752 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1753 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1754
1755 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1756 if (!ifi)
d16bda44
CB
1757 return ret_errno(ENOMEM);
1758
06f976ca
SZ
1759 ifi->ifi_family = AF_UNSPEC;
1760 ifi->ifi_index = ifindex;
b84f58b9
DL
1761
1762 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1763 return ret_errno(ENOMEM);
b9a5bb58 1764
d16bda44 1765 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1766}
1767
b84f58b9
DL
1768int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1769{
1770 int len, index;
1771
1772 len = strlen(oldname);
dae3fdf6 1773 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1774 return -EINVAL;
1775
1776 index = if_nametoindex(oldname);
1777 if (!index)
1778 return -EINVAL;
1779
1780 return lxc_netdev_rename_by_index(index, newname);
1781}
1782
8befa924 1783int netdev_set_flag(const char *name, int flag)
0ad19a3f 1784{
d16bda44
CB
1785 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1786 struct nl_handler nlh;
1787 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1788 int err, index, len;
06f976ca 1789 struct ifinfomsg *ifi;
0ad19a3f 1790
d16bda44 1791 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1792 if (err)
1793 return err;
0ad19a3f 1794
1795 len = strlen(name);
dae3fdf6 1796 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1797 return ret_errno(EINVAL);
0ad19a3f 1798
1799 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1800 if (!nlmsg)
d16bda44 1801 return ret_errno(ENOMEM);
0ad19a3f 1802
06f976ca 1803 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1804 if (!answer)
d16bda44 1805 return ret_errno(ENOMEM);
0ad19a3f 1806
1807 index = if_nametoindex(name);
1808 if (!index)
d16bda44 1809 return ret_errno(EINVAL);
0ad19a3f 1810
ebc73a67 1811 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1812 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1813
1814 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1815 if (!ifi)
1816 return ret_errno(ENOMEM);
1817
06f976ca
SZ
1818 ifi->ifi_family = AF_UNSPEC;
1819 ifi->ifi_index = index;
1820 ifi->ifi_change |= IFF_UP;
1821 ifi->ifi_flags |= flag;
0ad19a3f 1822
d16bda44 1823 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1824}
1825
59eac805 1826static int netdev_get_flag(const char *name, int *flag)
efa1cf45 1827{
d16bda44
CB
1828 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1829 struct nl_handler nlh;
1830 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1831 int err, index, len;
a4318300 1832 struct ifinfomsg *ifi;
efa1cf45
DY
1833
1834 if (!name)
d16bda44 1835 return ret_errno(EINVAL);
efa1cf45 1836
d16bda44 1837 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1838 if (err)
1839 return err;
1840
efa1cf45
DY
1841 len = strlen(name);
1842 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1843 return ret_errno(EINVAL);
efa1cf45 1844
efa1cf45
DY
1845 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1846 if (!nlmsg)
d16bda44 1847 return ret_errno(ENOMEM);
efa1cf45 1848
06f976ca 1849 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1850 if (!answer)
d16bda44 1851 return ret_errno(ENOMEM);
efa1cf45 1852
efa1cf45
DY
1853 index = if_nametoindex(name);
1854 if (!index)
d16bda44 1855 return ret_errno(EINVAL);
efa1cf45 1856
06f976ca
SZ
1857 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1858 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1859
1860 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1861 if (!ifi)
1862 return ret_errno(ENOMEM);
1863
06f976ca
SZ
1864 ifi->ifi_family = AF_UNSPEC;
1865 ifi->ifi_index = index;
efa1cf45 1866
d16bda44 1867 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1868 if (err)
d16bda44 1869 return ret_set_errno(-1, errno);
efa1cf45 1870
06f976ca 1871 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1872
1873 *flag = ifi->ifi_flags;
efa1cf45
DY
1874 return err;
1875}
1876
1877/*
1878 * \brief Check a interface is up or not.
1879 *
1880 * \param name: name for the interface.
1881 *
1882 * \return int.
1883 * 0 means interface is down.
1884 * 1 means interface is up.
1885 * Others means error happened, and ret-value is the error number.
1886 */
ebc73a67 1887int lxc_netdev_isup(const char *name)
efa1cf45 1888{
4db0514d
CB
1889 int err;
1890 int flag = 0;
efa1cf45
DY
1891
1892 err = netdev_get_flag(name, &flag);
1893 if (err)
ebc73a67
CB
1894 return err;
1895
efa1cf45
DY
1896 if (flag & IFF_UP)
1897 return 1;
ebc73a67 1898
efa1cf45 1899 return 0;
efa1cf45
DY
1900}
1901
0130df54
SH
1902int netdev_get_mtu(int ifindex)
1903{
a5f5cb41 1904 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1905 struct nl_handler nlh;
a5f5cb41
CB
1906 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1907 int readmore = 0, recv_len = 0;
1908 int answer_len, err, res;
06f976ca 1909 struct ifinfomsg *ifi;
0130df54 1910 struct nlmsghdr *msg;
0130df54 1911
a5f5cb41 1912 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1913 if (err)
1914 return err;
1915
0130df54
SH
1916 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1917 if (!nlmsg)
a5f5cb41 1918 return ret_errno(ENOMEM);
0130df54 1919
06f976ca 1920 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1921 if (!answer)
a5f5cb41 1922 return ret_errno(ENOMEM);
0130df54
SH
1923
1924 /* Save the answer buffer length, since it will be overwritten
1925 * on the first receive (and we might need to receive more than
ebc73a67
CB
1926 * once.
1927 */
06f976ca
SZ
1928 answer_len = answer->nlmsghdr->nlmsg_len;
1929
ebc73a67 1930 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1931 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1932
06f976ca 1933 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1934 if (!ifi)
a5f5cb41
CB
1935 return ret_errno(ENOMEM);
1936
06f976ca 1937 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1938
1939 /* Send the request for addresses, which returns all addresses
1940 * on all interfaces. */
a5f5cb41 1941 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1942 if (err < 0)
a5f5cb41 1943 return ret_set_errno(-1, errno);
0130df54 1944
6ce39620
CB
1945#pragma GCC diagnostic push
1946#pragma GCC diagnostic ignored "-Wcast-align"
1947
0130df54
SH
1948 do {
1949 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1950 * overwritten by a previous receive.
1951 */
06f976ca 1952 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1953
1954 /* Get the (next) batch of reply messages */
a5f5cb41 1955 err = netlink_rcv(nlh_ptr, answer);
0130df54 1956 if (err < 0)
a5f5cb41 1957 return ret_set_errno(-1, errno);
0130df54
SH
1958
1959 recv_len = err;
0130df54
SH
1960
1961 /* Satisfy the typing for the netlink macros */
06f976ca 1962 msg = answer->nlmsghdr;
0130df54
SH
1963
1964 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1965 /* Stop reading if we see an error message */
1966 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1967 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1968 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1969 }
1970
1971 /* Stop reading if we see a NLMSG_DONE message */
1972 if (msg->nlmsg_type == NLMSG_DONE) {
1973 readmore = 0;
1974 break;
1975 }
1976
06f976ca 1977 ifi = NLMSG_DATA(msg);
0130df54
SH
1978 if (ifi->ifi_index == ifindex) {
1979 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1980 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1981
0130df54 1982 res = 0;
ebc73a67 1983 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1984 /*
a5f5cb41 1985 * Found a local address for the
ebc73a67
CB
1986 * requested interface, return it.
1987 */
0130df54 1988 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1989 memcpy(&res, RTA_DATA(rta), sizeof(int));
1990 return res;
0130df54 1991 }
a5f5cb41 1992
0130df54
SH
1993 rta = RTA_NEXT(rta, attr_len);
1994 }
0130df54
SH
1995 }
1996
ebc73a67
CB
1997 /* Keep reading more data from the socket if the last
1998 * message had the NLF_F_MULTI flag set.
1999 */
0130df54
SH
2000 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2001
ebc73a67 2002 /* Look at the next message received in this buffer. */
0130df54
SH
2003 msg = NLMSG_NEXT(msg, recv_len);
2004 }
2005 } while (readmore);
2006
6ce39620
CB
2007#pragma GCC diagnostic pop
2008
ebc73a67 2009 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 2010 return -1;
0130df54
SH
2011}
2012
d472214b 2013int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 2014{
a5f5cb41
CB
2015 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2016 struct nl_handler nlh;
2017 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 2018 int err, len;
06f976ca 2019 struct ifinfomsg *ifi;
75d09f83 2020
a5f5cb41 2021 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2022 if (err)
2023 return err;
75d09f83
DL
2024
2025 len = strlen(name);
dae3fdf6 2026 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2027 return ret_errno(EINVAL);
75d09f83
DL
2028
2029 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2030 if (!nlmsg)
a5f5cb41 2031 return ret_errno(ENOMEM);
75d09f83 2032
06f976ca 2033 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 2034 if (!answer)
a5f5cb41 2035 return ret_errno(ENOMEM);
75d09f83 2036
ebc73a67 2037 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
2038 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2039
2040 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2041 if (!ifi)
2042 return ret_errno(ENOMEM);
2043
06f976ca 2044 ifi->ifi_family = AF_UNSPEC;
54256301
CB
2045
2046 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2047 return ret_errno(ENOMEM);
75d09f83
DL
2048
2049 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2050 return ret_errno(ENOMEM);
75d09f83 2051
a5f5cb41 2052 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
2053}
2054
d472214b 2055int lxc_netdev_up(const char *name)
0ad19a3f 2056{
d472214b 2057 return netdev_set_flag(name, IFF_UP);
0ad19a3f 2058}
2059
d472214b 2060int lxc_netdev_down(const char *name)
0ad19a3f 2061{
d472214b 2062 return netdev_set_flag(name, 0);
0ad19a3f 2063}
2064
54256301 2065int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 2066{
a5f5cb41
CB
2067 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2068 struct nl_handler nlh;
2069 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2070 int err, len;
06f976ca 2071 struct ifinfomsg *ifi;
0ad19a3f 2072 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 2073
a5f5cb41 2074 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2075 if (err)
2076 return err;
0ad19a3f 2077
2078 len = strlen(name1);
dae3fdf6 2079 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2080 return ret_errno(EINVAL);
0ad19a3f 2081
2082 len = strlen(name2);
dae3fdf6 2083 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2084 return ret_errno(EINVAL);
0ad19a3f 2085
2086 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2087 if (!nlmsg)
a5f5cb41 2088 return ret_errno(ENOMEM);
0ad19a3f 2089
06f976ca 2090 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2091 if (!answer)
a5f5cb41 2092 return ret_errno(ENOMEM);
0ad19a3f 2093
a5f5cb41 2094 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2095 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2096
2097 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 2098 if (!ifi)
a5f5cb41
CB
2099 return ret_errno(ENOMEM);
2100
06f976ca 2101 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2102
79e68309 2103 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2104 if (!nest1)
a5f5cb41 2105 return ret_errno(EINVAL);
0ad19a3f 2106
2107 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2108 return ret_errno(ENOMEM);
0ad19a3f 2109
2110 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2111 if (!nest2)
a5f5cb41 2112 return ret_errno(ENOMEM);
0ad19a3f 2113
2114 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2115 if (!nest3)
a5f5cb41 2116 return ret_errno(ENOMEM);
0ad19a3f 2117
06f976ca 2118 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2119 if (!ifi)
2120 return ret_errno(ENOMEM);
0ad19a3f 2121
2122 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2123 return ret_errno(ENOMEM);
0ad19a3f 2124
54256301 2125 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2126 return ret_errno(ENOMEM);
54256301
CB
2127
2128 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2129 return ret_errno(ENOMEM);
54256301 2130
0ad19a3f 2131 nla_end_nested(nlmsg, nest3);
0ad19a3f 2132 nla_end_nested(nlmsg, nest2);
0ad19a3f 2133 nla_end_nested(nlmsg, nest1);
2134
2135 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2136 return ret_errno(ENOMEM);
0ad19a3f 2137
a5f5cb41 2138 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2139}
2140
ebc73a67 2141/* TODO: merge with lxc_macvlan_create */
0dc9a142 2142int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
26c39028 2143{
a5f5cb41
CB
2144 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2145 struct nl_handler nlh;
2146 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2147 int err, len, lindex;
06f976ca 2148 struct ifinfomsg *ifi;
26c39028 2149 struct rtattr *nest, *nest2;
26c39028 2150
a5f5cb41 2151 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2152 if (err)
2153 return err;
26c39028 2154
0dc9a142 2155 len = strlen(parent);
dae3fdf6 2156 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2157 return ret_errno(EINVAL);
26c39028
JHS
2158
2159 len = strlen(name);
dae3fdf6 2160 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2161 return ret_errno(EINVAL);
26c39028
JHS
2162
2163 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2164 if (!nlmsg)
a5f5cb41 2165 return ret_errno(ENOMEM);
26c39028 2166
06f976ca 2167 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2168 if (!answer)
a5f5cb41 2169 return ret_errno(ENOMEM);
26c39028 2170
0dc9a142 2171 lindex = if_nametoindex(parent);
26c39028 2172 if (!lindex)
a5f5cb41 2173 return ret_errno(EINVAL);
26c39028 2174
a5f5cb41 2175 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2176 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2177
2178 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2179 if (!ifi)
2180 return ret_errno(ENOMEM);
2181
06f976ca 2182 ifi->ifi_family = AF_UNSPEC;
26c39028 2183
79e68309 2184 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2185 if (!nest)
a5f5cb41 2186 return ret_errno(ENOMEM);
26c39028
JHS
2187
2188 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2189 return ret_errno(ENOMEM);
26c39028
JHS
2190
2191 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2192 if (!nest2)
a5f5cb41 2193 return ret_errno(ENOMEM);
e892973e 2194
26c39028 2195 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2196 return ret_errno(ENOMEM);
e892973e 2197
26c39028 2198 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2199 nla_end_nested(nlmsg, nest);
2200
2201 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2202 return ret_errno(ENOMEM);
26c39028
JHS
2203
2204 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2205 return ret_errno(ENOMEM);
2206
2207 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2208}
2209
0dc9a142 2210int lxc_macvlan_create(const char *parent, const char *name, int mode)
0ad19a3f 2211{
a5f5cb41
CB
2212 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2213 struct nl_handler nlh;
2214 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2215 int err, index, len;
06f976ca 2216 struct ifinfomsg *ifi;
e892973e 2217 struct rtattr *nest, *nest2;
0ad19a3f 2218
a5f5cb41 2219 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2220 if (err)
2221 return err;
0ad19a3f 2222
0dc9a142 2223 len = strlen(parent);
dae3fdf6 2224 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2225 return ret_errno(EINVAL);
0ad19a3f 2226
2227 len = strlen(name);
dae3fdf6 2228 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2229 return ret_errno(EINVAL);
0ad19a3f 2230
2231 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2232 if (!nlmsg)
a5f5cb41 2233 return ret_errno(ENOMEM);
0ad19a3f 2234
06f976ca 2235 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2236 if (!answer)
a5f5cb41 2237 return ret_errno(ENOMEM);
0ad19a3f 2238
0dc9a142 2239 index = if_nametoindex(parent);
0ad19a3f 2240 if (!index)
a5f5cb41 2241 return ret_errno(EINVAL);
0ad19a3f 2242
a5f5cb41 2243 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2244 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2245
2246 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2247 if (!ifi)
2248 return ret_errno(ENOMEM);
2249
06f976ca 2250 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2251
79e68309 2252 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2253 if (!nest)
a5f5cb41 2254 return ret_errno(ENOMEM);
0ad19a3f 2255
2256 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2257 return ret_errno(ENOMEM);
0ad19a3f 2258
e892973e
DL
2259 if (mode) {
2260 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2261 if (!nest2)
a5f5cb41 2262 return ret_errno(ENOMEM);
e892973e
DL
2263
2264 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2265 return ret_errno(ENOMEM);
e892973e
DL
2266
2267 nla_end_nested(nlmsg, nest2);
2268 }
2269
0ad19a3f 2270 nla_end_nested(nlmsg, nest);
2271
2272 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2273 return ret_errno(ENOMEM);
0ad19a3f 2274
2275 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2276 return ret_errno(ENOMEM);
0ad19a3f 2277
a5f5cb41 2278 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2279}
2280
2281static int proc_sys_net_write(const char *path, const char *value)
2282{
ebc73a67
CB
2283 int fd;
2284 int err = 0;
0ad19a3f 2285
2286 fd = open(path, O_WRONLY);
2287 if (fd < 0)
2288 return -errno;
2289
f640cf46 2290 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2291 err = -errno;
2292
2293 close(fd);
2294 return err;
2295}
2296
6dfa9581 2297static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2298{
2299 int ret;
2300 char path[PATH_MAX];
6509154d 2301
2302 if (family != AF_INET && family != AF_INET6)
6dfa9581 2303 return -EINVAL;
6509154d 2304
387c1c70
CB
2305 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2306 family == AF_INET ? "ipv4" : "ipv6", ifname,
2307 "forwarding");
2308 if (ret < 0)
6dfa9581 2309 return -E2BIG;
6509154d 2310
6dfa9581
TP
2311 return proc_sys_net_write(path, flag ? "1" : "0");
2312}
2313
2314int lxc_ip_forwarding_on(const char *name, int family)
2315{
2316 return ip_forwarding_set(name, family, 1);
2317}
2318
2319int lxc_ip_forwarding_off(const char *name, int family)
2320{
2321 return ip_forwarding_set(name, family, 0);
6509154d 2322}
2323
0ad19a3f 2324static int neigh_proxy_set(const char *ifname, int family, int flag)
2325{
9ba8130c 2326 int ret;
419590da 2327 char path[PATH_MAX];
0ad19a3f 2328
2329 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2330 return -EINVAL;
0ad19a3f 2331
387c1c70
CB
2332 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2333 family == AF_INET ? "ipv4" : "ipv6", ifname,
2334 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2335 if (ret < 0)
9ba8130c 2336 return -E2BIG;
0ad19a3f 2337
ebc73a67 2338 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2339}
2340
6509154d 2341static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2342{
2343 int ret;
2344 char path[PATH_MAX];
2345 char buf[1] = "";
2346
2347 if (family != AF_INET && family != AF_INET6)
596a002c 2348 return ret_set_errno(-1, EINVAL);
6509154d 2349
387c1c70
CB
2350 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2351 family == AF_INET ? "ipv4" : "ipv6", ifname,
2352 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2353 if (ret < 0)
596a002c 2354 return ret_set_errno(-1, E2BIG);
6509154d 2355
2356 return lxc_read_file_expect(path, buf, 1, "1");
2357}
2358
497353b6 2359int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2360{
2361 return neigh_proxy_set(name, family, 1);
2362}
2363
497353b6 2364int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2365{
2366 return neigh_proxy_set(name, family, 0);
2367}
2368
2369int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2370{
1f1b18e7
DL
2371 int i = 0;
2372 unsigned val;
ebc73a67
CB
2373 char c;
2374 unsigned char *data;
1f1b18e7
DL
2375
2376 sockaddr->sa_family = ARPHRD_ETHER;
2377 data = (unsigned char *)sockaddr->sa_data;
2378
2379 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2380 c = *macaddr++;
2381 if (isdigit(c))
2382 val = c - '0';
2383 else if (c >= 'a' && c <= 'f')
2384 val = c - 'a' + 10;
2385 else if (c >= 'A' && c <= 'F')
2386 val = c - 'A' + 10;
2387 else
2388 return -EINVAL;
2389
2390 val <<= 4;
2391 c = *macaddr;
2392 if (isdigit(c))
2393 val |= c - '0';
2394 else if (c >= 'a' && c <= 'f')
2395 val |= c - 'a' + 10;
2396 else if (c >= 'A' && c <= 'F')
2397 val |= c - 'A' + 10;
2398 else if (c == ':' || c == 0)
2399 val >>= 4;
2400 else
2401 return -EINVAL;
2402 if (c != 0)
2403 macaddr++;
2404 *data++ = (unsigned char)(val & 0377);
2405 i++;
2406
2407 if (*macaddr == ':')
2408 macaddr++;
0ad19a3f 2409 }
0ad19a3f 2410
1f1b18e7 2411 return 0;
0ad19a3f 2412}
2413
ebc73a67
CB
2414static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2415 void *acast, int prefix)
0ad19a3f 2416{
a5f5cb41
CB
2417 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2418 struct nl_handler nlh;
2419 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2420 int addrlen, err;
06f976ca 2421 struct ifaddrmsg *ifa;
0ad19a3f 2422
ebc73a67
CB
2423 addrlen = family == AF_INET ? sizeof(struct in_addr)
2424 : sizeof(struct in6_addr);
4bf1968d 2425
a5f5cb41 2426 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2427 if (err)
2428 return err;
0ad19a3f 2429
0ad19a3f 2430 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2431 if (!nlmsg)
a5f5cb41 2432 return ret_errno(ENOMEM);
0ad19a3f 2433
06f976ca 2434 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2435 if (!answer)
a5f5cb41 2436 return ret_errno(ENOMEM);
0ad19a3f 2437
a5f5cb41 2438 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2439 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2440
2441 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2442 if (!ifa)
a5f5cb41
CB
2443 return ret_errno(ENOMEM);
2444
06f976ca
SZ
2445 ifa->ifa_prefixlen = prefix;
2446 ifa->ifa_index = ifindex;
2447 ifa->ifa_family = family;
2448 ifa->ifa_scope = 0;
acf47e1b 2449
4bf1968d 2450 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2451 return ret_errno(EINVAL);
0ad19a3f 2452
4bf1968d 2453 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2454 return ret_errno(EINVAL);
0ad19a3f 2455
d8948a52 2456 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2457 return ret_errno(EINVAL);
1f1b18e7 2458
ebc73a67 2459 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2460 if (family == AF_INET6 &&
2461 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2462 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2463 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2464
a5f5cb41 2465 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2466}
2467
1f1b18e7 2468int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2469 struct in6_addr *mcast, struct in6_addr *acast,
2470 int prefix)
1f1b18e7
DL
2471{
2472 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2473}
2474
ebc73a67
CB
2475int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2476 int prefix)
1f1b18e7
DL
2477{
2478 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2479}
2480
ebc73a67
CB
2481/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2482 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2483 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2484 */
6ce39620
CB
2485#pragma GCC diagnostic push
2486#pragma GCC diagnostic ignored "-Wcast-align"
2487
ebc73a67
CB
2488static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2489{
2490 int addrlen;
06f976ca
SZ
2491 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2492 struct rtattr *rta = IFA_RTA(ifa);
2493 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2494
06f976ca 2495 if (ifa->ifa_family != family)
19a26f82
MK
2496 return 0;
2497
ebc73a67
CB
2498 addrlen = family == AF_INET ? sizeof(struct in_addr)
2499 : sizeof(struct in6_addr);
19a26f82
MK
2500
2501 /* Loop over the rtattr's in this message */
ebc73a67 2502 while (RTA_OK(rta, attr_len)) {
19a26f82 2503 /* Found a local address for the requested interface,
ebc73a67
CB
2504 * return it.
2505 */
2506 if (rta->rta_type == IFA_LOCAL ||
2507 rta->rta_type == IFA_ADDRESS) {
2508 /* Sanity check. The family check above should make sure
2509 * the address length is correct, but check here just in
2510 * case.
2511 */
19a26f82
MK
2512 if (RTA_PAYLOAD(rta) != addrlen)
2513 return -1;
2514
ebc73a67
CB
2515 /* We might have found an IFA_ADDRESS before, which we
2516 * now overwrite with an IFA_LOCAL.
2517 */
dd66e5ad 2518 if (!*res) {
19a26f82 2519 *res = malloc(addrlen);
dd66e5ad
DE
2520 if (!*res)
2521 return -1;
2522 }
19a26f82
MK
2523
2524 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2525 if (rta->rta_type == IFA_LOCAL)
2526 break;
2527 }
2528 rta = RTA_NEXT(rta, attr_len);
2529 }
2530 return 0;
2531}
2532
6ce39620
CB
2533#pragma GCC diagnostic pop
2534
19a26f82
MK
2535static int ip_addr_get(int family, int ifindex, void **res)
2536{
a5f5cb41
CB
2537 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2538 struct nl_handler nlh;
2539 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2540 int answer_len, err;
06f976ca 2541 struct ifaddrmsg *ifa;
19a26f82 2542 struct nlmsghdr *msg;
ebc73a67 2543 int readmore = 0, recv_len = 0;
19a26f82 2544
a5f5cb41 2545 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2546 if (err)
2547 return err;
2548
19a26f82
MK
2549 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2550 if (!nlmsg)
a5f5cb41 2551 return ret_errno(ENOMEM);
19a26f82 2552
06f976ca 2553 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2554 if (!answer)
a5f5cb41 2555 return ret_errno(ENOMEM);
19a26f82 2556
ebc73a67
CB
2557 /* Save the answer buffer length, since it will be overwritten on the
2558 * first receive (and we might need to receive more than once).
2559 */
06f976ca
SZ
2560 answer_len = answer->nlmsghdr->nlmsg_len;
2561
ebc73a67 2562 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2563 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2564
06f976ca 2565 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2566 if (!ifa)
a5f5cb41
CB
2567 return ret_errno(ENOMEM);
2568
06f976ca 2569 ifa->ifa_family = family;
19a26f82 2570
ebc73a67
CB
2571 /* Send the request for addresses, which returns all addresses on all
2572 * interfaces.
2573 */
a5f5cb41 2574 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2575 if (err < 0)
a5f5cb41 2576 return ret_set_errno(err, errno);
19a26f82 2577
6ce39620
CB
2578#pragma GCC diagnostic push
2579#pragma GCC diagnostic ignored "-Wcast-align"
2580
19a26f82
MK
2581 do {
2582 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2583 * overwritten by a previous receive.
2584 */
06f976ca 2585 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2586
ebc73a67 2587 /* Get the (next) batch of reply messages. */
a5f5cb41 2588 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2589 if (err < 0)
a5f5cb41 2590 return ret_set_errno(err, errno);
19a26f82
MK
2591
2592 recv_len = err;
2593 err = 0;
2594
ebc73a67 2595 /* Satisfy the typing for the netlink macros. */
06f976ca 2596 msg = answer->nlmsghdr;
19a26f82
MK
2597
2598 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2599 /* Stop reading if we see an error message. */
19a26f82 2600 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2601 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2602 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2603 }
2604
ebc73a67 2605 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2606 if (msg->nlmsg_type == NLMSG_DONE) {
2607 readmore = 0;
2608 break;
2609 }
2610
a5f5cb41
CB
2611 if (msg->nlmsg_type != RTM_NEWADDR)
2612 return ret_errno(EINVAL);
19a26f82 2613
06f976ca
SZ
2614 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2615 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2616 if (ifa_get_local_ip(family, msg, res) < 0)
2617 return ret_errno(EINVAL);
51e7a874 2618
ebc73a67 2619 /* Found a result, stop searching. */
19a26f82 2620 if (*res)
a5f5cb41 2621 return 0;
19a26f82
MK
2622 }
2623
ebc73a67
CB
2624 /* Keep reading more data from the socket if the last
2625 * message had the NLF_F_MULTI flag set.
2626 */
19a26f82
MK
2627 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2628
ebc73a67 2629 /* Look at the next message received in this buffer. */
19a26f82
MK
2630 msg = NLMSG_NEXT(msg, recv_len);
2631 }
2632 } while (readmore);
2633
6ce39620
CB
2634#pragma GCC diagnostic pop
2635
19a26f82 2636 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2637 * error.
2638 */
a5f5cb41 2639 return -1;
19a26f82
MK
2640}
2641
2642int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2643{
ebc73a67 2644 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2645}
2646
ebc73a67 2647int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2648{
ebc73a67 2649 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2650}
2651
f8fee0e2
MK
2652static int ip_gateway_add(int family, int ifindex, void *gw)
2653{
a5f5cb41 2654 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2655 struct nl_handler nlh;
a5f5cb41
CB
2656 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2657 int addrlen, err;
06f976ca 2658 struct rtmsg *rt;
f8fee0e2 2659
ebc73a67
CB
2660 addrlen = family == AF_INET ? sizeof(struct in_addr)
2661 : sizeof(struct in6_addr);
f8fee0e2 2662
a5f5cb41 2663 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2664 if (err)
2665 return err;
2666
f8fee0e2
MK
2667 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2668 if (!nlmsg)
a5f5cb41 2669 return ret_errno(ENOMEM);
f8fee0e2 2670
06f976ca 2671 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2672 if (!answer)
a5f5cb41 2673 return ret_errno(ENOMEM);
f8fee0e2 2674
a5f5cb41 2675 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2676 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2677
2678 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2679 if (!rt)
a5f5cb41
CB
2680 return ret_errno(ENOMEM);
2681
06f976ca
SZ
2682 rt->rtm_family = family;
2683 rt->rtm_table = RT_TABLE_MAIN;
2684 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2685 rt->rtm_protocol = RTPROT_BOOT;
2686 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2687 /* "default" destination */
06f976ca 2688 rt->rtm_dst_len = 0;
f8fee0e2 2689
a2f9a670 2690 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2691 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2692 return ret_errno(ENOMEM);
f8fee0e2
MK
2693
2694 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2695 * addresses for the gateway.
2696 */
f8fee0e2 2697 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2698 return ret_errno(EINVAL);
f8fee0e2 2699
a5f5cb41 2700 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2701}
2702
2703int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2704{
2705 return ip_gateway_add(AF_INET, ifindex, gw);
2706}
2707
2708int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2709{
2710 return ip_gateway_add(AF_INET6, ifindex, gw);
2711}
581c75e7 2712bool is_ovs_bridge(const char *bridge)
0d204771 2713{
ebc73a67 2714 int ret;
0d204771 2715 struct stat sb;
ebc73a67 2716 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2717
387c1c70
CB
2718 ret = strnprintf(brdirname, 22 + IFNAMSIZ + 1,
2719 "/sys/class/net/%s/bridge", bridge);
2720 if (ret < 0)
ebc73a67
CB
2721 return false;
2722
2723 ret = stat(brdirname, &sb);
2724 if (ret < 0 && errno == ENOENT)
0d204771 2725 return true;
ebc73a67 2726
0d204771
SH
2727 return false;
2728}
2729
581c75e7
CB
2730struct ovs_veth_args {
2731 const char *bridge;
2732 const char *nic;
2733};
2734
cb0dc11b
CB
2735/* Called from a background thread - when nic goes away, remove it from the
2736 * bridge.
c43cbc04 2737 */
581c75e7 2738static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2739{
581c75e7 2740 struct ovs_veth_args *args = data;
cb0dc11b 2741
9c66dc4f 2742 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2743 return -1;
c43cbc04
SH
2744}
2745
581c75e7 2746int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2747{
c43cbc04 2748 int ret;
419590da 2749 char cmd_output[PATH_MAX];
581c75e7 2750 struct ovs_veth_args args;
6ad22d06 2751
581c75e7
CB
2752 args.bridge = bridge;
2753 args.nic = nic;
2754 ret = run_command(cmd_output, sizeof(cmd_output),
2755 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2756 if (ret < 0)
2757 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2758
581c75e7
CB
2759 return 0;
2760}
ebc73a67 2761
581c75e7
CB
2762static int lxc_ovs_attach_bridge_exec(void *data)
2763{
2764 struct ovs_veth_args *args = data;
ebc73a67 2765
9c66dc4f 2766 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2767 return -1;
2768}
ebc73a67 2769
581c75e7
CB
2770static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2771{
2772 int ret;
419590da 2773 char cmd_output[PATH_MAX];
581c75e7 2774 struct ovs_veth_args args;
ebc73a67 2775
581c75e7
CB
2776 args.bridge = bridge;
2777 args.nic = nic;
2778 ret = run_command(cmd_output, sizeof(cmd_output),
2779 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2780 if (ret < 0)
2781 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2782
581c75e7 2783 return 0;
0d204771 2784}
0d204771 2785
581c75e7 2786int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2787{
ebc73a67 2788 int err, fd, index;
9de31d5a 2789 size_t retlen;
0ad19a3f 2790 struct ifreq ifr;
2791
dae3fdf6 2792 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2793 return -EINVAL;
0ad19a3f 2794
2795 index = if_nametoindex(ifname);
2796 if (!index)
3cfc0f3a 2797 return -EINVAL;
0ad19a3f 2798
0d204771 2799 if (is_ovs_bridge(bridge))
581c75e7 2800 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2801
ad9429e5 2802 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2803 if (fd < 0)
3cfc0f3a 2804 return -errno;
0ad19a3f 2805
9de31d5a 2806 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2807 if (retlen >= IFNAMSIZ) {
2808 close(fd);
9de31d5a 2809 return -E2BIG;
42cc4083 2810 }
9de31d5a 2811
ebc73a67 2812 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2813 ifr.ifr_ifindex = index;
7d163508 2814 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2815 close(fd);
3cfc0f3a
MN
2816 if (err)
2817 err = -errno;
0ad19a3f 2818
2819 return err;
2820}
72d0e1cb 2821
8befa924
SH
2822int setup_private_host_hw_addr(char *veth1)
2823{
387c1c70
CB
2824 __do_close int sockfd = -EBADF;
2825 int err;
8befa924 2826 struct ifreq ifr;
8befa924 2827
ad9429e5 2828 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2829 if (sockfd < 0)
2830 return -errno;
2831
387c1c70
CB
2832 err = strnprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2833 if (err < 0)
2834 return err;
ebc73a67 2835
8befa924 2836 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
387c1c70 2837 if (err < 0)
8befa924 2838 return -errno;
8befa924
SH
2839
2840 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2841 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924
SH
2842 if (err < 0)
2843 return -errno;
2844
2845 return 0;
2846}
811ef482
CB
2847
2848int lxc_find_gateway_addresses(struct lxc_handler *handler)
2849{
2850 struct lxc_list *network = &handler->conf->network;
2851 struct lxc_list *iterator;
2852 struct lxc_netdev *netdev;
2853 int link_index;
2854
2855 lxc_list_for_each(iterator, network) {
2856 netdev = iterator->elem;
2857
2858 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2859 continue;
2860
9c66dc4f
CB
2861 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2862 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2863
f2711167 2864 if (is_empty_string(netdev->link)) {
9c66dc4f 2865 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2866 }
2867
2868 link_index = if_nametoindex(netdev->link);
2869 if (!link_index)
2870 return -EINVAL;
2871
2872 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2873 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2874 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2875 }
2876
2877 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2878 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2879 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2880 }
2881 }
2882
2883 return 0;
2884}
2885
2886#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
071d0934
CB
2887static int lxc_create_network_unpriv_exec(const char *lxcpath,
2888 const char *lxcname,
2889 struct lxc_netdev *netdev, pid_t pid,
2890 unsigned int hooks_version)
811ef482
CB
2891{
2892 int ret;
2893 pid_t child;
2894 int bytes, pipefd[2];
2895 char *token, *saveptr = NULL;
095ead80 2896 char netdev_link[IFNAMSIZ];
419590da 2897 char buffer[PATH_MAX] = {0};
94b1cade 2898 size_t retlen;
811ef482 2899
9c66dc4f 2900 if (netdev->type != LXC_NET_VETH)
071d0934
CB
2901 return log_error_errno(-1, errno,
2902 "Network type %d not support for unprivileged use",
2903 netdev->type);
811ef482
CB
2904
2905 ret = pipe(pipefd);
9c66dc4f
CB
2906 if (ret < 0)
2907 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2908
2909 child = fork();
2910 if (child < 0) {
811ef482
CB
2911 close(pipefd[0]);
2912 close(pipefd[1]);
9c66dc4f 2913 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2914 }
2915
2916 if (child == 0) {
8335fd40 2917 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2918
2919 close(pipefd[0]);
2920
2921 ret = dup2(pipefd[1], STDOUT_FILENO);
2922 if (ret >= 0)
2923 ret = dup2(pipefd[1], STDERR_FILENO);
2924 close(pipefd[1]);
2925 if (ret < 0) {
2926 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2927 _exit(EXIT_FAILURE);
811ef482
CB
2928 }
2929
f2711167 2930 if (!is_empty_string(netdev->link))
9de31d5a 2931 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2932 else
9de31d5a
CB
2933 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2934 if (retlen >= IFNAMSIZ) {
2935 SYSERROR("Invalid network device name");
2936 _exit(EXIT_FAILURE);
2937 }
811ef482 2938
387c1c70
CB
2939 ret = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
2940 if (ret < 0)
78070056 2941 _exit(EXIT_FAILURE);
8335fd40 2942 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2943
2944 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
071d0934 2945 lxcname, pidstr, netdev_link, !is_empty_string(netdev->name) ? netdev->name : "(null)");
3473ca76 2946 if (!is_empty_string(netdev->name))
811ef482
CB
2947 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2948 lxcpath, lxcname, pidstr, "veth", netdev_link,
2949 netdev->name, (char *)NULL);
2950 else
2951 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2952 lxcpath, lxcname, pidstr, "veth", netdev_link,
2953 (char *)NULL);
2954 SYSERROR("Failed to execute lxc-user-nic");
78070056 2955 _exit(EXIT_FAILURE);
811ef482
CB
2956 }
2957
2958 /* close the write-end of the pipe */
2959 close(pipefd[1]);
2960
9c66dc4f 2961 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2962 if (bytes < 0) {
74c6e2b0 2963 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2964 close(pipefd[0]);
6b9f82a9
CB
2965 } else {
2966 buffer[bytes - 1] = '\0';
811ef482 2967 }
811ef482
CB
2968
2969 ret = wait_for_pid(child);
2970 close(pipefd[0]);
9c66dc4f 2971 if (ret != 0 || bytes < 0)
071d0934
CB
2972 return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
2973 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2974 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2975
2976 /* netdev->name */
2977 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2978 if (!token)
2979 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2980
e389f2af
CB
2981 /*
2982 * lxc-user-nic will take care of proper network device naming. So
fdd6be55 2983 * netdev->name and netdev->transient_name need to be identical to not
e389f2af
CB
2984 * trigger another rename later on.
2985 */
2986 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
fdd6be55
CB
2987 if (retlen < IFNAMSIZ) {
2988 retlen = strlcpy(netdev->transient_name, token, IFNAMSIZ);
2989 if (retlen < IFNAMSIZ)
2990 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2991 }
9c66dc4f 2992 if (retlen >= IFNAMSIZ)
071d0934
CB
2993 return log_error_errno(-1, E2BIG,
2994 "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2995
74c6e2b0 2996 /* netdev->ifindex */
811ef482 2997 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2998 if (!token)
2999 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3000
74c6e2b0 3001 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f 3002 if (ret < 0)
071d0934
CB
3003 return log_error_errno(-1, -ret,
3004 "Failed to convert string \"%s\" to integer", token);
811ef482 3005
74c6e2b0 3006 /* netdev->priv.veth_attr.veth1 */
811ef482 3007 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3008 if (!token)
3009 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3010
94b1cade 3011 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f 3012 if (retlen >= IFNAMSIZ)
071d0934
CB
3013 return log_error_errno(-1, E2BIG,
3014 "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
3015
3016 /* netdev->priv.veth_attr.ifindex */
3017 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3018 if (!token)
3019 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
3020
3021 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f 3022 if (ret < 0)
071d0934
CB
3023 return log_error_errno(-1, -ret,
3024 "Failed to convert string \"%s\" to integer", token);
811ef482 3025
4d781681 3026 if (netdev->upscript) {
3027 char *argv[] = {
3028 "veth",
3029 netdev->link,
3030 netdev->priv.veth_attr.veth1,
3031 NULL,
3032 };
3033
e389f2af
CB
3034 ret = run_script_argv(lxcname, hooks_version, "net",
3035 netdev->upscript, "up", argv);
4d781681 3036 if (ret < 0)
3037 return -1;
071d0934 3038 }
4d781681 3039
811ef482
CB
3040 return 0;
3041}
3042
f0ecc19d 3043static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3044 struct lxc_netdev *netdev,
3045 const char *netns_path)
811ef482
CB
3046{
3047 int bytes, ret;
3048 pid_t child;
3049 int pipefd[2];
25619b99 3050 char buffer[PATH_MAX] = {};
811ef482 3051
9c66dc4f
CB
3052 if (netdev->type != LXC_NET_VETH)
3053 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3054
3055 ret = pipe(pipefd);
9c66dc4f
CB
3056 if (ret < 0)
3057 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3058
3059 child = fork();
3060 if (child < 0) {
811ef482
CB
3061 close(pipefd[0]);
3062 close(pipefd[1]);
9c66dc4f 3063 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3064 }
3065
3066 if (child == 0) {
8843fde4 3067 char *hostveth;
811ef482
CB
3068
3069 close(pipefd[0]);
3070
3071 ret = dup2(pipefd[1], STDOUT_FILENO);
3072 if (ret >= 0)
3073 ret = dup2(pipefd[1], STDERR_FILENO);
3074 close(pipefd[1]);
3075 if (ret < 0) {
3076 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3077 _exit(EXIT_FAILURE);
811ef482
CB
3078 }
3079
f2711167 3080 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3081 hostveth = netdev->priv.veth_attr.pair;
3082 else
3083 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3084 if (is_empty_string(hostveth)) {
74c6e2b0 3085 SYSERROR("Host side veth device name is missing");
a30b9023 3086 _exit(EXIT_FAILURE);
74c6e2b0
CB
3087 }
3088
f2711167
CB
3089 if (is_empty_string(netdev->link)) {
3090 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3091 _exit(EXIT_FAILURE);
74c6e2b0 3092 }
811ef482 3093
811ef482 3094 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3095 lxcname, netns_path, netdev->link, hostveth);
811ef482 3096 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3097 lxcname, netns_path, "veth", netdev->link, hostveth,
3098 (char *)NULL);
811ef482 3099 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3100 _exit(EXIT_FAILURE);
811ef482
CB
3101 }
3102
3103 close(pipefd[1]);
3104
9c66dc4f 3105 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3106 if (bytes < 0) {
3107 SYSERROR("Failed to read from pipe file descriptor.");
3108 close(pipefd[0]);
6b9f82a9
CB
3109 } else {
3110 buffer[bytes - 1] = '\0';
811ef482 3111 }
811ef482 3112
6b9f82a9 3113 ret = wait_for_pid(child);
9c66dc4f
CB
3114 close_prot_errno_disarm(pipefd[0]);
3115 if (ret != 0 || bytes < 0)
3116 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3117 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3118
811ef482
CB
3119 return 0;
3120}
3121
59eac805 3122static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
1bd8d726
CB
3123{
3124 int ret;
3125 struct lxc_list *iterator;
3126 struct lxc_list *network = &handler->conf->network;
3127 /* strlen("/proc/") = 6
3128 * +
8335fd40 3129 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3130 * +
3131 * strlen("/fd/") = 4
3132 * +
8335fd40 3133 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3134 * +
3135 * \0
3136 */
8335fd40 3137 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3138
3139 *netns_path = '\0';
3140
9c66dc4f
CB
3141 if (handler->nsfd[LXC_NS_NET] < 0)
3142 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726 3143
387c1c70
CB
3144 ret = strnprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
3145 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
3146 if (ret < 0)
1bd8d726
CB
3147 return false;
3148
3149 lxc_list_for_each(iterator, network) {
3150 char *hostveth = NULL;
3151 struct lxc_netdev *netdev = iterator->elem;
3152
3153 /* We can only delete devices whose ifindex we have. If we don't
3154 * have the index it means that we didn't create it.
3155 */
3156 if (!netdev->ifindex)
3157 continue;
3158
3159 if (netdev->type == LXC_NET_PHYS) {
3160 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3161 netdev->link);
3162 if (ret < 0)
9c66dc4f 3163 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3164 netdev->ifindex, netdev->link);
3165 else
9c66dc4f 3166 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3167 netdev->ifindex, netdev->link);
b3259dc6
TP
3168
3169 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3170 if (ret < 0)
3171 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3172 netdev->ifindex, netdev->link);
66a7c406 3173 goto clear_ifindices;
1bd8d726
CB
3174 }
3175
3176 ret = netdev_deconf[netdev->type](handler, netdev);
3177 if (ret < 0)
3178 WARN("Failed to deconfigure network device");
3179
3180 if (netdev->type != LXC_NET_VETH)
66a7c406 3181 goto clear_ifindices;
1bd8d726 3182
f2711167 3183 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3184 goto clear_ifindices;
1bd8d726 3185
f2711167 3186 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3187 hostveth = netdev->priv.veth_attr.pair;
3188 else
3189 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3190 if (is_empty_string(hostveth))
66a7c406 3191 goto clear_ifindices;
8843fde4 3192
1bd8d726
CB
3193 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3194 handler->name, netdev,
3195 netns_path);
3196 if (ret < 0) {
9c66dc4f 3197 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3198 goto clear_ifindices;
1bd8d726 3199 }
9c66dc4f 3200 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3201
3202clear_ifindices:
0858c829
CB
3203 /*
3204 * We need to clear any ifindices we recorded so liblxc won't
3205 * have cached stale data which would cause it to fail on
3206 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3207 */
3208 netdev->ifindex = 0;
3209 if (netdev->type == LXC_NET_PHYS) {
3210 netdev->priv.phys_attr.ifindex = 0;
3211 } else if (netdev->type == LXC_NET_VETH) {
3212 netdev->priv.veth_attr.veth1[0] = '\0';
3213 netdev->priv.veth_attr.ifindex = 0;
3214 }
1bd8d726
CB
3215 }
3216
bb84beda 3217 return true;
1bd8d726
CB
3218}
3219
6509154d 3220static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3221 struct lxc_list *cur, *next;
3222 struct lxc_inetdev *inet4dev;
3223 struct lxc_inet6dev *inet6dev;
3224 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3225 int err = 0;
5fe147e9
TP
3226 unsigned int lo_ifindex = 0, link_ifindex = 0;
3227
3228 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3229 if (link_ifindex == 0)
3230 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3231
6509154d 3232
3233 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3234 if (!lxc_list_empty(&netdev->ipv4)) {
3235 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3236 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3237 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3238 }
3239
3240 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3241 if (!lxc_list_empty(&netdev->ipv6)) {
3242 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3243 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3244 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3245
3246 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3247 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3248 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3249 }
3250
b670016a 3251 /* Perform IPVLAN specific checks. */
3252 if (netdev->type == LXC_NET_IPVLAN) {
3253 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3254 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3255 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3256
3257 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3258 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3259 if (lo_ifindex == 0)
3260 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3261 }
3262
6509154d 3263 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3264 inet4dev = cur->elem;
3265 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3266 return ret_set_errno(-1, -errno);
6509154d 3267
5fe147e9 3268 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3269 return ret_set_errno(-1, EINVAL);
b670016a 3270
3271 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3272 if (netdev->type == LXC_NET_IPVLAN) {
3273 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3274 if (err < 0)
3275 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3276 }
6509154d 3277 }
3278
3279 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3280 inet6dev = cur->elem;
3281 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3282 return ret_set_errno(-1, -errno);
6509154d 3283
5fe147e9 3284 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3285 return ret_set_errno(-1, EINVAL);
b670016a 3286
3287 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3288 if (netdev->type == LXC_NET_IPVLAN) {
3289 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3290 if (err < 0)
3291 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3292 }
6509154d 3293 }
3294
3295 return 0;
3296}
3297
9c66dc4f
CB
3298static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3299{
b670016a 3300 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3301 bool had_error = false;
3302 unsigned int link_ifindex = 0;
b670016a 3303
9c66dc4f
CB
3304 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3305 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3306
3307 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3308 if (lo_ifindex > 0) {
3309 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3310 had_error = true;
b670016a 3311 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3312 }
3313 }
3314
3315 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3316 if (!is_empty_string(link)) {
5fe147e9 3317 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3318 if (link_ifindex == 0)
3319 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3320
3321 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3322 had_error = true;
b670016a 3323 }
3324
9c66dc4f 3325 if (had_error)
596a002c 3326 return ret_set_errno(-1, EINVAL);
b670016a 3327
3328 return 0;
3329}
3330
9c66dc4f
CB
3331static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3332{
b670016a 3333 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3334 bool had_error = false;
3335 unsigned int link_ifindex = 0;
b670016a 3336
9c66dc4f
CB
3337 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3338 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3339
3340 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3341 if (lo_ifindex > 0) {
3342 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3343 had_error = true;
b670016a 3344 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3345 }
3346 }
3347
3348 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3349 if (!is_empty_string(link)) {
5fe147e9
TP
3350 link_ifindex = if_nametoindex(link);
3351 if (link_ifindex == 0) {
3352 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3353 return ret_set_errno(-1, EINVAL);
3354 }
3355
3356 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3357 had_error = true;
b670016a 3358 }
3359
9c66dc4f 3360 if (had_error)
596a002c 3361 return ret_set_errno(-1, EINVAL);
b670016a 3362
3363 return 0;
3364}
3365
6509154d 3366static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3367 unsigned int lo_ifindex = 0;
3368 unsigned int errCount = 0;
6509154d 3369 struct lxc_list *cur, *next;
3370 struct lxc_inetdev *inet4dev;
3371 struct lxc_inet6dev *inet6dev;
6509154d 3372
b670016a 3373 /* Perform IPVLAN specific checks. */
3374 if (netdev->type == LXC_NET_IPVLAN) {
3375 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3376 lo_ifindex = if_nametoindex(loop_device);
b670016a 3377 if (lo_ifindex == 0) {
3378 errCount++;
3ebffb98 3379 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3380 }
b670016a 3381 }
6509154d 3382
b670016a 3383 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3384 inet4dev = cur->elem;
3385 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3386 errCount++;
6509154d 3387 }
3388
3389 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3390 inet6dev = cur->elem;
b670016a 3391 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3392 errCount++;
6509154d 3393 }
3394
b670016a 3395 if (errCount > 0)
596a002c 3396 return ret_set_errno(-1, EINVAL);
6509154d 3397
3398 return 0;
3399}
3400
e389f2af 3401static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3402{
811ef482
CB
3403 struct lxc_list *iterator;
3404 struct lxc_list *network = &handler->conf->network;
3405
811ef482
CB
3406 lxc_list_for_each(iterator, network) {
3407 struct lxc_netdev *netdev = iterator->elem;
3408
9c66dc4f
CB
3409 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3410 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3411
6509154d 3412 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3413 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3414 if (lxc_setup_l2proxy(netdev))
3415 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3416 }
3417
bad2f913 3418 if (netdev_configure_server[netdev->type](handler, netdev))
9c66dc4f 3419 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3420 }
3421
3422 return 0;
3423}
3424
fdd6be55
CB
3425/*
3426 * LXC moves network devices into the target namespace based on their created
3427 * name. The created name can either be randomly generated for e.g. veth
3428 * devices or it can be the name of the existing device in the server's
3429 * namespaces. This is e.g. the case when moving physical devices. However this
3430 * can lead to weird clashes. Consider we have a network namespace that has the
3431 * following devices:
3432
3433 * 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3434 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3435 * altname enp7s0
3436 * 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3437 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3438 * altname enp8s0
3439 *
3440 * and the user generates the following network config for their container:
3441 *
3442 * lxc.net.0.type = phys
3443 * lxc.net.0.name = eth1
3444 * lxc.net.0.link = eth2
3445 *
3446 * lxc.net.1.type = phys
3447 * lxc.net.1.name = eth2
3448 * lxc.net.1.link = eth1
3449 *
3450 * This would cause LXC to move the devices eth1 and eth2 from the server's
3451 * network namespace into the container's network namespace:
3452 *
3453 * 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3454 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3455 * altname enp7s0
3456 * 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3457 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3458 * altname enp8s0
3459 *
3460 * According to the network config above we now need to rename the network
3461 * devices in the container's network namespace. Let's say we start with
3462 * renaming eth2 to eth1. This would immediately lead to a clash since the
3463 * container's network namespace already contains a network device with that
3464 * name. Renaming the other device would have the same problem.
3465 *
3466 * There are multiple ways to fix this but I'm concerned with keeping the logic
3467 * somewhat reasonable which is why we simply start creating transient device
3468 * names that are unique which we'll use to move and rename the network device
3469 * in the container's network namespace at the same time. And then we rename
3470 * based on those random devices names to the target name.
3471 *
3472 * Note that the transient name is based on the type of network device as
3473 * specified in the LXC config. However, that doesn't mean it's correct. LXD
3474 * passes veth devices and a range of other network devices (e.g. Infiniband
3475 * VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
3476 * sense we like to think about it so you might see a veth device being
3477 * assigned a "physXXXXXX" transient name. That's not a problem.
3478 */
3479static int create_transient_name(struct lxc_netdev *netdev)
3480{
3481 const struct lxc_network_info *info;
3482
3483 if (!is_empty_string(netdev->transient_name))
3484 return syserror_set(-EINVAL, "Network device already had a transient name %s",
3485 netdev->transient_name);
3486
3487 info = &lxc_network_info[netdev->type];
3488 strlcpy(netdev->transient_name, info->template, info->template_len + 1);
3489
3490 if (!lxc_ifname_alnum_case_sensitive(netdev->transient_name))
3491 return syserror_set(-EINVAL, "Failed to create transient name for network device %s", netdev->created_name);
3492
3493 TRACE("Created transient name %s for network device", netdev->transient_name);
3494 return 0;
3495}
3496
e389f2af 3497int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3498{
e389f2af
CB
3499 pid_t pid = handler->pid;
3500 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3501 struct lxc_list *iterator;
3502
e0010464 3503 if (am_guest_unpriv())
74c6e2b0 3504 return 0;
811ef482
CB
3505
3506 lxc_list_for_each(iterator, network) {
3dd78294 3507 __do_free char *physname = NULL;
e389f2af 3508 int ret;
811ef482
CB
3509 struct lxc_netdev *netdev = iterator->elem;
3510
fdd6be55
CB
3511 /*
3512 * Veth devices are directly created in the container's network
3513 * namespace so the device doesn't need to be moved into the
3514 * container's network namespace. The transient name will
3515 * already have been set above when we created the veth tunnel.
3516 *
3517 * Other than this special case this also catches all
3518 * LXC_NET_EMPTY and LXC_NET_NONE devices.
3519 */
811ef482
CB
3520 if (!netdev->ifindex)
3521 continue;
3522
fdd6be55
CB
3523 ret = create_transient_name(netdev);
3524 if (ret < 0)
3525 return ret;
3526
3dd78294
CB
3527 if (netdev->type == LXC_NET_PHYS)
3528 physname = is_wlan(netdev->link);
3529
3530 if (physname)
fdd6be55 3531 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->transient_name);
3dd78294 3532 else
fdd6be55 3533 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->transient_name);
9c66dc4f 3534 if (ret)
fdd6be55
CB
3535 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
3536 netdev->created_name, netdev->ifindex, pid, netdev->transient_name);
811ef482 3537
fdd6be55
CB
3538 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
3539 maybe_empty(netdev->created_name), netdev->ifindex, pid, netdev->transient_name);
811ef482
CB
3540 }
3541
3542 return 0;
3543}
3544
3c09b97c
CB
3545static int network_requires_advanced_setup(int type)
3546{
3547 if (type == LXC_NET_EMPTY)
3548 return false;
3549
3550 if (type == LXC_NET_NONE)
3551 return false;
3552
3553 return true;
3554}
3555
e389f2af 3556static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3557{
e389f2af
CB
3558 int hooks_version = handler->conf->hooks_version;
3559 const char *lxcname = handler->name;
3560 const char *lxcpath = handler->lxcpath;
3561 struct lxc_list *network = &handler->conf->network;
3562 pid_t pid = handler->pid;
74c6e2b0
CB
3563 struct lxc_list *iterator;
3564
74c6e2b0
CB
3565 lxc_list_for_each(iterator, network) {
3566 struct lxc_netdev *netdev = iterator->elem;
3567
3c09b97c 3568 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3569 continue;
3570
9c66dc4f
CB
3571 if (netdev->type != LXC_NET_VETH)
3572 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3573 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3574
3575 if (netdev->mtu)
3576 INFO("mtu ignored due to insufficient privilege");
3577
e389f2af
CB
3578 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3579 pid, hooks_version))
74c6e2b0
CB
3580 return -1;
3581 }
3582
3583 return 0;
3584}
3585
59eac805 3586static bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3587{
3588 int ret;
3589 struct lxc_list *iterator;
3590 struct lxc_list *network = &handler->conf->network;
1bd8d726 3591
811ef482
CB
3592 lxc_list_for_each(iterator, network) {
3593 char *hostveth = NULL;
3594 struct lxc_netdev *netdev = iterator->elem;
3595
3596 /* We can only delete devices whose ifindex we have. If we don't
3597 * have the index it means that we didn't create it.
3598 */
3599 if (!netdev->ifindex)
3600 continue;
3601
0104c121
CB
3602 /*
3603 * If the network device has been moved back from the
3604 * containers network namespace, update the ifindex.
3605 */
3606 netdev->ifindex = if_nametoindex(netdev->name);
3607
6509154d 3608 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3609 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3610 if (lxc_delete_l2proxy(netdev))
3611 WARN("Failed to delete all l2proxy config");
3612 /* Don't return, let the network be cleaned up as normal. */
3613 }
3614
811ef482 3615 if (netdev->type == LXC_NET_PHYS) {
bb301db7
SB
3616 /* Physical interfaces are initially returned to the parent namespace
3617 * with their transient name to avoid collisions
3618 */
3619 netdev->ifindex = if_nametoindex(netdev->transient_name);
811ef482
CB
3620 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3621 if (ret < 0)
3622 WARN("Failed to rename interface with index %d "
b809f232
CB
3623 "from \"%s\" to its initial name \"%s\"",
3624 netdev->ifindex, netdev->name, netdev->link);
0b154989 3625 else {
29589196
CB
3626 TRACE("Renamed interface with index %d from "
3627 "\"%s\" to its initial name \"%s\"",
3628 netdev->ifindex, netdev->name,
3629 netdev->link);
0b154989
TP
3630
3631 /* Restore original MTU */
3632 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3633 if (ret < 0) {
3634 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3635 netdev->link, netdev->priv.phys_attr.mtu);
3636 } else {
3637 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3638 netdev->link, netdev->priv.phys_attr.mtu);
3639 }
3640 }
b3259dc6
TP
3641
3642 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3643 if (ret < 0)
3644 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3645 netdev->ifindex, netdev->link);
66a7c406 3646 goto clear_ifindices;
811ef482
CB
3647 }
3648
3649 ret = netdev_deconf[netdev->type](handler, netdev);
3650 if (ret < 0)
3651 WARN("Failed to deconfigure network device");
3652
811ef482 3653 if (netdev->type != LXC_NET_VETH)
66a7c406 3654 goto clear_ifindices;
811ef482 3655
811ef482
CB
3656 /* Explicitly delete host veth device to prevent lingering
3657 * devices. We had issues in LXD around this.
3658 */
f2711167 3659 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3660 hostveth = netdev->priv.veth_attr.pair;
3661 else
3662 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3663 if (is_empty_string(hostveth))
66a7c406 3664 goto clear_ifindices;
811ef482 3665
1ee56cff
CB
3666 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3667 ret = lxc_netdev_delete_by_name(hostveth);
3668 if (ret < 0)
3669 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3670
1ee56cff
CB
3671 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3672 } else if (!is_empty_string(netdev->link)) {
3673 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3674 if (ret < 0)
3675 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3676
1ee56cff
CB
3677 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3678 }
811ef482 3679
66a7c406 3680clear_ifindices:
ad2ddfcd 3681 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3682 * have cached stale data which would cause it to fail on reboot
3683 * we're we don't re-read the on-disk config file.
3684 */
3685 netdev->ifindex = 0;
3686 if (netdev->type == LXC_NET_PHYS) {
3687 netdev->priv.phys_attr.ifindex = 0;
3688 } else if (netdev->type == LXC_NET_VETH) {
3689 netdev->priv.veth_attr.veth1[0] = '\0';
3690 netdev->priv.veth_attr.ifindex = 0;
3691 }
bb301db7
SB
3692
3693 /* Clear transient name */
3694 if (!is_empty_string (netdev->transient_name))
3695 {
3696 netdev->transient_name[0] = '\0';
3697 }
811ef482
CB
3698 }
3699
bb84beda 3700 return true;
811ef482
CB
3701}
3702
3703int lxc_requests_empty_network(struct lxc_handler *handler)
3704{
3705 struct lxc_list *network = &handler->conf->network;
3706 struct lxc_list *iterator;
3707 bool found_none = false, found_nic = false;
3708
3709 if (lxc_list_empty(network))
3710 return 0;
3711
9c66dc4f 3712 lxc_list_for_each (iterator, network) {
811ef482
CB
3713 struct lxc_netdev *netdev = iterator->elem;
3714
3715 if (netdev->type == LXC_NET_NONE)
3716 found_none = true;
3717 else
3718 found_nic = true;
3719 }
9c66dc4f 3720
811ef482
CB
3721 if (found_none && !found_nic)
3722 return 1;
9c66dc4f 3723
811ef482
CB
3724 return 0;
3725}
3726
3727/* try to move physical nics to the init netns */
b809f232 3728int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3729{
9c66dc4f
CB
3730 __do_close int oldfd = -EBADF;
3731 int netnsfd = handler->nsfd[LXC_NS_NET];
3732 struct lxc_conf *conf = handler->conf;
811ef482 3733 int ret;
811ef482 3734 char ifname[IFNAMSIZ];
b809f232 3735 struct lxc_list *iterator;
811ef482 3736
04213960
TA
3737 /*
3738 * If we weren't asked to clone a new network namespace, there's
3739 * nothing to restore.
3740 */
3741 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3742 return 0;
3743
b809f232
CB
3744 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3745 * the parent network namespace. We won't have this capability if we are
3746 * unprivileged.
3747 */
d0fbc7ba 3748 if (!handler->am_root)
b809f232 3749 return 0;
811ef482 3750
b809f232 3751 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3752
0037ab49 3753 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3754 if (oldfd < 0)
3755 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3756
b809f232 3757 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3758 if (ret < 0)
3759 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3760
b809f232
CB
3761 lxc_list_for_each(iterator, &conf->network) {
3762 struct lxc_netdev *netdev = iterator->elem;
811ef482 3763
b809f232
CB
3764 if (netdev->type != LXC_NET_PHYS)
3765 continue;
3766
3767 /* Retrieve the name of the interface in the container's network
3768 * namespace.
3769 */
3770 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3771 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3772 continue;
3773 }
b809f232 3774
bb301db7
SB
3775 /* Restore physical interfaces to host's network namespace with its transient name
3776 * to avoid collisions with the host's other interfaces.
3777 */
3778 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->transient_name);
b809f232 3779 if (ret < 0)
9c66dc4f 3780 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3781 else
9c66dc4f 3782 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3783 }
811ef482 3784
b809f232 3785 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3786 if (ret < 0)
3787 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3788
3789 return 0;
811ef482
CB
3790}
3791
3792static int setup_hw_addr(char *hwaddr, const char *ifname)
3793{
9c66dc4f 3794 __do_close int fd = -EBADF;
811ef482
CB
3795 struct sockaddr sockaddr;
3796 struct ifreq ifr;
9c66dc4f 3797 int ret;
811ef482
CB
3798
3799 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3800 if (ret)
3801 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3802
3803 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3804 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3805 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3806
ad9429e5 3807 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3808 if (fd < 0)
3809 return -1;
3810
3811 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3812 if (ret)
6d1400b5 3813 SYSERROR("Failed to perform ioctl");
3814
9c66dc4f 3815 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3816
3817 return ret;
3818}
3819
3820static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3821{
3822 struct lxc_list *iterator;
3823 int err;
3824
3825 lxc_list_for_each(iterator, ip) {
3826 struct lxc_inetdev *inetdev = iterator->elem;
3827
3828 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3829 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3830 if (err)
3831 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3832 }
3833
3834 return 0;
3835}
3836
3837static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3838{
3839 struct lxc_list *iterator;
3840 int err;
3841
3842 lxc_list_for_each(iterator, ip) {
3843 struct lxc_inet6dev *inet6dev = iterator->elem;
3844
3845 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3846 &inet6dev->mcast, &inet6dev->acast,
3847 inet6dev->prefix);
9c66dc4f
CB
3848 if (err)
3849 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3850 }
3851
3852 return 0;
3853}
3854
8bf64b77 3855static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3856{
811ef482 3857 int err;
009d6127 3858 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3859
3860 /* empty network namespace */
8bf64b77
CB
3861 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3862 err = lxc_netdev_up("lo");
9c66dc4f
CB
3863 if (err)
3864 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3865 }
3866
811ef482 3867 /* set a mac address */
9c66dc4f
CB
3868 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3869 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3870
3871 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3872 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3873 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3874
3875 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3876 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3877 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3878
3879 /* set the network device up */
3880 if (netdev->flags & IFF_UP) {
8bf64b77 3881 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3882 if (err)
3883 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3884
3885 /* the network is up, make the loopback up too */
3886 err = lxc_netdev_up("lo");
9c66dc4f
CB
3887 if (err)
3888 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3889 }
3890
811ef482 3891 /* setup ipv4 gateway on the interface */
a2f9a670 3892 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3893 if (!(netdev->flags & IFF_UP))
3894 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3895
9c66dc4f
CB
3896 if (lxc_list_empty(&netdev->ipv4))
3897 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3898
a2f9a670 3899 /* Setup device route if ipv4_gateway_dev is enabled */
3900 if (netdev->ipv4_gateway_dev) {
3901 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3902 if (err < 0)
3903 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3904 } else {
009d6127 3905 /* Check the gateway address is valid */
3906 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3907 return ret_set_errno(-1, errno);
009d6127 3908
3909 /* Try adding a default route to the gateway address */
811ef482 3910 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3911 if (err < 0) {
3912 /* If adding the default route fails, this could be because the
3913 * gateway address is in a different subnet to the container's address.
3914 * To work around this, we try adding a static device route to the
3915 * gateway address first, and then try again.
3916 */
a2f9a670 3917 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3918 if (err < 0)
3919 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3920
a2f9a670 3921 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3922 if (err < 0)
3923 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3924 }
3925 }
3926 }
3927
3928 /* setup ipv6 gateway on the interface */
a2f9a670 3929 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3930 if (!(netdev->flags & IFF_UP))
3931 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3932
9c66dc4f
CB
3933 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3934 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3935
a2f9a670 3936 /* Setup device route if ipv6_gateway_dev is enabled */
3937 if (netdev->ipv6_gateway_dev) {
3938 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3939 if (err < 0)
3940 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3941 } else {
009d6127 3942 /* Check the gateway address is valid */
3943 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3944 return ret_set_errno(-1, errno);
009d6127 3945
3946 /* Try adding a default route to the gateway address */
811ef482 3947 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3948 if (err < 0) {
3949 /* If adding the default route fails, this could be because the
3950 * gateway address is in a different subnet to the container's address.
3951 * To work around this, we try adding a static device route to the
3952 * gateway address first, and then try again.
3953 */
a2f9a670 3954 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3955 if (err < 0)
3956 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3957
a2f9a670 3958 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3959 if (err < 0)
3960 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3961 }
3962 }
3963 }
3964
8bf64b77 3965 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3966
3967 return 0;
3968}
3969
3a197a1b
CB
3970/**
3971 * Consider the following network layout:
3972 *
3973 * lxc.net.0.type = phys
3974 * lxc.net.0.link = eth2
3975 * lxc.net.0.name = eth%d
3976 *
3977 * lxc.net.1.type = phys
3978 * lxc.net.1.link = eth1
3979 * lxc.net.1.name = eth0
3980 *
3981 * If we simply follow this order and create the first network first the kernel
3982 * will allocate eth0 for the first network but the second network requests
3983 * that eth1 be renamed to eth0 in the container's network namespace which
3984 * would lead to a clash.
3985 *
3986 * Note, we don't handle cases like:
3987 *
3988 * lxc.net.0.type = phys
3989 * lxc.net.0.link = eth2
3990 * lxc.net.0.name = eth0
3991 *
3992 * lxc.net.1.type = phys
3993 * lxc.net.1.link = eth1
3994 * lxc.net.1.name = eth0
3995 *
3996 * That'll brutally fail of course but there's nothing we can do about it.
3997 */
811ef482
CB
3998int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3999 struct lxc_list *network)
4000{
4001 struct lxc_list *iterator;
3a197a1b 4002 bool needs_second_pass = false;
811ef482 4003
3a197a1b
CB
4004 if (lxc_list_empty(network))
4005 return 0;
4006
4007 /* Configure all devices that have a specific target name. */
4008 lxc_list_for_each(iterator, network) {
e389f2af 4009 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 4010 int ret;
811ef482 4011
3a197a1b
CB
4012 if (is_empty_string(netdev->name) || strequal(netdev->name, "eth%d")) {
4013 needs_second_pass = true;
4014 continue;
4015 }
4016
bad2f913 4017 ret = netdev_configure_container[netdev->type](netdev);
8bf64b77
CB
4018 if (!ret)
4019 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
4020 if (ret)
4021 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482 4022 }
3a197a1b
CB
4023 INFO("Finished setting up network devices with caller assigned names");
4024
4025 if (needs_second_pass) {
4026 /* Configure all devices that have a kernel assigned name. */
4027 lxc_list_for_each(iterator, network) {
4028 struct lxc_netdev *netdev = iterator->elem;
4029 int ret;
811ef482 4030
3a197a1b
CB
4031 if (!is_empty_string(netdev->name) && !strequal(netdev->name, "eth%d"))
4032 continue;
4033
4034 ret = netdev_configure_container[netdev->type](netdev);
4035 if (!ret)
4036 ret = lxc_network_setup_in_child_namespaces_common(netdev);
4037 if (ret)
4038 return log_error_errno(-1, errno, "Failed to setup netdev");
4039 }
4040 INFO("Finished setting up network devices with kernel assigned names");
4041 }
811ef482
CB
4042
4043 return 0;
4044}
7ab1ba02 4045
3c09b97c 4046int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
4047{
4048 struct lxc_list *iterator;
4049 struct lxc_list *network = &handler->conf->network;
4050 int data_sock = handler->data_sock[0];
4051
7ab1ba02
CB
4052 lxc_list_for_each(iterator, network) {
4053 int ret;
4054 struct lxc_netdev *netdev = iterator->elem;
4055
3c09b97c 4056 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4057 continue;
4058
7fbb15ec 4059 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 4060 if (ret < 0)
7ab1ba02 4061 return -1;
e389f2af 4062
fdd6be55 4063 ret = lxc_send_nointr(data_sock, netdev->transient_name, IFNAMSIZ, MSG_NOSIGNAL);
e389f2af
CB
4064 if (ret < 0)
4065 return -1;
4066
fdd6be55 4067 TRACE("Sent network device name \"%s\" to child", netdev->transient_name);
7ab1ba02
CB
4068 }
4069
4070 return 0;
4071}
4072
3c09b97c 4073int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
4074{
4075 struct lxc_list *iterator;
4076 struct lxc_list *network = &handler->conf->network;
4077 int data_sock = handler->data_sock[1];
4078
7ab1ba02
CB
4079 lxc_list_for_each(iterator, network) {
4080 int ret;
4081 struct lxc_netdev *netdev = iterator->elem;
4082
3c09b97c 4083 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4084 continue;
4085
e3233f26 4086 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 4087 if (ret < 0)
7ab1ba02 4088 return -1;
e389f2af 4089
fdd6be55 4090 ret = lxc_recv_nointr(data_sock, netdev->transient_name, IFNAMSIZ, 0);
e389f2af
CB
4091 if (ret < 0)
4092 return -1;
54256301 4093
fdd6be55 4094 TRACE("Received network device name \"%s\" from parent", netdev->transient_name);
7ab1ba02
CB
4095 }
4096
4097 return 0;
4098}
a1ae535a
CB
4099
4100int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
4101{
4102 struct lxc_list *iterator, *network;
4103 int data_sock = handler->data_sock[0];
4104
4105 if (!handler->am_root)
4106 return 0;
4107
4108 network = &handler->conf->network;
4109 lxc_list_for_each(iterator, network) {
4110 int ret;
4111 struct lxc_netdev *netdev = iterator->elem;
4112
4113 /* Send network device name in the child's namespace to parent. */
7fbb15ec 4114 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 4115 if (ret < 0)
7729f8e5 4116 return -1;
a1ae535a
CB
4117
4118 /* Send network device ifindex in the child's namespace to
4119 * parent.
4120 */
7fbb15ec 4121 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 4122 if (ret < 0)
7729f8e5 4123 return -1;
a1150aa1
CB
4124
4125 TRACE("Sent network device %s with ifindex %d to parent", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4126 }
4127
e389f2af
CB
4128 if (!lxc_list_empty(network))
4129 TRACE("Sent network device names and ifindices to parent");
4130
a1ae535a 4131 return 0;
a1ae535a
CB
4132}
4133
4134int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
4135{
4136 struct lxc_list *iterator, *network;
4137 int data_sock = handler->data_sock[1];
4138
4139 if (!handler->am_root)
4140 return 0;
4141
4142 network = &handler->conf->network;
4143 lxc_list_for_each(iterator, network) {
4144 int ret;
4145 struct lxc_netdev *netdev = iterator->elem;
4146
4147 /* Receive network device name in the child's namespace to
4148 * parent.
4149 */
e3233f26 4150 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 4151 if (ret < 0)
7729f8e5 4152 return -1;
a1ae535a
CB
4153
4154 /* Receive network device ifindex in the child's namespace to
4155 * parent.
4156 */
e3233f26 4157 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 4158 if (ret < 0)
7729f8e5 4159 return -1;
a1150aa1
CB
4160
4161 TRACE("Received network device %s with ifindex %d from child", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4162 }
4163
4164 return 0;
a1ae535a 4165}
bb84beda
CB
4166
4167void lxc_delete_network(struct lxc_handler *handler)
4168{
4169 bool bret;
4170
37631ddb
CB
4171 /*
4172 * Always expose namespace fd paths to network down hooks via
4173 * environment variables. No need to complicate things by passing them
4174 * as additional hook arguments.
4175 */
4176 lxc_expose_namespace_environment(handler);
4177
bb84beda
CB
4178 if (handler->am_root)
4179 bret = lxc_delete_network_priv(handler);
4180 else
4181 bret = lxc_delete_network_unpriv(handler);
4182 if (!bret)
4183 DEBUG("Failed to delete network devices");
4184 else
4185 DEBUG("Deleted network devices");
4186}
1cd95214 4187
1cd95214
CB
4188int lxc_netns_set_nsid(int fd)
4189{
41a3300d 4190 int ret;
0ce60f0d
CB
4191 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4192 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4193 NLMSG_ALIGN(1024)];
1cd95214 4194 struct nl_handler nlh;
a5f5cb41 4195 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
4196 struct nlmsghdr *hdr;
4197 struct rtgenmsg *msg;
9d036caa
CB
4198 const __s32 ns_id = -1;
4199 const __u32 netns_fd = fd;
1cd95214 4200
a5f5cb41 4201 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4202 if (ret < 0)
41a3300d 4203 return -1;
1cd95214 4204
0ce60f0d 4205 memset(buf, 0, sizeof(buf));
6ce39620
CB
4206
4207#pragma GCC diagnostic push
4208#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4209 hdr = (struct nlmsghdr *)buf;
4210 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4211#pragma GCC diagnostic pop
1cd95214 4212
0ce60f0d
CB
4213 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4214 hdr->nlmsg_type = RTM_NEWNSID;
4215 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4216 hdr->nlmsg_pid = 0;
4217 hdr->nlmsg_seq = RTM_NEWNSID;
4218 msg->rtgen_family = AF_UNSPEC;
1cd95214 4219
9d036caa
CB
4220 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4221 if (ret < 0)
a5f5cb41 4222 return ret_errno(ENOMEM);
9d036caa
CB
4223
4224 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4225 if (ret < 0)
a5f5cb41 4226 return ret_errno(ENOMEM);
1cd95214 4227
a5f5cb41 4228 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4229}
938980ba
CB
4230
4231static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4232{
4233
4234 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4235
4236 while (RTA_OK(rta, len)) {
4237 unsigned short type = rta->rta_type;
4238
4239 if ((type <= max) && (!tb[type]))
4240 tb[type] = rta;
4241
6ce39620
CB
4242#pragma GCC diagnostic push
4243#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4244 rta = RTA_NEXT(rta, len);
6ce39620 4245#pragma GCC diagnostic pop
938980ba
CB
4246 }
4247
4248 return 0;
4249}
4250
4251static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4252{
4253 return *(__s32 *)RTA_DATA(rta);
4254}
4255
4256#ifndef NETNS_RTA
4257#define NETNS_RTA(r) \
4258 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4259#endif
4260
4261int lxc_netns_get_nsid(int fd)
4262{
a5f5cb41
CB
4263 struct nl_handler nlh;
4264 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4265 int ret;
4266 ssize_t len;
4267 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4268 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4269 NLMSG_ALIGN(1024)];
938980ba 4270 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4271 struct nlmsghdr *hdr;
4272 struct rtgenmsg *msg;
938980ba
CB
4273 __u32 netns_fd = fd;
4274
a5f5cb41 4275 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4276 if (ret < 0)
4277 return -1;
4278
4279 memset(buf, 0, sizeof(buf));
6ce39620
CB
4280
4281#pragma GCC diagnostic push
4282#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4283 hdr = (struct nlmsghdr *)buf;
4284 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4285#pragma GCC diagnostic pop
938980ba
CB
4286
4287 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4288 hdr->nlmsg_type = RTM_GETNSID;
4289 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4290 hdr->nlmsg_pid = 0;
4291 hdr->nlmsg_seq = RTM_GETNSID;
4292 msg->rtgen_family = AF_UNSPEC;
4293
9d036caa 4294 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4295 if (ret < 0)
4296 return ret_errno(ENOMEM);
938980ba 4297
a5f5cb41 4298 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4299 if (ret < 0)
4300 return -1;
4301
4302 msg = NLMSG_DATA(hdr);
4303 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4304 if (len < 0)
a5f5cb41 4305 return ret_errno(EINVAL);
938980ba 4306
6ce39620
CB
4307#pragma GCC diagnostic push
4308#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4309 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4310 if (tb[__LXC_NETNSA_NSID])
4311 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4312#pragma GCC diagnostic pop
938980ba
CB
4313
4314 return -1;
4315}
e389f2af
CB
4316
4317int lxc_create_network(struct lxc_handler *handler)
4318{
4319 int ret;
4320
e389f2af
CB
4321 if (handler->am_root) {
4322 ret = lxc_create_network_priv(handler);
4323 if (ret)
4324 return -1;
4325
4326 return lxc_network_move_created_netdev_priv(handler);
4327 }
4328
4329 return lxc_create_network_unpriv(handler);
4330}