]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
list: add new kernel-based list implementation
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
fdd6be55 40#include "string_utils.h"
59524108 41#include "syscall_wrappers.h"
0d204771 42#include "utils.h"
0ad19a3f 43
9de31d5a
CB
44#ifndef HAVE_STRLCPY
45#include "include/strlcpy.h"
46#endif
47
ac2cecc4 48lxc_log_define(network, lxc);
f8fee0e2 49
bad2f913
CB
50typedef int (*netdev_configure_server_cb)(struct lxc_handler *, struct lxc_netdev *);
51typedef int (*netdev_configure_container_cb)(struct lxc_netdev *);
52typedef int (*netdev_shutdown_server_cb)(struct lxc_handler *, struct lxc_netdev *);
53
3392d379
CB
54const struct lxc_network_info {
55 const char *name;
fdd6be55
CB
56 const char template[IFNAMSIZ];
57 size_t template_len;
3392d379 58} lxc_network_info[LXC_NET_MAXCONFTYPE + 1] = {
fdd6be55
CB
59 [LXC_NET_EMPTY] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
60 [LXC_NET_VETH] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
61 [LXC_NET_MACVLAN] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
62 [LXC_NET_IPVLAN] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
63 [LXC_NET_PHYS] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
64 [LXC_NET_VLAN] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
65 [LXC_NET_NONE] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
66 [LXC_NET_MAXCONFTYPE] = { NULL, "", 0 }
3392d379
CB
67};
68
69const char *lxc_net_type_to_str(int type)
70{
71 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
72 return NULL;
73
74 return lxc_network_info[type].name;
75}
76
77static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
78
79char *lxc_ifname_alnum_case_sensitive(char *template)
80{
81 char name[IFNAMSIZ];
82 size_t i = 0;
83#ifdef HAVE_RAND_R
84 unsigned int seed;
85
86 seed = randseed(false);
87#else
88
89 (void)randseed(true);
90#endif
91
92 if (strlen(template) >= IFNAMSIZ)
93 return NULL;
94
95 /* Generate random names until we find one that doesn't exist. */
96 for (;;) {
97 name[0] = '\0';
98 (void)strlcpy(name, template, IFNAMSIZ);
99
100 for (i = 0; i < strlen(name); i++) {
101 if (name[i] == 'X') {
102#ifdef HAVE_RAND_R
103 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
104#else
105 name[i] = padchar[rand() % strlen(padchar)];
106#endif
107 }
108 }
109
110 if (if_nametoindex(name) == 0)
111 break;
112 }
113
114 (void)strlcpy(template, name, strlen(template) + 1);
115
116 return template;
117}
3ebffb98 118static const char loop_device[] = "lo";
811ef482 119
b670016a 120static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 121{
d16bda44 122 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 123 struct nl_handler nlh;
d16bda44
CB
124 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
125 int addrlen, err;
8f82874c 126 struct rtmsg *rt;
8f82874c 127
128 addrlen = family == AF_INET ? sizeof(struct in_addr)
129 : sizeof(struct in6_addr);
130
d16bda44 131 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 132 if (err)
133 return err;
134
8f82874c 135 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
136 if (!nlmsg)
d16bda44 137 return -ENOMEM;
8f82874c 138
139 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
140 if (!answer)
a5f5cb41 141 return -ENOMEM;
8f82874c 142
143 nlmsg->nlmsghdr->nlmsg_flags =
144 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 145 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 146
147 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
148 if (!rt)
a5f5cb41 149 return -ENOMEM;
d16bda44 150
8f82874c 151 rt->rtm_family = family;
152 rt->rtm_table = RT_TABLE_MAIN;
153 rt->rtm_scope = RT_SCOPE_LINK;
154 rt->rtm_protocol = RTPROT_BOOT;
155 rt->rtm_type = RTN_UNICAST;
156 rt->rtm_dst_len = netmask;
157
8f82874c 158 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
159 return -EINVAL;
160
8f82874c 161 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
162 return -EINVAL;
163
164 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 165}
166
167static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
168{
b670016a 169 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 170}
171
172static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
173{
b670016a 174 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
175}
176
177static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
178{
179 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
180}
181
182static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
183{
184 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 185}
186
d4a7da46 187static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
188{
189 struct lxc_list *iterator;
190 int err;
191
192 lxc_list_for_each(iterator, ip) {
193 struct lxc_inetdev *inetdev = iterator->elem;
194
195 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
196 if (err)
197 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 198 }
199
200 return 0;
201}
202
203static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
204{
205 struct lxc_list *iterator;
206 int err;
207
208 lxc_list_for_each(iterator, ip) {
209 struct lxc_inet6dev *inet6dev = iterator->elem;
210
211 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
212 if (err)
213 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 214 }
215
216 return 0;
217}
218
6dfa9581
TP
219static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
220{
221 struct lxc_list *iterator;
222 int err;
223
224 lxc_list_for_each(iterator, ip) {
225 struct lxc_inetdev *inetdev = iterator->elem;
226
227 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
228
229 if (err)
9c66dc4f 230 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
231 }
232
233 return 0;
234}
235
236static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
237{
238 struct lxc_list *iterator;
239 int err;
240
241 lxc_list_for_each(iterator, ip) {
242 struct lxc_inet6dev *inet6dev = iterator->elem;
243
244 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
245 if (err)
9c66dc4f 246 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
247 }
248
249 return 0;
250}
251
5fe147e9 252static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 253{
d16bda44 254 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 255 struct nl_handler nlh;
d16bda44
CB
256 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
257 int addrlen, err;
5fe147e9 258 struct ndmsg *rt;
6dfa9581 259
5fe147e9 260 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 261
d16bda44 262 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
263 if (err)
264 return err;
6dfa9581 265
5fe147e9
TP
266 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
267 if (!nlmsg)
d16bda44 268 return -ENOMEM;
6dfa9581 269
5fe147e9
TP
270 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
271 if (!answer)
d16bda44 272 return -ENOMEM;
6dfa9581 273
5fe147e9
TP
274 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
275 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 276
5fe147e9
TP
277 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
278 if (!rt)
d16bda44
CB
279 return -ENOMEM;
280
5fe147e9
TP
281 rt->ndm_ifindex = ifindex;
282 rt->ndm_flags = NTF_PROXY;
283 rt->ndm_type = NDA_DST;
284 rt->ndm_family = family;
6dfa9581 285
5fe147e9 286 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 287 return -EINVAL;
6dfa9581 288
d16bda44 289 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
290}
291
292static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
293{
294 int ret;
295 char path[PATH_MAX];
296 char buf[1] = "";
297
298 if (family != AF_INET && family != AF_INET6)
596a002c 299 return ret_set_errno(-1, EINVAL);
6dfa9581 300
387c1c70
CB
301 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
302 family == AF_INET ? "ipv4" : "ipv6", ifname,
303 "forwarding");
304 if (ret < 0)
596a002c 305 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
306
307 return lxc_read_file_expect(path, buf, 1, "1");
308}
309
622f05c7
TP
310struct bridge_vlan_info {
311 __u16 flags;
312 __u16 vid;
313};
314
315static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
316{
317 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
318 struct nl_handler nlh;
319 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
320 int err;
321 struct ifinfomsg *ifi;
322 struct rtattr *nest;
323 unsigned short bridge_flags = 0;
324 struct bridge_vlan_info vlan_info;
325
326 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
327 if (err)
328 return err;
329
330 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
331 if (!nlmsg)
332 return ret_errno(ENOMEM);
333
334 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
335 if (!answer)
336 return ret_errno(ENOMEM);
337
338 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
339 nlmsg->nlmsghdr->nlmsg_type = operation;
340
341 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
342 if (!ifi)
343 return ret_errno(ENOMEM);
344 ifi->ifi_family = AF_BRIDGE;
345 ifi->ifi_index = ifindex;
346
347 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
348 if (!nest)
349 return ret_errno(ENOMEM);
350
351 bridge_flags |= BRIDGE_FLAGS_MASTER;
352 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
353 return ret_errno(ENOMEM);
354
355 vlan_info.vid = vlan_id;
356 vlan_info.flags = 0;
357 if (!tagged)
358 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
359
360 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
361 return ret_errno(ENOMEM);
362
363 nla_end_nested(nlmsg, nest);
364
365 return netlink_transaction(nlh_ptr, nlmsg, answer);
366}
367
368static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
369{
370 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
371}
372
373static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
374{
375 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
376}
377
378static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
379{
380 struct lxc_list *iterator;
381 int err;
382
383 lxc_list_for_each(iterator, vlan_ids) {
384 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
385
386 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
387 if (err)
388 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
389 }
390
391 return 0;
392}
393
33320936
TP
394static int validate_veth(struct lxc_netdev *netdev)
395{
396 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
397 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
398 if (netdev->priv.veth_attr.vlan_id_set)
399 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
400
401 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
402 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
403 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
404 }
405
406 if (netdev->priv.veth_attr.vlan_id_set) {
407 struct lxc_list *it;
408 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
409 unsigned short i = PTR_TO_USHORT(it->elem);
410 if (i == netdev->priv.veth_attr.vlan_id)
411 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
412 }
413 }
414
415 return 0;
416}
417
418static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
419{
420 int err, rc, veth1index;
421 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
422 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
423
424 /* Skip setup if no VLAN options are specified. */
425 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
426 return 0;
427
428 /* Check vlan filtering is enabled on parent bridge. */
387c1c70
CB
429 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
430 if (rc < 0)
33320936
TP
431 return -1;
432
433 rc = lxc_read_from_file(path, buf, sizeof(buf));
434 if (rc < 0)
435 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
436
437 buf[rc - 1] = '\0';
438
6ee997a7 439 if (!strequal(buf, "1"))
33320936
TP
440 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
441
442 /* Get veth1 ifindex for use with netlink. */
443 veth1index = if_nametoindex(veth1);
444 if (!veth1index)
445 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
446
447 /* Configure untagged VLAN settings on bridge port if specified. */
448 if (netdev->priv.veth_attr.vlan_id_set) {
449 unsigned short default_pvid;
450
451 /* Get the bridge's default VLAN PVID. */
387c1c70
CB
452 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
453 if (rc < 0)
33320936
TP
454 return -1;
455
456 rc = lxc_read_from_file(path, buf, sizeof(buf));
457 if (rc < 0)
458 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
459
460 buf[rc - 1] = '\0';
461 err = get_u16(&default_pvid, buf, 0);
462 if (err)
463 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
464
465 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
466 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
467 err = lxc_bridge_vlan_del(veth1index, default_pvid);
468 if (err)
469 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
470 }
471
472 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
473 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
474 if (err)
475 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
476 }
477 }
478
479 /* Configure tagged VLAN settings on bridge port if specified. */
480 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
481 if (err)
482 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
483
484 return 0;
485}
486
8f7c3358
TP
487struct ovs_veth_vlan_args {
488 const char *nic;
489 const char *vlan_mode; /* Port VLAN mode. */
490 short vlan_id; /* PVID VLAN ID. */
d2f8b272 491 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
8f7c3358
TP
492};
493
d2f8b272
TP
494static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
495{
496 free_disarm(args->trunks);
497}
8f7c3358
TP
498
499static int lxc_ovs_setup_bridge_vlan_exec(void *data)
500{
501 struct ovs_veth_vlan_args *args = data;
785e1540
TP
502 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
503
504 if (!args->vlan_mode)
505 return ret_errno(EINVAL);
8f7c3358
TP
506
507 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
508
785e1540 509 if (args->vlan_id > BRIDGE_VLAN_NONE) {
8f7c3358
TP
510 char buf[5];
511 int rc;
512
387c1c70
CB
513 rc = strnprintf(buf, sizeof(buf), "%u", args->vlan_id);
514 if (rc < 0)
72e8122b 515 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
8f7c3358
TP
516
517 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
518 }
519
785e1540 520 if (args->trunks)
8f7c3358
TP
521 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
522
523 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
785e1540 524 if (tag && trunks)
8f7c3358 525 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
785e1540 526 else if (tag)
8f7c3358 527 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
785e1540 528 else if (trunks)
8f7c3358
TP
529 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
530 else
531 return -EINVAL;
532
533 return -errno;
534}
535
536static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
537{
538 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
539 struct ovs_veth_vlan_args args;
540 args.nic = veth1;
1ee07848
TP
541 args.vlan_mode = NULL;
542 args.vlan_id = BRIDGE_VLAN_NONE;
543 args.trunks = NULL;
8f7c3358
TP
544
545 /* Skip setup if no VLAN options are specified. */
546 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
547 return 0;
548
549 /* Configure untagged VLAN settings on bridge port if specified. */
550 if (netdev->priv.veth_attr.vlan_id_set) {
551 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
552 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
553
554 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
555 * Also set the vlan_mode=access, which will drop any tagged frames.
556 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
557 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
558 */
559 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
560 args.vlan_mode = "access";
561 args.vlan_id = netdev->priv.veth_attr.vlan_id;
562 }
563 }
564
565 if (taggedLength > 0) {
566 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
567
568 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
569 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
570 args.vlan_mode = "native-untagged";
571 }
572
573 struct lxc_list *iterator;
574 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
575 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
576 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
577 int rc;
578
387c1c70
CB
579 rc = strnprintf(buf, sizeof(buf), "%u", vlan_id);
580 if (rc < 0) {
3fe6b5cf 581 free_ovs_veth_vlan_args(&args);
8f7c3358 582 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
3fe6b5cf 583 }
8f7c3358 584
1ee07848
TP
585 if (args.trunks)
586 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
587 else
588 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
8f7c3358
TP
589 }
590 }
591
1ee07848 592 if (args.vlan_mode) {
8f7c3358
TP
593 int ret;
594 char cmd_output[PATH_MAX];
595
596 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
3fe6b5cf
TP
597 if (ret < 0) {
598 free_ovs_veth_vlan_args(&args);
8f7c3358 599 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
3fe6b5cf 600 }
8f7c3358
TP
601 }
602
3fe6b5cf 603 free_ovs_veth_vlan_args(&args);
8f7c3358
TP
604 return 0;
605}
606
bad2f913 607static int netdev_configure_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 608{
54256301 609 int err;
a00fbab5 610 unsigned int mtu = 1500;
811ef482
CB
611 char *veth1, *veth2;
612 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 613
33320936
TP
614 err = validate_veth(netdev);
615 if (err)
616 return err;
617
f2711167 618 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
619 veth1 = netdev->priv.veth_attr.pair;
620 if (handler->conf->reboot)
621 lxc_netdev_delete_by_name(veth1);
622 } else {
387c1c70
CB
623 err = strnprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
624 if (err < 0)
811ef482
CB
625 return -1;
626
3646ffd9 627 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
628 if (!veth1)
629 return -1;
630
631 /* store away for deconf */
632 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
633 }
634
387c1c70
CB
635 err = strnprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
636 if (err < 0)
d34212ad
CB
637 return -1;
638
3646ffd9 639 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 640 if (!veth2)
54256301
CB
641 return -1;
642
a00fbab5
TP
643 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
644 if (netdev->mtu) {
645 if (lxc_safe_uint(netdev->mtu, &mtu))
646 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 647 } else if (!is_empty_string(netdev->link)) {
54256301 648 int ifindex_mtu;
811ef482 649
54256301
CB
650 ifindex_mtu = if_nametoindex(netdev->link);
651 if (ifindex_mtu) {
652 mtu = netdev_get_mtu(ifindex_mtu);
653 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
654 }
655 }
656
657 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
658 if (err)
659 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 660
fdd6be55
CB
661 /*
662 * Veth devices are directly created in the container's network
663 * namespace so the device doesn't need to be moved into the
664 * container's network namespace. Make this explicit by setting the
665 * devices ifindex to 0.
666 */
667 netdev->ifindex = 0;
668
24190194
CB
669 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
670
fdd6be55
CB
671 /*
672 * Since the device won't be moved transient name generation won't
673 * happen. But the transient name is needed for the container to
674 * retrieve the ifindex for the device.
675 */
676 strlcpy(netdev->transient_name, veth2, IFNAMSIZ);
677
678 /*
679 * Changing the high byte of the mac address to 0xfe, the bridge interface
811ef482 680 * will always keep the host's mac address and not take the mac address
fdd6be55
CB
681 * of a container.
682 */
811ef482
CB
683 err = setup_private_host_hw_addr(veth1);
684 if (err) {
6d1400b5 685 errno = -err;
686 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
687 goto out_delete;
688 }
689
8da62485
CB
690 /* Retrieve ifindex of the host's veth device. */
691 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
692 if (!netdev->priv.veth_attr.ifindex) {
693 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
694 goto out_delete;
695 }
696
811ef482
CB
697 if (mtu) {
698 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 699 if (err) {
6d1400b5 700 errno = -err;
54256301 701 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
702 goto out_delete;
703 }
704 }
705
f2711167 706 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
707 if (!lxc_nic_exists(netdev->link)) {
708 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
709 goto out_delete;
710 }
711
811ef482
CB
712 err = lxc_bridge_attach(netdev->link, veth1);
713 if (err) {
6d1400b5 714 errno = -err;
26da53c3 715 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
716 goto out_delete;
717 }
718 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936 719
38790036
TP
720 if (is_ovs_bridge(netdev->link)) {
721 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
722 if (err) {
723 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
724 lxc_ovs_delete_port(netdev->link, veth1);
725 goto out_delete;
726 }
727 } else {
33320936
TP
728 err = setup_veth_native_bridge_vlan(veth1, netdev);
729 if (err) {
730 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
731 goto out_delete;
732 }
733 }
811ef482
CB
734 }
735
736 err = lxc_netdev_up(veth1);
737 if (err) {
6d1400b5 738 errno = -err;
739 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
740 goto out_delete;
741 }
742
d4a7da46 743 /* setup ipv4 routes on the host interface */
744 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
745 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
746 goto out_delete;
747 }
748
749 /* setup ipv6 routes on the host interface */
750 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
751 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
752 goto out_delete;
753 }
754
6dfa9581 755 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
756 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
757 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
758 appear to be added successfully but then do not appear in the proxy list. The length of time
759 slept doesn't appear to be important, only that the process sleeps for a short period of time.
760 */
761 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
762
6dfa9581
TP
763 if (netdev->ipv4_gateway) {
764 char bufinet4[INET_ADDRSTRLEN];
765 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 766 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
767 goto out_delete;
768 }
769
770 err = lxc_ip_forwarding_on(veth1, AF_INET);
771 if (err) {
9c66dc4f 772 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
773 goto out_delete;
774 }
775
5fe147e9 776 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 777 if (err) {
9c66dc4f 778 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
779 goto out_delete;
780 }
781 }
782
783 if (netdev->ipv6_gateway) {
784 char bufinet6[INET6_ADDRSTRLEN];
785
786 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 787 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
788 goto out_delete;
789 }
790
791 /* Check for sysctl net.ipv6.conf.all.forwarding=1
792 Kernel requires this to route any packets for IPv6.
793 */
794 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
795 if (err) {
9c66dc4f 796 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
797 goto out_delete;
798 }
799
800 err = lxc_ip_forwarding_on(veth1, AF_INET6);
801 if (err) {
9c66dc4f 802 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
803 goto out_delete;
804 }
805
806 err = lxc_neigh_proxy_on(veth1, AF_INET6);
807 if (err) {
9c66dc4f 808 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
809 goto out_delete;
810 }
811
5fe147e9 812 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 813 if (err) {
9c66dc4f 814 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
815 goto out_delete;
816 }
817 }
818
819 /* setup ipv4 address routes on the host interface */
820 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
821 if (err) {
9c66dc4f 822 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
823 goto out_delete;
824 }
825
826 /* setup ipv6 address routes on the host interface */
827 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
828 if (err) {
9c66dc4f 829 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
830 goto out_delete;
831 }
832 }
833
811ef482 834 if (netdev->upscript) {
14a7b0f9
CB
835 char *argv[] = {
836 "veth",
837 netdev->link,
990b9ac3 838 veth1,
14a7b0f9
CB
839 NULL,
840 };
841
842 err = run_script_argv(handler->name,
843 handler->conf->hooks_version, "net",
844 netdev->upscript, "up", argv);
845 if (err < 0)
811ef482
CB
846 goto out_delete;
847 }
848
54256301 849 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
850
851 return 0;
852
853out_delete:
54256301 854 lxc_netdev_delete_by_name(veth1);
811ef482
CB
855 return -1;
856}
857
bad2f913 858static int netdev_configure_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 859{
8021de25 860 char peer[IFNAMSIZ];
811ef482
CB
861 int err;
862
f2711167 863 if (is_empty_string(netdev->link)) {
811ef482
CB
864 ERROR("No link for macvlan network device specified");
865 return -1;
866 }
867
387c1c70
CB
868 err = strnprintf(peer, sizeof(peer), "mcXXXXXX");
869 if (err < 0)
811ef482
CB
870 return -1;
871
3646ffd9 872 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
873 return -1;
874
875 err = lxc_macvlan_create(netdev->link, peer,
876 netdev->priv.macvlan_attr.mode);
877 if (err) {
6d1400b5 878 errno = -err;
879 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
880 peer, netdev->link);
966e9f1f 881 goto on_error;
811ef482
CB
882 }
883
9f8cf6e1
CB
884 strlcpy(netdev->created_name, peer, IFNAMSIZ);
885
811ef482
CB
886 netdev->ifindex = if_nametoindex(peer);
887 if (!netdev->ifindex) {
888 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 889 goto on_error;
811ef482
CB
890 }
891
3bef7b7b 892 if (netdev->mtu) {
54256301
CB
893 unsigned int mtu;
894
3bef7b7b
TP
895 err = lxc_safe_uint(netdev->mtu, &mtu);
896 if (err < 0) {
897 errno = -err;
898 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
899 goto on_error;
900 }
901
902 err = lxc_netdev_set_mtu(peer, mtu);
903 if (err < 0) {
904 errno = -err;
905 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
906 goto on_error;
907 }
908 }
909
811ef482 910 if (netdev->upscript) {
14a7b0f9
CB
911 char *argv[] = {
912 "macvlan",
913 netdev->link,
914 NULL,
915 };
916
917 err = run_script_argv(handler->name,
918 handler->conf->hooks_version, "net",
919 netdev->upscript, "up", argv);
920 if (err < 0)
966e9f1f 921 goto on_error;
811ef482
CB
922 }
923
4a037d61 924 DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
811ef482
CB
925 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
926
927 return 0;
966e9f1f
CB
928
929on_error:
811ef482 930 lxc_netdev_delete_by_name(peer);
811ef482
CB
931 return -1;
932}
933
0dc9a142 934static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
c9f52382 935{
d16bda44
CB
936 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
937 struct nl_handler nlh;
938 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 939 int err, index, len;
940 struct ifinfomsg *ifi;
c9f52382 941 struct rtattr *nest, *nest2;
c9f52382 942
0dc9a142 943 len = strlen(parent);
c9f52382 944 if (len == 1 || len >= IFNAMSIZ)
d16bda44 945 return ret_errno(EINVAL);
c9f52382 946
947 len = strlen(name);
948 if (len == 1 || len >= IFNAMSIZ)
d16bda44 949 return ret_errno(EINVAL);
c9f52382 950
0dc9a142 951 index = if_nametoindex(parent);
c9f52382 952 if (!index)
d16bda44 953 return ret_errno(EINVAL);
c9f52382 954
d16bda44 955 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 956 if (err)
df62850d 957 return err;
c9f52382 958
c9f52382 959 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
960 if (!nlmsg)
d16bda44 961 return ret_errno(ENOMEM);
c9f52382 962
963 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
964 if (!answer)
d16bda44 965 return ret_errno(ENOMEM);
c9f52382 966
967 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
968 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
969
970 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
971 if (!ifi)
972 return ret_errno(ENOMEM);
c9f52382 973 ifi->ifi_family = AF_UNSPEC;
974
c9f52382 975 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
976 if (!nest)
d16bda44 977 return ret_errno(EPROTO);
c9f52382 978
979 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 980 return ret_errno(EPROTO);
c9f52382 981
5755765e
KT
982 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
983 if (!nest2)
984 return ret_errno(EPROTO);
985
3a934e2e 986 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
987 return ret_errno(EPROTO);
988
cf88a827
TP
989 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
990 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
991 * according to ipvlan docs.
5755765e 992 */
cf88a827 993 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 994 return ret_errno(EPROTO);
c9f52382 995
5755765e 996 nla_end_nested(nlmsg, nest2);
c9f52382 997 nla_end_nested(nlmsg, nest);
998
999 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 1000 return ret_errno(EPROTO);
c9f52382 1001
1002 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
1003 return ret_errno(EPROTO);
1004
1005 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 1006}
1007
bad2f913 1008static int netdev_configure_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1009{
dd119206 1010 char peer[IFNAMSIZ];
c9f52382 1011 int err;
1012
f2711167 1013 if (is_empty_string(netdev->link)) {
c9f52382 1014 ERROR("No link for ipvlan network device specified");
1015 return -1;
1016 }
1017
387c1c70
CB
1018 err = strnprintf(peer, sizeof(peer), "ipXXXXXX");
1019 if (err < 0)
c9f52382 1020 return -1;
1021
3646ffd9 1022 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 1023 return -1;
1024
dd119206
CB
1025 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
1026 netdev->priv.ipvlan_attr.isolation);
c9f52382 1027 if (err) {
dd119206
CB
1028 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
1029 peer, netdev->link);
c9f52382 1030 goto on_error;
1031 }
1032
e7fdd504
CB
1033 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1034
c9f52382 1035 netdev->ifindex = if_nametoindex(peer);
1036 if (!netdev->ifindex) {
1037 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1038 goto on_error;
1039 }
1040
006e135e 1041 if (netdev->mtu) {
54256301
CB
1042 unsigned int mtu;
1043
006e135e 1044 err = lxc_safe_uint(netdev->mtu, &mtu);
1045 if (err < 0) {
1046 errno = -err;
54256301 1047 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1048 goto on_error;
1049 }
1050
1051 err = lxc_netdev_set_mtu(peer, mtu);
1052 if (err < 0) {
1053 errno = -err;
54256301 1054 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1055 goto on_error;
1056 }
1057 }
1058
c9f52382 1059 if (netdev->upscript) {
1060 char *argv[] = {
1061 "ipvlan",
1062 netdev->link,
1063 NULL,
1064 };
1065
dd119206
CB
1066 err = run_script_argv(handler->name, handler->conf->hooks_version,
1067 "net", netdev->upscript, "up", argv);
c9f52382 1068 if (err < 0)
1069 goto on_error;
1070 }
1071
4a037d61 1072 DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer,
dd119206 1073 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 1074
1075 return 0;
1076
1077on_error:
1078 lxc_netdev_delete_by_name(peer);
1079 return -1;
1080}
1081
bad2f913 1082static int netdev_configure_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1083{
1084 char peer[IFNAMSIZ];
1085 int err;
1086 static uint16_t vlan_cntr = 0;
811ef482 1087
f2711167 1088 if (is_empty_string(netdev->link)) {
811ef482
CB
1089 ERROR("No link for vlan network device specified");
1090 return -1;
1091 }
1092
387c1c70
CB
1093 err = strnprintf(peer, sizeof(peer), "vlan%d-%d",
1094 netdev->priv.vlan_attr.vid, vlan_cntr++);
1095 if (err < 0)
811ef482
CB
1096 return -1;
1097
1098 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1099 if (err) {
6d1400b5 1100 errno = -err;
1101 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1102 peer, netdev->link);
811ef482
CB
1103 return -1;
1104 }
1105
83530dba
CB
1106 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1107
811ef482
CB
1108 netdev->ifindex = if_nametoindex(peer);
1109 if (!netdev->ifindex) {
1110 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1111 goto on_error;
1112 }
1113
1114 if (netdev->mtu) {
54256301
CB
1115 unsigned int mtu;
1116
3e2a7b08 1117 err = lxc_safe_uint(netdev->mtu, &mtu);
1118 if (err < 0) {
1119 errno = -err;
54256301 1120 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1121 goto on_error;
1122 }
1123
1124 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1125 if (err < 0) {
3e2a7b08 1126 errno = -err;
54256301 1127 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1128 goto on_error;
1129 }
811ef482
CB
1130 }
1131
3a73d9f1 1132 if (netdev->upscript) {
1133 char *argv[] = {
1134 "vlan",
1135 netdev->link,
1136 NULL,
1137 };
1138
d4d68410
CB
1139 err = run_script_argv(handler->name, handler->conf->hooks_version,
1140 "net", netdev->upscript, "up", argv);
19abca58 1141 if (err < 0) {
3e2a7b08 1142 goto on_error;
19abca58 1143 }
3a73d9f1 1144 }
1145
4a037d61 1146 DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer,
d4d68410 1147 netdev->ifindex);
811ef482
CB
1148
1149 return 0;
3e2a7b08 1150
1151on_error:
1152 lxc_netdev_delete_by_name(peer);
1153 return -1;
811ef482
CB
1154}
1155
bad2f913 1156static int netdev_configure_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1157{
0b154989 1158 int err, mtu_orig = 0;
14a7b0f9 1159
9c66dc4f
CB
1160 if (is_empty_string(netdev->link))
1161 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1162
75b074ee
CB
1163 /*
1164 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1165 * network namespace because we need it to move the device from the
1166 * host's network namespace to the container's network namespace later
1167 * on.
1168 * Note that netdev->link will contain the name of the physical network
1169 * device in the host's namespace.
1170 */
811ef482 1171 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1172 if (!netdev->ifindex)
1173 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1174
61302ef7 1175 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1176 if (is_empty_string(netdev->name))
8bf64b77 1177 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1178
75b074ee
CB
1179 /*
1180 * Store the ifindex of the host's network device in the host's
790255cf
CB
1181 * namespace.
1182 */
1183 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1184
75b074ee
CB
1185 /*
1186 * Get original device MTU setting and store for restoration after
1187 * container shutdown.
1188 */
0b154989 1189 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1190 if (mtu_orig < 0)
1191 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1192
1193 netdev->priv.phys_attr.mtu = mtu_orig;
1194
3bef7b7b 1195 if (netdev->mtu) {
54256301
CB
1196 unsigned int mtu;
1197
3bef7b7b 1198 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1199 if (err < 0)
1200 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1201
3bef7b7b 1202 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1203 if (err < 0)
1204 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1205 }
1206
1207 if (netdev->upscript) {
1208 char *argv[] = {
1209 "phys",
1210 netdev->link,
1211 NULL,
1212 };
1213
75b074ee
CB
1214 err = run_script_argv(handler->name, handler->conf->hooks_version,
1215 "net", netdev->upscript, "up", argv);
9c66dc4f 1216 if (err < 0)
3bef7b7b 1217 return -1;
3bef7b7b
TP
1218 }
1219
4a037d61 1220 DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev->link,
75b074ee 1221 netdev->ifindex);
811ef482
CB
1222
1223 return 0;
1224}
1225
bad2f913 1226static int netdev_configure_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1227{
14a7b0f9
CB
1228 int ret;
1229 char *argv[] = {
1230 "empty",
1231 NULL,
1232 };
1233
43e2a964
CB
1234 /* The loopback device always has index 1. */
1235 netdev->ifindex = 1;
1236
1237 if (!strequal(netdev->name, "lo"))
1238 return syserror_set(-EINVAL, "Custom loopback device names not supported");
1239
14a7b0f9
CB
1240 if (!netdev->upscript)
1241 return 0;
1242
1243 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1244 "net", netdev->upscript, "up", argv);
1245 if (ret < 0)
1246 return -1;
1247
811ef482
CB
1248 return 0;
1249}
1250
bad2f913 1251static int netdev_configure_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1252{
1253 netdev->ifindex = 0;
1254 return 0;
1255}
1256
bad2f913
CB
1257static netdev_configure_server_cb netdev_configure_server[LXC_NET_MAXCONFTYPE + 1] = {
1258 [LXC_NET_VETH] = netdev_configure_server_veth,
1259 [LXC_NET_MACVLAN] = netdev_configure_server_macvlan,
1260 [LXC_NET_IPVLAN] = netdev_configure_server_ipvlan,
1261 [LXC_NET_VLAN] = netdev_configure_server_vlan,
1262 [LXC_NET_PHYS] = netdev_configure_server_phys,
1263 [LXC_NET_EMPTY] = netdev_configure_server_empty,
1264 [LXC_NET_NONE] = netdev_configure_server_none,
811ef482
CB
1265};
1266
bad2f913 1267static int __netdev_configure_container_common(struct lxc_netdev *netdev)
8bf64b77
CB
1268{
1269 char current_ifname[IFNAMSIZ];
1270
fdd6be55 1271 netdev->ifindex = if_nametoindex(netdev->transient_name);
8bf64b77
CB
1272 if (!netdev->ifindex)
1273 return log_error_errno(-1,
1274 errno, "Failed to retrieve ifindex for network device with name %s",
fdd6be55 1275 netdev->transient_name);
8bf64b77 1276
3473ca76 1277 if (is_empty_string(netdev->name))
8bf64b77
CB
1278 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1279
fdd6be55 1280 if (!strequal(netdev->transient_name, netdev->name)) {
8bf64b77
CB
1281 int ret;
1282
fdd6be55 1283 ret = lxc_netdev_rename_by_name(netdev->transient_name, netdev->name);
8bf64b77 1284 if (ret)
9c66dc4f 1285 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
fdd6be55 1286 netdev->transient_name, netdev->name);
8bf64b77 1287
fdd6be55 1288 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->transient_name, netdev->name);
8bf64b77
CB
1289 }
1290
1291 /*
1292 * Re-read the name of the interface because its name has changed and
1293 * would be automatically allocated by the system
1294 */
1295 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1296 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1297
1298 /*
1299 * Now update the recorded name of the network device to reflect the
1300 * name of the network device in the child's network namespace. We will
1301 * later on send this information back to the parent.
1302 */
1303 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
fdd6be55 1304 netdev->transient_name[0] = '\0';
8bf64b77
CB
1305
1306 return 0;
1307}
1308
bad2f913 1309static int netdev_configure_container_veth(struct lxc_netdev *netdev)
8bf64b77 1310{
8bf64b77 1311
bad2f913 1312 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1313}
1314
bad2f913 1315static int netdev_configure_container_macvlan(struct lxc_netdev *netdev)
8bf64b77 1316{
bad2f913 1317 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1318}
1319
bad2f913 1320static int netdev_configure_container_ipvlan(struct lxc_netdev *netdev)
8bf64b77 1321{
bad2f913 1322 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1323}
1324
bad2f913 1325static int netdev_configure_container_vlan(struct lxc_netdev *netdev)
8bf64b77 1326{
bad2f913 1327 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1328}
1329
bad2f913 1330static int netdev_configure_container_phys(struct lxc_netdev *netdev)
8bf64b77 1331{
bad2f913 1332 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1333}
1334
bad2f913 1335static int netdev_configure_container_empty(struct lxc_netdev *netdev)
8bf64b77
CB
1336{
1337 return 0;
1338}
1339
bad2f913 1340static int netdev_configure_container_none(struct lxc_netdev *netdev)
8bf64b77
CB
1341{
1342 return 0;
1343}
1344
bad2f913
CB
1345static netdev_configure_container_cb netdev_configure_container[LXC_NET_MAXCONFTYPE + 1] = {
1346 [LXC_NET_VETH] = netdev_configure_container_veth,
1347 [LXC_NET_MACVLAN] = netdev_configure_container_macvlan,
1348 [LXC_NET_IPVLAN] = netdev_configure_container_ipvlan,
1349 [LXC_NET_VLAN] = netdev_configure_container_vlan,
1350 [LXC_NET_PHYS] = netdev_configure_container_phys,
1351 [LXC_NET_EMPTY] = netdev_configure_container_empty,
1352 [LXC_NET_NONE] = netdev_configure_container_none,
8bf64b77
CB
1353};
1354
bad2f913 1355static int netdev_shutdown_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1356{
14a7b0f9
CB
1357 int ret;
1358 char *argv[] = {
1359 "veth",
1360 netdev->link,
1361 NULL,
1362 NULL,
1363 };
1364
1365 if (!netdev->downscript)
1366 return 0;
811ef482 1367
f2711167 1368 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1369 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1370 else
14a7b0f9
CB
1371 argv[2] = netdev->priv.veth_attr.veth1;
1372
1373 ret = run_script_argv(handler->name,
1374 handler->conf->hooks_version, "net",
1375 netdev->downscript, "down", argv);
1376 if (ret < 0)
1377 return -1;
811ef482 1378
811ef482
CB
1379 return 0;
1380}
1381
bad2f913 1382static int netdev_shutdown_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1383{
14a7b0f9
CB
1384 int ret;
1385 char *argv[] = {
1386 "macvlan",
1387 netdev->link,
1388 NULL,
1389 };
1390
1391 if (!netdev->downscript)
1392 return 0;
1393
1394 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1395 "net", netdev->downscript, "down", argv);
1396 if (ret < 0)
1397 return -1;
811ef482 1398
811ef482
CB
1399 return 0;
1400}
1401
bad2f913 1402static int netdev_shutdown_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1403{
1404 int ret;
1405 char *argv[] = {
1406 "ipvlan",
1407 netdev->link,
1408 NULL,
1409 };
1410
1411 if (!netdev->downscript)
1412 return 0;
1413
1414 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1415 "net", netdev->downscript, "down", argv);
1416 if (ret < 0)
1417 return -1;
1418
1419 return 0;
1420}
1421
bad2f913 1422static int netdev_shutdown_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1423{
3a73d9f1 1424 int ret;
1425 char *argv[] = {
1426 "vlan",
1427 netdev->link,
1428 NULL,
1429 };
1430
1431 if (!netdev->downscript)
1432 return 0;
1433
1434 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1435 "net", netdev->downscript, "down", argv);
1436 if (ret < 0)
1437 return -1;
1438
811ef482
CB
1439 return 0;
1440}
1441
bad2f913 1442static int netdev_shutdown_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1443{
14a7b0f9
CB
1444 int ret;
1445 char *argv[] = {
1446 "phys",
1447 netdev->link,
1448 NULL,
1449 };
1450
1451 if (!netdev->downscript)
1452 return 0;
1453
1454 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1455 "net", netdev->downscript, "down", argv);
1456 if (ret < 0)
1457 return -1;
811ef482 1458
811ef482
CB
1459 return 0;
1460}
1461
bad2f913 1462static int netdev_shutdown_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1463{
14a7b0f9
CB
1464 int ret;
1465 char *argv[] = {
1466 "empty",
1467 NULL,
1468 };
1469
1470 if (!netdev->downscript)
1471 return 0;
1472
1473 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1474 "net", netdev->downscript, "down", argv);
1475 if (ret < 0)
1476 return -1;
811ef482 1477
811ef482
CB
1478 return 0;
1479}
1480
bad2f913 1481static int netdev_shutdown_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1482{
1483 return 0;
1484}
1485
bad2f913
CB
1486static netdev_shutdown_server_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1487 [LXC_NET_VETH] = netdev_shutdown_server_veth,
1488 [LXC_NET_MACVLAN] = netdev_shutdown_server_macvlan,
1489 [LXC_NET_IPVLAN] = netdev_shutdown_server_ipvlan,
1490 [LXC_NET_VLAN] = netdev_shutdown_server_vlan,
1491 [LXC_NET_PHYS] = netdev_shutdown_server_phys,
1492 [LXC_NET_EMPTY] = netdev_shutdown_server_empty,
1493 [LXC_NET_NONE] = netdev_shutdown_server_none,
811ef482
CB
1494};
1495
0037ab49
TP
1496static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1497{
d16bda44 1498 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1499 struct nl_handler nlh;
d16bda44
CB
1500 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1501 int err;
0037ab49 1502 struct ifinfomsg *ifi;
0037ab49 1503
d16bda44 1504 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1505 if (err)
1506 return err;
1507
0037ab49
TP
1508 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1509 if (!nlmsg)
d16bda44 1510 return ret_errno(ENOMEM);
0037ab49
TP
1511
1512 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1513 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1514
1515 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1516 if (!ifi)
d16bda44
CB
1517 return ret_errno(ENOMEM);
1518
0037ab49
TP
1519 ifi->ifi_family = AF_UNSPEC;
1520 ifi->ifi_index = ifindex;
1521
1522 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1523 return ret_errno(ENOMEM);
0037ab49 1524
3473ca76 1525 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1526 return ret_errno(ENOMEM);
0037ab49 1527
d16bda44 1528 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1529}
1530
ebc73a67 1531int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1532{
d16bda44 1533 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1534 struct nl_handler nlh;
d16bda44
CB
1535 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1536 int err;
06f976ca 1537 struct ifinfomsg *ifi;
0ad19a3f 1538
d16bda44 1539 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1540 if (err)
1541 return err;
0ad19a3f 1542
0ad19a3f 1543 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1544 if (!nlmsg)
d16bda44 1545 return ret_errno(ENOMEM);
0ad19a3f 1546
ebc73a67 1547 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1548 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1549
1550 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1551 if (!ifi)
d16bda44
CB
1552 return ret_errno(ENOMEM);
1553
06f976ca
SZ
1554 ifi->ifi_family = AF_UNSPEC;
1555 ifi->ifi_index = ifindex;
0ad19a3f 1556
1557 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1558 return ret_errno(ENOMEM);
0ad19a3f 1559
3473ca76 1560 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1561 return ret_errno(ENOMEM);
8d357196 1562
d16bda44 1563 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1564}
1565
ebc73a67
CB
1566/* If we are asked to move a wireless interface, then we must actually move its
1567 * phyN device. Detect that condition and return the physname here. The physname
1568 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1569 */
1570#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1571char *is_wlan(const char *ifname)
e5848d39 1572{
4110345b
CB
1573 __do_fclose FILE *f = NULL;
1574 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1575 int i, ret;
e5848d39 1576 long physlen;
ebc73a67 1577 size_t len;
e5848d39 1578
ebc73a67 1579 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1580 path = must_realloc(NULL, len + 1);
387c1c70
CB
1581 ret = strnprintf(path, len, PHYSNAME, ifname);
1582 if (ret < 0)
4110345b 1583 return NULL;
ebc73a67 1584
4110345b 1585 f = fopen(path, "re");
ebc73a67 1586 if (!f)
4110345b 1587 return NULL;
ebc73a67 1588
1a0e70ac 1589 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1590 fseek(f, 0, SEEK_END);
1591 physlen = ftell(f);
1592 fseek(f, 0, SEEK_SET);
4110345b
CB
1593 if (physlen < 0)
1594 return NULL;
ebc73a67
CB
1595
1596 physname = malloc(physlen + 1);
4110345b
CB
1597 if (!physname)
1598 return NULL;
ebc73a67
CB
1599
1600 memset(physname, 0, physlen + 1);
e5848d39 1601 ret = fread(physname, 1, physlen, f);
e5848d39 1602 if (ret < 0)
4110345b 1603 return NULL;
e5848d39 1604
ebc73a67 1605 for (i = 0; i < physlen; i++) {
e5848d39
SH
1606 if (physname[i] == '\n')
1607 physname[i] = '\0';
ebc73a67 1608
e5848d39
SH
1609 if (physname[i] == '\0')
1610 break;
1611 }
1612
4110345b 1613 return move_ptr(physname);
e5848d39
SH
1614}
1615
ebc73a67
CB
1616static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1617 const char *new)
e5848d39 1618{
ebc73a67 1619 pid_t fpid;
e5848d39 1620
ebc73a67 1621 fpid = fork();
e5848d39
SH
1622 if (fpid < 0)
1623 return -1;
ebc73a67 1624
e5848d39
SH
1625 if (fpid != 0)
1626 return wait_for_pid(fpid);
ebc73a67 1627
e5848d39
SH
1628 if (!switch_to_ns(pid, "net"))
1629 return -1;
ebc73a67 1630
05ec44f8 1631 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1632}
1633
e4103cf6 1634int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1635 const char *newname)
e5848d39 1636{
3dd78294 1637 __do_free char *cmd = NULL;
ebc73a67 1638 pid_t fpid;
e5848d39
SH
1639
1640 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1641 * However, IIUC this involves a bit more complicated work to talk to
1642 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1643 */
1644 cmd = on_path("iw", NULL);
0ed79f45
M
1645 if (!cmd) {
1646 ERROR("Couldn't find the application iw in PATH");
3dd78294 1647 return -1;
0ed79f45 1648 }
e5848d39
SH
1649
1650 fpid = fork();
1651 if (fpid < 0)
3dd78294 1652 return -1;
ebc73a67 1653
e5848d39
SH
1654 if (fpid == 0) {
1655 char pidstr[30];
1656 sprintf(pidstr, "%d", pid);
9c66dc4f 1657 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1658 _exit(EXIT_FAILURE);
e5848d39 1659 }
ebc73a67 1660
e5848d39 1661 if (wait_for_pid(fpid))
3dd78294 1662 return -1;
e5848d39 1663
e5848d39 1664 if (newname)
3dd78294 1665 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1666
3dd78294 1667 return 0;
e5848d39
SH
1668}
1669
8d357196 1670int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1671{
3dd78294 1672 __do_free char *physname = NULL;
8befa924
SH
1673 int index;
1674
8befa924
SH
1675 if (!ifname)
1676 return -EINVAL;
1677
32571606 1678 index = if_nametoindex(ifname);
49428bf3
DY
1679 if (!index)
1680 return -EINVAL;
32571606 1681
ebc73a67
CB
1682 physname = is_wlan(ifname);
1683 if (physname)
e5848d39
SH
1684 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1685
8d357196 1686 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1687}
1688
b84f58b9 1689int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1690{
d16bda44
CB
1691 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1692 struct nl_handler nlh;
1693 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1694 int err;
ebc73a67 1695 struct ifinfomsg *ifi;
0ad19a3f 1696
d16bda44 1697 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1698 if (err)
1699 return err;
0ad19a3f 1700
0ad19a3f 1701 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1702 if (!nlmsg)
d16bda44 1703 return ret_errno(ENOMEM);
0ad19a3f 1704
06f976ca 1705 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1706 if (!answer)
d16bda44 1707 return ret_errno(ENOMEM);
0ad19a3f 1708
ebc73a67 1709 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1710 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1711
1712 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1713 if (!ifi)
d16bda44
CB
1714 return ret_errno(ENOMEM);
1715
06f976ca
SZ
1716 ifi->ifi_family = AF_UNSPEC;
1717 ifi->ifi_index = ifindex;
0ad19a3f 1718
d16bda44 1719 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1720}
1721
b84f58b9
DL
1722int lxc_netdev_delete_by_name(const char *name)
1723{
1724 int index;
1725
1726 index = if_nametoindex(name);
1727 if (!index)
1728 return -EINVAL;
1729
1730 return lxc_netdev_delete_by_index(index);
1731}
1732
1733int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1734{
d16bda44
CB
1735 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1736 struct nl_handler nlh;
1737 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1738 int err, len;
06f976ca 1739 struct ifinfomsg *ifi;
b9a5bb58 1740
d16bda44 1741 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1742 if (err)
1743 return err;
b9a5bb58 1744
b84f58b9 1745 len = strlen(newname);
d16bda44
CB
1746 if (len == 1 || len >= IFNAMSIZ)
1747 return ret_errno(EINVAL);
b84f58b9 1748
b9a5bb58
DL
1749 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1750 if (!nlmsg)
d16bda44 1751 return ret_errno(ENOMEM);
b9a5bb58 1752
06f976ca 1753 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1754 if (!answer)
d16bda44 1755 return ret_errno(ENOMEM);
b9a5bb58 1756
ebc73a67 1757 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1758 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1759
1760 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1761 if (!ifi)
d16bda44
CB
1762 return ret_errno(ENOMEM);
1763
06f976ca
SZ
1764 ifi->ifi_family = AF_UNSPEC;
1765 ifi->ifi_index = ifindex;
b84f58b9
DL
1766
1767 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1768 return ret_errno(ENOMEM);
b9a5bb58 1769
d16bda44 1770 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1771}
1772
b84f58b9
DL
1773int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1774{
1775 int len, index;
1776
1777 len = strlen(oldname);
dae3fdf6 1778 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1779 return -EINVAL;
1780
1781 index = if_nametoindex(oldname);
1782 if (!index)
1783 return -EINVAL;
1784
1785 return lxc_netdev_rename_by_index(index, newname);
1786}
1787
8befa924 1788int netdev_set_flag(const char *name, int flag)
0ad19a3f 1789{
d16bda44
CB
1790 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1791 struct nl_handler nlh;
1792 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1793 int err, index, len;
06f976ca 1794 struct ifinfomsg *ifi;
0ad19a3f 1795
d16bda44 1796 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1797 if (err)
1798 return err;
0ad19a3f 1799
1800 len = strlen(name);
dae3fdf6 1801 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1802 return ret_errno(EINVAL);
0ad19a3f 1803
1804 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1805 if (!nlmsg)
d16bda44 1806 return ret_errno(ENOMEM);
0ad19a3f 1807
06f976ca 1808 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1809 if (!answer)
d16bda44 1810 return ret_errno(ENOMEM);
0ad19a3f 1811
1812 index = if_nametoindex(name);
1813 if (!index)
d16bda44 1814 return ret_errno(EINVAL);
0ad19a3f 1815
ebc73a67 1816 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1817 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1818
1819 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1820 if (!ifi)
1821 return ret_errno(ENOMEM);
1822
06f976ca
SZ
1823 ifi->ifi_family = AF_UNSPEC;
1824 ifi->ifi_index = index;
1825 ifi->ifi_change |= IFF_UP;
1826 ifi->ifi_flags |= flag;
0ad19a3f 1827
d16bda44 1828 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1829}
1830
59eac805 1831static int netdev_get_flag(const char *name, int *flag)
efa1cf45 1832{
d16bda44
CB
1833 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1834 struct nl_handler nlh;
1835 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1836 int err, index, len;
a4318300 1837 struct ifinfomsg *ifi;
efa1cf45
DY
1838
1839 if (!name)
d16bda44 1840 return ret_errno(EINVAL);
efa1cf45 1841
d16bda44 1842 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1843 if (err)
1844 return err;
1845
efa1cf45
DY
1846 len = strlen(name);
1847 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1848 return ret_errno(EINVAL);
efa1cf45 1849
efa1cf45
DY
1850 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1851 if (!nlmsg)
d16bda44 1852 return ret_errno(ENOMEM);
efa1cf45 1853
06f976ca 1854 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1855 if (!answer)
d16bda44 1856 return ret_errno(ENOMEM);
efa1cf45 1857
efa1cf45
DY
1858 index = if_nametoindex(name);
1859 if (!index)
d16bda44 1860 return ret_errno(EINVAL);
efa1cf45 1861
06f976ca
SZ
1862 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1863 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1864
1865 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1866 if (!ifi)
1867 return ret_errno(ENOMEM);
1868
06f976ca
SZ
1869 ifi->ifi_family = AF_UNSPEC;
1870 ifi->ifi_index = index;
efa1cf45 1871
d16bda44 1872 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1873 if (err)
d16bda44 1874 return ret_set_errno(-1, errno);
efa1cf45 1875
06f976ca 1876 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1877
1878 *flag = ifi->ifi_flags;
efa1cf45
DY
1879 return err;
1880}
1881
1882/*
1883 * \brief Check a interface is up or not.
1884 *
1885 * \param name: name for the interface.
1886 *
1887 * \return int.
1888 * 0 means interface is down.
1889 * 1 means interface is up.
1890 * Others means error happened, and ret-value is the error number.
1891 */
ebc73a67 1892int lxc_netdev_isup(const char *name)
efa1cf45 1893{
4db0514d
CB
1894 int err;
1895 int flag = 0;
efa1cf45
DY
1896
1897 err = netdev_get_flag(name, &flag);
1898 if (err)
ebc73a67
CB
1899 return err;
1900
efa1cf45
DY
1901 if (flag & IFF_UP)
1902 return 1;
ebc73a67 1903
efa1cf45 1904 return 0;
efa1cf45
DY
1905}
1906
0130df54
SH
1907int netdev_get_mtu(int ifindex)
1908{
a5f5cb41 1909 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1910 struct nl_handler nlh;
a5f5cb41
CB
1911 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1912 int readmore = 0, recv_len = 0;
1913 int answer_len, err, res;
06f976ca 1914 struct ifinfomsg *ifi;
0130df54 1915 struct nlmsghdr *msg;
0130df54 1916
a5f5cb41 1917 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1918 if (err)
1919 return err;
1920
0130df54
SH
1921 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1922 if (!nlmsg)
a5f5cb41 1923 return ret_errno(ENOMEM);
0130df54 1924
06f976ca 1925 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1926 if (!answer)
a5f5cb41 1927 return ret_errno(ENOMEM);
0130df54
SH
1928
1929 /* Save the answer buffer length, since it will be overwritten
1930 * on the first receive (and we might need to receive more than
ebc73a67
CB
1931 * once.
1932 */
06f976ca
SZ
1933 answer_len = answer->nlmsghdr->nlmsg_len;
1934
ebc73a67 1935 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1936 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1937
06f976ca 1938 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1939 if (!ifi)
a5f5cb41
CB
1940 return ret_errno(ENOMEM);
1941
06f976ca 1942 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1943
1944 /* Send the request for addresses, which returns all addresses
1945 * on all interfaces. */
a5f5cb41 1946 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1947 if (err < 0)
a5f5cb41 1948 return ret_set_errno(-1, errno);
0130df54 1949
6ce39620
CB
1950#pragma GCC diagnostic push
1951#pragma GCC diagnostic ignored "-Wcast-align"
1952
0130df54
SH
1953 do {
1954 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1955 * overwritten by a previous receive.
1956 */
06f976ca 1957 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1958
1959 /* Get the (next) batch of reply messages */
a5f5cb41 1960 err = netlink_rcv(nlh_ptr, answer);
0130df54 1961 if (err < 0)
a5f5cb41 1962 return ret_set_errno(-1, errno);
0130df54
SH
1963
1964 recv_len = err;
0130df54
SH
1965
1966 /* Satisfy the typing for the netlink macros */
06f976ca 1967 msg = answer->nlmsghdr;
0130df54
SH
1968
1969 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1970 /* Stop reading if we see an error message */
1971 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1972 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1973 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1974 }
1975
1976 /* Stop reading if we see a NLMSG_DONE message */
1977 if (msg->nlmsg_type == NLMSG_DONE) {
1978 readmore = 0;
1979 break;
1980 }
1981
06f976ca 1982 ifi = NLMSG_DATA(msg);
0130df54
SH
1983 if (ifi->ifi_index == ifindex) {
1984 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1985 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1986
0130df54 1987 res = 0;
ebc73a67 1988 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1989 /*
a5f5cb41 1990 * Found a local address for the
ebc73a67
CB
1991 * requested interface, return it.
1992 */
0130df54 1993 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1994 memcpy(&res, RTA_DATA(rta), sizeof(int));
1995 return res;
0130df54 1996 }
a5f5cb41 1997
0130df54
SH
1998 rta = RTA_NEXT(rta, attr_len);
1999 }
0130df54
SH
2000 }
2001
ebc73a67
CB
2002 /* Keep reading more data from the socket if the last
2003 * message had the NLF_F_MULTI flag set.
2004 */
0130df54
SH
2005 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2006
ebc73a67 2007 /* Look at the next message received in this buffer. */
0130df54
SH
2008 msg = NLMSG_NEXT(msg, recv_len);
2009 }
2010 } while (readmore);
2011
6ce39620
CB
2012#pragma GCC diagnostic pop
2013
ebc73a67 2014 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 2015 return -1;
0130df54
SH
2016}
2017
d472214b 2018int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 2019{
a5f5cb41
CB
2020 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2021 struct nl_handler nlh;
2022 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 2023 int err, len;
06f976ca 2024 struct ifinfomsg *ifi;
75d09f83 2025
a5f5cb41 2026 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2027 if (err)
2028 return err;
75d09f83
DL
2029
2030 len = strlen(name);
dae3fdf6 2031 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2032 return ret_errno(EINVAL);
75d09f83
DL
2033
2034 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2035 if (!nlmsg)
a5f5cb41 2036 return ret_errno(ENOMEM);
75d09f83 2037
06f976ca 2038 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 2039 if (!answer)
a5f5cb41 2040 return ret_errno(ENOMEM);
75d09f83 2041
ebc73a67 2042 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
2043 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2044
2045 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2046 if (!ifi)
2047 return ret_errno(ENOMEM);
2048
06f976ca 2049 ifi->ifi_family = AF_UNSPEC;
54256301
CB
2050
2051 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2052 return ret_errno(ENOMEM);
75d09f83
DL
2053
2054 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2055 return ret_errno(ENOMEM);
75d09f83 2056
a5f5cb41 2057 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
2058}
2059
d472214b 2060int lxc_netdev_up(const char *name)
0ad19a3f 2061{
d472214b 2062 return netdev_set_flag(name, IFF_UP);
0ad19a3f 2063}
2064
d472214b 2065int lxc_netdev_down(const char *name)
0ad19a3f 2066{
d472214b 2067 return netdev_set_flag(name, 0);
0ad19a3f 2068}
2069
54256301 2070int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 2071{
a5f5cb41
CB
2072 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2073 struct nl_handler nlh;
2074 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2075 int err, len;
06f976ca 2076 struct ifinfomsg *ifi;
0ad19a3f 2077 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 2078
a5f5cb41 2079 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2080 if (err)
2081 return err;
0ad19a3f 2082
2083 len = strlen(name1);
dae3fdf6 2084 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2085 return ret_errno(EINVAL);
0ad19a3f 2086
2087 len = strlen(name2);
dae3fdf6 2088 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2089 return ret_errno(EINVAL);
0ad19a3f 2090
2091 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2092 if (!nlmsg)
a5f5cb41 2093 return ret_errno(ENOMEM);
0ad19a3f 2094
06f976ca 2095 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2096 if (!answer)
a5f5cb41 2097 return ret_errno(ENOMEM);
0ad19a3f 2098
a5f5cb41 2099 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2100 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2101
2102 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 2103 if (!ifi)
a5f5cb41
CB
2104 return ret_errno(ENOMEM);
2105
06f976ca 2106 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2107
79e68309 2108 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2109 if (!nest1)
a5f5cb41 2110 return ret_errno(EINVAL);
0ad19a3f 2111
2112 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2113 return ret_errno(ENOMEM);
0ad19a3f 2114
2115 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2116 if (!nest2)
a5f5cb41 2117 return ret_errno(ENOMEM);
0ad19a3f 2118
2119 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2120 if (!nest3)
a5f5cb41 2121 return ret_errno(ENOMEM);
0ad19a3f 2122
06f976ca 2123 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2124 if (!ifi)
2125 return ret_errno(ENOMEM);
0ad19a3f 2126
2127 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2128 return ret_errno(ENOMEM);
0ad19a3f 2129
54256301 2130 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2131 return ret_errno(ENOMEM);
54256301
CB
2132
2133 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2134 return ret_errno(ENOMEM);
54256301 2135
0ad19a3f 2136 nla_end_nested(nlmsg, nest3);
0ad19a3f 2137 nla_end_nested(nlmsg, nest2);
0ad19a3f 2138 nla_end_nested(nlmsg, nest1);
2139
2140 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2141 return ret_errno(ENOMEM);
0ad19a3f 2142
a5f5cb41 2143 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2144}
2145
ebc73a67 2146/* TODO: merge with lxc_macvlan_create */
0dc9a142 2147int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
26c39028 2148{
a5f5cb41
CB
2149 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2150 struct nl_handler nlh;
2151 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2152 int err, len, lindex;
06f976ca 2153 struct ifinfomsg *ifi;
26c39028 2154 struct rtattr *nest, *nest2;
26c39028 2155
a5f5cb41 2156 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2157 if (err)
2158 return err;
26c39028 2159
0dc9a142 2160 len = strlen(parent);
dae3fdf6 2161 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2162 return ret_errno(EINVAL);
26c39028
JHS
2163
2164 len = strlen(name);
dae3fdf6 2165 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2166 return ret_errno(EINVAL);
26c39028
JHS
2167
2168 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2169 if (!nlmsg)
a5f5cb41 2170 return ret_errno(ENOMEM);
26c39028 2171
06f976ca 2172 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2173 if (!answer)
a5f5cb41 2174 return ret_errno(ENOMEM);
26c39028 2175
0dc9a142 2176 lindex = if_nametoindex(parent);
26c39028 2177 if (!lindex)
a5f5cb41 2178 return ret_errno(EINVAL);
26c39028 2179
a5f5cb41 2180 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2181 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2182
2183 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2184 if (!ifi)
2185 return ret_errno(ENOMEM);
2186
06f976ca 2187 ifi->ifi_family = AF_UNSPEC;
26c39028 2188
79e68309 2189 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2190 if (!nest)
a5f5cb41 2191 return ret_errno(ENOMEM);
26c39028
JHS
2192
2193 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2194 return ret_errno(ENOMEM);
26c39028
JHS
2195
2196 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2197 if (!nest2)
a5f5cb41 2198 return ret_errno(ENOMEM);
e892973e 2199
26c39028 2200 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2201 return ret_errno(ENOMEM);
e892973e 2202
26c39028 2203 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2204 nla_end_nested(nlmsg, nest);
2205
2206 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2207 return ret_errno(ENOMEM);
26c39028
JHS
2208
2209 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2210 return ret_errno(ENOMEM);
2211
2212 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2213}
2214
0dc9a142 2215int lxc_macvlan_create(const char *parent, const char *name, int mode)
0ad19a3f 2216{
a5f5cb41
CB
2217 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2218 struct nl_handler nlh;
2219 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2220 int err, index, len;
06f976ca 2221 struct ifinfomsg *ifi;
e892973e 2222 struct rtattr *nest, *nest2;
0ad19a3f 2223
a5f5cb41 2224 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2225 if (err)
2226 return err;
0ad19a3f 2227
0dc9a142 2228 len = strlen(parent);
dae3fdf6 2229 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2230 return ret_errno(EINVAL);
0ad19a3f 2231
2232 len = strlen(name);
dae3fdf6 2233 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2234 return ret_errno(EINVAL);
0ad19a3f 2235
2236 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2237 if (!nlmsg)
a5f5cb41 2238 return ret_errno(ENOMEM);
0ad19a3f 2239
06f976ca 2240 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2241 if (!answer)
a5f5cb41 2242 return ret_errno(ENOMEM);
0ad19a3f 2243
0dc9a142 2244 index = if_nametoindex(parent);
0ad19a3f 2245 if (!index)
a5f5cb41 2246 return ret_errno(EINVAL);
0ad19a3f 2247
a5f5cb41 2248 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2249 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2250
2251 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2252 if (!ifi)
2253 return ret_errno(ENOMEM);
2254
06f976ca 2255 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2256
79e68309 2257 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2258 if (!nest)
a5f5cb41 2259 return ret_errno(ENOMEM);
0ad19a3f 2260
2261 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2262 return ret_errno(ENOMEM);
0ad19a3f 2263
e892973e
DL
2264 if (mode) {
2265 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2266 if (!nest2)
a5f5cb41 2267 return ret_errno(ENOMEM);
e892973e
DL
2268
2269 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2270 return ret_errno(ENOMEM);
e892973e
DL
2271
2272 nla_end_nested(nlmsg, nest2);
2273 }
2274
0ad19a3f 2275 nla_end_nested(nlmsg, nest);
2276
2277 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2278 return ret_errno(ENOMEM);
0ad19a3f 2279
2280 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2281 return ret_errno(ENOMEM);
0ad19a3f 2282
a5f5cb41 2283 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2284}
2285
2286static int proc_sys_net_write(const char *path, const char *value)
2287{
ebc73a67
CB
2288 int fd;
2289 int err = 0;
0ad19a3f 2290
2291 fd = open(path, O_WRONLY);
2292 if (fd < 0)
2293 return -errno;
2294
f640cf46 2295 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2296 err = -errno;
2297
2298 close(fd);
2299 return err;
2300}
2301
6dfa9581 2302static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2303{
2304 int ret;
2305 char path[PATH_MAX];
6509154d 2306
2307 if (family != AF_INET && family != AF_INET6)
6dfa9581 2308 return -EINVAL;
6509154d 2309
387c1c70
CB
2310 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2311 family == AF_INET ? "ipv4" : "ipv6", ifname,
2312 "forwarding");
2313 if (ret < 0)
6dfa9581 2314 return -E2BIG;
6509154d 2315
6dfa9581
TP
2316 return proc_sys_net_write(path, flag ? "1" : "0");
2317}
2318
2319int lxc_ip_forwarding_on(const char *name, int family)
2320{
2321 return ip_forwarding_set(name, family, 1);
2322}
2323
2324int lxc_ip_forwarding_off(const char *name, int family)
2325{
2326 return ip_forwarding_set(name, family, 0);
6509154d 2327}
2328
0ad19a3f 2329static int neigh_proxy_set(const char *ifname, int family, int flag)
2330{
9ba8130c 2331 int ret;
419590da 2332 char path[PATH_MAX];
0ad19a3f 2333
2334 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2335 return -EINVAL;
0ad19a3f 2336
387c1c70
CB
2337 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2338 family == AF_INET ? "ipv4" : "ipv6", ifname,
2339 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2340 if (ret < 0)
9ba8130c 2341 return -E2BIG;
0ad19a3f 2342
ebc73a67 2343 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2344}
2345
6509154d 2346static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2347{
2348 int ret;
2349 char path[PATH_MAX];
2350 char buf[1] = "";
2351
2352 if (family != AF_INET && family != AF_INET6)
596a002c 2353 return ret_set_errno(-1, EINVAL);
6509154d 2354
387c1c70
CB
2355 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2356 family == AF_INET ? "ipv4" : "ipv6", ifname,
2357 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2358 if (ret < 0)
596a002c 2359 return ret_set_errno(-1, E2BIG);
6509154d 2360
2361 return lxc_read_file_expect(path, buf, 1, "1");
2362}
2363
497353b6 2364int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2365{
2366 return neigh_proxy_set(name, family, 1);
2367}
2368
497353b6 2369int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2370{
2371 return neigh_proxy_set(name, family, 0);
2372}
2373
2374int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2375{
1f1b18e7
DL
2376 int i = 0;
2377 unsigned val;
ebc73a67
CB
2378 char c;
2379 unsigned char *data;
1f1b18e7
DL
2380
2381 sockaddr->sa_family = ARPHRD_ETHER;
2382 data = (unsigned char *)sockaddr->sa_data;
2383
2384 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2385 c = *macaddr++;
2386 if (isdigit(c))
2387 val = c - '0';
2388 else if (c >= 'a' && c <= 'f')
2389 val = c - 'a' + 10;
2390 else if (c >= 'A' && c <= 'F')
2391 val = c - 'A' + 10;
2392 else
2393 return -EINVAL;
2394
2395 val <<= 4;
2396 c = *macaddr;
2397 if (isdigit(c))
2398 val |= c - '0';
2399 else if (c >= 'a' && c <= 'f')
2400 val |= c - 'a' + 10;
2401 else if (c >= 'A' && c <= 'F')
2402 val |= c - 'A' + 10;
2403 else if (c == ':' || c == 0)
2404 val >>= 4;
2405 else
2406 return -EINVAL;
2407 if (c != 0)
2408 macaddr++;
2409 *data++ = (unsigned char)(val & 0377);
2410 i++;
2411
2412 if (*macaddr == ':')
2413 macaddr++;
0ad19a3f 2414 }
0ad19a3f 2415
1f1b18e7 2416 return 0;
0ad19a3f 2417}
2418
ebc73a67
CB
2419static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2420 void *acast, int prefix)
0ad19a3f 2421{
a5f5cb41
CB
2422 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2423 struct nl_handler nlh;
2424 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2425 int addrlen, err;
06f976ca 2426 struct ifaddrmsg *ifa;
0ad19a3f 2427
ebc73a67
CB
2428 addrlen = family == AF_INET ? sizeof(struct in_addr)
2429 : sizeof(struct in6_addr);
4bf1968d 2430
a5f5cb41 2431 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2432 if (err)
2433 return err;
0ad19a3f 2434
0ad19a3f 2435 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2436 if (!nlmsg)
a5f5cb41 2437 return ret_errno(ENOMEM);
0ad19a3f 2438
06f976ca 2439 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2440 if (!answer)
a5f5cb41 2441 return ret_errno(ENOMEM);
0ad19a3f 2442
a5f5cb41 2443 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2444 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2445
2446 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2447 if (!ifa)
a5f5cb41
CB
2448 return ret_errno(ENOMEM);
2449
06f976ca
SZ
2450 ifa->ifa_prefixlen = prefix;
2451 ifa->ifa_index = ifindex;
2452 ifa->ifa_family = family;
2453 ifa->ifa_scope = 0;
acf47e1b 2454
4bf1968d 2455 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2456 return ret_errno(EINVAL);
0ad19a3f 2457
4bf1968d 2458 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2459 return ret_errno(EINVAL);
0ad19a3f 2460
d8948a52 2461 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2462 return ret_errno(EINVAL);
1f1b18e7 2463
ebc73a67 2464 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2465 if (family == AF_INET6 &&
2466 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2467 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2468 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2469
a5f5cb41 2470 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2471}
2472
1f1b18e7 2473int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2474 struct in6_addr *mcast, struct in6_addr *acast,
2475 int prefix)
1f1b18e7
DL
2476{
2477 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2478}
2479
ebc73a67
CB
2480int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2481 int prefix)
1f1b18e7
DL
2482{
2483 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2484}
2485
ebc73a67
CB
2486/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2487 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2488 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2489 */
6ce39620
CB
2490#pragma GCC diagnostic push
2491#pragma GCC diagnostic ignored "-Wcast-align"
2492
ebc73a67
CB
2493static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2494{
2495 int addrlen;
06f976ca
SZ
2496 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2497 struct rtattr *rta = IFA_RTA(ifa);
2498 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2499
06f976ca 2500 if (ifa->ifa_family != family)
19a26f82
MK
2501 return 0;
2502
ebc73a67
CB
2503 addrlen = family == AF_INET ? sizeof(struct in_addr)
2504 : sizeof(struct in6_addr);
19a26f82
MK
2505
2506 /* Loop over the rtattr's in this message */
ebc73a67 2507 while (RTA_OK(rta, attr_len)) {
19a26f82 2508 /* Found a local address for the requested interface,
ebc73a67
CB
2509 * return it.
2510 */
2511 if (rta->rta_type == IFA_LOCAL ||
2512 rta->rta_type == IFA_ADDRESS) {
2513 /* Sanity check. The family check above should make sure
2514 * the address length is correct, but check here just in
2515 * case.
2516 */
19a26f82
MK
2517 if (RTA_PAYLOAD(rta) != addrlen)
2518 return -1;
2519
ebc73a67
CB
2520 /* We might have found an IFA_ADDRESS before, which we
2521 * now overwrite with an IFA_LOCAL.
2522 */
dd66e5ad 2523 if (!*res) {
19a26f82 2524 *res = malloc(addrlen);
dd66e5ad
DE
2525 if (!*res)
2526 return -1;
2527 }
19a26f82
MK
2528
2529 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2530 if (rta->rta_type == IFA_LOCAL)
2531 break;
2532 }
2533 rta = RTA_NEXT(rta, attr_len);
2534 }
2535 return 0;
2536}
2537
6ce39620
CB
2538#pragma GCC diagnostic pop
2539
19a26f82
MK
2540static int ip_addr_get(int family, int ifindex, void **res)
2541{
a5f5cb41
CB
2542 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2543 struct nl_handler nlh;
2544 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2545 int answer_len, err;
06f976ca 2546 struct ifaddrmsg *ifa;
19a26f82 2547 struct nlmsghdr *msg;
ebc73a67 2548 int readmore = 0, recv_len = 0;
19a26f82 2549
a5f5cb41 2550 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2551 if (err)
2552 return err;
2553
19a26f82
MK
2554 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2555 if (!nlmsg)
a5f5cb41 2556 return ret_errno(ENOMEM);
19a26f82 2557
06f976ca 2558 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2559 if (!answer)
a5f5cb41 2560 return ret_errno(ENOMEM);
19a26f82 2561
ebc73a67
CB
2562 /* Save the answer buffer length, since it will be overwritten on the
2563 * first receive (and we might need to receive more than once).
2564 */
06f976ca
SZ
2565 answer_len = answer->nlmsghdr->nlmsg_len;
2566
ebc73a67 2567 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2568 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2569
06f976ca 2570 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2571 if (!ifa)
a5f5cb41
CB
2572 return ret_errno(ENOMEM);
2573
06f976ca 2574 ifa->ifa_family = family;
19a26f82 2575
ebc73a67
CB
2576 /* Send the request for addresses, which returns all addresses on all
2577 * interfaces.
2578 */
a5f5cb41 2579 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2580 if (err < 0)
a5f5cb41 2581 return ret_set_errno(err, errno);
19a26f82 2582
6ce39620
CB
2583#pragma GCC diagnostic push
2584#pragma GCC diagnostic ignored "-Wcast-align"
2585
19a26f82
MK
2586 do {
2587 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2588 * overwritten by a previous receive.
2589 */
06f976ca 2590 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2591
ebc73a67 2592 /* Get the (next) batch of reply messages. */
a5f5cb41 2593 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2594 if (err < 0)
a5f5cb41 2595 return ret_set_errno(err, errno);
19a26f82
MK
2596
2597 recv_len = err;
2598 err = 0;
2599
ebc73a67 2600 /* Satisfy the typing for the netlink macros. */
06f976ca 2601 msg = answer->nlmsghdr;
19a26f82
MK
2602
2603 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2604 /* Stop reading if we see an error message. */
19a26f82 2605 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2606 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2607 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2608 }
2609
ebc73a67 2610 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2611 if (msg->nlmsg_type == NLMSG_DONE) {
2612 readmore = 0;
2613 break;
2614 }
2615
a5f5cb41
CB
2616 if (msg->nlmsg_type != RTM_NEWADDR)
2617 return ret_errno(EINVAL);
19a26f82 2618
06f976ca
SZ
2619 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2620 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2621 if (ifa_get_local_ip(family, msg, res) < 0)
2622 return ret_errno(EINVAL);
51e7a874 2623
ebc73a67 2624 /* Found a result, stop searching. */
19a26f82 2625 if (*res)
a5f5cb41 2626 return 0;
19a26f82
MK
2627 }
2628
ebc73a67
CB
2629 /* Keep reading more data from the socket if the last
2630 * message had the NLF_F_MULTI flag set.
2631 */
19a26f82
MK
2632 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2633
ebc73a67 2634 /* Look at the next message received in this buffer. */
19a26f82
MK
2635 msg = NLMSG_NEXT(msg, recv_len);
2636 }
2637 } while (readmore);
2638
6ce39620
CB
2639#pragma GCC diagnostic pop
2640
19a26f82 2641 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2642 * error.
2643 */
a5f5cb41 2644 return -1;
19a26f82
MK
2645}
2646
2647int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2648{
ebc73a67 2649 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2650}
2651
ebc73a67 2652int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2653{
ebc73a67 2654 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2655}
2656
f8fee0e2
MK
2657static int ip_gateway_add(int family, int ifindex, void *gw)
2658{
a5f5cb41 2659 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2660 struct nl_handler nlh;
a5f5cb41
CB
2661 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2662 int addrlen, err;
06f976ca 2663 struct rtmsg *rt;
f8fee0e2 2664
ebc73a67
CB
2665 addrlen = family == AF_INET ? sizeof(struct in_addr)
2666 : sizeof(struct in6_addr);
f8fee0e2 2667
a5f5cb41 2668 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2669 if (err)
2670 return err;
2671
f8fee0e2
MK
2672 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2673 if (!nlmsg)
a5f5cb41 2674 return ret_errno(ENOMEM);
f8fee0e2 2675
06f976ca 2676 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2677 if (!answer)
a5f5cb41 2678 return ret_errno(ENOMEM);
f8fee0e2 2679
a5f5cb41 2680 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2681 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2682
2683 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2684 if (!rt)
a5f5cb41
CB
2685 return ret_errno(ENOMEM);
2686
06f976ca
SZ
2687 rt->rtm_family = family;
2688 rt->rtm_table = RT_TABLE_MAIN;
2689 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2690 rt->rtm_protocol = RTPROT_BOOT;
2691 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2692 /* "default" destination */
06f976ca 2693 rt->rtm_dst_len = 0;
f8fee0e2 2694
a2f9a670 2695 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2696 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2697 return ret_errno(ENOMEM);
f8fee0e2
MK
2698
2699 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2700 * addresses for the gateway.
2701 */
f8fee0e2 2702 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2703 return ret_errno(EINVAL);
f8fee0e2 2704
a5f5cb41 2705 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2706}
2707
2708int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2709{
2710 return ip_gateway_add(AF_INET, ifindex, gw);
2711}
2712
2713int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2714{
2715 return ip_gateway_add(AF_INET6, ifindex, gw);
2716}
581c75e7 2717bool is_ovs_bridge(const char *bridge)
0d204771 2718{
ebc73a67 2719 int ret;
0d204771 2720 struct stat sb;
ebc73a67 2721 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2722
387c1c70
CB
2723 ret = strnprintf(brdirname, 22 + IFNAMSIZ + 1,
2724 "/sys/class/net/%s/bridge", bridge);
2725 if (ret < 0)
ebc73a67
CB
2726 return false;
2727
2728 ret = stat(brdirname, &sb);
2729 if (ret < 0 && errno == ENOENT)
0d204771 2730 return true;
ebc73a67 2731
0d204771
SH
2732 return false;
2733}
2734
581c75e7
CB
2735struct ovs_veth_args {
2736 const char *bridge;
2737 const char *nic;
2738};
2739
cb0dc11b
CB
2740/* Called from a background thread - when nic goes away, remove it from the
2741 * bridge.
c43cbc04 2742 */
581c75e7 2743static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2744{
581c75e7 2745 struct ovs_veth_args *args = data;
cb0dc11b 2746
9c66dc4f 2747 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2748 return -1;
c43cbc04
SH
2749}
2750
581c75e7 2751int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2752{
c43cbc04 2753 int ret;
419590da 2754 char cmd_output[PATH_MAX];
581c75e7 2755 struct ovs_veth_args args;
6ad22d06 2756
581c75e7
CB
2757 args.bridge = bridge;
2758 args.nic = nic;
2759 ret = run_command(cmd_output, sizeof(cmd_output),
2760 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2761 if (ret < 0)
2762 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2763
581c75e7
CB
2764 return 0;
2765}
ebc73a67 2766
581c75e7
CB
2767static int lxc_ovs_attach_bridge_exec(void *data)
2768{
2769 struct ovs_veth_args *args = data;
ebc73a67 2770
9c66dc4f 2771 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2772 return -1;
2773}
ebc73a67 2774
581c75e7
CB
2775static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2776{
2777 int ret;
419590da 2778 char cmd_output[PATH_MAX];
581c75e7 2779 struct ovs_veth_args args;
ebc73a67 2780
581c75e7
CB
2781 args.bridge = bridge;
2782 args.nic = nic;
2783 ret = run_command(cmd_output, sizeof(cmd_output),
2784 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2785 if (ret < 0)
2786 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2787
581c75e7 2788 return 0;
0d204771 2789}
0d204771 2790
581c75e7 2791int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2792{
ebc73a67 2793 int err, fd, index;
9de31d5a 2794 size_t retlen;
0ad19a3f 2795 struct ifreq ifr;
2796
dae3fdf6 2797 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2798 return -EINVAL;
0ad19a3f 2799
2800 index = if_nametoindex(ifname);
2801 if (!index)
3cfc0f3a 2802 return -EINVAL;
0ad19a3f 2803
0d204771 2804 if (is_ovs_bridge(bridge))
581c75e7 2805 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2806
ad9429e5 2807 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2808 if (fd < 0)
3cfc0f3a 2809 return -errno;
0ad19a3f 2810
9de31d5a 2811 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2812 if (retlen >= IFNAMSIZ) {
2813 close(fd);
9de31d5a 2814 return -E2BIG;
42cc4083 2815 }
9de31d5a 2816
ebc73a67 2817 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2818 ifr.ifr_ifindex = index;
7d163508 2819 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2820 close(fd);
3cfc0f3a
MN
2821 if (err)
2822 err = -errno;
0ad19a3f 2823
2824 return err;
2825}
72d0e1cb 2826
8befa924
SH
2827int setup_private_host_hw_addr(char *veth1)
2828{
387c1c70
CB
2829 __do_close int sockfd = -EBADF;
2830 int err;
8befa924 2831 struct ifreq ifr;
8befa924 2832
ad9429e5 2833 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2834 if (sockfd < 0)
2835 return -errno;
2836
387c1c70
CB
2837 err = strnprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2838 if (err < 0)
2839 return err;
ebc73a67 2840
8befa924 2841 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
387c1c70 2842 if (err < 0)
8befa924 2843 return -errno;
8befa924
SH
2844
2845 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2846 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924
SH
2847 if (err < 0)
2848 return -errno;
2849
2850 return 0;
2851}
811ef482
CB
2852
2853int lxc_find_gateway_addresses(struct lxc_handler *handler)
2854{
2855 struct lxc_list *network = &handler->conf->network;
2856 struct lxc_list *iterator;
2857 struct lxc_netdev *netdev;
2858 int link_index;
2859
2860 lxc_list_for_each(iterator, network) {
2861 netdev = iterator->elem;
2862
2863 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2864 continue;
2865
9c66dc4f
CB
2866 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2867 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2868
f2711167 2869 if (is_empty_string(netdev->link)) {
9c66dc4f 2870 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2871 }
2872
2873 link_index = if_nametoindex(netdev->link);
2874 if (!link_index)
2875 return -EINVAL;
2876
2877 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2878 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2879 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2880 }
2881
2882 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2883 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2884 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2885 }
2886 }
2887
2888 return 0;
2889}
2890
2891#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
071d0934
CB
2892static int lxc_create_network_unpriv_exec(const char *lxcpath,
2893 const char *lxcname,
2894 struct lxc_netdev *netdev, pid_t pid,
2895 unsigned int hooks_version)
811ef482
CB
2896{
2897 int ret;
2898 pid_t child;
2899 int bytes, pipefd[2];
2900 char *token, *saveptr = NULL;
095ead80 2901 char netdev_link[IFNAMSIZ];
419590da 2902 char buffer[PATH_MAX] = {0};
94b1cade 2903 size_t retlen;
811ef482 2904
9c66dc4f 2905 if (netdev->type != LXC_NET_VETH)
071d0934
CB
2906 return log_error_errno(-1, errno,
2907 "Network type %d not support for unprivileged use",
2908 netdev->type);
811ef482
CB
2909
2910 ret = pipe(pipefd);
9c66dc4f
CB
2911 if (ret < 0)
2912 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2913
2914 child = fork();
2915 if (child < 0) {
811ef482
CB
2916 close(pipefd[0]);
2917 close(pipefd[1]);
9c66dc4f 2918 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2919 }
2920
2921 if (child == 0) {
8335fd40 2922 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2923
2924 close(pipefd[0]);
2925
2926 ret = dup2(pipefd[1], STDOUT_FILENO);
2927 if (ret >= 0)
2928 ret = dup2(pipefd[1], STDERR_FILENO);
2929 close(pipefd[1]);
2930 if (ret < 0) {
2931 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2932 _exit(EXIT_FAILURE);
811ef482
CB
2933 }
2934
f2711167 2935 if (!is_empty_string(netdev->link))
9de31d5a 2936 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2937 else
9de31d5a
CB
2938 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2939 if (retlen >= IFNAMSIZ) {
2940 SYSERROR("Invalid network device name");
2941 _exit(EXIT_FAILURE);
2942 }
811ef482 2943
387c1c70
CB
2944 ret = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
2945 if (ret < 0)
78070056 2946 _exit(EXIT_FAILURE);
8335fd40 2947 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2948
2949 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
071d0934 2950 lxcname, pidstr, netdev_link, !is_empty_string(netdev->name) ? netdev->name : "(null)");
3473ca76 2951 if (!is_empty_string(netdev->name))
811ef482
CB
2952 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2953 lxcpath, lxcname, pidstr, "veth", netdev_link,
2954 netdev->name, (char *)NULL);
2955 else
2956 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2957 lxcpath, lxcname, pidstr, "veth", netdev_link,
2958 (char *)NULL);
2959 SYSERROR("Failed to execute lxc-user-nic");
78070056 2960 _exit(EXIT_FAILURE);
811ef482
CB
2961 }
2962
2963 /* close the write-end of the pipe */
2964 close(pipefd[1]);
2965
9c66dc4f 2966 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2967 if (bytes < 0) {
74c6e2b0 2968 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2969 close(pipefd[0]);
6b9f82a9
CB
2970 } else {
2971 buffer[bytes - 1] = '\0';
811ef482 2972 }
811ef482
CB
2973
2974 ret = wait_for_pid(child);
2975 close(pipefd[0]);
9c66dc4f 2976 if (ret != 0 || bytes < 0)
071d0934
CB
2977 return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
2978 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2979 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2980
2981 /* netdev->name */
2982 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2983 if (!token)
2984 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2985
e389f2af
CB
2986 /*
2987 * lxc-user-nic will take care of proper network device naming. So
fdd6be55 2988 * netdev->name and netdev->transient_name need to be identical to not
e389f2af
CB
2989 * trigger another rename later on.
2990 */
2991 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
fdd6be55
CB
2992 if (retlen < IFNAMSIZ) {
2993 retlen = strlcpy(netdev->transient_name, token, IFNAMSIZ);
2994 if (retlen < IFNAMSIZ)
2995 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2996 }
9c66dc4f 2997 if (retlen >= IFNAMSIZ)
071d0934
CB
2998 return log_error_errno(-1, E2BIG,
2999 "Container side veth device name returned by lxc-user-nic is too long");
811ef482 3000
74c6e2b0 3001 /* netdev->ifindex */
811ef482 3002 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3003 if (!token)
3004 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3005
74c6e2b0 3006 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f 3007 if (ret < 0)
071d0934
CB
3008 return log_error_errno(-1, -ret,
3009 "Failed to convert string \"%s\" to integer", token);
811ef482 3010
74c6e2b0 3011 /* netdev->priv.veth_attr.veth1 */
811ef482 3012 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3013 if (!token)
3014 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3015
94b1cade 3016 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f 3017 if (retlen >= IFNAMSIZ)
071d0934
CB
3018 return log_error_errno(-1, E2BIG,
3019 "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
3020
3021 /* netdev->priv.veth_attr.ifindex */
3022 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3023 if (!token)
3024 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
3025
3026 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f 3027 if (ret < 0)
071d0934
CB
3028 return log_error_errno(-1, -ret,
3029 "Failed to convert string \"%s\" to integer", token);
811ef482 3030
4d781681 3031 if (netdev->upscript) {
3032 char *argv[] = {
3033 "veth",
3034 netdev->link,
3035 netdev->priv.veth_attr.veth1,
3036 NULL,
3037 };
3038
e389f2af
CB
3039 ret = run_script_argv(lxcname, hooks_version, "net",
3040 netdev->upscript, "up", argv);
4d781681 3041 if (ret < 0)
3042 return -1;
071d0934 3043 }
4d781681 3044
811ef482
CB
3045 return 0;
3046}
3047
f0ecc19d 3048static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3049 struct lxc_netdev *netdev,
3050 const char *netns_path)
811ef482
CB
3051{
3052 int bytes, ret;
3053 pid_t child;
3054 int pipefd[2];
25619b99 3055 char buffer[PATH_MAX] = {};
811ef482 3056
9c66dc4f
CB
3057 if (netdev->type != LXC_NET_VETH)
3058 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3059
3060 ret = pipe(pipefd);
9c66dc4f
CB
3061 if (ret < 0)
3062 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3063
3064 child = fork();
3065 if (child < 0) {
811ef482
CB
3066 close(pipefd[0]);
3067 close(pipefd[1]);
9c66dc4f 3068 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3069 }
3070
3071 if (child == 0) {
8843fde4 3072 char *hostveth;
811ef482
CB
3073
3074 close(pipefd[0]);
3075
3076 ret = dup2(pipefd[1], STDOUT_FILENO);
3077 if (ret >= 0)
3078 ret = dup2(pipefd[1], STDERR_FILENO);
3079 close(pipefd[1]);
3080 if (ret < 0) {
3081 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3082 _exit(EXIT_FAILURE);
811ef482
CB
3083 }
3084
f2711167 3085 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3086 hostveth = netdev->priv.veth_attr.pair;
3087 else
3088 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3089 if (is_empty_string(hostveth)) {
74c6e2b0 3090 SYSERROR("Host side veth device name is missing");
a30b9023 3091 _exit(EXIT_FAILURE);
74c6e2b0
CB
3092 }
3093
f2711167
CB
3094 if (is_empty_string(netdev->link)) {
3095 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3096 _exit(EXIT_FAILURE);
74c6e2b0 3097 }
811ef482 3098
811ef482 3099 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3100 lxcname, netns_path, netdev->link, hostveth);
811ef482 3101 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3102 lxcname, netns_path, "veth", netdev->link, hostveth,
3103 (char *)NULL);
811ef482 3104 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3105 _exit(EXIT_FAILURE);
811ef482
CB
3106 }
3107
3108 close(pipefd[1]);
3109
9c66dc4f 3110 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3111 if (bytes < 0) {
3112 SYSERROR("Failed to read from pipe file descriptor.");
3113 close(pipefd[0]);
6b9f82a9
CB
3114 } else {
3115 buffer[bytes - 1] = '\0';
811ef482 3116 }
811ef482 3117
6b9f82a9 3118 ret = wait_for_pid(child);
9c66dc4f
CB
3119 close_prot_errno_disarm(pipefd[0]);
3120 if (ret != 0 || bytes < 0)
3121 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3122 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3123
811ef482
CB
3124 return 0;
3125}
3126
59eac805 3127static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
1bd8d726
CB
3128{
3129 int ret;
3130 struct lxc_list *iterator;
3131 struct lxc_list *network = &handler->conf->network;
3132 /* strlen("/proc/") = 6
3133 * +
8335fd40 3134 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3135 * +
3136 * strlen("/fd/") = 4
3137 * +
8335fd40 3138 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3139 * +
3140 * \0
3141 */
8335fd40 3142 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3143
3144 *netns_path = '\0';
3145
9c66dc4f
CB
3146 if (handler->nsfd[LXC_NS_NET] < 0)
3147 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726 3148
387c1c70
CB
3149 ret = strnprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
3150 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
3151 if (ret < 0)
1bd8d726
CB
3152 return false;
3153
3154 lxc_list_for_each(iterator, network) {
3155 char *hostveth = NULL;
3156 struct lxc_netdev *netdev = iterator->elem;
3157
3158 /* We can only delete devices whose ifindex we have. If we don't
3159 * have the index it means that we didn't create it.
3160 */
3161 if (!netdev->ifindex)
3162 continue;
3163
3164 if (netdev->type == LXC_NET_PHYS) {
3165 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3166 netdev->link);
3167 if (ret < 0)
9c66dc4f 3168 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3169 netdev->ifindex, netdev->link);
3170 else
9c66dc4f 3171 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3172 netdev->ifindex, netdev->link);
b3259dc6
TP
3173
3174 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3175 if (ret < 0)
3176 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3177 netdev->ifindex, netdev->link);
66a7c406 3178 goto clear_ifindices;
1bd8d726
CB
3179 }
3180
3181 ret = netdev_deconf[netdev->type](handler, netdev);
3182 if (ret < 0)
3183 WARN("Failed to deconfigure network device");
3184
3185 if (netdev->type != LXC_NET_VETH)
66a7c406 3186 goto clear_ifindices;
1bd8d726 3187
f2711167 3188 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3189 goto clear_ifindices;
1bd8d726 3190
f2711167 3191 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3192 hostveth = netdev->priv.veth_attr.pair;
3193 else
3194 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3195 if (is_empty_string(hostveth))
66a7c406 3196 goto clear_ifindices;
8843fde4 3197
1bd8d726
CB
3198 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3199 handler->name, netdev,
3200 netns_path);
3201 if (ret < 0) {
9c66dc4f 3202 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3203 goto clear_ifindices;
1bd8d726 3204 }
9c66dc4f 3205 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3206
3207clear_ifindices:
0858c829
CB
3208 /*
3209 * We need to clear any ifindices we recorded so liblxc won't
3210 * have cached stale data which would cause it to fail on
3211 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3212 */
3213 netdev->ifindex = 0;
3214 if (netdev->type == LXC_NET_PHYS) {
3215 netdev->priv.phys_attr.ifindex = 0;
3216 } else if (netdev->type == LXC_NET_VETH) {
3217 netdev->priv.veth_attr.veth1[0] = '\0';
3218 netdev->priv.veth_attr.ifindex = 0;
3219 }
1bd8d726
CB
3220 }
3221
bb84beda 3222 return true;
1bd8d726
CB
3223}
3224
6509154d 3225static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3226 struct lxc_list *cur, *next;
3227 struct lxc_inetdev *inet4dev;
3228 struct lxc_inet6dev *inet6dev;
3229 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3230 int err = 0;
5fe147e9
TP
3231 unsigned int lo_ifindex = 0, link_ifindex = 0;
3232
3233 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3234 if (link_ifindex == 0)
3235 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3236
6509154d 3237
3238 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3239 if (!lxc_list_empty(&netdev->ipv4)) {
3240 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3241 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3242 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3243 }
3244
3245 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3246 if (!lxc_list_empty(&netdev->ipv6)) {
3247 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3248 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3249 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3250
3251 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3252 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3253 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3254 }
3255
b670016a 3256 /* Perform IPVLAN specific checks. */
3257 if (netdev->type == LXC_NET_IPVLAN) {
3258 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3259 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3260 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3261
3262 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3263 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3264 if (lo_ifindex == 0)
3265 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3266 }
3267
6509154d 3268 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3269 inet4dev = cur->elem;
3270 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3271 return ret_set_errno(-1, -errno);
6509154d 3272
5fe147e9 3273 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3274 return ret_set_errno(-1, EINVAL);
b670016a 3275
3276 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3277 if (netdev->type == LXC_NET_IPVLAN) {
3278 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3279 if (err < 0)
3280 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3281 }
6509154d 3282 }
3283
3284 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3285 inet6dev = cur->elem;
3286 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3287 return ret_set_errno(-1, -errno);
6509154d 3288
5fe147e9 3289 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3290 return ret_set_errno(-1, EINVAL);
b670016a 3291
3292 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3293 if (netdev->type == LXC_NET_IPVLAN) {
3294 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3295 if (err < 0)
3296 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3297 }
6509154d 3298 }
3299
3300 return 0;
3301}
3302
9c66dc4f
CB
3303static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3304{
b670016a 3305 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3306 bool had_error = false;
3307 unsigned int link_ifindex = 0;
b670016a 3308
9c66dc4f
CB
3309 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3310 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3311
3312 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3313 if (lo_ifindex > 0) {
3314 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3315 had_error = true;
b670016a 3316 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3317 }
3318 }
3319
3320 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3321 if (!is_empty_string(link)) {
5fe147e9 3322 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3323 if (link_ifindex == 0)
3324 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3325
3326 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3327 had_error = true;
b670016a 3328 }
3329
9c66dc4f 3330 if (had_error)
596a002c 3331 return ret_set_errno(-1, EINVAL);
b670016a 3332
3333 return 0;
3334}
3335
9c66dc4f
CB
3336static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3337{
b670016a 3338 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3339 bool had_error = false;
3340 unsigned int link_ifindex = 0;
b670016a 3341
9c66dc4f
CB
3342 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3343 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3344
3345 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3346 if (lo_ifindex > 0) {
3347 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3348 had_error = true;
b670016a 3349 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3350 }
3351 }
3352
3353 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3354 if (!is_empty_string(link)) {
5fe147e9
TP
3355 link_ifindex = if_nametoindex(link);
3356 if (link_ifindex == 0) {
3357 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3358 return ret_set_errno(-1, EINVAL);
3359 }
3360
3361 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3362 had_error = true;
b670016a 3363 }
3364
9c66dc4f 3365 if (had_error)
596a002c 3366 return ret_set_errno(-1, EINVAL);
b670016a 3367
3368 return 0;
3369}
3370
6509154d 3371static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3372 unsigned int lo_ifindex = 0;
3373 unsigned int errCount = 0;
6509154d 3374 struct lxc_list *cur, *next;
3375 struct lxc_inetdev *inet4dev;
3376 struct lxc_inet6dev *inet6dev;
6509154d 3377
b670016a 3378 /* Perform IPVLAN specific checks. */
3379 if (netdev->type == LXC_NET_IPVLAN) {
3380 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3381 lo_ifindex = if_nametoindex(loop_device);
b670016a 3382 if (lo_ifindex == 0) {
3383 errCount++;
3ebffb98 3384 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3385 }
b670016a 3386 }
6509154d 3387
b670016a 3388 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3389 inet4dev = cur->elem;
3390 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3391 errCount++;
6509154d 3392 }
3393
3394 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3395 inet6dev = cur->elem;
b670016a 3396 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3397 errCount++;
6509154d 3398 }
3399
b670016a 3400 if (errCount > 0)
596a002c 3401 return ret_set_errno(-1, EINVAL);
6509154d 3402
3403 return 0;
3404}
3405
e389f2af 3406static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3407{
811ef482
CB
3408 struct lxc_list *iterator;
3409 struct lxc_list *network = &handler->conf->network;
3410
811ef482
CB
3411 lxc_list_for_each(iterator, network) {
3412 struct lxc_netdev *netdev = iterator->elem;
3413
9c66dc4f
CB
3414 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3415 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3416
6509154d 3417 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3418 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3419 if (lxc_setup_l2proxy(netdev))
3420 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3421 }
3422
bad2f913 3423 if (netdev_configure_server[netdev->type](handler, netdev))
9c66dc4f 3424 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3425 }
3426
3427 return 0;
3428}
3429
fdd6be55
CB
3430/*
3431 * LXC moves network devices into the target namespace based on their created
3432 * name. The created name can either be randomly generated for e.g. veth
3433 * devices or it can be the name of the existing device in the server's
3434 * namespaces. This is e.g. the case when moving physical devices. However this
3435 * can lead to weird clashes. Consider we have a network namespace that has the
3436 * following devices:
3437
3438 * 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3439 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3440 * altname enp7s0
3441 * 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3442 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3443 * altname enp8s0
3444 *
3445 * and the user generates the following network config for their container:
3446 *
3447 * lxc.net.0.type = phys
3448 * lxc.net.0.name = eth1
3449 * lxc.net.0.link = eth2
3450 *
3451 * lxc.net.1.type = phys
3452 * lxc.net.1.name = eth2
3453 * lxc.net.1.link = eth1
3454 *
3455 * This would cause LXC to move the devices eth1 and eth2 from the server's
3456 * network namespace into the container's network namespace:
3457 *
3458 * 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3459 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3460 * altname enp7s0
3461 * 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3462 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3463 * altname enp8s0
3464 *
3465 * According to the network config above we now need to rename the network
3466 * devices in the container's network namespace. Let's say we start with
3467 * renaming eth2 to eth1. This would immediately lead to a clash since the
3468 * container's network namespace already contains a network device with that
3469 * name. Renaming the other device would have the same problem.
3470 *
3471 * There are multiple ways to fix this but I'm concerned with keeping the logic
3472 * somewhat reasonable which is why we simply start creating transient device
3473 * names that are unique which we'll use to move and rename the network device
3474 * in the container's network namespace at the same time. And then we rename
3475 * based on those random devices names to the target name.
3476 *
3477 * Note that the transient name is based on the type of network device as
3478 * specified in the LXC config. However, that doesn't mean it's correct. LXD
3479 * passes veth devices and a range of other network devices (e.g. Infiniband
3480 * VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
3481 * sense we like to think about it so you might see a veth device being
3482 * assigned a "physXXXXXX" transient name. That's not a problem.
3483 */
3484static int create_transient_name(struct lxc_netdev *netdev)
3485{
3486 const struct lxc_network_info *info;
3487
3488 if (!is_empty_string(netdev->transient_name))
3489 return syserror_set(-EINVAL, "Network device already had a transient name %s",
3490 netdev->transient_name);
3491
3492 info = &lxc_network_info[netdev->type];
3493 strlcpy(netdev->transient_name, info->template, info->template_len + 1);
3494
3495 if (!lxc_ifname_alnum_case_sensitive(netdev->transient_name))
3496 return syserror_set(-EINVAL, "Failed to create transient name for network device %s", netdev->created_name);
3497
3498 TRACE("Created transient name %s for network device", netdev->transient_name);
3499 return 0;
3500}
3501
43e2a964
CB
3502static int netdev_requires_move(const struct lxc_netdev *netdev)
3503{
3504 if (IN_SET(netdev->type, LXC_NET_EMPTY, LXC_NET_NONE))
3505 return false;
3506
3507 /*
3508 * Veth devices are directly created in the container's network
3509 * namespace so the device doesn't need to be moved into the
3510 * container's network namespace. The transient name will
3511 * already have been set above when we created the veth tunnel.
3512 */
3513 if (!netdev->ifindex)
3514 return false;
3515
3516 return true;
3517}
3518
e389f2af 3519int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3520{
e389f2af
CB
3521 pid_t pid = handler->pid;
3522 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3523 struct lxc_list *iterator;
3524
e0010464 3525 if (am_guest_unpriv())
74c6e2b0 3526 return 0;
811ef482
CB
3527
3528 lxc_list_for_each(iterator, network) {
3dd78294 3529 __do_free char *physname = NULL;
e389f2af 3530 int ret;
811ef482
CB
3531 struct lxc_netdev *netdev = iterator->elem;
3532
43e2a964 3533 if (!netdev_requires_move(netdev))
811ef482
CB
3534 continue;
3535
fdd6be55
CB
3536 ret = create_transient_name(netdev);
3537 if (ret < 0)
3538 return ret;
3539
3dd78294
CB
3540 if (netdev->type == LXC_NET_PHYS)
3541 physname = is_wlan(netdev->link);
3542
3543 if (physname)
fdd6be55 3544 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->transient_name);
3dd78294 3545 else
fdd6be55 3546 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->transient_name);
9c66dc4f 3547 if (ret)
fdd6be55
CB
3548 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
3549 netdev->created_name, netdev->ifindex, pid, netdev->transient_name);
811ef482 3550
fdd6be55
CB
3551 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
3552 maybe_empty(netdev->created_name), netdev->ifindex, pid, netdev->transient_name);
811ef482
CB
3553 }
3554
3555 return 0;
3556}
3557
3c09b97c
CB
3558static int network_requires_advanced_setup(int type)
3559{
3560 if (type == LXC_NET_EMPTY)
3561 return false;
3562
3563 if (type == LXC_NET_NONE)
3564 return false;
3565
3566 return true;
3567}
3568
e389f2af 3569static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3570{
e389f2af
CB
3571 int hooks_version = handler->conf->hooks_version;
3572 const char *lxcname = handler->name;
3573 const char *lxcpath = handler->lxcpath;
3574 struct lxc_list *network = &handler->conf->network;
3575 pid_t pid = handler->pid;
74c6e2b0
CB
3576 struct lxc_list *iterator;
3577
74c6e2b0
CB
3578 lxc_list_for_each(iterator, network) {
3579 struct lxc_netdev *netdev = iterator->elem;
3580
3c09b97c 3581 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3582 continue;
3583
9c66dc4f
CB
3584 if (netdev->type != LXC_NET_VETH)
3585 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3586 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3587
3588 if (netdev->mtu)
3589 INFO("mtu ignored due to insufficient privilege");
3590
e389f2af
CB
3591 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3592 pid, hooks_version))
74c6e2b0
CB
3593 return -1;
3594 }
3595
3596 return 0;
3597}
3598
59eac805 3599static bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3600{
3601 int ret;
3602 struct lxc_list *iterator;
3603 struct lxc_list *network = &handler->conf->network;
1bd8d726 3604
811ef482
CB
3605 lxc_list_for_each(iterator, network) {
3606 char *hostveth = NULL;
3607 struct lxc_netdev *netdev = iterator->elem;
3608
3609 /* We can only delete devices whose ifindex we have. If we don't
3610 * have the index it means that we didn't create it.
3611 */
3612 if (!netdev->ifindex)
3613 continue;
3614
0104c121
CB
3615 /*
3616 * If the network device has been moved back from the
3617 * containers network namespace, update the ifindex.
3618 */
3619 netdev->ifindex = if_nametoindex(netdev->name);
3620
6509154d 3621 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3622 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3623 if (lxc_delete_l2proxy(netdev))
3624 WARN("Failed to delete all l2proxy config");
3625 /* Don't return, let the network be cleaned up as normal. */
3626 }
3627
811ef482 3628 if (netdev->type == LXC_NET_PHYS) {
bb301db7
SB
3629 /* Physical interfaces are initially returned to the parent namespace
3630 * with their transient name to avoid collisions
3631 */
3632 netdev->ifindex = if_nametoindex(netdev->transient_name);
811ef482
CB
3633 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3634 if (ret < 0)
3635 WARN("Failed to rename interface with index %d "
b809f232
CB
3636 "from \"%s\" to its initial name \"%s\"",
3637 netdev->ifindex, netdev->name, netdev->link);
0b154989 3638 else {
29589196
CB
3639 TRACE("Renamed interface with index %d from "
3640 "\"%s\" to its initial name \"%s\"",
3641 netdev->ifindex, netdev->name,
3642 netdev->link);
0b154989
TP
3643
3644 /* Restore original MTU */
3645 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3646 if (ret < 0) {
3647 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3648 netdev->link, netdev->priv.phys_attr.mtu);
3649 } else {
3650 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3651 netdev->link, netdev->priv.phys_attr.mtu);
3652 }
3653 }
b3259dc6
TP
3654
3655 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3656 if (ret < 0)
3657 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3658 netdev->ifindex, netdev->link);
66a7c406 3659 goto clear_ifindices;
811ef482
CB
3660 }
3661
3662 ret = netdev_deconf[netdev->type](handler, netdev);
3663 if (ret < 0)
3664 WARN("Failed to deconfigure network device");
3665
811ef482 3666 if (netdev->type != LXC_NET_VETH)
66a7c406 3667 goto clear_ifindices;
811ef482 3668
811ef482
CB
3669 /* Explicitly delete host veth device to prevent lingering
3670 * devices. We had issues in LXD around this.
3671 */
f2711167 3672 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3673 hostveth = netdev->priv.veth_attr.pair;
3674 else
3675 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3676 if (is_empty_string(hostveth))
66a7c406 3677 goto clear_ifindices;
811ef482 3678
1ee56cff
CB
3679 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3680 ret = lxc_netdev_delete_by_name(hostveth);
3681 if (ret < 0)
3682 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3683
1ee56cff
CB
3684 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3685 } else if (!is_empty_string(netdev->link)) {
3686 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3687 if (ret < 0)
3688 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3689
1ee56cff
CB
3690 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3691 }
811ef482 3692
66a7c406 3693clear_ifindices:
ad2ddfcd 3694 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3695 * have cached stale data which would cause it to fail on reboot
3696 * we're we don't re-read the on-disk config file.
3697 */
3698 netdev->ifindex = 0;
3699 if (netdev->type == LXC_NET_PHYS) {
3700 netdev->priv.phys_attr.ifindex = 0;
3701 } else if (netdev->type == LXC_NET_VETH) {
3702 netdev->priv.veth_attr.veth1[0] = '\0';
3703 netdev->priv.veth_attr.ifindex = 0;
3704 }
bb301db7
SB
3705
3706 /* Clear transient name */
3707 if (!is_empty_string (netdev->transient_name))
3708 {
3709 netdev->transient_name[0] = '\0';
3710 }
811ef482
CB
3711 }
3712
bb84beda 3713 return true;
811ef482
CB
3714}
3715
3716int lxc_requests_empty_network(struct lxc_handler *handler)
3717{
3718 struct lxc_list *network = &handler->conf->network;
3719 struct lxc_list *iterator;
3720 bool found_none = false, found_nic = false;
3721
3722 if (lxc_list_empty(network))
3723 return 0;
3724
9c66dc4f 3725 lxc_list_for_each (iterator, network) {
811ef482
CB
3726 struct lxc_netdev *netdev = iterator->elem;
3727
3728 if (netdev->type == LXC_NET_NONE)
3729 found_none = true;
3730 else
3731 found_nic = true;
3732 }
9c66dc4f 3733
811ef482
CB
3734 if (found_none && !found_nic)
3735 return 1;
9c66dc4f 3736
811ef482
CB
3737 return 0;
3738}
3739
3740/* try to move physical nics to the init netns */
b809f232 3741int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3742{
9c66dc4f
CB
3743 __do_close int oldfd = -EBADF;
3744 int netnsfd = handler->nsfd[LXC_NS_NET];
3745 struct lxc_conf *conf = handler->conf;
811ef482 3746 int ret;
811ef482 3747 char ifname[IFNAMSIZ];
b809f232 3748 struct lxc_list *iterator;
811ef482 3749
04213960
TA
3750 /*
3751 * If we weren't asked to clone a new network namespace, there's
3752 * nothing to restore.
3753 */
3754 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3755 return 0;
3756
b809f232
CB
3757 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3758 * the parent network namespace. We won't have this capability if we are
3759 * unprivileged.
3760 */
d0fbc7ba 3761 if (!handler->am_root)
b809f232 3762 return 0;
811ef482 3763
b809f232 3764 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3765
0037ab49 3766 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3767 if (oldfd < 0)
3768 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3769
b809f232 3770 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3771 if (ret < 0)
3772 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3773
b809f232
CB
3774 lxc_list_for_each(iterator, &conf->network) {
3775 struct lxc_netdev *netdev = iterator->elem;
811ef482 3776
b809f232
CB
3777 if (netdev->type != LXC_NET_PHYS)
3778 continue;
3779
3780 /* Retrieve the name of the interface in the container's network
3781 * namespace.
3782 */
3783 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3784 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3785 continue;
3786 }
b809f232 3787
bb301db7
SB
3788 /* Restore physical interfaces to host's network namespace with its transient name
3789 * to avoid collisions with the host's other interfaces.
3790 */
3791 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->transient_name);
b809f232 3792 if (ret < 0)
9c66dc4f 3793 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3794 else
9c66dc4f 3795 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3796 }
811ef482 3797
b809f232 3798 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3799 if (ret < 0)
3800 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3801
3802 return 0;
811ef482
CB
3803}
3804
3805static int setup_hw_addr(char *hwaddr, const char *ifname)
3806{
9c66dc4f 3807 __do_close int fd = -EBADF;
811ef482
CB
3808 struct sockaddr sockaddr;
3809 struct ifreq ifr;
9c66dc4f 3810 int ret;
811ef482
CB
3811
3812 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3813 if (ret)
3814 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3815
3816 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3817 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3818 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3819
ad9429e5 3820 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3821 if (fd < 0)
3822 return -1;
3823
3824 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3825 if (ret)
6d1400b5 3826 SYSERROR("Failed to perform ioctl");
3827
9c66dc4f 3828 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3829
3830 return ret;
3831}
3832
3833static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3834{
3835 struct lxc_list *iterator;
3836 int err;
3837
3838 lxc_list_for_each(iterator, ip) {
3839 struct lxc_inetdev *inetdev = iterator->elem;
3840
3841 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3842 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3843 if (err)
3844 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3845 }
3846
3847 return 0;
3848}
3849
3850static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3851{
3852 struct lxc_list *iterator;
3853 int err;
3854
3855 lxc_list_for_each(iterator, ip) {
3856 struct lxc_inet6dev *inet6dev = iterator->elem;
3857
3858 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3859 &inet6dev->mcast, &inet6dev->acast,
3860 inet6dev->prefix);
9c66dc4f
CB
3861 if (err)
3862 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3863 }
3864
3865 return 0;
3866}
3867
8bf64b77 3868static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3869{
811ef482 3870 int err;
009d6127 3871 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482 3872
811ef482 3873 /* set a mac address */
9c66dc4f
CB
3874 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3875 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3876
3877 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3878 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3879 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3880
3881 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3882 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3883 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3884
3885 /* set the network device up */
3886 if (netdev->flags & IFF_UP) {
8bf64b77 3887 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3888 if (err)
3889 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3890
3891 /* the network is up, make the loopback up too */
3892 err = lxc_netdev_up("lo");
9c66dc4f
CB
3893 if (err)
3894 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3895 }
3896
811ef482 3897 /* setup ipv4 gateway on the interface */
a2f9a670 3898 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3899 if (!(netdev->flags & IFF_UP))
3900 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3901
9c66dc4f
CB
3902 if (lxc_list_empty(&netdev->ipv4))
3903 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3904
a2f9a670 3905 /* Setup device route if ipv4_gateway_dev is enabled */
3906 if (netdev->ipv4_gateway_dev) {
3907 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3908 if (err < 0)
3909 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3910 } else {
009d6127 3911 /* Check the gateway address is valid */
3912 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3913 return ret_set_errno(-1, errno);
009d6127 3914
3915 /* Try adding a default route to the gateway address */
811ef482 3916 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3917 if (err < 0) {
3918 /* If adding the default route fails, this could be because the
3919 * gateway address is in a different subnet to the container's address.
3920 * To work around this, we try adding a static device route to the
3921 * gateway address first, and then try again.
3922 */
a2f9a670 3923 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3924 if (err < 0)
3925 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3926
a2f9a670 3927 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3928 if (err < 0)
3929 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3930 }
3931 }
3932 }
3933
3934 /* setup ipv6 gateway on the interface */
a2f9a670 3935 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3936 if (!(netdev->flags & IFF_UP))
3937 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3938
9c66dc4f
CB
3939 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3940 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3941
a2f9a670 3942 /* Setup device route if ipv6_gateway_dev is enabled */
3943 if (netdev->ipv6_gateway_dev) {
3944 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3945 if (err < 0)
3946 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3947 } else {
009d6127 3948 /* Check the gateway address is valid */
3949 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3950 return ret_set_errno(-1, errno);
009d6127 3951
3952 /* Try adding a default route to the gateway address */
811ef482 3953 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3954 if (err < 0) {
3955 /* If adding the default route fails, this could be because the
3956 * gateway address is in a different subnet to the container's address.
3957 * To work around this, we try adding a static device route to the
3958 * gateway address first, and then try again.
3959 */
a2f9a670 3960 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3961 if (err < 0)
3962 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3963
a2f9a670 3964 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3965 if (err < 0)
3966 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3967 }
3968 }
3969 }
3970
8bf64b77 3971 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3972
3973 return 0;
3974}
3975
3a197a1b
CB
3976/**
3977 * Consider the following network layout:
3978 *
3979 * lxc.net.0.type = phys
3980 * lxc.net.0.link = eth2
3981 * lxc.net.0.name = eth%d
3982 *
3983 * lxc.net.1.type = phys
3984 * lxc.net.1.link = eth1
3985 * lxc.net.1.name = eth0
3986 *
3987 * If we simply follow this order and create the first network first the kernel
3988 * will allocate eth0 for the first network but the second network requests
3989 * that eth1 be renamed to eth0 in the container's network namespace which
3990 * would lead to a clash.
3991 *
3992 * Note, we don't handle cases like:
3993 *
3994 * lxc.net.0.type = phys
3995 * lxc.net.0.link = eth2
3996 * lxc.net.0.name = eth0
3997 *
3998 * lxc.net.1.type = phys
3999 * lxc.net.1.link = eth1
4000 * lxc.net.1.name = eth0
4001 *
4002 * That'll brutally fail of course but there's nothing we can do about it.
4003 */
811ef482
CB
4004int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
4005 struct lxc_list *network)
4006{
4007 struct lxc_list *iterator;
3a197a1b 4008 bool needs_second_pass = false;
811ef482 4009
3a197a1b
CB
4010 if (lxc_list_empty(network))
4011 return 0;
4012
4013 /* Configure all devices that have a specific target name. */
4014 lxc_list_for_each(iterator, network) {
e389f2af 4015 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 4016 int ret;
811ef482 4017
3a197a1b
CB
4018 if (is_empty_string(netdev->name) || strequal(netdev->name, "eth%d")) {
4019 needs_second_pass = true;
4020 continue;
4021 }
4022
bad2f913 4023 ret = netdev_configure_container[netdev->type](netdev);
8bf64b77
CB
4024 if (!ret)
4025 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
4026 if (ret)
4027 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482 4028 }
3a197a1b
CB
4029 INFO("Finished setting up network devices with caller assigned names");
4030
4031 if (needs_second_pass) {
4032 /* Configure all devices that have a kernel assigned name. */
4033 lxc_list_for_each(iterator, network) {
4034 struct lxc_netdev *netdev = iterator->elem;
4035 int ret;
811ef482 4036
3a197a1b
CB
4037 if (!is_empty_string(netdev->name) && !strequal(netdev->name, "eth%d"))
4038 continue;
4039
4040 ret = netdev_configure_container[netdev->type](netdev);
4041 if (!ret)
4042 ret = lxc_network_setup_in_child_namespaces_common(netdev);
4043 if (ret)
4044 return log_error_errno(-1, errno, "Failed to setup netdev");
4045 }
4046 INFO("Finished setting up network devices with kernel assigned names");
4047 }
811ef482
CB
4048
4049 return 0;
4050}
7ab1ba02 4051
3c09b97c 4052int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
4053{
4054 struct lxc_list *iterator;
4055 struct lxc_list *network = &handler->conf->network;
4056 int data_sock = handler->data_sock[0];
4057
7ab1ba02
CB
4058 lxc_list_for_each(iterator, network) {
4059 int ret;
4060 struct lxc_netdev *netdev = iterator->elem;
4061
3c09b97c 4062 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4063 continue;
4064
7fbb15ec 4065 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 4066 if (ret < 0)
7ab1ba02 4067 return -1;
e389f2af 4068
fdd6be55 4069 ret = lxc_send_nointr(data_sock, netdev->transient_name, IFNAMSIZ, MSG_NOSIGNAL);
e389f2af
CB
4070 if (ret < 0)
4071 return -1;
4072
fdd6be55 4073 TRACE("Sent network device name \"%s\" to child", netdev->transient_name);
7ab1ba02
CB
4074 }
4075
4076 return 0;
4077}
4078
3c09b97c 4079int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
4080{
4081 struct lxc_list *iterator;
4082 struct lxc_list *network = &handler->conf->network;
4083 int data_sock = handler->data_sock[1];
4084
7ab1ba02
CB
4085 lxc_list_for_each(iterator, network) {
4086 int ret;
4087 struct lxc_netdev *netdev = iterator->elem;
4088
3c09b97c 4089 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4090 continue;
4091
e3233f26 4092 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 4093 if (ret < 0)
7ab1ba02 4094 return -1;
e389f2af 4095
fdd6be55 4096 ret = lxc_recv_nointr(data_sock, netdev->transient_name, IFNAMSIZ, 0);
e389f2af
CB
4097 if (ret < 0)
4098 return -1;
54256301 4099
fdd6be55 4100 TRACE("Received network device name \"%s\" from parent", netdev->transient_name);
7ab1ba02
CB
4101 }
4102
4103 return 0;
4104}
a1ae535a
CB
4105
4106int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
4107{
4108 struct lxc_list *iterator, *network;
4109 int data_sock = handler->data_sock[0];
4110
4111 if (!handler->am_root)
4112 return 0;
4113
4114 network = &handler->conf->network;
4115 lxc_list_for_each(iterator, network) {
4116 int ret;
4117 struct lxc_netdev *netdev = iterator->elem;
4118
4119 /* Send network device name in the child's namespace to parent. */
7fbb15ec 4120 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 4121 if (ret < 0)
7729f8e5 4122 return -1;
a1ae535a
CB
4123
4124 /* Send network device ifindex in the child's namespace to
4125 * parent.
4126 */
7fbb15ec 4127 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 4128 if (ret < 0)
7729f8e5 4129 return -1;
a1150aa1
CB
4130
4131 TRACE("Sent network device %s with ifindex %d to parent", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4132 }
4133
e389f2af
CB
4134 if (!lxc_list_empty(network))
4135 TRACE("Sent network device names and ifindices to parent");
4136
a1ae535a 4137 return 0;
a1ae535a
CB
4138}
4139
4140int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
4141{
4142 struct lxc_list *iterator, *network;
4143 int data_sock = handler->data_sock[1];
4144
4145 if (!handler->am_root)
4146 return 0;
4147
4148 network = &handler->conf->network;
4149 lxc_list_for_each(iterator, network) {
4150 int ret;
4151 struct lxc_netdev *netdev = iterator->elem;
4152
4153 /* Receive network device name in the child's namespace to
4154 * parent.
4155 */
e3233f26 4156 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 4157 if (ret < 0)
7729f8e5 4158 return -1;
a1ae535a
CB
4159
4160 /* Receive network device ifindex in the child's namespace to
4161 * parent.
4162 */
e3233f26 4163 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 4164 if (ret < 0)
7729f8e5 4165 return -1;
a1150aa1
CB
4166
4167 TRACE("Received network device %s with ifindex %d from child", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4168 }
4169
4170 return 0;
a1ae535a 4171}
bb84beda
CB
4172
4173void lxc_delete_network(struct lxc_handler *handler)
4174{
4175 bool bret;
4176
37631ddb
CB
4177 /*
4178 * Always expose namespace fd paths to network down hooks via
4179 * environment variables. No need to complicate things by passing them
4180 * as additional hook arguments.
4181 */
4182 lxc_expose_namespace_environment(handler);
4183
bb84beda
CB
4184 if (handler->am_root)
4185 bret = lxc_delete_network_priv(handler);
4186 else
4187 bret = lxc_delete_network_unpriv(handler);
4188 if (!bret)
4189 DEBUG("Failed to delete network devices");
4190 else
4191 DEBUG("Deleted network devices");
4192}
1cd95214 4193
1cd95214
CB
4194int lxc_netns_set_nsid(int fd)
4195{
41a3300d 4196 int ret;
0ce60f0d
CB
4197 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4198 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4199 NLMSG_ALIGN(1024)];
1cd95214 4200 struct nl_handler nlh;
a5f5cb41 4201 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
4202 struct nlmsghdr *hdr;
4203 struct rtgenmsg *msg;
9d036caa
CB
4204 const __s32 ns_id = -1;
4205 const __u32 netns_fd = fd;
1cd95214 4206
a5f5cb41 4207 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4208 if (ret < 0)
41a3300d 4209 return -1;
1cd95214 4210
0ce60f0d 4211 memset(buf, 0, sizeof(buf));
6ce39620
CB
4212
4213#pragma GCC diagnostic push
4214#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4215 hdr = (struct nlmsghdr *)buf;
4216 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4217#pragma GCC diagnostic pop
1cd95214 4218
0ce60f0d
CB
4219 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4220 hdr->nlmsg_type = RTM_NEWNSID;
4221 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4222 hdr->nlmsg_pid = 0;
4223 hdr->nlmsg_seq = RTM_NEWNSID;
4224 msg->rtgen_family = AF_UNSPEC;
1cd95214 4225
9d036caa
CB
4226 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4227 if (ret < 0)
a5f5cb41 4228 return ret_errno(ENOMEM);
9d036caa
CB
4229
4230 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4231 if (ret < 0)
a5f5cb41 4232 return ret_errno(ENOMEM);
1cd95214 4233
a5f5cb41 4234 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4235}
938980ba
CB
4236
4237static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4238{
4239
4240 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4241
4242 while (RTA_OK(rta, len)) {
4243 unsigned short type = rta->rta_type;
4244
4245 if ((type <= max) && (!tb[type]))
4246 tb[type] = rta;
4247
6ce39620
CB
4248#pragma GCC diagnostic push
4249#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4250 rta = RTA_NEXT(rta, len);
6ce39620 4251#pragma GCC diagnostic pop
938980ba
CB
4252 }
4253
4254 return 0;
4255}
4256
4257static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4258{
4259 return *(__s32 *)RTA_DATA(rta);
4260}
4261
4262#ifndef NETNS_RTA
4263#define NETNS_RTA(r) \
4264 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4265#endif
4266
4267int lxc_netns_get_nsid(int fd)
4268{
a5f5cb41
CB
4269 struct nl_handler nlh;
4270 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4271 int ret;
4272 ssize_t len;
4273 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4274 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4275 NLMSG_ALIGN(1024)];
938980ba 4276 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4277 struct nlmsghdr *hdr;
4278 struct rtgenmsg *msg;
938980ba
CB
4279 __u32 netns_fd = fd;
4280
a5f5cb41 4281 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4282 if (ret < 0)
4283 return -1;
4284
4285 memset(buf, 0, sizeof(buf));
6ce39620
CB
4286
4287#pragma GCC diagnostic push
4288#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4289 hdr = (struct nlmsghdr *)buf;
4290 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4291#pragma GCC diagnostic pop
938980ba
CB
4292
4293 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4294 hdr->nlmsg_type = RTM_GETNSID;
4295 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4296 hdr->nlmsg_pid = 0;
4297 hdr->nlmsg_seq = RTM_GETNSID;
4298 msg->rtgen_family = AF_UNSPEC;
4299
9d036caa 4300 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4301 if (ret < 0)
4302 return ret_errno(ENOMEM);
938980ba 4303
a5f5cb41 4304 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4305 if (ret < 0)
4306 return -1;
4307
4308 msg = NLMSG_DATA(hdr);
4309 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4310 if (len < 0)
a5f5cb41 4311 return ret_errno(EINVAL);
938980ba 4312
6ce39620
CB
4313#pragma GCC diagnostic push
4314#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4315 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4316 if (tb[__LXC_NETNSA_NSID])
4317 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4318#pragma GCC diagnostic pop
938980ba
CB
4319
4320 return -1;
4321}
e389f2af
CB
4322
4323int lxc_create_network(struct lxc_handler *handler)
4324{
4325 int ret;
4326
e389f2af
CB
4327 if (handler->am_root) {
4328 ret = lxc_create_network_priv(handler);
4329 if (ret)
4330 return -1;
4331
4332 return lxc_network_move_created_netdev_priv(handler);
4333 }
4334
4335 return lxc_create_network_unpriv(handler);
4336}