]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: port ipv6 addresses to new list type
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
fdd6be55 40#include "string_utils.h"
59524108 41#include "syscall_wrappers.h"
0d204771 42#include "utils.h"
0ad19a3f 43
9de31d5a
CB
44#ifndef HAVE_STRLCPY
45#include "include/strlcpy.h"
46#endif
47
ac2cecc4 48lxc_log_define(network, lxc);
f8fee0e2 49
bad2f913
CB
50typedef int (*netdev_configure_server_cb)(struct lxc_handler *, struct lxc_netdev *);
51typedef int (*netdev_configure_container_cb)(struct lxc_netdev *);
52typedef int (*netdev_shutdown_server_cb)(struct lxc_handler *, struct lxc_netdev *);
53
3392d379
CB
54const struct lxc_network_info {
55 const char *name;
fdd6be55
CB
56 const char template[IFNAMSIZ];
57 size_t template_len;
3392d379 58} lxc_network_info[LXC_NET_MAXCONFTYPE + 1] = {
fdd6be55
CB
59 [LXC_NET_EMPTY] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
60 [LXC_NET_VETH] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
61 [LXC_NET_MACVLAN] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
62 [LXC_NET_IPVLAN] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
63 [LXC_NET_PHYS] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
64 [LXC_NET_VLAN] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
65 [LXC_NET_NONE] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
66 [LXC_NET_MAXCONFTYPE] = { NULL, "", 0 }
3392d379
CB
67};
68
69const char *lxc_net_type_to_str(int type)
70{
71 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
72 return NULL;
73
74 return lxc_network_info[type].name;
75}
76
77static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
78
79char *lxc_ifname_alnum_case_sensitive(char *template)
80{
81 char name[IFNAMSIZ];
82 size_t i = 0;
83#ifdef HAVE_RAND_R
84 unsigned int seed;
85
86 seed = randseed(false);
87#else
88
89 (void)randseed(true);
90#endif
91
92 if (strlen(template) >= IFNAMSIZ)
93 return NULL;
94
95 /* Generate random names until we find one that doesn't exist. */
96 for (;;) {
97 name[0] = '\0';
98 (void)strlcpy(name, template, IFNAMSIZ);
99
100 for (i = 0; i < strlen(name); i++) {
101 if (name[i] == 'X') {
102#ifdef HAVE_RAND_R
103 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
104#else
105 name[i] = padchar[rand() % strlen(padchar)];
106#endif
107 }
108 }
109
110 if (if_nametoindex(name) == 0)
111 break;
112 }
113
114 (void)strlcpy(template, name, strlen(template) + 1);
115
116 return template;
117}
3ebffb98 118static const char loop_device[] = "lo";
811ef482 119
b670016a 120static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 121{
d16bda44 122 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 123 struct nl_handler nlh;
d16bda44
CB
124 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
125 int addrlen, err;
8f82874c 126 struct rtmsg *rt;
8f82874c 127
128 addrlen = family == AF_INET ? sizeof(struct in_addr)
129 : sizeof(struct in6_addr);
130
d16bda44 131 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 132 if (err)
133 return err;
134
8f82874c 135 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
136 if (!nlmsg)
d16bda44 137 return -ENOMEM;
8f82874c 138
139 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
140 if (!answer)
a5f5cb41 141 return -ENOMEM;
8f82874c 142
143 nlmsg->nlmsghdr->nlmsg_flags =
144 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 145 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 146
147 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
148 if (!rt)
a5f5cb41 149 return -ENOMEM;
d16bda44 150
8f82874c 151 rt->rtm_family = family;
152 rt->rtm_table = RT_TABLE_MAIN;
153 rt->rtm_scope = RT_SCOPE_LINK;
154 rt->rtm_protocol = RTPROT_BOOT;
155 rt->rtm_type = RTN_UNICAST;
156 rt->rtm_dst_len = netmask;
157
8f82874c 158 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
159 return -EINVAL;
160
8f82874c 161 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
162 return -EINVAL;
163
164 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 165}
166
167static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
168{
b670016a 169 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 170}
171
172static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
173{
b670016a 174 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
175}
176
177static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
178{
179 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
180}
181
182static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
183{
184 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 185}
186
d4a7da46 187static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
188{
189 struct lxc_list *iterator;
190 int err;
191
192 lxc_list_for_each(iterator, ip) {
193 struct lxc_inetdev *inetdev = iterator->elem;
194
195 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
196 if (err)
197 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 198 }
199
200 return 0;
201}
202
203static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
204{
205 struct lxc_list *iterator;
206 int err;
207
208 lxc_list_for_each(iterator, ip) {
209 struct lxc_inet6dev *inet6dev = iterator->elem;
210
211 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
212 if (err)
213 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 214 }
215
216 return 0;
217}
218
2ec31bbd 219static int setup_ipv4_addr_routes(struct lxc_netdev *netdev)
6dfa9581 220{
6dfa9581 221 int err;
2ec31bbd
CB
222 struct lxc_inetdev *inetdev;
223 int ifindex;
6dfa9581 224
2ec31bbd
CB
225 if (netdev->type != LXC_NET_VETH)
226 return ret_errno(EINVAL);
6dfa9581 227
2ec31bbd
CB
228 ifindex = netdev->priv.veth_attr.ifindex;
229 list_for_each_entry(inetdev, &netdev->ipv4_list, head) {
6dfa9581 230 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
6dfa9581 231 if (err)
9c66dc4f 232 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
233 }
234
235 return 0;
236}
237
cd32fc73 238static int setup_ipv6_addr_routes(struct lxc_netdev *netdev)
6dfa9581 239{
6dfa9581 240 int err;
cd32fc73
CB
241 struct lxc_inet6dev *inet6dev;
242 int ifindex;
6dfa9581 243
cd32fc73
CB
244 if (netdev->type != LXC_NET_VETH)
245 return ret_errno(EINVAL);
246
247 ifindex = netdev->priv.veth_attr.ifindex;
248 list_for_each_entry(inet6dev, &netdev->ipv6_list, head) {
6dfa9581
TP
249
250 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
251 if (err)
9c66dc4f 252 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
253 }
254
255 return 0;
256}
257
5fe147e9 258static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 259{
d16bda44 260 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 261 struct nl_handler nlh;
d16bda44
CB
262 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
263 int addrlen, err;
5fe147e9 264 struct ndmsg *rt;
6dfa9581 265
5fe147e9 266 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 267
d16bda44 268 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
269 if (err)
270 return err;
6dfa9581 271
5fe147e9
TP
272 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
273 if (!nlmsg)
d16bda44 274 return -ENOMEM;
6dfa9581 275
5fe147e9
TP
276 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
277 if (!answer)
d16bda44 278 return -ENOMEM;
6dfa9581 279
5fe147e9
TP
280 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
281 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 282
5fe147e9
TP
283 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
284 if (!rt)
d16bda44
CB
285 return -ENOMEM;
286
5fe147e9
TP
287 rt->ndm_ifindex = ifindex;
288 rt->ndm_flags = NTF_PROXY;
289 rt->ndm_type = NDA_DST;
290 rt->ndm_family = family;
6dfa9581 291
5fe147e9 292 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 293 return -EINVAL;
6dfa9581 294
d16bda44 295 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
296}
297
298static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
299{
300 int ret;
301 char path[PATH_MAX];
302 char buf[1] = "";
303
304 if (family != AF_INET && family != AF_INET6)
596a002c 305 return ret_set_errno(-1, EINVAL);
6dfa9581 306
387c1c70
CB
307 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
308 family == AF_INET ? "ipv4" : "ipv6", ifname,
309 "forwarding");
310 if (ret < 0)
596a002c 311 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
312
313 return lxc_read_file_expect(path, buf, 1, "1");
314}
315
622f05c7
TP
316struct bridge_vlan_info {
317 __u16 flags;
318 __u16 vid;
319};
320
321static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
322{
323 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
324 struct nl_handler nlh;
325 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
326 int err;
327 struct ifinfomsg *ifi;
328 struct rtattr *nest;
329 unsigned short bridge_flags = 0;
330 struct bridge_vlan_info vlan_info;
331
332 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
333 if (err)
334 return err;
335
336 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
337 if (!nlmsg)
338 return ret_errno(ENOMEM);
339
340 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
341 if (!answer)
342 return ret_errno(ENOMEM);
343
344 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
345 nlmsg->nlmsghdr->nlmsg_type = operation;
346
347 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
348 if (!ifi)
349 return ret_errno(ENOMEM);
350 ifi->ifi_family = AF_BRIDGE;
351 ifi->ifi_index = ifindex;
352
353 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
354 if (!nest)
355 return ret_errno(ENOMEM);
356
357 bridge_flags |= BRIDGE_FLAGS_MASTER;
358 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
359 return ret_errno(ENOMEM);
360
361 vlan_info.vid = vlan_id;
362 vlan_info.flags = 0;
363 if (!tagged)
364 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
365
366 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
367 return ret_errno(ENOMEM);
368
369 nla_end_nested(nlmsg, nest);
370
371 return netlink_transaction(nlh_ptr, nlmsg, answer);
372}
373
374static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
375{
376 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
377}
378
379static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
380{
381 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
382}
383
384static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
385{
386 struct lxc_list *iterator;
387 int err;
388
389 lxc_list_for_each(iterator, vlan_ids) {
390 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
391
392 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
393 if (err)
394 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
395 }
396
397 return 0;
398}
399
33320936
TP
400static int validate_veth(struct lxc_netdev *netdev)
401{
402 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
403 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
404 if (netdev->priv.veth_attr.vlan_id_set)
405 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
406
407 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
408 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
409 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
410 }
411
412 if (netdev->priv.veth_attr.vlan_id_set) {
413 struct lxc_list *it;
414 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
415 unsigned short i = PTR_TO_USHORT(it->elem);
416 if (i == netdev->priv.veth_attr.vlan_id)
417 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
418 }
419 }
420
421 return 0;
422}
423
424static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
425{
426 int err, rc, veth1index;
427 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
428 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
429
430 /* Skip setup if no VLAN options are specified. */
431 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
432 return 0;
433
434 /* Check vlan filtering is enabled on parent bridge. */
387c1c70
CB
435 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
436 if (rc < 0)
33320936
TP
437 return -1;
438
439 rc = lxc_read_from_file(path, buf, sizeof(buf));
440 if (rc < 0)
441 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
442
443 buf[rc - 1] = '\0';
444
6ee997a7 445 if (!strequal(buf, "1"))
33320936
TP
446 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
447
448 /* Get veth1 ifindex for use with netlink. */
449 veth1index = if_nametoindex(veth1);
450 if (!veth1index)
451 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
452
453 /* Configure untagged VLAN settings on bridge port if specified. */
454 if (netdev->priv.veth_attr.vlan_id_set) {
455 unsigned short default_pvid;
456
457 /* Get the bridge's default VLAN PVID. */
387c1c70
CB
458 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
459 if (rc < 0)
33320936
TP
460 return -1;
461
462 rc = lxc_read_from_file(path, buf, sizeof(buf));
463 if (rc < 0)
464 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
465
466 buf[rc - 1] = '\0';
467 err = get_u16(&default_pvid, buf, 0);
468 if (err)
469 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
470
471 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
472 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
473 err = lxc_bridge_vlan_del(veth1index, default_pvid);
474 if (err)
475 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
476 }
477
478 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
479 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
480 if (err)
481 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
482 }
483 }
484
485 /* Configure tagged VLAN settings on bridge port if specified. */
486 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
487 if (err)
488 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
489
490 return 0;
491}
492
8f7c3358
TP
493struct ovs_veth_vlan_args {
494 const char *nic;
495 const char *vlan_mode; /* Port VLAN mode. */
496 short vlan_id; /* PVID VLAN ID. */
d2f8b272 497 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
8f7c3358
TP
498};
499
d2f8b272
TP
500static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
501{
502 free_disarm(args->trunks);
503}
8f7c3358
TP
504
505static int lxc_ovs_setup_bridge_vlan_exec(void *data)
506{
507 struct ovs_veth_vlan_args *args = data;
785e1540
TP
508 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
509
510 if (!args->vlan_mode)
511 return ret_errno(EINVAL);
8f7c3358
TP
512
513 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
514
785e1540 515 if (args->vlan_id > BRIDGE_VLAN_NONE) {
8f7c3358
TP
516 char buf[5];
517 int rc;
518
387c1c70
CB
519 rc = strnprintf(buf, sizeof(buf), "%u", args->vlan_id);
520 if (rc < 0)
72e8122b 521 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
8f7c3358
TP
522
523 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
524 }
525
785e1540 526 if (args->trunks)
8f7c3358
TP
527 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
528
529 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
785e1540 530 if (tag && trunks)
8f7c3358 531 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
785e1540 532 else if (tag)
8f7c3358 533 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
785e1540 534 else if (trunks)
8f7c3358
TP
535 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
536 else
537 return -EINVAL;
538
539 return -errno;
540}
541
542static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
543{
544 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
545 struct ovs_veth_vlan_args args;
546 args.nic = veth1;
1ee07848
TP
547 args.vlan_mode = NULL;
548 args.vlan_id = BRIDGE_VLAN_NONE;
549 args.trunks = NULL;
8f7c3358
TP
550
551 /* Skip setup if no VLAN options are specified. */
552 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
553 return 0;
554
555 /* Configure untagged VLAN settings on bridge port if specified. */
556 if (netdev->priv.veth_attr.vlan_id_set) {
557 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
558 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
559
560 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
561 * Also set the vlan_mode=access, which will drop any tagged frames.
562 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
563 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
564 */
565 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
566 args.vlan_mode = "access";
567 args.vlan_id = netdev->priv.veth_attr.vlan_id;
568 }
569 }
570
571 if (taggedLength > 0) {
572 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
573
574 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
575 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
576 args.vlan_mode = "native-untagged";
577 }
578
579 struct lxc_list *iterator;
580 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
581 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
582 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
583 int rc;
584
387c1c70
CB
585 rc = strnprintf(buf, sizeof(buf), "%u", vlan_id);
586 if (rc < 0) {
3fe6b5cf 587 free_ovs_veth_vlan_args(&args);
8f7c3358 588 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
3fe6b5cf 589 }
8f7c3358 590
1ee07848
TP
591 if (args.trunks)
592 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
593 else
594 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
8f7c3358
TP
595 }
596 }
597
1ee07848 598 if (args.vlan_mode) {
8f7c3358
TP
599 int ret;
600 char cmd_output[PATH_MAX];
601
602 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
3fe6b5cf
TP
603 if (ret < 0) {
604 free_ovs_veth_vlan_args(&args);
8f7c3358 605 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
3fe6b5cf 606 }
8f7c3358
TP
607 }
608
3fe6b5cf 609 free_ovs_veth_vlan_args(&args);
8f7c3358
TP
610 return 0;
611}
612
bad2f913 613static int netdev_configure_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 614{
54256301 615 int err;
a00fbab5 616 unsigned int mtu = 1500;
811ef482
CB
617 char *veth1, *veth2;
618 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 619
33320936
TP
620 err = validate_veth(netdev);
621 if (err)
622 return err;
623
f2711167 624 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
625 veth1 = netdev->priv.veth_attr.pair;
626 if (handler->conf->reboot)
627 lxc_netdev_delete_by_name(veth1);
628 } else {
387c1c70
CB
629 err = strnprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
630 if (err < 0)
811ef482
CB
631 return -1;
632
3646ffd9 633 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
634 if (!veth1)
635 return -1;
636
637 /* store away for deconf */
638 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
639 }
640
387c1c70
CB
641 err = strnprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
642 if (err < 0)
d34212ad
CB
643 return -1;
644
3646ffd9 645 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 646 if (!veth2)
54256301
CB
647 return -1;
648
a00fbab5
TP
649 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
650 if (netdev->mtu) {
651 if (lxc_safe_uint(netdev->mtu, &mtu))
652 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 653 } else if (!is_empty_string(netdev->link)) {
54256301 654 int ifindex_mtu;
811ef482 655
54256301
CB
656 ifindex_mtu = if_nametoindex(netdev->link);
657 if (ifindex_mtu) {
658 mtu = netdev_get_mtu(ifindex_mtu);
659 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
660 }
661 }
662
663 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
664 if (err)
665 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 666
fdd6be55
CB
667 /*
668 * Veth devices are directly created in the container's network
669 * namespace so the device doesn't need to be moved into the
670 * container's network namespace. Make this explicit by setting the
671 * devices ifindex to 0.
672 */
673 netdev->ifindex = 0;
674
24190194
CB
675 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
676
fdd6be55
CB
677 /*
678 * Since the device won't be moved transient name generation won't
679 * happen. But the transient name is needed for the container to
680 * retrieve the ifindex for the device.
681 */
682 strlcpy(netdev->transient_name, veth2, IFNAMSIZ);
683
684 /*
685 * Changing the high byte of the mac address to 0xfe, the bridge interface
811ef482 686 * will always keep the host's mac address and not take the mac address
fdd6be55
CB
687 * of a container.
688 */
811ef482
CB
689 err = setup_private_host_hw_addr(veth1);
690 if (err) {
6d1400b5 691 errno = -err;
692 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
693 goto out_delete;
694 }
695
8da62485
CB
696 /* Retrieve ifindex of the host's veth device. */
697 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
698 if (!netdev->priv.veth_attr.ifindex) {
699 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
700 goto out_delete;
701 }
702
811ef482
CB
703 if (mtu) {
704 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 705 if (err) {
6d1400b5 706 errno = -err;
54256301 707 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
708 goto out_delete;
709 }
710 }
711
f2711167 712 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
713 if (!lxc_nic_exists(netdev->link)) {
714 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
715 goto out_delete;
716 }
717
811ef482
CB
718 err = lxc_bridge_attach(netdev->link, veth1);
719 if (err) {
6d1400b5 720 errno = -err;
26da53c3 721 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
722 goto out_delete;
723 }
724 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936 725
38790036
TP
726 if (is_ovs_bridge(netdev->link)) {
727 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
728 if (err) {
729 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
730 lxc_ovs_delete_port(netdev->link, veth1);
731 goto out_delete;
732 }
733 } else {
33320936
TP
734 err = setup_veth_native_bridge_vlan(veth1, netdev);
735 if (err) {
736 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
737 goto out_delete;
738 }
739 }
811ef482
CB
740 }
741
742 err = lxc_netdev_up(veth1);
743 if (err) {
6d1400b5 744 errno = -err;
745 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
746 goto out_delete;
747 }
748
d4a7da46 749 /* setup ipv4 routes on the host interface */
750 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
751 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
752 goto out_delete;
753 }
754
755 /* setup ipv6 routes on the host interface */
756 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
757 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
758 goto out_delete;
759 }
760
6dfa9581 761 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
762 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
763 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
764 appear to be added successfully but then do not appear in the proxy list. The length of time
765 slept doesn't appear to be important, only that the process sleeps for a short period of time.
766 */
767 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
768
6dfa9581
TP
769 if (netdev->ipv4_gateway) {
770 char bufinet4[INET_ADDRSTRLEN];
771 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 772 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
773 goto out_delete;
774 }
775
776 err = lxc_ip_forwarding_on(veth1, AF_INET);
777 if (err) {
9c66dc4f 778 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
779 goto out_delete;
780 }
781
5fe147e9 782 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 783 if (err) {
9c66dc4f 784 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
785 goto out_delete;
786 }
787 }
788
789 if (netdev->ipv6_gateway) {
790 char bufinet6[INET6_ADDRSTRLEN];
791
792 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 793 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
794 goto out_delete;
795 }
796
797 /* Check for sysctl net.ipv6.conf.all.forwarding=1
798 Kernel requires this to route any packets for IPv6.
799 */
800 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
801 if (err) {
9c66dc4f 802 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
803 goto out_delete;
804 }
805
806 err = lxc_ip_forwarding_on(veth1, AF_INET6);
807 if (err) {
9c66dc4f 808 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
809 goto out_delete;
810 }
811
812 err = lxc_neigh_proxy_on(veth1, AF_INET6);
813 if (err) {
9c66dc4f 814 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
815 goto out_delete;
816 }
817
5fe147e9 818 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 819 if (err) {
9c66dc4f 820 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
821 goto out_delete;
822 }
823 }
824
825 /* setup ipv4 address routes on the host interface */
2ec31bbd 826 err = setup_ipv4_addr_routes(netdev);
6dfa9581 827 if (err) {
9c66dc4f 828 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
829 goto out_delete;
830 }
831
832 /* setup ipv6 address routes on the host interface */
cd32fc73 833 err = setup_ipv6_addr_routes(netdev);
6dfa9581 834 if (err) {
9c66dc4f 835 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
836 goto out_delete;
837 }
838 }
839
811ef482 840 if (netdev->upscript) {
14a7b0f9
CB
841 char *argv[] = {
842 "veth",
843 netdev->link,
990b9ac3 844 veth1,
14a7b0f9
CB
845 NULL,
846 };
847
848 err = run_script_argv(handler->name,
849 handler->conf->hooks_version, "net",
850 netdev->upscript, "up", argv);
851 if (err < 0)
811ef482
CB
852 goto out_delete;
853 }
854
54256301 855 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
856
857 return 0;
858
859out_delete:
54256301 860 lxc_netdev_delete_by_name(veth1);
811ef482
CB
861 return -1;
862}
863
bad2f913 864static int netdev_configure_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 865{
8021de25 866 char peer[IFNAMSIZ];
811ef482
CB
867 int err;
868
f2711167 869 if (is_empty_string(netdev->link)) {
811ef482
CB
870 ERROR("No link for macvlan network device specified");
871 return -1;
872 }
873
387c1c70
CB
874 err = strnprintf(peer, sizeof(peer), "mcXXXXXX");
875 if (err < 0)
811ef482
CB
876 return -1;
877
3646ffd9 878 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
879 return -1;
880
881 err = lxc_macvlan_create(netdev->link, peer,
882 netdev->priv.macvlan_attr.mode);
883 if (err) {
6d1400b5 884 errno = -err;
885 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
886 peer, netdev->link);
966e9f1f 887 goto on_error;
811ef482
CB
888 }
889
9f8cf6e1
CB
890 strlcpy(netdev->created_name, peer, IFNAMSIZ);
891
811ef482
CB
892 netdev->ifindex = if_nametoindex(peer);
893 if (!netdev->ifindex) {
894 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 895 goto on_error;
811ef482
CB
896 }
897
3bef7b7b 898 if (netdev->mtu) {
54256301
CB
899 unsigned int mtu;
900
3bef7b7b
TP
901 err = lxc_safe_uint(netdev->mtu, &mtu);
902 if (err < 0) {
903 errno = -err;
904 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
905 goto on_error;
906 }
907
908 err = lxc_netdev_set_mtu(peer, mtu);
909 if (err < 0) {
910 errno = -err;
911 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
912 goto on_error;
913 }
914 }
915
811ef482 916 if (netdev->upscript) {
14a7b0f9
CB
917 char *argv[] = {
918 "macvlan",
919 netdev->link,
920 NULL,
921 };
922
923 err = run_script_argv(handler->name,
924 handler->conf->hooks_version, "net",
925 netdev->upscript, "up", argv);
926 if (err < 0)
966e9f1f 927 goto on_error;
811ef482
CB
928 }
929
4a037d61 930 DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
811ef482
CB
931 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
932
933 return 0;
966e9f1f
CB
934
935on_error:
811ef482 936 lxc_netdev_delete_by_name(peer);
811ef482
CB
937 return -1;
938}
939
0dc9a142 940static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
c9f52382 941{
d16bda44
CB
942 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
943 struct nl_handler nlh;
944 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 945 int err, index, len;
946 struct ifinfomsg *ifi;
c9f52382 947 struct rtattr *nest, *nest2;
c9f52382 948
0dc9a142 949 len = strlen(parent);
c9f52382 950 if (len == 1 || len >= IFNAMSIZ)
d16bda44 951 return ret_errno(EINVAL);
c9f52382 952
953 len = strlen(name);
954 if (len == 1 || len >= IFNAMSIZ)
d16bda44 955 return ret_errno(EINVAL);
c9f52382 956
0dc9a142 957 index = if_nametoindex(parent);
c9f52382 958 if (!index)
d16bda44 959 return ret_errno(EINVAL);
c9f52382 960
d16bda44 961 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 962 if (err)
df62850d 963 return err;
c9f52382 964
c9f52382 965 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
966 if (!nlmsg)
d16bda44 967 return ret_errno(ENOMEM);
c9f52382 968
969 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
970 if (!answer)
d16bda44 971 return ret_errno(ENOMEM);
c9f52382 972
973 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
974 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
975
976 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
977 if (!ifi)
978 return ret_errno(ENOMEM);
c9f52382 979 ifi->ifi_family = AF_UNSPEC;
980
c9f52382 981 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
982 if (!nest)
d16bda44 983 return ret_errno(EPROTO);
c9f52382 984
985 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 986 return ret_errno(EPROTO);
c9f52382 987
5755765e
KT
988 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
989 if (!nest2)
990 return ret_errno(EPROTO);
991
3a934e2e 992 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
993 return ret_errno(EPROTO);
994
cf88a827
TP
995 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
996 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
997 * according to ipvlan docs.
5755765e 998 */
cf88a827 999 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 1000 return ret_errno(EPROTO);
c9f52382 1001
5755765e 1002 nla_end_nested(nlmsg, nest2);
c9f52382 1003 nla_end_nested(nlmsg, nest);
1004
1005 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 1006 return ret_errno(EPROTO);
c9f52382 1007
1008 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
1009 return ret_errno(EPROTO);
1010
1011 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 1012}
1013
bad2f913 1014static int netdev_configure_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1015{
dd119206 1016 char peer[IFNAMSIZ];
c9f52382 1017 int err;
1018
f2711167 1019 if (is_empty_string(netdev->link)) {
c9f52382 1020 ERROR("No link for ipvlan network device specified");
1021 return -1;
1022 }
1023
387c1c70
CB
1024 err = strnprintf(peer, sizeof(peer), "ipXXXXXX");
1025 if (err < 0)
c9f52382 1026 return -1;
1027
3646ffd9 1028 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 1029 return -1;
1030
dd119206
CB
1031 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
1032 netdev->priv.ipvlan_attr.isolation);
c9f52382 1033 if (err) {
dd119206
CB
1034 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
1035 peer, netdev->link);
c9f52382 1036 goto on_error;
1037 }
1038
e7fdd504
CB
1039 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1040
c9f52382 1041 netdev->ifindex = if_nametoindex(peer);
1042 if (!netdev->ifindex) {
1043 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1044 goto on_error;
1045 }
1046
006e135e 1047 if (netdev->mtu) {
54256301
CB
1048 unsigned int mtu;
1049
006e135e 1050 err = lxc_safe_uint(netdev->mtu, &mtu);
1051 if (err < 0) {
1052 errno = -err;
54256301 1053 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1054 goto on_error;
1055 }
1056
1057 err = lxc_netdev_set_mtu(peer, mtu);
1058 if (err < 0) {
1059 errno = -err;
54256301 1060 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1061 goto on_error;
1062 }
1063 }
1064
c9f52382 1065 if (netdev->upscript) {
1066 char *argv[] = {
1067 "ipvlan",
1068 netdev->link,
1069 NULL,
1070 };
1071
dd119206
CB
1072 err = run_script_argv(handler->name, handler->conf->hooks_version,
1073 "net", netdev->upscript, "up", argv);
c9f52382 1074 if (err < 0)
1075 goto on_error;
1076 }
1077
4a037d61 1078 DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer,
dd119206 1079 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 1080
1081 return 0;
1082
1083on_error:
1084 lxc_netdev_delete_by_name(peer);
1085 return -1;
1086}
1087
bad2f913 1088static int netdev_configure_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1089{
1090 char peer[IFNAMSIZ];
1091 int err;
1092 static uint16_t vlan_cntr = 0;
811ef482 1093
f2711167 1094 if (is_empty_string(netdev->link)) {
811ef482
CB
1095 ERROR("No link for vlan network device specified");
1096 return -1;
1097 }
1098
387c1c70
CB
1099 err = strnprintf(peer, sizeof(peer), "vlan%d-%d",
1100 netdev->priv.vlan_attr.vid, vlan_cntr++);
1101 if (err < 0)
811ef482
CB
1102 return -1;
1103
1104 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1105 if (err) {
6d1400b5 1106 errno = -err;
1107 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1108 peer, netdev->link);
811ef482
CB
1109 return -1;
1110 }
1111
83530dba
CB
1112 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1113
811ef482
CB
1114 netdev->ifindex = if_nametoindex(peer);
1115 if (!netdev->ifindex) {
1116 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1117 goto on_error;
1118 }
1119
1120 if (netdev->mtu) {
54256301
CB
1121 unsigned int mtu;
1122
3e2a7b08 1123 err = lxc_safe_uint(netdev->mtu, &mtu);
1124 if (err < 0) {
1125 errno = -err;
54256301 1126 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1127 goto on_error;
1128 }
1129
1130 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1131 if (err < 0) {
3e2a7b08 1132 errno = -err;
54256301 1133 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1134 goto on_error;
1135 }
811ef482
CB
1136 }
1137
3a73d9f1 1138 if (netdev->upscript) {
1139 char *argv[] = {
1140 "vlan",
1141 netdev->link,
1142 NULL,
1143 };
1144
d4d68410
CB
1145 err = run_script_argv(handler->name, handler->conf->hooks_version,
1146 "net", netdev->upscript, "up", argv);
19abca58 1147 if (err < 0) {
3e2a7b08 1148 goto on_error;
19abca58 1149 }
3a73d9f1 1150 }
1151
4a037d61 1152 DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer,
d4d68410 1153 netdev->ifindex);
811ef482
CB
1154
1155 return 0;
3e2a7b08 1156
1157on_error:
1158 lxc_netdev_delete_by_name(peer);
1159 return -1;
811ef482
CB
1160}
1161
bad2f913 1162static int netdev_configure_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1163{
0b154989 1164 int err, mtu_orig = 0;
14a7b0f9 1165
9c66dc4f
CB
1166 if (is_empty_string(netdev->link))
1167 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1168
75b074ee
CB
1169 /*
1170 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1171 * network namespace because we need it to move the device from the
1172 * host's network namespace to the container's network namespace later
1173 * on.
1174 * Note that netdev->link will contain the name of the physical network
1175 * device in the host's namespace.
1176 */
811ef482 1177 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1178 if (!netdev->ifindex)
1179 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1180
61302ef7 1181 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1182 if (is_empty_string(netdev->name))
8bf64b77 1183 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1184
75b074ee
CB
1185 /*
1186 * Store the ifindex of the host's network device in the host's
790255cf
CB
1187 * namespace.
1188 */
1189 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1190
75b074ee
CB
1191 /*
1192 * Get original device MTU setting and store for restoration after
1193 * container shutdown.
1194 */
0b154989 1195 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1196 if (mtu_orig < 0)
1197 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1198
1199 netdev->priv.phys_attr.mtu = mtu_orig;
1200
3bef7b7b 1201 if (netdev->mtu) {
54256301
CB
1202 unsigned int mtu;
1203
3bef7b7b 1204 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1205 if (err < 0)
1206 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1207
3bef7b7b 1208 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1209 if (err < 0)
1210 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1211 }
1212
1213 if (netdev->upscript) {
1214 char *argv[] = {
1215 "phys",
1216 netdev->link,
1217 NULL,
1218 };
1219
75b074ee
CB
1220 err = run_script_argv(handler->name, handler->conf->hooks_version,
1221 "net", netdev->upscript, "up", argv);
9c66dc4f 1222 if (err < 0)
3bef7b7b 1223 return -1;
3bef7b7b
TP
1224 }
1225
4a037d61 1226 DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev->link,
75b074ee 1227 netdev->ifindex);
811ef482
CB
1228
1229 return 0;
1230}
1231
bad2f913 1232static int netdev_configure_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1233{
14a7b0f9
CB
1234 int ret;
1235 char *argv[] = {
1236 "empty",
1237 NULL,
1238 };
1239
43e2a964
CB
1240 /* The loopback device always has index 1. */
1241 netdev->ifindex = 1;
1242
1243 if (!strequal(netdev->name, "lo"))
1244 return syserror_set(-EINVAL, "Custom loopback device names not supported");
1245
14a7b0f9
CB
1246 if (!netdev->upscript)
1247 return 0;
1248
1249 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1250 "net", netdev->upscript, "up", argv);
1251 if (ret < 0)
1252 return -1;
1253
811ef482
CB
1254 return 0;
1255}
1256
bad2f913 1257static int netdev_configure_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1258{
1259 netdev->ifindex = 0;
1260 return 0;
1261}
1262
bad2f913
CB
1263static netdev_configure_server_cb netdev_configure_server[LXC_NET_MAXCONFTYPE + 1] = {
1264 [LXC_NET_VETH] = netdev_configure_server_veth,
1265 [LXC_NET_MACVLAN] = netdev_configure_server_macvlan,
1266 [LXC_NET_IPVLAN] = netdev_configure_server_ipvlan,
1267 [LXC_NET_VLAN] = netdev_configure_server_vlan,
1268 [LXC_NET_PHYS] = netdev_configure_server_phys,
1269 [LXC_NET_EMPTY] = netdev_configure_server_empty,
1270 [LXC_NET_NONE] = netdev_configure_server_none,
811ef482
CB
1271};
1272
bad2f913 1273static int __netdev_configure_container_common(struct lxc_netdev *netdev)
8bf64b77
CB
1274{
1275 char current_ifname[IFNAMSIZ];
1276
fdd6be55 1277 netdev->ifindex = if_nametoindex(netdev->transient_name);
8bf64b77
CB
1278 if (!netdev->ifindex)
1279 return log_error_errno(-1,
1280 errno, "Failed to retrieve ifindex for network device with name %s",
fdd6be55 1281 netdev->transient_name);
8bf64b77 1282
3473ca76 1283 if (is_empty_string(netdev->name))
8bf64b77
CB
1284 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1285
fdd6be55 1286 if (!strequal(netdev->transient_name, netdev->name)) {
8bf64b77
CB
1287 int ret;
1288
fdd6be55 1289 ret = lxc_netdev_rename_by_name(netdev->transient_name, netdev->name);
8bf64b77 1290 if (ret)
9c66dc4f 1291 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
fdd6be55 1292 netdev->transient_name, netdev->name);
8bf64b77 1293
fdd6be55 1294 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->transient_name, netdev->name);
8bf64b77
CB
1295 }
1296
1297 /*
1298 * Re-read the name of the interface because its name has changed and
1299 * would be automatically allocated by the system
1300 */
1301 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1302 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1303
1304 /*
1305 * Now update the recorded name of the network device to reflect the
1306 * name of the network device in the child's network namespace. We will
1307 * later on send this information back to the parent.
1308 */
1309 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
fdd6be55 1310 netdev->transient_name[0] = '\0';
8bf64b77
CB
1311
1312 return 0;
1313}
1314
bad2f913 1315static int netdev_configure_container_veth(struct lxc_netdev *netdev)
8bf64b77 1316{
8bf64b77 1317
bad2f913 1318 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1319}
1320
bad2f913 1321static int netdev_configure_container_macvlan(struct lxc_netdev *netdev)
8bf64b77 1322{
bad2f913 1323 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1324}
1325
bad2f913 1326static int netdev_configure_container_ipvlan(struct lxc_netdev *netdev)
8bf64b77 1327{
bad2f913 1328 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1329}
1330
bad2f913 1331static int netdev_configure_container_vlan(struct lxc_netdev *netdev)
8bf64b77 1332{
bad2f913 1333 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1334}
1335
bad2f913 1336static int netdev_configure_container_phys(struct lxc_netdev *netdev)
8bf64b77 1337{
bad2f913 1338 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1339}
1340
bad2f913 1341static int netdev_configure_container_empty(struct lxc_netdev *netdev)
8bf64b77
CB
1342{
1343 return 0;
1344}
1345
bad2f913 1346static int netdev_configure_container_none(struct lxc_netdev *netdev)
8bf64b77
CB
1347{
1348 return 0;
1349}
1350
bad2f913
CB
1351static netdev_configure_container_cb netdev_configure_container[LXC_NET_MAXCONFTYPE + 1] = {
1352 [LXC_NET_VETH] = netdev_configure_container_veth,
1353 [LXC_NET_MACVLAN] = netdev_configure_container_macvlan,
1354 [LXC_NET_IPVLAN] = netdev_configure_container_ipvlan,
1355 [LXC_NET_VLAN] = netdev_configure_container_vlan,
1356 [LXC_NET_PHYS] = netdev_configure_container_phys,
1357 [LXC_NET_EMPTY] = netdev_configure_container_empty,
1358 [LXC_NET_NONE] = netdev_configure_container_none,
8bf64b77
CB
1359};
1360
bad2f913 1361static int netdev_shutdown_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1362{
14a7b0f9
CB
1363 int ret;
1364 char *argv[] = {
1365 "veth",
1366 netdev->link,
1367 NULL,
1368 NULL,
1369 };
1370
1371 if (!netdev->downscript)
1372 return 0;
811ef482 1373
f2711167 1374 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1375 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1376 else
14a7b0f9
CB
1377 argv[2] = netdev->priv.veth_attr.veth1;
1378
1379 ret = run_script_argv(handler->name,
1380 handler->conf->hooks_version, "net",
1381 netdev->downscript, "down", argv);
1382 if (ret < 0)
1383 return -1;
811ef482 1384
811ef482
CB
1385 return 0;
1386}
1387
bad2f913 1388static int netdev_shutdown_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1389{
14a7b0f9
CB
1390 int ret;
1391 char *argv[] = {
1392 "macvlan",
1393 netdev->link,
1394 NULL,
1395 };
1396
1397 if (!netdev->downscript)
1398 return 0;
1399
1400 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1401 "net", netdev->downscript, "down", argv);
1402 if (ret < 0)
1403 return -1;
811ef482 1404
811ef482
CB
1405 return 0;
1406}
1407
bad2f913 1408static int netdev_shutdown_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1409{
1410 int ret;
1411 char *argv[] = {
1412 "ipvlan",
1413 netdev->link,
1414 NULL,
1415 };
1416
1417 if (!netdev->downscript)
1418 return 0;
1419
1420 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1421 "net", netdev->downscript, "down", argv);
1422 if (ret < 0)
1423 return -1;
1424
1425 return 0;
1426}
1427
bad2f913 1428static int netdev_shutdown_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1429{
3a73d9f1 1430 int ret;
1431 char *argv[] = {
1432 "vlan",
1433 netdev->link,
1434 NULL,
1435 };
1436
1437 if (!netdev->downscript)
1438 return 0;
1439
1440 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1441 "net", netdev->downscript, "down", argv);
1442 if (ret < 0)
1443 return -1;
1444
811ef482
CB
1445 return 0;
1446}
1447
bad2f913 1448static int netdev_shutdown_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1449{
14a7b0f9
CB
1450 int ret;
1451 char *argv[] = {
1452 "phys",
1453 netdev->link,
1454 NULL,
1455 };
1456
1457 if (!netdev->downscript)
1458 return 0;
1459
1460 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1461 "net", netdev->downscript, "down", argv);
1462 if (ret < 0)
1463 return -1;
811ef482 1464
811ef482
CB
1465 return 0;
1466}
1467
bad2f913 1468static int netdev_shutdown_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1469{
14a7b0f9
CB
1470 int ret;
1471 char *argv[] = {
1472 "empty",
1473 NULL,
1474 };
1475
1476 if (!netdev->downscript)
1477 return 0;
1478
1479 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1480 "net", netdev->downscript, "down", argv);
1481 if (ret < 0)
1482 return -1;
811ef482 1483
811ef482
CB
1484 return 0;
1485}
1486
bad2f913 1487static int netdev_shutdown_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1488{
1489 return 0;
1490}
1491
bad2f913
CB
1492static netdev_shutdown_server_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1493 [LXC_NET_VETH] = netdev_shutdown_server_veth,
1494 [LXC_NET_MACVLAN] = netdev_shutdown_server_macvlan,
1495 [LXC_NET_IPVLAN] = netdev_shutdown_server_ipvlan,
1496 [LXC_NET_VLAN] = netdev_shutdown_server_vlan,
1497 [LXC_NET_PHYS] = netdev_shutdown_server_phys,
1498 [LXC_NET_EMPTY] = netdev_shutdown_server_empty,
1499 [LXC_NET_NONE] = netdev_shutdown_server_none,
811ef482
CB
1500};
1501
0037ab49
TP
1502static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1503{
d16bda44 1504 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1505 struct nl_handler nlh;
d16bda44
CB
1506 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1507 int err;
0037ab49 1508 struct ifinfomsg *ifi;
0037ab49 1509
d16bda44 1510 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1511 if (err)
1512 return err;
1513
0037ab49
TP
1514 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1515 if (!nlmsg)
d16bda44 1516 return ret_errno(ENOMEM);
0037ab49
TP
1517
1518 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1519 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1520
1521 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1522 if (!ifi)
d16bda44
CB
1523 return ret_errno(ENOMEM);
1524
0037ab49
TP
1525 ifi->ifi_family = AF_UNSPEC;
1526 ifi->ifi_index = ifindex;
1527
1528 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1529 return ret_errno(ENOMEM);
0037ab49 1530
3473ca76 1531 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1532 return ret_errno(ENOMEM);
0037ab49 1533
d16bda44 1534 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1535}
1536
ebc73a67 1537int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1538{
d16bda44 1539 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1540 struct nl_handler nlh;
d16bda44
CB
1541 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1542 int err;
06f976ca 1543 struct ifinfomsg *ifi;
0ad19a3f 1544
d16bda44 1545 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1546 if (err)
1547 return err;
0ad19a3f 1548
0ad19a3f 1549 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1550 if (!nlmsg)
d16bda44 1551 return ret_errno(ENOMEM);
0ad19a3f 1552
ebc73a67 1553 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1554 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1555
1556 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1557 if (!ifi)
d16bda44
CB
1558 return ret_errno(ENOMEM);
1559
06f976ca
SZ
1560 ifi->ifi_family = AF_UNSPEC;
1561 ifi->ifi_index = ifindex;
0ad19a3f 1562
1563 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1564 return ret_errno(ENOMEM);
0ad19a3f 1565
3473ca76 1566 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1567 return ret_errno(ENOMEM);
8d357196 1568
d16bda44 1569 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1570}
1571
ebc73a67
CB
1572/* If we are asked to move a wireless interface, then we must actually move its
1573 * phyN device. Detect that condition and return the physname here. The physname
1574 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1575 */
1576#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1577char *is_wlan(const char *ifname)
e5848d39 1578{
4110345b
CB
1579 __do_fclose FILE *f = NULL;
1580 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1581 int i, ret;
e5848d39 1582 long physlen;
ebc73a67 1583 size_t len;
e5848d39 1584
ebc73a67 1585 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1586 path = must_realloc(NULL, len + 1);
387c1c70
CB
1587 ret = strnprintf(path, len, PHYSNAME, ifname);
1588 if (ret < 0)
4110345b 1589 return NULL;
ebc73a67 1590
4110345b 1591 f = fopen(path, "re");
ebc73a67 1592 if (!f)
4110345b 1593 return NULL;
ebc73a67 1594
1a0e70ac 1595 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1596 fseek(f, 0, SEEK_END);
1597 physlen = ftell(f);
1598 fseek(f, 0, SEEK_SET);
4110345b
CB
1599 if (physlen < 0)
1600 return NULL;
ebc73a67
CB
1601
1602 physname = malloc(physlen + 1);
4110345b
CB
1603 if (!physname)
1604 return NULL;
ebc73a67
CB
1605
1606 memset(physname, 0, physlen + 1);
e5848d39 1607 ret = fread(physname, 1, physlen, f);
e5848d39 1608 if (ret < 0)
4110345b 1609 return NULL;
e5848d39 1610
ebc73a67 1611 for (i = 0; i < physlen; i++) {
e5848d39
SH
1612 if (physname[i] == '\n')
1613 physname[i] = '\0';
ebc73a67 1614
e5848d39
SH
1615 if (physname[i] == '\0')
1616 break;
1617 }
1618
4110345b 1619 return move_ptr(physname);
e5848d39
SH
1620}
1621
ebc73a67
CB
1622static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1623 const char *new)
e5848d39 1624{
ebc73a67 1625 pid_t fpid;
e5848d39 1626
ebc73a67 1627 fpid = fork();
e5848d39
SH
1628 if (fpid < 0)
1629 return -1;
ebc73a67 1630
e5848d39
SH
1631 if (fpid != 0)
1632 return wait_for_pid(fpid);
ebc73a67 1633
e5848d39
SH
1634 if (!switch_to_ns(pid, "net"))
1635 return -1;
ebc73a67 1636
05ec44f8 1637 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1638}
1639
e4103cf6 1640int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1641 const char *newname)
e5848d39 1642{
3dd78294 1643 __do_free char *cmd = NULL;
ebc73a67 1644 pid_t fpid;
e5848d39
SH
1645
1646 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1647 * However, IIUC this involves a bit more complicated work to talk to
1648 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1649 */
1650 cmd = on_path("iw", NULL);
0ed79f45
M
1651 if (!cmd) {
1652 ERROR("Couldn't find the application iw in PATH");
3dd78294 1653 return -1;
0ed79f45 1654 }
e5848d39
SH
1655
1656 fpid = fork();
1657 if (fpid < 0)
3dd78294 1658 return -1;
ebc73a67 1659
e5848d39
SH
1660 if (fpid == 0) {
1661 char pidstr[30];
1662 sprintf(pidstr, "%d", pid);
9c66dc4f 1663 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1664 _exit(EXIT_FAILURE);
e5848d39 1665 }
ebc73a67 1666
e5848d39 1667 if (wait_for_pid(fpid))
3dd78294 1668 return -1;
e5848d39 1669
e5848d39 1670 if (newname)
3dd78294 1671 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1672
3dd78294 1673 return 0;
e5848d39
SH
1674}
1675
8d357196 1676int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1677{
3dd78294 1678 __do_free char *physname = NULL;
8befa924
SH
1679 int index;
1680
8befa924
SH
1681 if (!ifname)
1682 return -EINVAL;
1683
32571606 1684 index = if_nametoindex(ifname);
49428bf3
DY
1685 if (!index)
1686 return -EINVAL;
32571606 1687
ebc73a67
CB
1688 physname = is_wlan(ifname);
1689 if (physname)
e5848d39
SH
1690 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1691
8d357196 1692 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1693}
1694
b84f58b9 1695int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1696{
d16bda44
CB
1697 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1698 struct nl_handler nlh;
1699 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1700 int err;
ebc73a67 1701 struct ifinfomsg *ifi;
0ad19a3f 1702
d16bda44 1703 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1704 if (err)
1705 return err;
0ad19a3f 1706
0ad19a3f 1707 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1708 if (!nlmsg)
d16bda44 1709 return ret_errno(ENOMEM);
0ad19a3f 1710
06f976ca 1711 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1712 if (!answer)
d16bda44 1713 return ret_errno(ENOMEM);
0ad19a3f 1714
ebc73a67 1715 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1716 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1717
1718 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1719 if (!ifi)
d16bda44
CB
1720 return ret_errno(ENOMEM);
1721
06f976ca
SZ
1722 ifi->ifi_family = AF_UNSPEC;
1723 ifi->ifi_index = ifindex;
0ad19a3f 1724
d16bda44 1725 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1726}
1727
b84f58b9
DL
1728int lxc_netdev_delete_by_name(const char *name)
1729{
1730 int index;
1731
1732 index = if_nametoindex(name);
1733 if (!index)
1734 return -EINVAL;
1735
1736 return lxc_netdev_delete_by_index(index);
1737}
1738
1739int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1740{
d16bda44
CB
1741 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1742 struct nl_handler nlh;
1743 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1744 int err, len;
06f976ca 1745 struct ifinfomsg *ifi;
b9a5bb58 1746
d16bda44 1747 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1748 if (err)
1749 return err;
b9a5bb58 1750
b84f58b9 1751 len = strlen(newname);
d16bda44
CB
1752 if (len == 1 || len >= IFNAMSIZ)
1753 return ret_errno(EINVAL);
b84f58b9 1754
b9a5bb58
DL
1755 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1756 if (!nlmsg)
d16bda44 1757 return ret_errno(ENOMEM);
b9a5bb58 1758
06f976ca 1759 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1760 if (!answer)
d16bda44 1761 return ret_errno(ENOMEM);
b9a5bb58 1762
ebc73a67 1763 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1764 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1765
1766 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1767 if (!ifi)
d16bda44
CB
1768 return ret_errno(ENOMEM);
1769
06f976ca
SZ
1770 ifi->ifi_family = AF_UNSPEC;
1771 ifi->ifi_index = ifindex;
b84f58b9
DL
1772
1773 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1774 return ret_errno(ENOMEM);
b9a5bb58 1775
d16bda44 1776 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1777}
1778
b84f58b9
DL
1779int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1780{
1781 int len, index;
1782
1783 len = strlen(oldname);
dae3fdf6 1784 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1785 return -EINVAL;
1786
1787 index = if_nametoindex(oldname);
1788 if (!index)
1789 return -EINVAL;
1790
1791 return lxc_netdev_rename_by_index(index, newname);
1792}
1793
8befa924 1794int netdev_set_flag(const char *name, int flag)
0ad19a3f 1795{
d16bda44
CB
1796 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1797 struct nl_handler nlh;
1798 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1799 int err, index, len;
06f976ca 1800 struct ifinfomsg *ifi;
0ad19a3f 1801
d16bda44 1802 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1803 if (err)
1804 return err;
0ad19a3f 1805
1806 len = strlen(name);
dae3fdf6 1807 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1808 return ret_errno(EINVAL);
0ad19a3f 1809
1810 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1811 if (!nlmsg)
d16bda44 1812 return ret_errno(ENOMEM);
0ad19a3f 1813
06f976ca 1814 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1815 if (!answer)
d16bda44 1816 return ret_errno(ENOMEM);
0ad19a3f 1817
1818 index = if_nametoindex(name);
1819 if (!index)
d16bda44 1820 return ret_errno(EINVAL);
0ad19a3f 1821
ebc73a67 1822 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1823 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1824
1825 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1826 if (!ifi)
1827 return ret_errno(ENOMEM);
1828
06f976ca
SZ
1829 ifi->ifi_family = AF_UNSPEC;
1830 ifi->ifi_index = index;
1831 ifi->ifi_change |= IFF_UP;
1832 ifi->ifi_flags |= flag;
0ad19a3f 1833
d16bda44 1834 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1835}
1836
59eac805 1837static int netdev_get_flag(const char *name, int *flag)
efa1cf45 1838{
d16bda44
CB
1839 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1840 struct nl_handler nlh;
1841 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1842 int err, index, len;
a4318300 1843 struct ifinfomsg *ifi;
efa1cf45
DY
1844
1845 if (!name)
d16bda44 1846 return ret_errno(EINVAL);
efa1cf45 1847
d16bda44 1848 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1849 if (err)
1850 return err;
1851
efa1cf45
DY
1852 len = strlen(name);
1853 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1854 return ret_errno(EINVAL);
efa1cf45 1855
efa1cf45
DY
1856 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1857 if (!nlmsg)
d16bda44 1858 return ret_errno(ENOMEM);
efa1cf45 1859
06f976ca 1860 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1861 if (!answer)
d16bda44 1862 return ret_errno(ENOMEM);
efa1cf45 1863
efa1cf45
DY
1864 index = if_nametoindex(name);
1865 if (!index)
d16bda44 1866 return ret_errno(EINVAL);
efa1cf45 1867
06f976ca
SZ
1868 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1869 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1870
1871 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1872 if (!ifi)
1873 return ret_errno(ENOMEM);
1874
06f976ca
SZ
1875 ifi->ifi_family = AF_UNSPEC;
1876 ifi->ifi_index = index;
efa1cf45 1877
d16bda44 1878 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1879 if (err)
d16bda44 1880 return ret_set_errno(-1, errno);
efa1cf45 1881
06f976ca 1882 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1883
1884 *flag = ifi->ifi_flags;
efa1cf45
DY
1885 return err;
1886}
1887
1888/*
1889 * \brief Check a interface is up or not.
1890 *
1891 * \param name: name for the interface.
1892 *
1893 * \return int.
1894 * 0 means interface is down.
1895 * 1 means interface is up.
1896 * Others means error happened, and ret-value is the error number.
1897 */
ebc73a67 1898int lxc_netdev_isup(const char *name)
efa1cf45 1899{
4db0514d
CB
1900 int err;
1901 int flag = 0;
efa1cf45
DY
1902
1903 err = netdev_get_flag(name, &flag);
1904 if (err)
ebc73a67
CB
1905 return err;
1906
efa1cf45
DY
1907 if (flag & IFF_UP)
1908 return 1;
ebc73a67 1909
efa1cf45 1910 return 0;
efa1cf45
DY
1911}
1912
0130df54
SH
1913int netdev_get_mtu(int ifindex)
1914{
a5f5cb41 1915 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1916 struct nl_handler nlh;
a5f5cb41
CB
1917 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1918 int readmore = 0, recv_len = 0;
1919 int answer_len, err, res;
06f976ca 1920 struct ifinfomsg *ifi;
0130df54 1921 struct nlmsghdr *msg;
0130df54 1922
a5f5cb41 1923 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1924 if (err)
1925 return err;
1926
0130df54
SH
1927 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1928 if (!nlmsg)
a5f5cb41 1929 return ret_errno(ENOMEM);
0130df54 1930
06f976ca 1931 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1932 if (!answer)
a5f5cb41 1933 return ret_errno(ENOMEM);
0130df54
SH
1934
1935 /* Save the answer buffer length, since it will be overwritten
1936 * on the first receive (and we might need to receive more than
ebc73a67
CB
1937 * once.
1938 */
06f976ca
SZ
1939 answer_len = answer->nlmsghdr->nlmsg_len;
1940
ebc73a67 1941 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1942 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1943
06f976ca 1944 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1945 if (!ifi)
a5f5cb41
CB
1946 return ret_errno(ENOMEM);
1947
06f976ca 1948 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1949
1950 /* Send the request for addresses, which returns all addresses
1951 * on all interfaces. */
a5f5cb41 1952 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1953 if (err < 0)
a5f5cb41 1954 return ret_set_errno(-1, errno);
0130df54 1955
6ce39620
CB
1956#pragma GCC diagnostic push
1957#pragma GCC diagnostic ignored "-Wcast-align"
1958
0130df54
SH
1959 do {
1960 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1961 * overwritten by a previous receive.
1962 */
06f976ca 1963 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1964
1965 /* Get the (next) batch of reply messages */
a5f5cb41 1966 err = netlink_rcv(nlh_ptr, answer);
0130df54 1967 if (err < 0)
a5f5cb41 1968 return ret_set_errno(-1, errno);
0130df54
SH
1969
1970 recv_len = err;
0130df54
SH
1971
1972 /* Satisfy the typing for the netlink macros */
06f976ca 1973 msg = answer->nlmsghdr;
0130df54
SH
1974
1975 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1976 /* Stop reading if we see an error message */
1977 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1978 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1979 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1980 }
1981
1982 /* Stop reading if we see a NLMSG_DONE message */
1983 if (msg->nlmsg_type == NLMSG_DONE) {
1984 readmore = 0;
1985 break;
1986 }
1987
06f976ca 1988 ifi = NLMSG_DATA(msg);
0130df54
SH
1989 if (ifi->ifi_index == ifindex) {
1990 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1991 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1992
0130df54 1993 res = 0;
ebc73a67 1994 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1995 /*
a5f5cb41 1996 * Found a local address for the
ebc73a67
CB
1997 * requested interface, return it.
1998 */
0130df54 1999 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
2000 memcpy(&res, RTA_DATA(rta), sizeof(int));
2001 return res;
0130df54 2002 }
a5f5cb41 2003
0130df54
SH
2004 rta = RTA_NEXT(rta, attr_len);
2005 }
0130df54
SH
2006 }
2007
ebc73a67
CB
2008 /* Keep reading more data from the socket if the last
2009 * message had the NLF_F_MULTI flag set.
2010 */
0130df54
SH
2011 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2012
ebc73a67 2013 /* Look at the next message received in this buffer. */
0130df54
SH
2014 msg = NLMSG_NEXT(msg, recv_len);
2015 }
2016 } while (readmore);
2017
6ce39620
CB
2018#pragma GCC diagnostic pop
2019
ebc73a67 2020 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 2021 return -1;
0130df54
SH
2022}
2023
d472214b 2024int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 2025{
a5f5cb41
CB
2026 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2027 struct nl_handler nlh;
2028 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 2029 int err, len;
06f976ca 2030 struct ifinfomsg *ifi;
75d09f83 2031
a5f5cb41 2032 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2033 if (err)
2034 return err;
75d09f83
DL
2035
2036 len = strlen(name);
dae3fdf6 2037 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2038 return ret_errno(EINVAL);
75d09f83
DL
2039
2040 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2041 if (!nlmsg)
a5f5cb41 2042 return ret_errno(ENOMEM);
75d09f83 2043
06f976ca 2044 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 2045 if (!answer)
a5f5cb41 2046 return ret_errno(ENOMEM);
75d09f83 2047
ebc73a67 2048 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
2049 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2050
2051 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2052 if (!ifi)
2053 return ret_errno(ENOMEM);
2054
06f976ca 2055 ifi->ifi_family = AF_UNSPEC;
54256301
CB
2056
2057 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2058 return ret_errno(ENOMEM);
75d09f83
DL
2059
2060 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2061 return ret_errno(ENOMEM);
75d09f83 2062
a5f5cb41 2063 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
2064}
2065
d472214b 2066int lxc_netdev_up(const char *name)
0ad19a3f 2067{
d472214b 2068 return netdev_set_flag(name, IFF_UP);
0ad19a3f 2069}
2070
d472214b 2071int lxc_netdev_down(const char *name)
0ad19a3f 2072{
d472214b 2073 return netdev_set_flag(name, 0);
0ad19a3f 2074}
2075
54256301 2076int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 2077{
a5f5cb41
CB
2078 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2079 struct nl_handler nlh;
2080 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2081 int err, len;
06f976ca 2082 struct ifinfomsg *ifi;
0ad19a3f 2083 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 2084
a5f5cb41 2085 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2086 if (err)
2087 return err;
0ad19a3f 2088
2089 len = strlen(name1);
dae3fdf6 2090 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2091 return ret_errno(EINVAL);
0ad19a3f 2092
2093 len = strlen(name2);
dae3fdf6 2094 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2095 return ret_errno(EINVAL);
0ad19a3f 2096
2097 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2098 if (!nlmsg)
a5f5cb41 2099 return ret_errno(ENOMEM);
0ad19a3f 2100
06f976ca 2101 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2102 if (!answer)
a5f5cb41 2103 return ret_errno(ENOMEM);
0ad19a3f 2104
a5f5cb41 2105 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2106 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2107
2108 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 2109 if (!ifi)
a5f5cb41
CB
2110 return ret_errno(ENOMEM);
2111
06f976ca 2112 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2113
79e68309 2114 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2115 if (!nest1)
a5f5cb41 2116 return ret_errno(EINVAL);
0ad19a3f 2117
2118 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2119 return ret_errno(ENOMEM);
0ad19a3f 2120
2121 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2122 if (!nest2)
a5f5cb41 2123 return ret_errno(ENOMEM);
0ad19a3f 2124
2125 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2126 if (!nest3)
a5f5cb41 2127 return ret_errno(ENOMEM);
0ad19a3f 2128
06f976ca 2129 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2130 if (!ifi)
2131 return ret_errno(ENOMEM);
0ad19a3f 2132
2133 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2134 return ret_errno(ENOMEM);
0ad19a3f 2135
54256301 2136 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2137 return ret_errno(ENOMEM);
54256301
CB
2138
2139 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2140 return ret_errno(ENOMEM);
54256301 2141
0ad19a3f 2142 nla_end_nested(nlmsg, nest3);
0ad19a3f 2143 nla_end_nested(nlmsg, nest2);
0ad19a3f 2144 nla_end_nested(nlmsg, nest1);
2145
2146 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2147 return ret_errno(ENOMEM);
0ad19a3f 2148
a5f5cb41 2149 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2150}
2151
ebc73a67 2152/* TODO: merge with lxc_macvlan_create */
0dc9a142 2153int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
26c39028 2154{
a5f5cb41
CB
2155 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2156 struct nl_handler nlh;
2157 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2158 int err, len, lindex;
06f976ca 2159 struct ifinfomsg *ifi;
26c39028 2160 struct rtattr *nest, *nest2;
26c39028 2161
a5f5cb41 2162 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2163 if (err)
2164 return err;
26c39028 2165
0dc9a142 2166 len = strlen(parent);
dae3fdf6 2167 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2168 return ret_errno(EINVAL);
26c39028
JHS
2169
2170 len = strlen(name);
dae3fdf6 2171 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2172 return ret_errno(EINVAL);
26c39028
JHS
2173
2174 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2175 if (!nlmsg)
a5f5cb41 2176 return ret_errno(ENOMEM);
26c39028 2177
06f976ca 2178 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2179 if (!answer)
a5f5cb41 2180 return ret_errno(ENOMEM);
26c39028 2181
0dc9a142 2182 lindex = if_nametoindex(parent);
26c39028 2183 if (!lindex)
a5f5cb41 2184 return ret_errno(EINVAL);
26c39028 2185
a5f5cb41 2186 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2187 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2188
2189 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2190 if (!ifi)
2191 return ret_errno(ENOMEM);
2192
06f976ca 2193 ifi->ifi_family = AF_UNSPEC;
26c39028 2194
79e68309 2195 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2196 if (!nest)
a5f5cb41 2197 return ret_errno(ENOMEM);
26c39028
JHS
2198
2199 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2200 return ret_errno(ENOMEM);
26c39028
JHS
2201
2202 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2203 if (!nest2)
a5f5cb41 2204 return ret_errno(ENOMEM);
e892973e 2205
26c39028 2206 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2207 return ret_errno(ENOMEM);
e892973e 2208
26c39028 2209 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2210 nla_end_nested(nlmsg, nest);
2211
2212 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2213 return ret_errno(ENOMEM);
26c39028
JHS
2214
2215 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2216 return ret_errno(ENOMEM);
2217
2218 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2219}
2220
0dc9a142 2221int lxc_macvlan_create(const char *parent, const char *name, int mode)
0ad19a3f 2222{
a5f5cb41
CB
2223 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2224 struct nl_handler nlh;
2225 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2226 int err, index, len;
06f976ca 2227 struct ifinfomsg *ifi;
e892973e 2228 struct rtattr *nest, *nest2;
0ad19a3f 2229
a5f5cb41 2230 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2231 if (err)
2232 return err;
0ad19a3f 2233
0dc9a142 2234 len = strlen(parent);
dae3fdf6 2235 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2236 return ret_errno(EINVAL);
0ad19a3f 2237
2238 len = strlen(name);
dae3fdf6 2239 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2240 return ret_errno(EINVAL);
0ad19a3f 2241
2242 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2243 if (!nlmsg)
a5f5cb41 2244 return ret_errno(ENOMEM);
0ad19a3f 2245
06f976ca 2246 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2247 if (!answer)
a5f5cb41 2248 return ret_errno(ENOMEM);
0ad19a3f 2249
0dc9a142 2250 index = if_nametoindex(parent);
0ad19a3f 2251 if (!index)
a5f5cb41 2252 return ret_errno(EINVAL);
0ad19a3f 2253
a5f5cb41 2254 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2255 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2256
2257 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2258 if (!ifi)
2259 return ret_errno(ENOMEM);
2260
06f976ca 2261 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2262
79e68309 2263 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2264 if (!nest)
a5f5cb41 2265 return ret_errno(ENOMEM);
0ad19a3f 2266
2267 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2268 return ret_errno(ENOMEM);
0ad19a3f 2269
e892973e
DL
2270 if (mode) {
2271 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2272 if (!nest2)
a5f5cb41 2273 return ret_errno(ENOMEM);
e892973e
DL
2274
2275 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2276 return ret_errno(ENOMEM);
e892973e
DL
2277
2278 nla_end_nested(nlmsg, nest2);
2279 }
2280
0ad19a3f 2281 nla_end_nested(nlmsg, nest);
2282
2283 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2284 return ret_errno(ENOMEM);
0ad19a3f 2285
2286 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2287 return ret_errno(ENOMEM);
0ad19a3f 2288
a5f5cb41 2289 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2290}
2291
2292static int proc_sys_net_write(const char *path, const char *value)
2293{
ebc73a67
CB
2294 int fd;
2295 int err = 0;
0ad19a3f 2296
2297 fd = open(path, O_WRONLY);
2298 if (fd < 0)
2299 return -errno;
2300
f640cf46 2301 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2302 err = -errno;
2303
2304 close(fd);
2305 return err;
2306}
2307
6dfa9581 2308static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2309{
2310 int ret;
2311 char path[PATH_MAX];
6509154d 2312
2313 if (family != AF_INET && family != AF_INET6)
6dfa9581 2314 return -EINVAL;
6509154d 2315
387c1c70
CB
2316 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2317 family == AF_INET ? "ipv4" : "ipv6", ifname,
2318 "forwarding");
2319 if (ret < 0)
6dfa9581 2320 return -E2BIG;
6509154d 2321
6dfa9581
TP
2322 return proc_sys_net_write(path, flag ? "1" : "0");
2323}
2324
2325int lxc_ip_forwarding_on(const char *name, int family)
2326{
2327 return ip_forwarding_set(name, family, 1);
2328}
2329
2330int lxc_ip_forwarding_off(const char *name, int family)
2331{
2332 return ip_forwarding_set(name, family, 0);
6509154d 2333}
2334
0ad19a3f 2335static int neigh_proxy_set(const char *ifname, int family, int flag)
2336{
9ba8130c 2337 int ret;
419590da 2338 char path[PATH_MAX];
0ad19a3f 2339
2340 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2341 return -EINVAL;
0ad19a3f 2342
387c1c70
CB
2343 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2344 family == AF_INET ? "ipv4" : "ipv6", ifname,
2345 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2346 if (ret < 0)
9ba8130c 2347 return -E2BIG;
0ad19a3f 2348
ebc73a67 2349 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2350}
2351
6509154d 2352static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2353{
2354 int ret;
2355 char path[PATH_MAX];
2356 char buf[1] = "";
2357
2358 if (family != AF_INET && family != AF_INET6)
596a002c 2359 return ret_set_errno(-1, EINVAL);
6509154d 2360
387c1c70
CB
2361 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2362 family == AF_INET ? "ipv4" : "ipv6", ifname,
2363 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2364 if (ret < 0)
596a002c 2365 return ret_set_errno(-1, E2BIG);
6509154d 2366
2367 return lxc_read_file_expect(path, buf, 1, "1");
2368}
2369
497353b6 2370int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2371{
2372 return neigh_proxy_set(name, family, 1);
2373}
2374
497353b6 2375int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2376{
2377 return neigh_proxy_set(name, family, 0);
2378}
2379
2380int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2381{
1f1b18e7
DL
2382 int i = 0;
2383 unsigned val;
ebc73a67
CB
2384 char c;
2385 unsigned char *data;
1f1b18e7
DL
2386
2387 sockaddr->sa_family = ARPHRD_ETHER;
2388 data = (unsigned char *)sockaddr->sa_data;
2389
2390 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2391 c = *macaddr++;
2392 if (isdigit(c))
2393 val = c - '0';
2394 else if (c >= 'a' && c <= 'f')
2395 val = c - 'a' + 10;
2396 else if (c >= 'A' && c <= 'F')
2397 val = c - 'A' + 10;
2398 else
2399 return -EINVAL;
2400
2401 val <<= 4;
2402 c = *macaddr;
2403 if (isdigit(c))
2404 val |= c - '0';
2405 else if (c >= 'a' && c <= 'f')
2406 val |= c - 'a' + 10;
2407 else if (c >= 'A' && c <= 'F')
2408 val |= c - 'A' + 10;
2409 else if (c == ':' || c == 0)
2410 val >>= 4;
2411 else
2412 return -EINVAL;
2413 if (c != 0)
2414 macaddr++;
2415 *data++ = (unsigned char)(val & 0377);
2416 i++;
2417
2418 if (*macaddr == ':')
2419 macaddr++;
0ad19a3f 2420 }
0ad19a3f 2421
1f1b18e7 2422 return 0;
0ad19a3f 2423}
2424
ebc73a67
CB
2425static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2426 void *acast, int prefix)
0ad19a3f 2427{
a5f5cb41
CB
2428 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2429 struct nl_handler nlh;
2430 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2431 int addrlen, err;
06f976ca 2432 struct ifaddrmsg *ifa;
0ad19a3f 2433
ebc73a67
CB
2434 addrlen = family == AF_INET ? sizeof(struct in_addr)
2435 : sizeof(struct in6_addr);
4bf1968d 2436
a5f5cb41 2437 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2438 if (err)
2439 return err;
0ad19a3f 2440
0ad19a3f 2441 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2442 if (!nlmsg)
a5f5cb41 2443 return ret_errno(ENOMEM);
0ad19a3f 2444
06f976ca 2445 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2446 if (!answer)
a5f5cb41 2447 return ret_errno(ENOMEM);
0ad19a3f 2448
a5f5cb41 2449 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2450 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2451
2452 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2453 if (!ifa)
a5f5cb41
CB
2454 return ret_errno(ENOMEM);
2455
06f976ca
SZ
2456 ifa->ifa_prefixlen = prefix;
2457 ifa->ifa_index = ifindex;
2458 ifa->ifa_family = family;
2459 ifa->ifa_scope = 0;
acf47e1b 2460
4bf1968d 2461 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2462 return ret_errno(EINVAL);
0ad19a3f 2463
4bf1968d 2464 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2465 return ret_errno(EINVAL);
0ad19a3f 2466
d8948a52 2467 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2468 return ret_errno(EINVAL);
1f1b18e7 2469
ebc73a67 2470 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2471 if (family == AF_INET6 &&
2472 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2473 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2474 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2475
a5f5cb41 2476 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2477}
2478
1f1b18e7 2479int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2480 struct in6_addr *mcast, struct in6_addr *acast,
2481 int prefix)
1f1b18e7
DL
2482{
2483 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2484}
2485
ebc73a67
CB
2486int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2487 int prefix)
1f1b18e7
DL
2488{
2489 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2490}
2491
ebc73a67
CB
2492/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2493 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2494 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2495 */
6ce39620
CB
2496#pragma GCC diagnostic push
2497#pragma GCC diagnostic ignored "-Wcast-align"
2498
ebc73a67
CB
2499static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2500{
2501 int addrlen;
06f976ca
SZ
2502 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2503 struct rtattr *rta = IFA_RTA(ifa);
2504 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2505
06f976ca 2506 if (ifa->ifa_family != family)
19a26f82
MK
2507 return 0;
2508
ebc73a67
CB
2509 addrlen = family == AF_INET ? sizeof(struct in_addr)
2510 : sizeof(struct in6_addr);
19a26f82
MK
2511
2512 /* Loop over the rtattr's in this message */
ebc73a67 2513 while (RTA_OK(rta, attr_len)) {
19a26f82 2514 /* Found a local address for the requested interface,
ebc73a67
CB
2515 * return it.
2516 */
2517 if (rta->rta_type == IFA_LOCAL ||
2518 rta->rta_type == IFA_ADDRESS) {
2519 /* Sanity check. The family check above should make sure
2520 * the address length is correct, but check here just in
2521 * case.
2522 */
19a26f82
MK
2523 if (RTA_PAYLOAD(rta) != addrlen)
2524 return -1;
2525
ebc73a67
CB
2526 /* We might have found an IFA_ADDRESS before, which we
2527 * now overwrite with an IFA_LOCAL.
2528 */
dd66e5ad 2529 if (!*res) {
19a26f82 2530 *res = malloc(addrlen);
dd66e5ad
DE
2531 if (!*res)
2532 return -1;
2533 }
19a26f82
MK
2534
2535 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2536 if (rta->rta_type == IFA_LOCAL)
2537 break;
2538 }
2539 rta = RTA_NEXT(rta, attr_len);
2540 }
2541 return 0;
2542}
2543
6ce39620
CB
2544#pragma GCC diagnostic pop
2545
19a26f82
MK
2546static int ip_addr_get(int family, int ifindex, void **res)
2547{
a5f5cb41
CB
2548 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2549 struct nl_handler nlh;
2550 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2551 int answer_len, err;
06f976ca 2552 struct ifaddrmsg *ifa;
19a26f82 2553 struct nlmsghdr *msg;
ebc73a67 2554 int readmore = 0, recv_len = 0;
19a26f82 2555
a5f5cb41 2556 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2557 if (err)
2558 return err;
2559
19a26f82
MK
2560 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2561 if (!nlmsg)
a5f5cb41 2562 return ret_errno(ENOMEM);
19a26f82 2563
06f976ca 2564 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2565 if (!answer)
a5f5cb41 2566 return ret_errno(ENOMEM);
19a26f82 2567
ebc73a67
CB
2568 /* Save the answer buffer length, since it will be overwritten on the
2569 * first receive (and we might need to receive more than once).
2570 */
06f976ca
SZ
2571 answer_len = answer->nlmsghdr->nlmsg_len;
2572
ebc73a67 2573 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2574 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2575
06f976ca 2576 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2577 if (!ifa)
a5f5cb41
CB
2578 return ret_errno(ENOMEM);
2579
06f976ca 2580 ifa->ifa_family = family;
19a26f82 2581
ebc73a67
CB
2582 /* Send the request for addresses, which returns all addresses on all
2583 * interfaces.
2584 */
a5f5cb41 2585 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2586 if (err < 0)
a5f5cb41 2587 return ret_set_errno(err, errno);
19a26f82 2588
6ce39620
CB
2589#pragma GCC diagnostic push
2590#pragma GCC diagnostic ignored "-Wcast-align"
2591
19a26f82
MK
2592 do {
2593 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2594 * overwritten by a previous receive.
2595 */
06f976ca 2596 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2597
ebc73a67 2598 /* Get the (next) batch of reply messages. */
a5f5cb41 2599 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2600 if (err < 0)
a5f5cb41 2601 return ret_set_errno(err, errno);
19a26f82
MK
2602
2603 recv_len = err;
2604 err = 0;
2605
ebc73a67 2606 /* Satisfy the typing for the netlink macros. */
06f976ca 2607 msg = answer->nlmsghdr;
19a26f82
MK
2608
2609 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2610 /* Stop reading if we see an error message. */
19a26f82 2611 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2612 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2613 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2614 }
2615
ebc73a67 2616 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2617 if (msg->nlmsg_type == NLMSG_DONE) {
2618 readmore = 0;
2619 break;
2620 }
2621
a5f5cb41
CB
2622 if (msg->nlmsg_type != RTM_NEWADDR)
2623 return ret_errno(EINVAL);
19a26f82 2624
06f976ca
SZ
2625 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2626 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2627 if (ifa_get_local_ip(family, msg, res) < 0)
2628 return ret_errno(EINVAL);
51e7a874 2629
ebc73a67 2630 /* Found a result, stop searching. */
19a26f82 2631 if (*res)
a5f5cb41 2632 return 0;
19a26f82
MK
2633 }
2634
ebc73a67
CB
2635 /* Keep reading more data from the socket if the last
2636 * message had the NLF_F_MULTI flag set.
2637 */
19a26f82
MK
2638 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2639
ebc73a67 2640 /* Look at the next message received in this buffer. */
19a26f82
MK
2641 msg = NLMSG_NEXT(msg, recv_len);
2642 }
2643 } while (readmore);
2644
6ce39620
CB
2645#pragma GCC diagnostic pop
2646
19a26f82 2647 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2648 * error.
2649 */
a5f5cb41 2650 return -1;
19a26f82
MK
2651}
2652
2653int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2654{
ebc73a67 2655 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2656}
2657
ebc73a67 2658int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2659{
ebc73a67 2660 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2661}
2662
f8fee0e2
MK
2663static int ip_gateway_add(int family, int ifindex, void *gw)
2664{
a5f5cb41 2665 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2666 struct nl_handler nlh;
a5f5cb41
CB
2667 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2668 int addrlen, err;
06f976ca 2669 struct rtmsg *rt;
f8fee0e2 2670
ebc73a67
CB
2671 addrlen = family == AF_INET ? sizeof(struct in_addr)
2672 : sizeof(struct in6_addr);
f8fee0e2 2673
a5f5cb41 2674 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2675 if (err)
2676 return err;
2677
f8fee0e2
MK
2678 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2679 if (!nlmsg)
a5f5cb41 2680 return ret_errno(ENOMEM);
f8fee0e2 2681
06f976ca 2682 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2683 if (!answer)
a5f5cb41 2684 return ret_errno(ENOMEM);
f8fee0e2 2685
a5f5cb41 2686 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2687 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2688
2689 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2690 if (!rt)
a5f5cb41
CB
2691 return ret_errno(ENOMEM);
2692
06f976ca
SZ
2693 rt->rtm_family = family;
2694 rt->rtm_table = RT_TABLE_MAIN;
2695 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2696 rt->rtm_protocol = RTPROT_BOOT;
2697 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2698 /* "default" destination */
06f976ca 2699 rt->rtm_dst_len = 0;
f8fee0e2 2700
a2f9a670 2701 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2702 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2703 return ret_errno(ENOMEM);
f8fee0e2
MK
2704
2705 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2706 * addresses for the gateway.
2707 */
f8fee0e2 2708 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2709 return ret_errno(EINVAL);
f8fee0e2 2710
a5f5cb41 2711 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2712}
2713
2714int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2715{
2716 return ip_gateway_add(AF_INET, ifindex, gw);
2717}
2718
2719int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2720{
2721 return ip_gateway_add(AF_INET6, ifindex, gw);
2722}
581c75e7 2723bool is_ovs_bridge(const char *bridge)
0d204771 2724{
ebc73a67 2725 int ret;
0d204771 2726 struct stat sb;
ebc73a67 2727 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2728
387c1c70
CB
2729 ret = strnprintf(brdirname, 22 + IFNAMSIZ + 1,
2730 "/sys/class/net/%s/bridge", bridge);
2731 if (ret < 0)
ebc73a67
CB
2732 return false;
2733
2734 ret = stat(brdirname, &sb);
2735 if (ret < 0 && errno == ENOENT)
0d204771 2736 return true;
ebc73a67 2737
0d204771
SH
2738 return false;
2739}
2740
581c75e7
CB
2741struct ovs_veth_args {
2742 const char *bridge;
2743 const char *nic;
2744};
2745
cb0dc11b
CB
2746/* Called from a background thread - when nic goes away, remove it from the
2747 * bridge.
c43cbc04 2748 */
581c75e7 2749static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2750{
581c75e7 2751 struct ovs_veth_args *args = data;
cb0dc11b 2752
9c66dc4f 2753 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2754 return -1;
c43cbc04
SH
2755}
2756
581c75e7 2757int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2758{
c43cbc04 2759 int ret;
419590da 2760 char cmd_output[PATH_MAX];
581c75e7 2761 struct ovs_veth_args args;
6ad22d06 2762
581c75e7
CB
2763 args.bridge = bridge;
2764 args.nic = nic;
2765 ret = run_command(cmd_output, sizeof(cmd_output),
2766 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2767 if (ret < 0)
2768 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2769
581c75e7
CB
2770 return 0;
2771}
ebc73a67 2772
581c75e7
CB
2773static int lxc_ovs_attach_bridge_exec(void *data)
2774{
2775 struct ovs_veth_args *args = data;
ebc73a67 2776
9c66dc4f 2777 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2778 return -1;
2779}
ebc73a67 2780
581c75e7
CB
2781static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2782{
2783 int ret;
419590da 2784 char cmd_output[PATH_MAX];
581c75e7 2785 struct ovs_veth_args args;
ebc73a67 2786
581c75e7
CB
2787 args.bridge = bridge;
2788 args.nic = nic;
2789 ret = run_command(cmd_output, sizeof(cmd_output),
2790 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2791 if (ret < 0)
2792 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2793
581c75e7 2794 return 0;
0d204771 2795}
0d204771 2796
581c75e7 2797int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2798{
ebc73a67 2799 int err, fd, index;
9de31d5a 2800 size_t retlen;
0ad19a3f 2801 struct ifreq ifr;
2802
dae3fdf6 2803 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2804 return -EINVAL;
0ad19a3f 2805
2806 index = if_nametoindex(ifname);
2807 if (!index)
3cfc0f3a 2808 return -EINVAL;
0ad19a3f 2809
0d204771 2810 if (is_ovs_bridge(bridge))
581c75e7 2811 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2812
ad9429e5 2813 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2814 if (fd < 0)
3cfc0f3a 2815 return -errno;
0ad19a3f 2816
9de31d5a 2817 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2818 if (retlen >= IFNAMSIZ) {
2819 close(fd);
9de31d5a 2820 return -E2BIG;
42cc4083 2821 }
9de31d5a 2822
ebc73a67 2823 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2824 ifr.ifr_ifindex = index;
7d163508 2825 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2826 close(fd);
3cfc0f3a
MN
2827 if (err)
2828 err = -errno;
0ad19a3f 2829
2830 return err;
2831}
72d0e1cb 2832
8befa924
SH
2833int setup_private_host_hw_addr(char *veth1)
2834{
387c1c70
CB
2835 __do_close int sockfd = -EBADF;
2836 int err;
8befa924 2837 struct ifreq ifr;
8befa924 2838
ad9429e5 2839 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2840 if (sockfd < 0)
2841 return -errno;
2842
387c1c70
CB
2843 err = strnprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2844 if (err < 0)
2845 return err;
ebc73a67 2846
8befa924 2847 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
387c1c70 2848 if (err < 0)
8befa924 2849 return -errno;
8befa924
SH
2850
2851 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2852 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924
SH
2853 if (err < 0)
2854 return -errno;
2855
2856 return 0;
2857}
811ef482
CB
2858
2859int lxc_find_gateway_addresses(struct lxc_handler *handler)
2860{
811ef482
CB
2861 struct lxc_netdev *netdev;
2862 int link_index;
2863
87d0990c 2864 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
811ef482
CB
2865 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2866 continue;
2867
9c66dc4f
CB
2868 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2869 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2870
87d0990c 2871 if (is_empty_string(netdev->link))
9c66dc4f 2872 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2873
2874 link_index = if_nametoindex(netdev->link);
2875 if (!link_index)
2876 return -EINVAL;
2877
2878 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2879 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2880 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2881 }
2882
2883 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2884 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2885 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2886 }
2887 }
2888
2889 return 0;
2890}
2891
2892#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
071d0934
CB
2893static int lxc_create_network_unpriv_exec(const char *lxcpath,
2894 const char *lxcname,
2895 struct lxc_netdev *netdev, pid_t pid,
2896 unsigned int hooks_version)
811ef482
CB
2897{
2898 int ret;
2899 pid_t child;
2900 int bytes, pipefd[2];
2901 char *token, *saveptr = NULL;
095ead80 2902 char netdev_link[IFNAMSIZ];
419590da 2903 char buffer[PATH_MAX] = {0};
94b1cade 2904 size_t retlen;
811ef482 2905
9c66dc4f 2906 if (netdev->type != LXC_NET_VETH)
071d0934
CB
2907 return log_error_errno(-1, errno,
2908 "Network type %d not support for unprivileged use",
2909 netdev->type);
811ef482
CB
2910
2911 ret = pipe(pipefd);
9c66dc4f
CB
2912 if (ret < 0)
2913 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2914
2915 child = fork();
2916 if (child < 0) {
811ef482
CB
2917 close(pipefd[0]);
2918 close(pipefd[1]);
9c66dc4f 2919 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2920 }
2921
2922 if (child == 0) {
8335fd40 2923 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2924
2925 close(pipefd[0]);
2926
2927 ret = dup2(pipefd[1], STDOUT_FILENO);
2928 if (ret >= 0)
2929 ret = dup2(pipefd[1], STDERR_FILENO);
2930 close(pipefd[1]);
2931 if (ret < 0) {
2932 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2933 _exit(EXIT_FAILURE);
811ef482
CB
2934 }
2935
f2711167 2936 if (!is_empty_string(netdev->link))
9de31d5a 2937 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2938 else
9de31d5a
CB
2939 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2940 if (retlen >= IFNAMSIZ) {
2941 SYSERROR("Invalid network device name");
2942 _exit(EXIT_FAILURE);
2943 }
811ef482 2944
387c1c70
CB
2945 ret = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
2946 if (ret < 0)
78070056 2947 _exit(EXIT_FAILURE);
8335fd40 2948 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2949
2950 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
071d0934 2951 lxcname, pidstr, netdev_link, !is_empty_string(netdev->name) ? netdev->name : "(null)");
3473ca76 2952 if (!is_empty_string(netdev->name))
811ef482
CB
2953 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2954 lxcpath, lxcname, pidstr, "veth", netdev_link,
2955 netdev->name, (char *)NULL);
2956 else
2957 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2958 lxcpath, lxcname, pidstr, "veth", netdev_link,
2959 (char *)NULL);
2960 SYSERROR("Failed to execute lxc-user-nic");
78070056 2961 _exit(EXIT_FAILURE);
811ef482
CB
2962 }
2963
2964 /* close the write-end of the pipe */
2965 close(pipefd[1]);
2966
9c66dc4f 2967 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2968 if (bytes < 0) {
74c6e2b0 2969 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2970 close(pipefd[0]);
6b9f82a9
CB
2971 } else {
2972 buffer[bytes - 1] = '\0';
811ef482 2973 }
811ef482
CB
2974
2975 ret = wait_for_pid(child);
2976 close(pipefd[0]);
9c66dc4f 2977 if (ret != 0 || bytes < 0)
071d0934
CB
2978 return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
2979 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2980 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2981
2982 /* netdev->name */
2983 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2984 if (!token)
2985 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2986
e389f2af
CB
2987 /*
2988 * lxc-user-nic will take care of proper network device naming. So
fdd6be55 2989 * netdev->name and netdev->transient_name need to be identical to not
e389f2af
CB
2990 * trigger another rename later on.
2991 */
2992 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
fdd6be55
CB
2993 if (retlen < IFNAMSIZ) {
2994 retlen = strlcpy(netdev->transient_name, token, IFNAMSIZ);
2995 if (retlen < IFNAMSIZ)
2996 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2997 }
9c66dc4f 2998 if (retlen >= IFNAMSIZ)
071d0934
CB
2999 return log_error_errno(-1, E2BIG,
3000 "Container side veth device name returned by lxc-user-nic is too long");
811ef482 3001
74c6e2b0 3002 /* netdev->ifindex */
811ef482 3003 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3004 if (!token)
3005 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3006
74c6e2b0 3007 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f 3008 if (ret < 0)
071d0934
CB
3009 return log_error_errno(-1, -ret,
3010 "Failed to convert string \"%s\" to integer", token);
811ef482 3011
74c6e2b0 3012 /* netdev->priv.veth_attr.veth1 */
811ef482 3013 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3014 if (!token)
3015 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3016
94b1cade 3017 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f 3018 if (retlen >= IFNAMSIZ)
071d0934
CB
3019 return log_error_errno(-1, E2BIG,
3020 "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
3021
3022 /* netdev->priv.veth_attr.ifindex */
3023 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3024 if (!token)
3025 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
3026
3027 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f 3028 if (ret < 0)
071d0934
CB
3029 return log_error_errno(-1, -ret,
3030 "Failed to convert string \"%s\" to integer", token);
811ef482 3031
4d781681 3032 if (netdev->upscript) {
3033 char *argv[] = {
3034 "veth",
3035 netdev->link,
3036 netdev->priv.veth_attr.veth1,
3037 NULL,
3038 };
3039
e389f2af
CB
3040 ret = run_script_argv(lxcname, hooks_version, "net",
3041 netdev->upscript, "up", argv);
4d781681 3042 if (ret < 0)
3043 return -1;
071d0934 3044 }
4d781681 3045
811ef482
CB
3046 return 0;
3047}
3048
f0ecc19d 3049static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3050 struct lxc_netdev *netdev,
3051 const char *netns_path)
811ef482
CB
3052{
3053 int bytes, ret;
3054 pid_t child;
3055 int pipefd[2];
25619b99 3056 char buffer[PATH_MAX] = {};
811ef482 3057
9c66dc4f
CB
3058 if (netdev->type != LXC_NET_VETH)
3059 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3060
3061 ret = pipe(pipefd);
9c66dc4f
CB
3062 if (ret < 0)
3063 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3064
3065 child = fork();
3066 if (child < 0) {
811ef482
CB
3067 close(pipefd[0]);
3068 close(pipefd[1]);
9c66dc4f 3069 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3070 }
3071
3072 if (child == 0) {
8843fde4 3073 char *hostveth;
811ef482
CB
3074
3075 close(pipefd[0]);
3076
3077 ret = dup2(pipefd[1], STDOUT_FILENO);
3078 if (ret >= 0)
3079 ret = dup2(pipefd[1], STDERR_FILENO);
3080 close(pipefd[1]);
3081 if (ret < 0) {
3082 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3083 _exit(EXIT_FAILURE);
811ef482
CB
3084 }
3085
f2711167 3086 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3087 hostveth = netdev->priv.veth_attr.pair;
3088 else
3089 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3090 if (is_empty_string(hostveth)) {
74c6e2b0 3091 SYSERROR("Host side veth device name is missing");
a30b9023 3092 _exit(EXIT_FAILURE);
74c6e2b0
CB
3093 }
3094
f2711167
CB
3095 if (is_empty_string(netdev->link)) {
3096 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3097 _exit(EXIT_FAILURE);
74c6e2b0 3098 }
811ef482 3099
811ef482 3100 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3101 lxcname, netns_path, netdev->link, hostveth);
811ef482 3102 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3103 lxcname, netns_path, "veth", netdev->link, hostveth,
3104 (char *)NULL);
811ef482 3105 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3106 _exit(EXIT_FAILURE);
811ef482
CB
3107 }
3108
3109 close(pipefd[1]);
3110
9c66dc4f 3111 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3112 if (bytes < 0) {
3113 SYSERROR("Failed to read from pipe file descriptor.");
3114 close(pipefd[0]);
6b9f82a9
CB
3115 } else {
3116 buffer[bytes - 1] = '\0';
811ef482 3117 }
811ef482 3118
6b9f82a9 3119 ret = wait_for_pid(child);
9c66dc4f
CB
3120 close_prot_errno_disarm(pipefd[0]);
3121 if (ret != 0 || bytes < 0)
3122 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3123 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3124
811ef482
CB
3125 return 0;
3126}
3127
59eac805 3128static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
1bd8d726
CB
3129{
3130 int ret;
87d0990c 3131 struct lxc_netdev *netdev;
1bd8d726
CB
3132 /* strlen("/proc/") = 6
3133 * +
8335fd40 3134 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3135 * +
3136 * strlen("/fd/") = 4
3137 * +
8335fd40 3138 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3139 * +
3140 * \0
3141 */
8335fd40 3142 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3143
3144 *netns_path = '\0';
3145
9c66dc4f
CB
3146 if (handler->nsfd[LXC_NS_NET] < 0)
3147 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726 3148
387c1c70
CB
3149 ret = strnprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
3150 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
3151 if (ret < 0)
1bd8d726
CB
3152 return false;
3153
87d0990c 3154 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
1bd8d726 3155 char *hostveth = NULL;
1bd8d726
CB
3156
3157 /* We can only delete devices whose ifindex we have. If we don't
3158 * have the index it means that we didn't create it.
3159 */
3160 if (!netdev->ifindex)
3161 continue;
3162
3163 if (netdev->type == LXC_NET_PHYS) {
3164 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3165 netdev->link);
3166 if (ret < 0)
9c66dc4f 3167 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3168 netdev->ifindex, netdev->link);
3169 else
9c66dc4f 3170 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3171 netdev->ifindex, netdev->link);
b3259dc6
TP
3172
3173 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3174 if (ret < 0)
3175 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3176 netdev->ifindex, netdev->link);
66a7c406 3177 goto clear_ifindices;
1bd8d726
CB
3178 }
3179
3180 ret = netdev_deconf[netdev->type](handler, netdev);
3181 if (ret < 0)
3182 WARN("Failed to deconfigure network device");
3183
3184 if (netdev->type != LXC_NET_VETH)
66a7c406 3185 goto clear_ifindices;
1bd8d726 3186
f2711167 3187 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3188 goto clear_ifindices;
1bd8d726 3189
f2711167 3190 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3191 hostveth = netdev->priv.veth_attr.pair;
3192 else
3193 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3194 if (is_empty_string(hostveth))
66a7c406 3195 goto clear_ifindices;
8843fde4 3196
1bd8d726
CB
3197 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3198 handler->name, netdev,
3199 netns_path);
3200 if (ret < 0) {
9c66dc4f 3201 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3202 goto clear_ifindices;
1bd8d726 3203 }
9c66dc4f 3204 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3205
3206clear_ifindices:
0858c829
CB
3207 /*
3208 * We need to clear any ifindices we recorded so liblxc won't
3209 * have cached stale data which would cause it to fail on
3210 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3211 */
3212 netdev->ifindex = 0;
3213 if (netdev->type == LXC_NET_PHYS) {
3214 netdev->priv.phys_attr.ifindex = 0;
3215 } else if (netdev->type == LXC_NET_VETH) {
3216 netdev->priv.veth_attr.veth1[0] = '\0';
3217 netdev->priv.veth_attr.ifindex = 0;
3218 }
1bd8d726
CB
3219 }
3220
bb84beda 3221 return true;
1bd8d726
CB
3222}
3223
6509154d 3224static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
6509154d 3225 struct lxc_inetdev *inet4dev;
3226 struct lxc_inet6dev *inet6dev;
3227 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3228 int err = 0;
5fe147e9
TP
3229 unsigned int lo_ifindex = 0, link_ifindex = 0;
3230
3231 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3232 if (link_ifindex == 0)
3233 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3234
6509154d 3235
3236 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2ec31bbd 3237 if (!list_empty(&netdev->ipv4_list)) {
6509154d 3238 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3239 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3240 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3241 }
3242
3243 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
cd32fc73 3244 if (!list_empty(&netdev->ipv6_list)) {
6509154d 3245 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3246 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3247 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3248
3249 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3250 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3251 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3252 }
3253
b670016a 3254 /* Perform IPVLAN specific checks. */
3255 if (netdev->type == LXC_NET_IPVLAN) {
3256 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3257 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3258 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3259
3260 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3261 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3262 if (lo_ifindex == 0)
3263 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3264 }
3265
2ec31bbd 3266 list_for_each_entry(inet4dev, &netdev->ipv4_list, head) {
6509154d 3267 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3268 return ret_set_errno(-1, -errno);
6509154d 3269
5fe147e9 3270 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3271 return ret_set_errno(-1, EINVAL);
b670016a 3272
3273 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3274 if (netdev->type == LXC_NET_IPVLAN) {
3275 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3276 if (err < 0)
3277 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3278 }
6509154d 3279 }
3280
cd32fc73 3281 list_for_each_entry(inet6dev, &netdev->ipv6_list, head) {
6509154d 3282 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3283 return ret_set_errno(-1, -errno);
6509154d 3284
5fe147e9 3285 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3286 return ret_set_errno(-1, EINVAL);
b670016a 3287
3288 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3289 if (netdev->type == LXC_NET_IPVLAN) {
3290 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3291 if (err < 0)
3292 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3293 }
6509154d 3294 }
3295
3296 return 0;
3297}
3298
9c66dc4f
CB
3299static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3300{
b670016a 3301 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3302 bool had_error = false;
3303 unsigned int link_ifindex = 0;
b670016a 3304
9c66dc4f
CB
3305 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3306 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3307
3308 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3309 if (lo_ifindex > 0) {
3310 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3311 had_error = true;
b670016a 3312 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3313 }
3314 }
3315
3316 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3317 if (!is_empty_string(link)) {
5fe147e9 3318 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3319 if (link_ifindex == 0)
3320 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3321
3322 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3323 had_error = true;
b670016a 3324 }
3325
9c66dc4f 3326 if (had_error)
596a002c 3327 return ret_set_errno(-1, EINVAL);
b670016a 3328
3329 return 0;
3330}
3331
9c66dc4f
CB
3332static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3333{
b670016a 3334 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3335 bool had_error = false;
3336 unsigned int link_ifindex = 0;
b670016a 3337
9c66dc4f
CB
3338 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3339 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3340
3341 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3342 if (lo_ifindex > 0) {
3343 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3344 had_error = true;
b670016a 3345 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3346 }
3347 }
3348
3349 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3350 if (!is_empty_string(link)) {
5fe147e9
TP
3351 link_ifindex = if_nametoindex(link);
3352 if (link_ifindex == 0) {
3353 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3354 return ret_set_errno(-1, EINVAL);
3355 }
3356
3357 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3358 had_error = true;
b670016a 3359 }
3360
9c66dc4f 3361 if (had_error)
596a002c 3362 return ret_set_errno(-1, EINVAL);
b670016a 3363
3364 return 0;
3365}
3366
cd32fc73
CB
3367static int lxc_delete_l2proxy(struct lxc_netdev *netdev)
3368{
b670016a 3369 unsigned int lo_ifindex = 0;
cd32fc73 3370 unsigned int err = 0;
6509154d 3371 struct lxc_inetdev *inet4dev;
3372 struct lxc_inet6dev *inet6dev;
6509154d 3373
b670016a 3374 /* Perform IPVLAN specific checks. */
3375 if (netdev->type == LXC_NET_IPVLAN) {
3376 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3377 lo_ifindex = if_nametoindex(loop_device);
b670016a 3378 if (lo_ifindex == 0) {
cd32fc73 3379 err++;
3ebffb98 3380 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3381 }
b670016a 3382 }
6509154d 3383
2ec31bbd 3384 list_for_each_entry(inet4dev, &netdev->ipv4_list, head) {
b670016a 3385 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
cd32fc73 3386 err++;
6509154d 3387 }
3388
cd32fc73 3389 list_for_each_entry(inet6dev, &netdev->ipv6_list, head) {
b670016a 3390 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
cd32fc73 3391 err++;
6509154d 3392 }
3393
cd32fc73
CB
3394 if (err > 0)
3395 return ret_errno(EINVAL);
6509154d 3396
3397 return 0;
3398}
3399
e389f2af 3400static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3401{
87d0990c 3402 struct lxc_netdev *netdev;
811ef482 3403
87d0990c 3404 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
9c66dc4f
CB
3405 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3406 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3407
6509154d 3408 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3409 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3410 if (lxc_setup_l2proxy(netdev))
3411 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3412 }
3413
bad2f913 3414 if (netdev_configure_server[netdev->type](handler, netdev))
9c66dc4f 3415 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3416 }
3417
3418 return 0;
3419}
3420
fdd6be55
CB
3421/*
3422 * LXC moves network devices into the target namespace based on their created
3423 * name. The created name can either be randomly generated for e.g. veth
3424 * devices or it can be the name of the existing device in the server's
3425 * namespaces. This is e.g. the case when moving physical devices. However this
3426 * can lead to weird clashes. Consider we have a network namespace that has the
3427 * following devices:
3428
3429 * 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3430 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3431 * altname enp7s0
3432 * 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3433 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3434 * altname enp8s0
3435 *
3436 * and the user generates the following network config for their container:
3437 *
3438 * lxc.net.0.type = phys
3439 * lxc.net.0.name = eth1
3440 * lxc.net.0.link = eth2
3441 *
3442 * lxc.net.1.type = phys
3443 * lxc.net.1.name = eth2
3444 * lxc.net.1.link = eth1
3445 *
3446 * This would cause LXC to move the devices eth1 and eth2 from the server's
3447 * network namespace into the container's network namespace:
3448 *
3449 * 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3450 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3451 * altname enp7s0
3452 * 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3453 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3454 * altname enp8s0
3455 *
3456 * According to the network config above we now need to rename the network
3457 * devices in the container's network namespace. Let's say we start with
3458 * renaming eth2 to eth1. This would immediately lead to a clash since the
3459 * container's network namespace already contains a network device with that
3460 * name. Renaming the other device would have the same problem.
3461 *
3462 * There are multiple ways to fix this but I'm concerned with keeping the logic
3463 * somewhat reasonable which is why we simply start creating transient device
3464 * names that are unique which we'll use to move and rename the network device
3465 * in the container's network namespace at the same time. And then we rename
3466 * based on those random devices names to the target name.
3467 *
3468 * Note that the transient name is based on the type of network device as
3469 * specified in the LXC config. However, that doesn't mean it's correct. LXD
3470 * passes veth devices and a range of other network devices (e.g. Infiniband
3471 * VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
3472 * sense we like to think about it so you might see a veth device being
3473 * assigned a "physXXXXXX" transient name. That's not a problem.
3474 */
3475static int create_transient_name(struct lxc_netdev *netdev)
3476{
3477 const struct lxc_network_info *info;
3478
3479 if (!is_empty_string(netdev->transient_name))
3480 return syserror_set(-EINVAL, "Network device already had a transient name %s",
3481 netdev->transient_name);
3482
3483 info = &lxc_network_info[netdev->type];
3484 strlcpy(netdev->transient_name, info->template, info->template_len + 1);
3485
3486 if (!lxc_ifname_alnum_case_sensitive(netdev->transient_name))
3487 return syserror_set(-EINVAL, "Failed to create transient name for network device %s", netdev->created_name);
3488
3489 TRACE("Created transient name %s for network device", netdev->transient_name);
3490 return 0;
3491}
3492
43e2a964
CB
3493static int netdev_requires_move(const struct lxc_netdev *netdev)
3494{
3495 if (IN_SET(netdev->type, LXC_NET_EMPTY, LXC_NET_NONE))
3496 return false;
3497
3498 /*
3499 * Veth devices are directly created in the container's network
3500 * namespace so the device doesn't need to be moved into the
3501 * container's network namespace. The transient name will
3502 * already have been set above when we created the veth tunnel.
3503 */
3504 if (!netdev->ifindex)
3505 return false;
3506
3507 return true;
3508}
3509
e389f2af 3510int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3511{
e389f2af 3512 pid_t pid = handler->pid;
87d0990c 3513 struct lxc_netdev *netdev;
811ef482 3514
e0010464 3515 if (am_guest_unpriv())
74c6e2b0 3516 return 0;
811ef482 3517
87d0990c 3518 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3dd78294 3519 __do_free char *physname = NULL;
e389f2af 3520 int ret;
811ef482 3521
43e2a964 3522 if (!netdev_requires_move(netdev))
811ef482
CB
3523 continue;
3524
fdd6be55
CB
3525 ret = create_transient_name(netdev);
3526 if (ret < 0)
3527 return ret;
3528
3dd78294
CB
3529 if (netdev->type == LXC_NET_PHYS)
3530 physname = is_wlan(netdev->link);
3531
3532 if (physname)
fdd6be55 3533 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->transient_name);
3dd78294 3534 else
fdd6be55 3535 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->transient_name);
9c66dc4f 3536 if (ret)
fdd6be55
CB
3537 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
3538 netdev->created_name, netdev->ifindex, pid, netdev->transient_name);
811ef482 3539
fdd6be55
CB
3540 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
3541 maybe_empty(netdev->created_name), netdev->ifindex, pid, netdev->transient_name);
811ef482
CB
3542 }
3543
3544 return 0;
3545}
3546
3c09b97c
CB
3547static int network_requires_advanced_setup(int type)
3548{
3549 if (type == LXC_NET_EMPTY)
3550 return false;
3551
3552 if (type == LXC_NET_NONE)
3553 return false;
3554
3555 return true;
3556}
3557
e389f2af 3558static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3559{
e389f2af
CB
3560 int hooks_version = handler->conf->hooks_version;
3561 const char *lxcname = handler->name;
3562 const char *lxcpath = handler->lxcpath;
e389f2af 3563 pid_t pid = handler->pid;
87d0990c 3564 struct lxc_netdev *netdev;
74c6e2b0 3565
87d0990c 3566 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3c09b97c 3567 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3568 continue;
3569
9c66dc4f
CB
3570 if (netdev->type != LXC_NET_VETH)
3571 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3572 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3573
3574 if (netdev->mtu)
3575 INFO("mtu ignored due to insufficient privilege");
3576
e389f2af
CB
3577 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3578 pid, hooks_version))
74c6e2b0
CB
3579 return -1;
3580 }
3581
3582 return 0;
3583}
3584
59eac805 3585static bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3586{
3587 int ret;
87d0990c 3588 struct lxc_netdev *netdev;
1bd8d726 3589
87d0990c 3590 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
811ef482 3591 char *hostveth = NULL;
811ef482
CB
3592
3593 /* We can only delete devices whose ifindex we have. If we don't
3594 * have the index it means that we didn't create it.
3595 */
3596 if (!netdev->ifindex)
3597 continue;
3598
0104c121
CB
3599 /*
3600 * If the network device has been moved back from the
3601 * containers network namespace, update the ifindex.
3602 */
3603 netdev->ifindex = if_nametoindex(netdev->name);
3604
6509154d 3605 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3606 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3607 if (lxc_delete_l2proxy(netdev))
3608 WARN("Failed to delete all l2proxy config");
3609 /* Don't return, let the network be cleaned up as normal. */
3610 }
3611
811ef482 3612 if (netdev->type == LXC_NET_PHYS) {
bb301db7
SB
3613 /* Physical interfaces are initially returned to the parent namespace
3614 * with their transient name to avoid collisions
3615 */
3616 netdev->ifindex = if_nametoindex(netdev->transient_name);
811ef482
CB
3617 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3618 if (ret < 0)
3619 WARN("Failed to rename interface with index %d "
b809f232
CB
3620 "from \"%s\" to its initial name \"%s\"",
3621 netdev->ifindex, netdev->name, netdev->link);
0b154989 3622 else {
29589196
CB
3623 TRACE("Renamed interface with index %d from "
3624 "\"%s\" to its initial name \"%s\"",
3625 netdev->ifindex, netdev->name,
3626 netdev->link);
0b154989
TP
3627
3628 /* Restore original MTU */
3629 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3630 if (ret < 0) {
3631 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3632 netdev->link, netdev->priv.phys_attr.mtu);
3633 } else {
3634 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3635 netdev->link, netdev->priv.phys_attr.mtu);
3636 }
3637 }
b3259dc6
TP
3638
3639 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3640 if (ret < 0)
3641 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3642 netdev->ifindex, netdev->link);
66a7c406 3643 goto clear_ifindices;
811ef482
CB
3644 }
3645
3646 ret = netdev_deconf[netdev->type](handler, netdev);
3647 if (ret < 0)
3648 WARN("Failed to deconfigure network device");
3649
811ef482 3650 if (netdev->type != LXC_NET_VETH)
66a7c406 3651 goto clear_ifindices;
811ef482 3652
811ef482
CB
3653 /* Explicitly delete host veth device to prevent lingering
3654 * devices. We had issues in LXD around this.
3655 */
f2711167 3656 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3657 hostveth = netdev->priv.veth_attr.pair;
3658 else
3659 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3660 if (is_empty_string(hostveth))
66a7c406 3661 goto clear_ifindices;
811ef482 3662
1ee56cff
CB
3663 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3664 ret = lxc_netdev_delete_by_name(hostveth);
3665 if (ret < 0)
3666 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3667
1ee56cff
CB
3668 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3669 } else if (!is_empty_string(netdev->link)) {
3670 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3671 if (ret < 0)
3672 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3673
1ee56cff
CB
3674 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3675 }
811ef482 3676
66a7c406 3677clear_ifindices:
ad2ddfcd 3678 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3679 * have cached stale data which would cause it to fail on reboot
3680 * we're we don't re-read the on-disk config file.
3681 */
3682 netdev->ifindex = 0;
3683 if (netdev->type == LXC_NET_PHYS) {
3684 netdev->priv.phys_attr.ifindex = 0;
3685 } else if (netdev->type == LXC_NET_VETH) {
3686 netdev->priv.veth_attr.veth1[0] = '\0';
3687 netdev->priv.veth_attr.ifindex = 0;
3688 }
bb301db7
SB
3689
3690 /* Clear transient name */
3691 if (!is_empty_string (netdev->transient_name))
3692 {
3693 netdev->transient_name[0] = '\0';
3694 }
811ef482
CB
3695 }
3696
bb84beda 3697 return true;
811ef482
CB
3698}
3699
3700int lxc_requests_empty_network(struct lxc_handler *handler)
3701{
87d0990c 3702 struct list_head *netdevs = &handler->conf->netdevs;
811ef482 3703 bool found_none = false, found_nic = false;
87d0990c 3704 struct lxc_netdev *netdev;
811ef482 3705
87d0990c 3706 if (list_empty(netdevs))
811ef482
CB
3707 return 0;
3708
87d0990c 3709 list_for_each_entry(netdev, netdevs, head) {
811ef482
CB
3710
3711 if (netdev->type == LXC_NET_NONE)
3712 found_none = true;
3713 else
3714 found_nic = true;
3715 }
9c66dc4f 3716
811ef482
CB
3717 if (found_none && !found_nic)
3718 return 1;
9c66dc4f 3719
811ef482
CB
3720 return 0;
3721}
3722
3723/* try to move physical nics to the init netns */
b809f232 3724int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3725{
9c66dc4f
CB
3726 __do_close int oldfd = -EBADF;
3727 int netnsfd = handler->nsfd[LXC_NS_NET];
3728 struct lxc_conf *conf = handler->conf;
811ef482 3729 int ret;
811ef482 3730 char ifname[IFNAMSIZ];
87d0990c 3731 struct lxc_netdev *netdev;
811ef482 3732
04213960
TA
3733 /*
3734 * If we weren't asked to clone a new network namespace, there's
3735 * nothing to restore.
3736 */
3737 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3738 return 0;
3739
b809f232
CB
3740 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3741 * the parent network namespace. We won't have this capability if we are
3742 * unprivileged.
3743 */
d0fbc7ba 3744 if (!handler->am_root)
b809f232 3745 return 0;
811ef482 3746
b809f232 3747 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3748
0037ab49 3749 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3750 if (oldfd < 0)
3751 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3752
b809f232 3753 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3754 if (ret < 0)
3755 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3756
87d0990c 3757 list_for_each_entry(netdev, &conf->netdevs, head) {
b809f232
CB
3758 if (netdev->type != LXC_NET_PHYS)
3759 continue;
3760
3761 /* Retrieve the name of the interface in the container's network
3762 * namespace.
3763 */
3764 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3765 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3766 continue;
3767 }
b809f232 3768
bb301db7
SB
3769 /* Restore physical interfaces to host's network namespace with its transient name
3770 * to avoid collisions with the host's other interfaces.
3771 */
3772 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->transient_name);
b809f232 3773 if (ret < 0)
9c66dc4f 3774 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3775 else
9c66dc4f 3776 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3777 }
811ef482 3778
b809f232 3779 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3780 if (ret < 0)
3781 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3782
3783 return 0;
811ef482
CB
3784}
3785
3786static int setup_hw_addr(char *hwaddr, const char *ifname)
3787{
9c66dc4f 3788 __do_close int fd = -EBADF;
811ef482
CB
3789 struct sockaddr sockaddr;
3790 struct ifreq ifr;
9c66dc4f 3791 int ret;
811ef482
CB
3792
3793 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3794 if (ret)
3795 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3796
3797 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3798 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3799 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3800
ad9429e5 3801 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3802 if (fd < 0)
3803 return -1;
3804
3805 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3806 if (ret)
6d1400b5 3807 SYSERROR("Failed to perform ioctl");
3808
9c66dc4f 3809 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3810
3811 return ret;
3812}
3813
2ec31bbd 3814static int setup_ipv4_addr(struct lxc_netdev *netdev)
811ef482 3815{
2ec31bbd 3816 int ifindex = netdev->ifindex;
811ef482 3817 int err;
2ec31bbd 3818 struct lxc_inetdev *inet4dev;
811ef482 3819
2ec31bbd
CB
3820 list_for_each_entry(inet4dev, &netdev->ipv4_list, head) {
3821 err = lxc_ipv4_addr_add(ifindex, &inet4dev->addr,
3822 &inet4dev->bcast, inet4dev->prefix);
9c66dc4f
CB
3823 if (err)
3824 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3825 }
3826
3827 return 0;
3828}
3829
cd32fc73 3830static int setup_ipv6_addr(struct lxc_netdev *netdev)
811ef482 3831{
811ef482 3832 int err;
cd32fc73
CB
3833 struct lxc_inet6dev *inet6dev;
3834 int ifindex = netdev->ifindex;
811ef482 3835
cd32fc73 3836 list_for_each_entry(inet6dev, &netdev->ipv6_list, head) {
811ef482
CB
3837 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3838 &inet6dev->mcast, &inet6dev->acast,
3839 inet6dev->prefix);
9c66dc4f
CB
3840 if (err)
3841 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3842 }
3843
3844 return 0;
3845}
3846
8bf64b77 3847static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3848{
811ef482 3849 int err;
009d6127 3850 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482 3851
811ef482 3852 /* set a mac address */
9c66dc4f
CB
3853 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3854 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3855
3856 /* setup ipv4 addresses on the interface */
2ec31bbd 3857 if (setup_ipv4_addr(netdev))
9c66dc4f 3858 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3859
3860 /* setup ipv6 addresses on the interface */
cd32fc73 3861 if (setup_ipv6_addr(netdev))
9c66dc4f 3862 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3863
3864 /* set the network device up */
3865 if (netdev->flags & IFF_UP) {
8bf64b77 3866 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3867 if (err)
3868 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3869
3870 /* the network is up, make the loopback up too */
3871 err = lxc_netdev_up("lo");
9c66dc4f
CB
3872 if (err)
3873 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3874 }
3875
811ef482 3876 /* setup ipv4 gateway on the interface */
a2f9a670 3877 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3878 if (!(netdev->flags & IFF_UP))
3879 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3880
2ec31bbd 3881 if (list_empty(&netdev->ipv4_list))
9c66dc4f 3882 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3883
a2f9a670 3884 /* Setup device route if ipv4_gateway_dev is enabled */
3885 if (netdev->ipv4_gateway_dev) {
3886 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3887 if (err < 0)
3888 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3889 } else {
009d6127 3890 /* Check the gateway address is valid */
3891 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3892 return ret_set_errno(-1, errno);
009d6127 3893
3894 /* Try adding a default route to the gateway address */
811ef482 3895 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3896 if (err < 0) {
3897 /* If adding the default route fails, this could be because the
3898 * gateway address is in a different subnet to the container's address.
3899 * To work around this, we try adding a static device route to the
3900 * gateway address first, and then try again.
3901 */
a2f9a670 3902 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3903 if (err < 0)
3904 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3905
a2f9a670 3906 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3907 if (err < 0)
3908 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3909 }
3910 }
3911 }
3912
3913 /* setup ipv6 gateway on the interface */
a2f9a670 3914 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3915 if (!(netdev->flags & IFF_UP))
3916 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3917
cd32fc73 3918 if (list_empty(&netdev->ipv6_list) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
9c66dc4f 3919 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3920
a2f9a670 3921 /* Setup device route if ipv6_gateway_dev is enabled */
3922 if (netdev->ipv6_gateway_dev) {
3923 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3924 if (err < 0)
3925 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3926 } else {
009d6127 3927 /* Check the gateway address is valid */
3928 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3929 return ret_set_errno(-1, errno);
009d6127 3930
3931 /* Try adding a default route to the gateway address */
811ef482 3932 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3933 if (err < 0) {
3934 /* If adding the default route fails, this could be because the
3935 * gateway address is in a different subnet to the container's address.
3936 * To work around this, we try adding a static device route to the
3937 * gateway address first, and then try again.
3938 */
a2f9a670 3939 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3940 if (err < 0)
3941 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3942
a2f9a670 3943 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3944 if (err < 0)
3945 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3946 }
3947 }
3948 }
3949
8bf64b77 3950 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3951
3952 return 0;
3953}
3954
3a197a1b
CB
3955/**
3956 * Consider the following network layout:
3957 *
3958 * lxc.net.0.type = phys
3959 * lxc.net.0.link = eth2
3960 * lxc.net.0.name = eth%d
3961 *
3962 * lxc.net.1.type = phys
3963 * lxc.net.1.link = eth1
3964 * lxc.net.1.name = eth0
3965 *
3966 * If we simply follow this order and create the first network first the kernel
3967 * will allocate eth0 for the first network but the second network requests
3968 * that eth1 be renamed to eth0 in the container's network namespace which
3969 * would lead to a clash.
3970 *
3971 * Note, we don't handle cases like:
3972 *
3973 * lxc.net.0.type = phys
3974 * lxc.net.0.link = eth2
3975 * lxc.net.0.name = eth0
3976 *
3977 * lxc.net.1.type = phys
3978 * lxc.net.1.link = eth1
3979 * lxc.net.1.name = eth0
3980 *
3981 * That'll brutally fail of course but there's nothing we can do about it.
3982 */
87d0990c 3983int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf)
811ef482 3984{
3a197a1b 3985 bool needs_second_pass = false;
87d0990c
CB
3986 struct lxc_netdev *netdev;
3987 const struct list_head *netdevs = &conf->netdevs;
811ef482 3988
87d0990c 3989 if (list_empty(netdevs))
3a197a1b
CB
3990 return 0;
3991
3992 /* Configure all devices that have a specific target name. */
87d0990c 3993 list_for_each_entry(netdev, netdevs, head) {
8bf64b77 3994 int ret;
811ef482 3995
3a197a1b
CB
3996 if (is_empty_string(netdev->name) || strequal(netdev->name, "eth%d")) {
3997 needs_second_pass = true;
3998 continue;
3999 }
4000
bad2f913 4001 ret = netdev_configure_container[netdev->type](netdev);
8bf64b77
CB
4002 if (!ret)
4003 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
4004 if (ret)
4005 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482 4006 }
3a197a1b
CB
4007 INFO("Finished setting up network devices with caller assigned names");
4008
4009 if (needs_second_pass) {
4010 /* Configure all devices that have a kernel assigned name. */
87d0990c 4011 list_for_each_entry(netdev, netdevs, head) {
3a197a1b 4012 int ret;
811ef482 4013
3a197a1b
CB
4014 if (!is_empty_string(netdev->name) && !strequal(netdev->name, "eth%d"))
4015 continue;
4016
4017 ret = netdev_configure_container[netdev->type](netdev);
4018 if (!ret)
4019 ret = lxc_network_setup_in_child_namespaces_common(netdev);
4020 if (ret)
4021 return log_error_errno(-1, errno, "Failed to setup netdev");
4022 }
4023 INFO("Finished setting up network devices with kernel assigned names");
4024 }
811ef482
CB
4025
4026 return 0;
4027}
7ab1ba02 4028
3c09b97c 4029int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02 4030{
7ab1ba02 4031 int data_sock = handler->data_sock[0];
87d0990c 4032 struct lxc_netdev *netdev;
7ab1ba02 4033
87d0990c 4034 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
7ab1ba02 4035 int ret;
7ab1ba02 4036
3c09b97c 4037 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4038 continue;
4039
7fbb15ec 4040 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 4041 if (ret < 0)
7ab1ba02 4042 return -1;
e389f2af 4043
fdd6be55 4044 ret = lxc_send_nointr(data_sock, netdev->transient_name, IFNAMSIZ, MSG_NOSIGNAL);
e389f2af
CB
4045 if (ret < 0)
4046 return -1;
4047
fdd6be55 4048 TRACE("Sent network device name \"%s\" to child", netdev->transient_name);
7ab1ba02
CB
4049 }
4050
4051 return 0;
4052}
4053
3c09b97c 4054int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02 4055{
7ab1ba02 4056 int data_sock = handler->data_sock[1];
87d0990c 4057 struct lxc_netdev *netdev;
7ab1ba02 4058
87d0990c 4059 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
7ab1ba02 4060 int ret;
7ab1ba02 4061
3c09b97c 4062 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4063 continue;
4064
e3233f26 4065 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 4066 if (ret < 0)
7ab1ba02 4067 return -1;
e389f2af 4068
fdd6be55 4069 ret = lxc_recv_nointr(data_sock, netdev->transient_name, IFNAMSIZ, 0);
e389f2af
CB
4070 if (ret < 0)
4071 return -1;
54256301 4072
fdd6be55 4073 TRACE("Received network device name \"%s\" from parent", netdev->transient_name);
7ab1ba02
CB
4074 }
4075
4076 return 0;
4077}
a1ae535a
CB
4078
4079int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
4080{
a1ae535a 4081 int data_sock = handler->data_sock[0];
87d0990c
CB
4082 struct lxc_netdev *netdev;
4083 struct list_head *netdevs = &handler->conf->netdevs;
a1ae535a
CB
4084
4085 if (!handler->am_root)
4086 return 0;
4087
87d0990c 4088 list_for_each_entry(netdev, netdevs, head) {
a1ae535a 4089 int ret;
a1ae535a
CB
4090
4091 /* Send network device name in the child's namespace to parent. */
7fbb15ec 4092 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 4093 if (ret < 0)
7729f8e5 4094 return -1;
a1ae535a
CB
4095
4096 /* Send network device ifindex in the child's namespace to
4097 * parent.
4098 */
7fbb15ec 4099 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 4100 if (ret < 0)
7729f8e5 4101 return -1;
a1150aa1
CB
4102
4103 TRACE("Sent network device %s with ifindex %d to parent", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4104 }
4105
87d0990c 4106 if (!list_empty(netdevs))
e389f2af
CB
4107 TRACE("Sent network device names and ifindices to parent");
4108
a1ae535a 4109 return 0;
a1ae535a
CB
4110}
4111
4112int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
4113{
a1ae535a 4114 int data_sock = handler->data_sock[1];
87d0990c 4115 struct lxc_netdev *netdev;
a1ae535a
CB
4116
4117 if (!handler->am_root)
4118 return 0;
4119
87d0990c 4120 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
a1ae535a 4121 int ret;
a1ae535a
CB
4122
4123 /* Receive network device name in the child's namespace to
4124 * parent.
4125 */
e3233f26 4126 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 4127 if (ret < 0)
7729f8e5 4128 return -1;
a1ae535a
CB
4129
4130 /* Receive network device ifindex in the child's namespace to
4131 * parent.
4132 */
e3233f26 4133 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 4134 if (ret < 0)
7729f8e5 4135 return -1;
a1150aa1
CB
4136
4137 TRACE("Received network device %s with ifindex %d from child", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4138 }
4139
4140 return 0;
a1ae535a 4141}
bb84beda
CB
4142
4143void lxc_delete_network(struct lxc_handler *handler)
4144{
4145 bool bret;
4146
37631ddb
CB
4147 /*
4148 * Always expose namespace fd paths to network down hooks via
4149 * environment variables. No need to complicate things by passing them
4150 * as additional hook arguments.
4151 */
4152 lxc_expose_namespace_environment(handler);
4153
bb84beda
CB
4154 if (handler->am_root)
4155 bret = lxc_delete_network_priv(handler);
4156 else
4157 bret = lxc_delete_network_unpriv(handler);
4158 if (!bret)
4159 DEBUG("Failed to delete network devices");
4160 else
4161 DEBUG("Deleted network devices");
4162}
1cd95214 4163
1cd95214
CB
4164int lxc_netns_set_nsid(int fd)
4165{
41a3300d 4166 int ret;
0ce60f0d
CB
4167 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4168 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4169 NLMSG_ALIGN(1024)];
1cd95214 4170 struct nl_handler nlh;
a5f5cb41 4171 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
4172 struct nlmsghdr *hdr;
4173 struct rtgenmsg *msg;
9d036caa
CB
4174 const __s32 ns_id = -1;
4175 const __u32 netns_fd = fd;
1cd95214 4176
a5f5cb41 4177 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4178 if (ret < 0)
41a3300d 4179 return -1;
1cd95214 4180
0ce60f0d 4181 memset(buf, 0, sizeof(buf));
6ce39620
CB
4182
4183#pragma GCC diagnostic push
4184#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4185 hdr = (struct nlmsghdr *)buf;
4186 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4187#pragma GCC diagnostic pop
1cd95214 4188
0ce60f0d
CB
4189 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4190 hdr->nlmsg_type = RTM_NEWNSID;
4191 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4192 hdr->nlmsg_pid = 0;
4193 hdr->nlmsg_seq = RTM_NEWNSID;
4194 msg->rtgen_family = AF_UNSPEC;
1cd95214 4195
9d036caa
CB
4196 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4197 if (ret < 0)
a5f5cb41 4198 return ret_errno(ENOMEM);
9d036caa
CB
4199
4200 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4201 if (ret < 0)
a5f5cb41 4202 return ret_errno(ENOMEM);
1cd95214 4203
a5f5cb41 4204 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4205}
938980ba
CB
4206
4207static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4208{
4209
4210 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4211
4212 while (RTA_OK(rta, len)) {
4213 unsigned short type = rta->rta_type;
4214
4215 if ((type <= max) && (!tb[type]))
4216 tb[type] = rta;
4217
6ce39620
CB
4218#pragma GCC diagnostic push
4219#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4220 rta = RTA_NEXT(rta, len);
6ce39620 4221#pragma GCC diagnostic pop
938980ba
CB
4222 }
4223
4224 return 0;
4225}
4226
4227static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4228{
4229 return *(__s32 *)RTA_DATA(rta);
4230}
4231
4232#ifndef NETNS_RTA
4233#define NETNS_RTA(r) \
4234 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4235#endif
4236
4237int lxc_netns_get_nsid(int fd)
4238{
a5f5cb41
CB
4239 struct nl_handler nlh;
4240 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4241 int ret;
4242 ssize_t len;
4243 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4244 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4245 NLMSG_ALIGN(1024)];
938980ba 4246 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4247 struct nlmsghdr *hdr;
4248 struct rtgenmsg *msg;
938980ba
CB
4249 __u32 netns_fd = fd;
4250
a5f5cb41 4251 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4252 if (ret < 0)
4253 return -1;
4254
4255 memset(buf, 0, sizeof(buf));
6ce39620
CB
4256
4257#pragma GCC diagnostic push
4258#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4259 hdr = (struct nlmsghdr *)buf;
4260 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4261#pragma GCC diagnostic pop
938980ba
CB
4262
4263 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4264 hdr->nlmsg_type = RTM_GETNSID;
4265 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4266 hdr->nlmsg_pid = 0;
4267 hdr->nlmsg_seq = RTM_GETNSID;
4268 msg->rtgen_family = AF_UNSPEC;
4269
9d036caa 4270 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4271 if (ret < 0)
4272 return ret_errno(ENOMEM);
938980ba 4273
a5f5cb41 4274 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4275 if (ret < 0)
4276 return -1;
4277
4278 msg = NLMSG_DATA(hdr);
4279 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4280 if (len < 0)
a5f5cb41 4281 return ret_errno(EINVAL);
938980ba 4282
6ce39620
CB
4283#pragma GCC diagnostic push
4284#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4285 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4286 if (tb[__LXC_NETNSA_NSID])
4287 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4288#pragma GCC diagnostic pop
938980ba
CB
4289
4290 return -1;
4291}
e389f2af
CB
4292
4293int lxc_create_network(struct lxc_handler *handler)
4294{
4295 int ret;
4296
e389f2af
CB
4297 if (handler->am_root) {
4298 ret = lxc_create_network_priv(handler);
4299 if (ret)
4300 return -1;
4301
4302 return lxc_network_move_created_netdev_priv(handler);
4303 }
4304
4305 return lxc_create_network_unpriv(handler);
4306}