]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
tree-wide: fix public lxc header inclusions
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
58db1a61 29#include "netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
fdd6be55 40#include "string_utils.h"
59524108 41#include "syscall_wrappers.h"
0d204771 42#include "utils.h"
0ad19a3f 43
9de31d5a 44#ifndef HAVE_STRLCPY
58db1a61 45#include "strlcpy.h"
9de31d5a
CB
46#endif
47
ac2cecc4 48lxc_log_define(network, lxc);
f8fee0e2 49
bad2f913
CB
50typedef int (*netdev_configure_server_cb)(struct lxc_handler *, struct lxc_netdev *);
51typedef int (*netdev_configure_container_cb)(struct lxc_netdev *);
52typedef int (*netdev_shutdown_server_cb)(struct lxc_handler *, struct lxc_netdev *);
53
3392d379
CB
54const struct lxc_network_info {
55 const char *name;
fdd6be55
CB
56 const char template[IFNAMSIZ];
57 size_t template_len;
3392d379 58} lxc_network_info[LXC_NET_MAXCONFTYPE + 1] = {
fdd6be55
CB
59 [LXC_NET_EMPTY] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
60 [LXC_NET_VETH] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
61 [LXC_NET_MACVLAN] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
62 [LXC_NET_IPVLAN] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
63 [LXC_NET_PHYS] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
64 [LXC_NET_VLAN] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
65 [LXC_NET_NONE] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
66 [LXC_NET_MAXCONFTYPE] = { NULL, "", 0 }
3392d379
CB
67};
68
69const char *lxc_net_type_to_str(int type)
70{
71 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
72 return NULL;
73
74 return lxc_network_info[type].name;
75}
76
77static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
78
79char *lxc_ifname_alnum_case_sensitive(char *template)
80{
81 char name[IFNAMSIZ];
82 size_t i = 0;
83#ifdef HAVE_RAND_R
84 unsigned int seed;
85
86 seed = randseed(false);
87#else
88
89 (void)randseed(true);
90#endif
91
92 if (strlen(template) >= IFNAMSIZ)
93 return NULL;
94
95 /* Generate random names until we find one that doesn't exist. */
96 for (;;) {
97 name[0] = '\0';
98 (void)strlcpy(name, template, IFNAMSIZ);
99
100 for (i = 0; i < strlen(name); i++) {
101 if (name[i] == 'X') {
102#ifdef HAVE_RAND_R
103 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
104#else
105 name[i] = padchar[rand() % strlen(padchar)];
106#endif
107 }
108 }
109
110 if (if_nametoindex(name) == 0)
111 break;
112 }
113
114 (void)strlcpy(template, name, strlen(template) + 1);
115
116 return template;
117}
3ebffb98 118static const char loop_device[] = "lo";
811ef482 119
b670016a 120static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 121{
d16bda44 122 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 123 struct nl_handler nlh;
d16bda44
CB
124 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
125 int addrlen, err;
8f82874c 126 struct rtmsg *rt;
8f82874c 127
128 addrlen = family == AF_INET ? sizeof(struct in_addr)
129 : sizeof(struct in6_addr);
130
d16bda44 131 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 132 if (err)
133 return err;
134
8f82874c 135 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
136 if (!nlmsg)
d16bda44 137 return -ENOMEM;
8f82874c 138
139 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
140 if (!answer)
a5f5cb41 141 return -ENOMEM;
8f82874c 142
143 nlmsg->nlmsghdr->nlmsg_flags =
144 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 145 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 146
147 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
148 if (!rt)
a5f5cb41 149 return -ENOMEM;
d16bda44 150
8f82874c 151 rt->rtm_family = family;
152 rt->rtm_table = RT_TABLE_MAIN;
153 rt->rtm_scope = RT_SCOPE_LINK;
154 rt->rtm_protocol = RTPROT_BOOT;
155 rt->rtm_type = RTN_UNICAST;
156 rt->rtm_dst_len = netmask;
157
8f82874c 158 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
159 return -EINVAL;
160
8f82874c 161 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
162 return -EINVAL;
163
164 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 165}
166
167static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
168{
b670016a 169 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 170}
171
172static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
173{
b670016a 174 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
175}
176
177static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
178{
179 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
180}
181
182static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
183{
184 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 185}
186
303707f6 187static int setup_ipv4_routes(struct lxc_netdev *netdev)
d4a7da46 188{
303707f6
CB
189 int ifindex = netdev->priv.veth_attr.ifindex;
190 struct lxc_inetdev *inetdev;
d4a7da46 191 int err;
192
303707f6 193 list_for_each_entry(inetdev, &netdev->priv.veth_attr.ipv4_routes, head) {
d4a7da46 194 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
195 if (err)
196 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 197 }
198
199 return 0;
200}
201
6bf0c06b 202static int setup_ipv6_routes(struct lxc_netdev *netdev)
d4a7da46 203{
d4a7da46 204 int err;
6bf0c06b
CB
205 struct lxc_inet6dev *inet6dev;
206 int ifindex = netdev->priv.veth_attr.ifindex;
d4a7da46 207
6bf0c06b 208 list_for_each_entry(inet6dev, &netdev->priv.veth_attr.ipv6_routes, head) {
d4a7da46 209 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
210 if (err)
211 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 212 }
213
214 return 0;
215}
216
2ec31bbd 217static int setup_ipv4_addr_routes(struct lxc_netdev *netdev)
6dfa9581 218{
6dfa9581 219 int err;
2ec31bbd
CB
220 struct lxc_inetdev *inetdev;
221 int ifindex;
6dfa9581 222
2ec31bbd
CB
223 if (netdev->type != LXC_NET_VETH)
224 return ret_errno(EINVAL);
6dfa9581 225
2ec31bbd 226 ifindex = netdev->priv.veth_attr.ifindex;
05a54a64 227 list_for_each_entry(inetdev, &netdev->ipv4_addresses, head) {
6dfa9581 228 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
6dfa9581 229 if (err)
9c66dc4f 230 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
231 }
232
233 return 0;
234}
235
cd32fc73 236static int setup_ipv6_addr_routes(struct lxc_netdev *netdev)
6dfa9581 237{
6dfa9581 238 int err;
cd32fc73
CB
239 struct lxc_inet6dev *inet6dev;
240 int ifindex;
6dfa9581 241
cd32fc73
CB
242 if (netdev->type != LXC_NET_VETH)
243 return ret_errno(EINVAL);
244
245 ifindex = netdev->priv.veth_attr.ifindex;
05a54a64 246 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
6dfa9581
TP
247
248 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
249 if (err)
9c66dc4f 250 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
251 }
252
253 return 0;
254}
255
5fe147e9 256static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 257{
d16bda44 258 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 259 struct nl_handler nlh;
d16bda44
CB
260 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
261 int addrlen, err;
5fe147e9 262 struct ndmsg *rt;
6dfa9581 263
5fe147e9 264 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 265
d16bda44 266 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
267 if (err)
268 return err;
6dfa9581 269
5fe147e9
TP
270 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
271 if (!nlmsg)
d16bda44 272 return -ENOMEM;
6dfa9581 273
5fe147e9
TP
274 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
275 if (!answer)
d16bda44 276 return -ENOMEM;
6dfa9581 277
5fe147e9
TP
278 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
279 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 280
5fe147e9
TP
281 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
282 if (!rt)
d16bda44
CB
283 return -ENOMEM;
284
5fe147e9
TP
285 rt->ndm_ifindex = ifindex;
286 rt->ndm_flags = NTF_PROXY;
287 rt->ndm_type = NDA_DST;
288 rt->ndm_family = family;
6dfa9581 289
5fe147e9 290 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 291 return -EINVAL;
6dfa9581 292
d16bda44 293 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
294}
295
296static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
297{
298 int ret;
299 char path[PATH_MAX];
300 char buf[1] = "";
301
302 if (family != AF_INET && family != AF_INET6)
596a002c 303 return ret_set_errno(-1, EINVAL);
6dfa9581 304
387c1c70
CB
305 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
306 family == AF_INET ? "ipv4" : "ipv6", ifname,
307 "forwarding");
308 if (ret < 0)
596a002c 309 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
310
311 return lxc_read_file_expect(path, buf, 1, "1");
312}
313
622f05c7
TP
314struct bridge_vlan_info {
315 __u16 flags;
316 __u16 vid;
317};
318
319static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
320{
321 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
322 struct nl_handler nlh;
323 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
324 int err;
325 struct ifinfomsg *ifi;
326 struct rtattr *nest;
327 unsigned short bridge_flags = 0;
328 struct bridge_vlan_info vlan_info;
329
330 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
331 if (err)
332 return err;
333
334 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
335 if (!nlmsg)
336 return ret_errno(ENOMEM);
337
338 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
339 if (!answer)
340 return ret_errno(ENOMEM);
341
342 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
343 nlmsg->nlmsghdr->nlmsg_type = operation;
344
345 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
346 if (!ifi)
347 return ret_errno(ENOMEM);
348 ifi->ifi_family = AF_BRIDGE;
349 ifi->ifi_index = ifindex;
350
351 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
352 if (!nest)
353 return ret_errno(ENOMEM);
354
355 bridge_flags |= BRIDGE_FLAGS_MASTER;
356 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
357 return ret_errno(ENOMEM);
358
359 vlan_info.vid = vlan_id;
360 vlan_info.flags = 0;
361 if (!tagged)
362 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
363
364 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
365 return ret_errno(ENOMEM);
366
367 nla_end_nested(nlmsg, nest);
368
369 return netlink_transaction(nlh_ptr, nlmsg, answer);
370}
371
372static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
373{
374 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
375}
376
377static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
378{
379 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
380}
381
382static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
383{
384 struct lxc_list *iterator;
385 int err;
386
387 lxc_list_for_each(iterator, vlan_ids) {
388 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
389
390 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
391 if (err)
392 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
393 }
394
395 return 0;
396}
397
33320936
TP
398static int validate_veth(struct lxc_netdev *netdev)
399{
400 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
401 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
402 if (netdev->priv.veth_attr.vlan_id_set)
403 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
404
405 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
406 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
407 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
408 }
409
410 if (netdev->priv.veth_attr.vlan_id_set) {
411 struct lxc_list *it;
412 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
413 unsigned short i = PTR_TO_USHORT(it->elem);
414 if (i == netdev->priv.veth_attr.vlan_id)
415 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
416 }
417 }
418
419 return 0;
420}
421
422static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
423{
424 int err, rc, veth1index;
425 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
426 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
427
428 /* Skip setup if no VLAN options are specified. */
429 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
430 return 0;
431
432 /* Check vlan filtering is enabled on parent bridge. */
387c1c70
CB
433 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
434 if (rc < 0)
33320936
TP
435 return -1;
436
437 rc = lxc_read_from_file(path, buf, sizeof(buf));
438 if (rc < 0)
439 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
440
441 buf[rc - 1] = '\0';
442
6ee997a7 443 if (!strequal(buf, "1"))
33320936
TP
444 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
445
446 /* Get veth1 ifindex for use with netlink. */
447 veth1index = if_nametoindex(veth1);
448 if (!veth1index)
449 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
450
451 /* Configure untagged VLAN settings on bridge port if specified. */
452 if (netdev->priv.veth_attr.vlan_id_set) {
453 unsigned short default_pvid;
454
455 /* Get the bridge's default VLAN PVID. */
387c1c70
CB
456 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
457 if (rc < 0)
33320936
TP
458 return -1;
459
460 rc = lxc_read_from_file(path, buf, sizeof(buf));
461 if (rc < 0)
462 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
463
464 buf[rc - 1] = '\0';
465 err = get_u16(&default_pvid, buf, 0);
466 if (err)
467 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
468
469 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
470 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
471 err = lxc_bridge_vlan_del(veth1index, default_pvid);
472 if (err)
473 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
474 }
475
476 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
477 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
478 if (err)
479 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
480 }
481 }
482
483 /* Configure tagged VLAN settings on bridge port if specified. */
484 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
485 if (err)
486 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
487
488 return 0;
489}
490
8f7c3358
TP
491struct ovs_veth_vlan_args {
492 const char *nic;
493 const char *vlan_mode; /* Port VLAN mode. */
494 short vlan_id; /* PVID VLAN ID. */
d2f8b272 495 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
8f7c3358
TP
496};
497
d2f8b272
TP
498static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
499{
500 free_disarm(args->trunks);
501}
8f7c3358
TP
502
503static int lxc_ovs_setup_bridge_vlan_exec(void *data)
504{
505 struct ovs_veth_vlan_args *args = data;
785e1540
TP
506 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
507
508 if (!args->vlan_mode)
509 return ret_errno(EINVAL);
8f7c3358
TP
510
511 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
512
785e1540 513 if (args->vlan_id > BRIDGE_VLAN_NONE) {
8f7c3358
TP
514 char buf[5];
515 int rc;
516
387c1c70
CB
517 rc = strnprintf(buf, sizeof(buf), "%u", args->vlan_id);
518 if (rc < 0)
72e8122b 519 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
8f7c3358
TP
520
521 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
522 }
523
785e1540 524 if (args->trunks)
8f7c3358
TP
525 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
526
527 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
785e1540 528 if (tag && trunks)
8f7c3358 529 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
785e1540 530 else if (tag)
8f7c3358 531 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
785e1540 532 else if (trunks)
8f7c3358
TP
533 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
534 else
535 return -EINVAL;
536
537 return -errno;
538}
539
540static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
541{
542 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
543 struct ovs_veth_vlan_args args;
544 args.nic = veth1;
1ee07848
TP
545 args.vlan_mode = NULL;
546 args.vlan_id = BRIDGE_VLAN_NONE;
547 args.trunks = NULL;
8f7c3358
TP
548
549 /* Skip setup if no VLAN options are specified. */
550 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
551 return 0;
552
553 /* Configure untagged VLAN settings on bridge port if specified. */
554 if (netdev->priv.veth_attr.vlan_id_set) {
555 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
556 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
557
558 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
559 * Also set the vlan_mode=access, which will drop any tagged frames.
560 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
561 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
562 */
563 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
564 args.vlan_mode = "access";
565 args.vlan_id = netdev->priv.veth_attr.vlan_id;
566 }
567 }
568
569 if (taggedLength > 0) {
570 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
571
572 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
573 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
574 args.vlan_mode = "native-untagged";
575 }
576
577 struct lxc_list *iterator;
578 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
579 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
580 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
581 int rc;
582
387c1c70
CB
583 rc = strnprintf(buf, sizeof(buf), "%u", vlan_id);
584 if (rc < 0) {
3fe6b5cf 585 free_ovs_veth_vlan_args(&args);
8f7c3358 586 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
3fe6b5cf 587 }
8f7c3358 588
1ee07848
TP
589 if (args.trunks)
590 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
591 else
592 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
8f7c3358
TP
593 }
594 }
595
1ee07848 596 if (args.vlan_mode) {
8f7c3358
TP
597 int ret;
598 char cmd_output[PATH_MAX];
599
600 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
3fe6b5cf
TP
601 if (ret < 0) {
602 free_ovs_veth_vlan_args(&args);
8f7c3358 603 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
3fe6b5cf 604 }
8f7c3358
TP
605 }
606
3fe6b5cf 607 free_ovs_veth_vlan_args(&args);
8f7c3358
TP
608 return 0;
609}
610
bad2f913 611static int netdev_configure_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 612{
54256301 613 int err;
a00fbab5 614 unsigned int mtu = 1500;
811ef482
CB
615 char *veth1, *veth2;
616 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 617
33320936
TP
618 err = validate_veth(netdev);
619 if (err)
620 return err;
621
f2711167 622 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
623 veth1 = netdev->priv.veth_attr.pair;
624 if (handler->conf->reboot)
625 lxc_netdev_delete_by_name(veth1);
626 } else {
387c1c70
CB
627 err = strnprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
628 if (err < 0)
811ef482
CB
629 return -1;
630
3646ffd9 631 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
632 if (!veth1)
633 return -1;
634
635 /* store away for deconf */
636 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
637 }
638
387c1c70
CB
639 err = strnprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
640 if (err < 0)
d34212ad
CB
641 return -1;
642
3646ffd9 643 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 644 if (!veth2)
54256301
CB
645 return -1;
646
a00fbab5
TP
647 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
648 if (netdev->mtu) {
649 if (lxc_safe_uint(netdev->mtu, &mtu))
650 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 651 } else if (!is_empty_string(netdev->link)) {
54256301 652 int ifindex_mtu;
811ef482 653
54256301
CB
654 ifindex_mtu = if_nametoindex(netdev->link);
655 if (ifindex_mtu) {
656 mtu = netdev_get_mtu(ifindex_mtu);
657 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
658 }
659 }
660
661 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
662 if (err)
663 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 664
fdd6be55
CB
665 /*
666 * Veth devices are directly created in the container's network
667 * namespace so the device doesn't need to be moved into the
668 * container's network namespace. Make this explicit by setting the
669 * devices ifindex to 0.
670 */
671 netdev->ifindex = 0;
672
24190194
CB
673 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
674
fdd6be55
CB
675 /*
676 * Since the device won't be moved transient name generation won't
677 * happen. But the transient name is needed for the container to
678 * retrieve the ifindex for the device.
679 */
680 strlcpy(netdev->transient_name, veth2, IFNAMSIZ);
681
682 /*
683 * Changing the high byte of the mac address to 0xfe, the bridge interface
811ef482 684 * will always keep the host's mac address and not take the mac address
fdd6be55
CB
685 * of a container.
686 */
811ef482
CB
687 err = setup_private_host_hw_addr(veth1);
688 if (err) {
6d1400b5 689 errno = -err;
690 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
691 goto out_delete;
692 }
693
8da62485
CB
694 /* Retrieve ifindex of the host's veth device. */
695 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
696 if (!netdev->priv.veth_attr.ifindex) {
697 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
698 goto out_delete;
699 }
700
811ef482
CB
701 if (mtu) {
702 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 703 if (err) {
6d1400b5 704 errno = -err;
54256301 705 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
706 goto out_delete;
707 }
708 }
709
f2711167 710 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
711 if (!lxc_nic_exists(netdev->link)) {
712 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
713 goto out_delete;
714 }
715
811ef482
CB
716 err = lxc_bridge_attach(netdev->link, veth1);
717 if (err) {
6d1400b5 718 errno = -err;
26da53c3 719 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
720 goto out_delete;
721 }
722 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936 723
38790036
TP
724 if (is_ovs_bridge(netdev->link)) {
725 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
726 if (err) {
727 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
728 lxc_ovs_delete_port(netdev->link, veth1);
729 goto out_delete;
730 }
731 } else {
33320936
TP
732 err = setup_veth_native_bridge_vlan(veth1, netdev);
733 if (err) {
734 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
735 goto out_delete;
736 }
737 }
811ef482
CB
738 }
739
740 err = lxc_netdev_up(veth1);
741 if (err) {
6d1400b5 742 errno = -err;
743 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
744 goto out_delete;
745 }
746
d4a7da46 747 /* setup ipv4 routes on the host interface */
303707f6 748 if (setup_ipv4_routes(netdev)) {
d4a7da46 749 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
750 goto out_delete;
751 }
752
753 /* setup ipv6 routes on the host interface */
6bf0c06b 754 if (setup_ipv6_routes(netdev)) {
d4a7da46 755 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
756 goto out_delete;
757 }
758
6dfa9581 759 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
760 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
761 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
762 appear to be added successfully but then do not appear in the proxy list. The length of time
763 slept doesn't appear to be important, only that the process sleeps for a short period of time.
764 */
765 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
766
6dfa9581
TP
767 if (netdev->ipv4_gateway) {
768 char bufinet4[INET_ADDRSTRLEN];
769 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 770 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
771 goto out_delete;
772 }
773
774 err = lxc_ip_forwarding_on(veth1, AF_INET);
775 if (err) {
9c66dc4f 776 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
777 goto out_delete;
778 }
779
5fe147e9 780 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 781 if (err) {
9c66dc4f 782 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
783 goto out_delete;
784 }
785 }
786
787 if (netdev->ipv6_gateway) {
788 char bufinet6[INET6_ADDRSTRLEN];
789
790 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 791 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
792 goto out_delete;
793 }
794
795 /* Check for sysctl net.ipv6.conf.all.forwarding=1
796 Kernel requires this to route any packets for IPv6.
797 */
798 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
799 if (err) {
9c66dc4f 800 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
801 goto out_delete;
802 }
803
804 err = lxc_ip_forwarding_on(veth1, AF_INET6);
805 if (err) {
9c66dc4f 806 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
807 goto out_delete;
808 }
809
810 err = lxc_neigh_proxy_on(veth1, AF_INET6);
811 if (err) {
9c66dc4f 812 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
813 goto out_delete;
814 }
815
5fe147e9 816 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 817 if (err) {
9c66dc4f 818 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
819 goto out_delete;
820 }
821 }
822
823 /* setup ipv4 address routes on the host interface */
2ec31bbd 824 err = setup_ipv4_addr_routes(netdev);
6dfa9581 825 if (err) {
9c66dc4f 826 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
827 goto out_delete;
828 }
829
830 /* setup ipv6 address routes on the host interface */
cd32fc73 831 err = setup_ipv6_addr_routes(netdev);
6dfa9581 832 if (err) {
9c66dc4f 833 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
834 goto out_delete;
835 }
836 }
837
811ef482 838 if (netdev->upscript) {
14a7b0f9
CB
839 char *argv[] = {
840 "veth",
841 netdev->link,
990b9ac3 842 veth1,
14a7b0f9
CB
843 NULL,
844 };
845
846 err = run_script_argv(handler->name,
847 handler->conf->hooks_version, "net",
848 netdev->upscript, "up", argv);
849 if (err < 0)
811ef482
CB
850 goto out_delete;
851 }
852
54256301 853 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
854
855 return 0;
856
857out_delete:
54256301 858 lxc_netdev_delete_by_name(veth1);
811ef482
CB
859 return -1;
860}
861
bad2f913 862static int netdev_configure_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 863{
8021de25 864 char peer[IFNAMSIZ];
811ef482
CB
865 int err;
866
f2711167 867 if (is_empty_string(netdev->link)) {
811ef482
CB
868 ERROR("No link for macvlan network device specified");
869 return -1;
870 }
871
387c1c70
CB
872 err = strnprintf(peer, sizeof(peer), "mcXXXXXX");
873 if (err < 0)
811ef482
CB
874 return -1;
875
3646ffd9 876 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
877 return -1;
878
879 err = lxc_macvlan_create(netdev->link, peer,
880 netdev->priv.macvlan_attr.mode);
881 if (err) {
6d1400b5 882 errno = -err;
883 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
884 peer, netdev->link);
966e9f1f 885 goto on_error;
811ef482
CB
886 }
887
9f8cf6e1
CB
888 strlcpy(netdev->created_name, peer, IFNAMSIZ);
889
811ef482
CB
890 netdev->ifindex = if_nametoindex(peer);
891 if (!netdev->ifindex) {
892 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 893 goto on_error;
811ef482
CB
894 }
895
3bef7b7b 896 if (netdev->mtu) {
54256301
CB
897 unsigned int mtu;
898
3bef7b7b
TP
899 err = lxc_safe_uint(netdev->mtu, &mtu);
900 if (err < 0) {
901 errno = -err;
902 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
903 goto on_error;
904 }
905
906 err = lxc_netdev_set_mtu(peer, mtu);
907 if (err < 0) {
908 errno = -err;
909 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
910 goto on_error;
911 }
912 }
913
811ef482 914 if (netdev->upscript) {
14a7b0f9
CB
915 char *argv[] = {
916 "macvlan",
917 netdev->link,
918 NULL,
919 };
920
921 err = run_script_argv(handler->name,
922 handler->conf->hooks_version, "net",
923 netdev->upscript, "up", argv);
924 if (err < 0)
966e9f1f 925 goto on_error;
811ef482
CB
926 }
927
4a037d61 928 DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
811ef482
CB
929 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
930
931 return 0;
966e9f1f
CB
932
933on_error:
811ef482 934 lxc_netdev_delete_by_name(peer);
811ef482
CB
935 return -1;
936}
937
0dc9a142 938static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
c9f52382 939{
d16bda44
CB
940 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
941 struct nl_handler nlh;
942 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 943 int err, index, len;
944 struct ifinfomsg *ifi;
c9f52382 945 struct rtattr *nest, *nest2;
c9f52382 946
0dc9a142 947 len = strlen(parent);
c9f52382 948 if (len == 1 || len >= IFNAMSIZ)
d16bda44 949 return ret_errno(EINVAL);
c9f52382 950
951 len = strlen(name);
952 if (len == 1 || len >= IFNAMSIZ)
d16bda44 953 return ret_errno(EINVAL);
c9f52382 954
0dc9a142 955 index = if_nametoindex(parent);
c9f52382 956 if (!index)
d16bda44 957 return ret_errno(EINVAL);
c9f52382 958
d16bda44 959 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 960 if (err)
df62850d 961 return err;
c9f52382 962
c9f52382 963 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
964 if (!nlmsg)
d16bda44 965 return ret_errno(ENOMEM);
c9f52382 966
967 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
968 if (!answer)
d16bda44 969 return ret_errno(ENOMEM);
c9f52382 970
971 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
972 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
973
974 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
975 if (!ifi)
976 return ret_errno(ENOMEM);
c9f52382 977 ifi->ifi_family = AF_UNSPEC;
978
c9f52382 979 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
980 if (!nest)
d16bda44 981 return ret_errno(EPROTO);
c9f52382 982
983 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 984 return ret_errno(EPROTO);
c9f52382 985
5755765e
KT
986 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
987 if (!nest2)
988 return ret_errno(EPROTO);
989
3a934e2e 990 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
991 return ret_errno(EPROTO);
992
cf88a827
TP
993 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
994 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
995 * according to ipvlan docs.
5755765e 996 */
cf88a827 997 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 998 return ret_errno(EPROTO);
c9f52382 999
5755765e 1000 nla_end_nested(nlmsg, nest2);
c9f52382 1001 nla_end_nested(nlmsg, nest);
1002
1003 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 1004 return ret_errno(EPROTO);
c9f52382 1005
1006 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
1007 return ret_errno(EPROTO);
1008
1009 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 1010}
1011
bad2f913 1012static int netdev_configure_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1013{
dd119206 1014 char peer[IFNAMSIZ];
c9f52382 1015 int err;
1016
f2711167 1017 if (is_empty_string(netdev->link)) {
c9f52382 1018 ERROR("No link for ipvlan network device specified");
1019 return -1;
1020 }
1021
387c1c70
CB
1022 err = strnprintf(peer, sizeof(peer), "ipXXXXXX");
1023 if (err < 0)
c9f52382 1024 return -1;
1025
3646ffd9 1026 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 1027 return -1;
1028
dd119206
CB
1029 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
1030 netdev->priv.ipvlan_attr.isolation);
c9f52382 1031 if (err) {
dd119206
CB
1032 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
1033 peer, netdev->link);
c9f52382 1034 goto on_error;
1035 }
1036
e7fdd504
CB
1037 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1038
c9f52382 1039 netdev->ifindex = if_nametoindex(peer);
1040 if (!netdev->ifindex) {
1041 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1042 goto on_error;
1043 }
1044
006e135e 1045 if (netdev->mtu) {
54256301
CB
1046 unsigned int mtu;
1047
006e135e 1048 err = lxc_safe_uint(netdev->mtu, &mtu);
1049 if (err < 0) {
1050 errno = -err;
54256301 1051 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1052 goto on_error;
1053 }
1054
1055 err = lxc_netdev_set_mtu(peer, mtu);
1056 if (err < 0) {
1057 errno = -err;
54256301 1058 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1059 goto on_error;
1060 }
1061 }
1062
c9f52382 1063 if (netdev->upscript) {
1064 char *argv[] = {
1065 "ipvlan",
1066 netdev->link,
1067 NULL,
1068 };
1069
dd119206
CB
1070 err = run_script_argv(handler->name, handler->conf->hooks_version,
1071 "net", netdev->upscript, "up", argv);
c9f52382 1072 if (err < 0)
1073 goto on_error;
1074 }
1075
4a037d61 1076 DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer,
dd119206 1077 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 1078
1079 return 0;
1080
1081on_error:
1082 lxc_netdev_delete_by_name(peer);
1083 return -1;
1084}
1085
bad2f913 1086static int netdev_configure_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1087{
1088 char peer[IFNAMSIZ];
1089 int err;
1090 static uint16_t vlan_cntr = 0;
811ef482 1091
f2711167 1092 if (is_empty_string(netdev->link)) {
811ef482
CB
1093 ERROR("No link for vlan network device specified");
1094 return -1;
1095 }
1096
387c1c70
CB
1097 err = strnprintf(peer, sizeof(peer), "vlan%d-%d",
1098 netdev->priv.vlan_attr.vid, vlan_cntr++);
1099 if (err < 0)
811ef482
CB
1100 return -1;
1101
1102 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1103 if (err) {
6d1400b5 1104 errno = -err;
1105 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1106 peer, netdev->link);
811ef482
CB
1107 return -1;
1108 }
1109
83530dba
CB
1110 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1111
811ef482
CB
1112 netdev->ifindex = if_nametoindex(peer);
1113 if (!netdev->ifindex) {
1114 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1115 goto on_error;
1116 }
1117
1118 if (netdev->mtu) {
54256301
CB
1119 unsigned int mtu;
1120
3e2a7b08 1121 err = lxc_safe_uint(netdev->mtu, &mtu);
1122 if (err < 0) {
1123 errno = -err;
54256301 1124 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1125 goto on_error;
1126 }
1127
1128 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1129 if (err < 0) {
3e2a7b08 1130 errno = -err;
54256301 1131 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1132 goto on_error;
1133 }
811ef482
CB
1134 }
1135
3a73d9f1 1136 if (netdev->upscript) {
1137 char *argv[] = {
1138 "vlan",
1139 netdev->link,
1140 NULL,
1141 };
1142
d4d68410
CB
1143 err = run_script_argv(handler->name, handler->conf->hooks_version,
1144 "net", netdev->upscript, "up", argv);
19abca58 1145 if (err < 0) {
3e2a7b08 1146 goto on_error;
19abca58 1147 }
3a73d9f1 1148 }
1149
4a037d61 1150 DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer,
d4d68410 1151 netdev->ifindex);
811ef482
CB
1152
1153 return 0;
3e2a7b08 1154
1155on_error:
1156 lxc_netdev_delete_by_name(peer);
1157 return -1;
811ef482
CB
1158}
1159
bad2f913 1160static int netdev_configure_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1161{
0b154989 1162 int err, mtu_orig = 0;
14a7b0f9 1163
9c66dc4f
CB
1164 if (is_empty_string(netdev->link))
1165 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1166
75b074ee
CB
1167 /*
1168 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1169 * network namespace because we need it to move the device from the
1170 * host's network namespace to the container's network namespace later
1171 * on.
1172 * Note that netdev->link will contain the name of the physical network
1173 * device in the host's namespace.
1174 */
811ef482 1175 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1176 if (!netdev->ifindex)
1177 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1178
61302ef7 1179 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1180 if (is_empty_string(netdev->name))
8bf64b77 1181 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1182
75b074ee
CB
1183 /*
1184 * Store the ifindex of the host's network device in the host's
790255cf
CB
1185 * namespace.
1186 */
1187 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1188
75b074ee
CB
1189 /*
1190 * Get original device MTU setting and store for restoration after
1191 * container shutdown.
1192 */
0b154989 1193 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1194 if (mtu_orig < 0)
1195 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1196
1197 netdev->priv.phys_attr.mtu = mtu_orig;
1198
3bef7b7b 1199 if (netdev->mtu) {
54256301
CB
1200 unsigned int mtu;
1201
3bef7b7b 1202 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1203 if (err < 0)
1204 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1205
3bef7b7b 1206 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1207 if (err < 0)
1208 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1209 }
1210
1211 if (netdev->upscript) {
1212 char *argv[] = {
1213 "phys",
1214 netdev->link,
1215 NULL,
1216 };
1217
75b074ee
CB
1218 err = run_script_argv(handler->name, handler->conf->hooks_version,
1219 "net", netdev->upscript, "up", argv);
9c66dc4f 1220 if (err < 0)
3bef7b7b 1221 return -1;
3bef7b7b
TP
1222 }
1223
4a037d61 1224 DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev->link,
75b074ee 1225 netdev->ifindex);
811ef482
CB
1226
1227 return 0;
1228}
1229
bad2f913 1230static int netdev_configure_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1231{
14a7b0f9
CB
1232 int ret;
1233 char *argv[] = {
1234 "empty",
1235 NULL,
1236 };
1237
43e2a964
CB
1238 /* The loopback device always has index 1. */
1239 netdev->ifindex = 1;
1240
1241 if (!strequal(netdev->name, "lo"))
1242 return syserror_set(-EINVAL, "Custom loopback device names not supported");
1243
14a7b0f9
CB
1244 if (!netdev->upscript)
1245 return 0;
1246
1247 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1248 "net", netdev->upscript, "up", argv);
1249 if (ret < 0)
1250 return -1;
1251
811ef482
CB
1252 return 0;
1253}
1254
bad2f913 1255static int netdev_configure_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1256{
1257 netdev->ifindex = 0;
1258 return 0;
1259}
1260
bad2f913
CB
1261static netdev_configure_server_cb netdev_configure_server[LXC_NET_MAXCONFTYPE + 1] = {
1262 [LXC_NET_VETH] = netdev_configure_server_veth,
1263 [LXC_NET_MACVLAN] = netdev_configure_server_macvlan,
1264 [LXC_NET_IPVLAN] = netdev_configure_server_ipvlan,
1265 [LXC_NET_VLAN] = netdev_configure_server_vlan,
1266 [LXC_NET_PHYS] = netdev_configure_server_phys,
1267 [LXC_NET_EMPTY] = netdev_configure_server_empty,
1268 [LXC_NET_NONE] = netdev_configure_server_none,
811ef482
CB
1269};
1270
bad2f913 1271static int __netdev_configure_container_common(struct lxc_netdev *netdev)
8bf64b77
CB
1272{
1273 char current_ifname[IFNAMSIZ];
1274
fdd6be55 1275 netdev->ifindex = if_nametoindex(netdev->transient_name);
8bf64b77
CB
1276 if (!netdev->ifindex)
1277 return log_error_errno(-1,
1278 errno, "Failed to retrieve ifindex for network device with name %s",
fdd6be55 1279 netdev->transient_name);
8bf64b77 1280
3473ca76 1281 if (is_empty_string(netdev->name))
8bf64b77
CB
1282 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1283
fdd6be55 1284 if (!strequal(netdev->transient_name, netdev->name)) {
8bf64b77
CB
1285 int ret;
1286
fdd6be55 1287 ret = lxc_netdev_rename_by_name(netdev->transient_name, netdev->name);
8bf64b77 1288 if (ret)
9c66dc4f 1289 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
fdd6be55 1290 netdev->transient_name, netdev->name);
8bf64b77 1291
fdd6be55 1292 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->transient_name, netdev->name);
8bf64b77
CB
1293 }
1294
1295 /*
1296 * Re-read the name of the interface because its name has changed and
1297 * would be automatically allocated by the system
1298 */
1299 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1300 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1301
1302 /*
1303 * Now update the recorded name of the network device to reflect the
1304 * name of the network device in the child's network namespace. We will
1305 * later on send this information back to the parent.
1306 */
1307 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
fdd6be55 1308 netdev->transient_name[0] = '\0';
8bf64b77
CB
1309
1310 return 0;
1311}
1312
bad2f913 1313static int netdev_configure_container_veth(struct lxc_netdev *netdev)
8bf64b77 1314{
8bf64b77 1315
bad2f913 1316 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1317}
1318
bad2f913 1319static int netdev_configure_container_macvlan(struct lxc_netdev *netdev)
8bf64b77 1320{
bad2f913 1321 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1322}
1323
bad2f913 1324static int netdev_configure_container_ipvlan(struct lxc_netdev *netdev)
8bf64b77 1325{
bad2f913 1326 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1327}
1328
bad2f913 1329static int netdev_configure_container_vlan(struct lxc_netdev *netdev)
8bf64b77 1330{
bad2f913 1331 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1332}
1333
bad2f913 1334static int netdev_configure_container_phys(struct lxc_netdev *netdev)
8bf64b77 1335{
bad2f913 1336 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1337}
1338
bad2f913 1339static int netdev_configure_container_empty(struct lxc_netdev *netdev)
8bf64b77
CB
1340{
1341 return 0;
1342}
1343
bad2f913 1344static int netdev_configure_container_none(struct lxc_netdev *netdev)
8bf64b77
CB
1345{
1346 return 0;
1347}
1348
bad2f913
CB
1349static netdev_configure_container_cb netdev_configure_container[LXC_NET_MAXCONFTYPE + 1] = {
1350 [LXC_NET_VETH] = netdev_configure_container_veth,
1351 [LXC_NET_MACVLAN] = netdev_configure_container_macvlan,
1352 [LXC_NET_IPVLAN] = netdev_configure_container_ipvlan,
1353 [LXC_NET_VLAN] = netdev_configure_container_vlan,
1354 [LXC_NET_PHYS] = netdev_configure_container_phys,
1355 [LXC_NET_EMPTY] = netdev_configure_container_empty,
1356 [LXC_NET_NONE] = netdev_configure_container_none,
8bf64b77
CB
1357};
1358
bad2f913 1359static int netdev_shutdown_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1360{
14a7b0f9
CB
1361 int ret;
1362 char *argv[] = {
1363 "veth",
1364 netdev->link,
1365 NULL,
1366 NULL,
1367 };
1368
1369 if (!netdev->downscript)
1370 return 0;
811ef482 1371
f2711167 1372 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1373 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1374 else
14a7b0f9
CB
1375 argv[2] = netdev->priv.veth_attr.veth1;
1376
1377 ret = run_script_argv(handler->name,
1378 handler->conf->hooks_version, "net",
1379 netdev->downscript, "down", argv);
1380 if (ret < 0)
1381 return -1;
811ef482 1382
811ef482
CB
1383 return 0;
1384}
1385
bad2f913 1386static int netdev_shutdown_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1387{
14a7b0f9
CB
1388 int ret;
1389 char *argv[] = {
1390 "macvlan",
1391 netdev->link,
1392 NULL,
1393 };
1394
1395 if (!netdev->downscript)
1396 return 0;
1397
1398 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1399 "net", netdev->downscript, "down", argv);
1400 if (ret < 0)
1401 return -1;
811ef482 1402
811ef482
CB
1403 return 0;
1404}
1405
bad2f913 1406static int netdev_shutdown_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1407{
1408 int ret;
1409 char *argv[] = {
1410 "ipvlan",
1411 netdev->link,
1412 NULL,
1413 };
1414
1415 if (!netdev->downscript)
1416 return 0;
1417
1418 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1419 "net", netdev->downscript, "down", argv);
1420 if (ret < 0)
1421 return -1;
1422
1423 return 0;
1424}
1425
bad2f913 1426static int netdev_shutdown_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1427{
3a73d9f1 1428 int ret;
1429 char *argv[] = {
1430 "vlan",
1431 netdev->link,
1432 NULL,
1433 };
1434
1435 if (!netdev->downscript)
1436 return 0;
1437
1438 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1439 "net", netdev->downscript, "down", argv);
1440 if (ret < 0)
1441 return -1;
1442
811ef482
CB
1443 return 0;
1444}
1445
bad2f913 1446static int netdev_shutdown_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1447{
14a7b0f9
CB
1448 int ret;
1449 char *argv[] = {
1450 "phys",
1451 netdev->link,
1452 NULL,
1453 };
1454
1455 if (!netdev->downscript)
1456 return 0;
1457
1458 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1459 "net", netdev->downscript, "down", argv);
1460 if (ret < 0)
1461 return -1;
811ef482 1462
811ef482
CB
1463 return 0;
1464}
1465
bad2f913 1466static int netdev_shutdown_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1467{
14a7b0f9
CB
1468 int ret;
1469 char *argv[] = {
1470 "empty",
1471 NULL,
1472 };
1473
1474 if (!netdev->downscript)
1475 return 0;
1476
1477 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1478 "net", netdev->downscript, "down", argv);
1479 if (ret < 0)
1480 return -1;
811ef482 1481
811ef482
CB
1482 return 0;
1483}
1484
bad2f913 1485static int netdev_shutdown_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1486{
1487 return 0;
1488}
1489
bad2f913
CB
1490static netdev_shutdown_server_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1491 [LXC_NET_VETH] = netdev_shutdown_server_veth,
1492 [LXC_NET_MACVLAN] = netdev_shutdown_server_macvlan,
1493 [LXC_NET_IPVLAN] = netdev_shutdown_server_ipvlan,
1494 [LXC_NET_VLAN] = netdev_shutdown_server_vlan,
1495 [LXC_NET_PHYS] = netdev_shutdown_server_phys,
1496 [LXC_NET_EMPTY] = netdev_shutdown_server_empty,
1497 [LXC_NET_NONE] = netdev_shutdown_server_none,
811ef482
CB
1498};
1499
0037ab49
TP
1500static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1501{
d16bda44 1502 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1503 struct nl_handler nlh;
d16bda44
CB
1504 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1505 int err;
0037ab49 1506 struct ifinfomsg *ifi;
0037ab49 1507
d16bda44 1508 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1509 if (err)
1510 return err;
1511
0037ab49
TP
1512 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1513 if (!nlmsg)
d16bda44 1514 return ret_errno(ENOMEM);
0037ab49
TP
1515
1516 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1517 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1518
1519 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1520 if (!ifi)
d16bda44
CB
1521 return ret_errno(ENOMEM);
1522
0037ab49
TP
1523 ifi->ifi_family = AF_UNSPEC;
1524 ifi->ifi_index = ifindex;
1525
1526 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1527 return ret_errno(ENOMEM);
0037ab49 1528
3473ca76 1529 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1530 return ret_errno(ENOMEM);
0037ab49 1531
d16bda44 1532 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1533}
1534
ebc73a67 1535int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1536{
d16bda44 1537 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1538 struct nl_handler nlh;
d16bda44
CB
1539 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1540 int err;
06f976ca 1541 struct ifinfomsg *ifi;
0ad19a3f 1542
d16bda44 1543 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1544 if (err)
1545 return err;
0ad19a3f 1546
0ad19a3f 1547 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1548 if (!nlmsg)
d16bda44 1549 return ret_errno(ENOMEM);
0ad19a3f 1550
ebc73a67 1551 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1552 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1553
1554 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1555 if (!ifi)
d16bda44
CB
1556 return ret_errno(ENOMEM);
1557
06f976ca
SZ
1558 ifi->ifi_family = AF_UNSPEC;
1559 ifi->ifi_index = ifindex;
0ad19a3f 1560
1561 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1562 return ret_errno(ENOMEM);
0ad19a3f 1563
3473ca76 1564 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1565 return ret_errno(ENOMEM);
8d357196 1566
d16bda44 1567 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1568}
1569
ebc73a67
CB
1570/* If we are asked to move a wireless interface, then we must actually move its
1571 * phyN device. Detect that condition and return the physname here. The physname
1572 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1573 */
1574#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1575char *is_wlan(const char *ifname)
e5848d39 1576{
4110345b
CB
1577 __do_fclose FILE *f = NULL;
1578 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1579 int i, ret;
e5848d39 1580 long physlen;
ebc73a67 1581 size_t len;
e5848d39 1582
ebc73a67 1583 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1584 path = must_realloc(NULL, len + 1);
387c1c70
CB
1585 ret = strnprintf(path, len, PHYSNAME, ifname);
1586 if (ret < 0)
4110345b 1587 return NULL;
ebc73a67 1588
4110345b 1589 f = fopen(path, "re");
ebc73a67 1590 if (!f)
4110345b 1591 return NULL;
ebc73a67 1592
1a0e70ac 1593 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1594 fseek(f, 0, SEEK_END);
1595 physlen = ftell(f);
1596 fseek(f, 0, SEEK_SET);
4110345b
CB
1597 if (physlen < 0)
1598 return NULL;
ebc73a67
CB
1599
1600 physname = malloc(physlen + 1);
4110345b
CB
1601 if (!physname)
1602 return NULL;
ebc73a67
CB
1603
1604 memset(physname, 0, physlen + 1);
e5848d39 1605 ret = fread(physname, 1, physlen, f);
e5848d39 1606 if (ret < 0)
4110345b 1607 return NULL;
e5848d39 1608
ebc73a67 1609 for (i = 0; i < physlen; i++) {
e5848d39
SH
1610 if (physname[i] == '\n')
1611 physname[i] = '\0';
ebc73a67 1612
e5848d39
SH
1613 if (physname[i] == '\0')
1614 break;
1615 }
1616
4110345b 1617 return move_ptr(physname);
e5848d39
SH
1618}
1619
ebc73a67
CB
1620static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1621 const char *new)
e5848d39 1622{
ebc73a67 1623 pid_t fpid;
e5848d39 1624
ebc73a67 1625 fpid = fork();
e5848d39
SH
1626 if (fpid < 0)
1627 return -1;
ebc73a67 1628
e5848d39
SH
1629 if (fpid != 0)
1630 return wait_for_pid(fpid);
ebc73a67 1631
e5848d39
SH
1632 if (!switch_to_ns(pid, "net"))
1633 return -1;
ebc73a67 1634
05ec44f8 1635 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1636}
1637
e4103cf6 1638int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1639 const char *newname)
e5848d39 1640{
3dd78294 1641 __do_free char *cmd = NULL;
ebc73a67 1642 pid_t fpid;
e5848d39
SH
1643
1644 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1645 * However, IIUC this involves a bit more complicated work to talk to
1646 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1647 */
1648 cmd = on_path("iw", NULL);
0ed79f45
M
1649 if (!cmd) {
1650 ERROR("Couldn't find the application iw in PATH");
3dd78294 1651 return -1;
0ed79f45 1652 }
e5848d39
SH
1653
1654 fpid = fork();
1655 if (fpid < 0)
3dd78294 1656 return -1;
ebc73a67 1657
e5848d39
SH
1658 if (fpid == 0) {
1659 char pidstr[30];
1660 sprintf(pidstr, "%d", pid);
9c66dc4f 1661 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1662 _exit(EXIT_FAILURE);
e5848d39 1663 }
ebc73a67 1664
e5848d39 1665 if (wait_for_pid(fpid))
3dd78294 1666 return -1;
e5848d39 1667
e5848d39 1668 if (newname)
3dd78294 1669 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1670
3dd78294 1671 return 0;
e5848d39
SH
1672}
1673
8d357196 1674int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1675{
3dd78294 1676 __do_free char *physname = NULL;
8befa924
SH
1677 int index;
1678
8befa924
SH
1679 if (!ifname)
1680 return -EINVAL;
1681
32571606 1682 index = if_nametoindex(ifname);
49428bf3
DY
1683 if (!index)
1684 return -EINVAL;
32571606 1685
ebc73a67
CB
1686 physname = is_wlan(ifname);
1687 if (physname)
e5848d39
SH
1688 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1689
8d357196 1690 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1691}
1692
b84f58b9 1693int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1694{
d16bda44
CB
1695 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1696 struct nl_handler nlh;
1697 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1698 int err;
ebc73a67 1699 struct ifinfomsg *ifi;
0ad19a3f 1700
d16bda44 1701 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1702 if (err)
1703 return err;
0ad19a3f 1704
0ad19a3f 1705 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1706 if (!nlmsg)
d16bda44 1707 return ret_errno(ENOMEM);
0ad19a3f 1708
06f976ca 1709 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1710 if (!answer)
d16bda44 1711 return ret_errno(ENOMEM);
0ad19a3f 1712
ebc73a67 1713 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1714 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1715
1716 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1717 if (!ifi)
d16bda44
CB
1718 return ret_errno(ENOMEM);
1719
06f976ca
SZ
1720 ifi->ifi_family = AF_UNSPEC;
1721 ifi->ifi_index = ifindex;
0ad19a3f 1722
d16bda44 1723 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1724}
1725
b84f58b9
DL
1726int lxc_netdev_delete_by_name(const char *name)
1727{
1728 int index;
1729
1730 index = if_nametoindex(name);
1731 if (!index)
1732 return -EINVAL;
1733
1734 return lxc_netdev_delete_by_index(index);
1735}
1736
1737int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1738{
d16bda44
CB
1739 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1740 struct nl_handler nlh;
1741 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1742 int err, len;
06f976ca 1743 struct ifinfomsg *ifi;
b9a5bb58 1744
d16bda44 1745 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1746 if (err)
1747 return err;
b9a5bb58 1748
b84f58b9 1749 len = strlen(newname);
d16bda44
CB
1750 if (len == 1 || len >= IFNAMSIZ)
1751 return ret_errno(EINVAL);
b84f58b9 1752
b9a5bb58
DL
1753 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1754 if (!nlmsg)
d16bda44 1755 return ret_errno(ENOMEM);
b9a5bb58 1756
06f976ca 1757 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1758 if (!answer)
d16bda44 1759 return ret_errno(ENOMEM);
b9a5bb58 1760
ebc73a67 1761 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1762 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1763
1764 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1765 if (!ifi)
d16bda44
CB
1766 return ret_errno(ENOMEM);
1767
06f976ca
SZ
1768 ifi->ifi_family = AF_UNSPEC;
1769 ifi->ifi_index = ifindex;
b84f58b9
DL
1770
1771 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1772 return ret_errno(ENOMEM);
b9a5bb58 1773
d16bda44 1774 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1775}
1776
b84f58b9
DL
1777int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1778{
1779 int len, index;
1780
1781 len = strlen(oldname);
dae3fdf6 1782 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1783 return -EINVAL;
1784
1785 index = if_nametoindex(oldname);
1786 if (!index)
1787 return -EINVAL;
1788
1789 return lxc_netdev_rename_by_index(index, newname);
1790}
1791
8befa924 1792int netdev_set_flag(const char *name, int flag)
0ad19a3f 1793{
d16bda44
CB
1794 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1795 struct nl_handler nlh;
1796 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1797 int err, index, len;
06f976ca 1798 struct ifinfomsg *ifi;
0ad19a3f 1799
d16bda44 1800 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1801 if (err)
1802 return err;
0ad19a3f 1803
1804 len = strlen(name);
dae3fdf6 1805 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1806 return ret_errno(EINVAL);
0ad19a3f 1807
1808 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1809 if (!nlmsg)
d16bda44 1810 return ret_errno(ENOMEM);
0ad19a3f 1811
06f976ca 1812 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1813 if (!answer)
d16bda44 1814 return ret_errno(ENOMEM);
0ad19a3f 1815
1816 index = if_nametoindex(name);
1817 if (!index)
d16bda44 1818 return ret_errno(EINVAL);
0ad19a3f 1819
ebc73a67 1820 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1821 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1822
1823 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1824 if (!ifi)
1825 return ret_errno(ENOMEM);
1826
06f976ca
SZ
1827 ifi->ifi_family = AF_UNSPEC;
1828 ifi->ifi_index = index;
1829 ifi->ifi_change |= IFF_UP;
1830 ifi->ifi_flags |= flag;
0ad19a3f 1831
d16bda44 1832 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1833}
1834
59eac805 1835static int netdev_get_flag(const char *name, int *flag)
efa1cf45 1836{
d16bda44
CB
1837 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1838 struct nl_handler nlh;
1839 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1840 int err, index, len;
a4318300 1841 struct ifinfomsg *ifi;
efa1cf45
DY
1842
1843 if (!name)
d16bda44 1844 return ret_errno(EINVAL);
efa1cf45 1845
d16bda44 1846 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1847 if (err)
1848 return err;
1849
efa1cf45
DY
1850 len = strlen(name);
1851 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1852 return ret_errno(EINVAL);
efa1cf45 1853
efa1cf45
DY
1854 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1855 if (!nlmsg)
d16bda44 1856 return ret_errno(ENOMEM);
efa1cf45 1857
06f976ca 1858 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1859 if (!answer)
d16bda44 1860 return ret_errno(ENOMEM);
efa1cf45 1861
efa1cf45
DY
1862 index = if_nametoindex(name);
1863 if (!index)
d16bda44 1864 return ret_errno(EINVAL);
efa1cf45 1865
06f976ca
SZ
1866 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1867 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1868
1869 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1870 if (!ifi)
1871 return ret_errno(ENOMEM);
1872
06f976ca
SZ
1873 ifi->ifi_family = AF_UNSPEC;
1874 ifi->ifi_index = index;
efa1cf45 1875
d16bda44 1876 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1877 if (err)
d16bda44 1878 return ret_set_errno(-1, errno);
efa1cf45 1879
06f976ca 1880 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1881
1882 *flag = ifi->ifi_flags;
efa1cf45
DY
1883 return err;
1884}
1885
1886/*
1887 * \brief Check a interface is up or not.
1888 *
1889 * \param name: name for the interface.
1890 *
1891 * \return int.
1892 * 0 means interface is down.
1893 * 1 means interface is up.
1894 * Others means error happened, and ret-value is the error number.
1895 */
ebc73a67 1896int lxc_netdev_isup(const char *name)
efa1cf45 1897{
4db0514d
CB
1898 int err;
1899 int flag = 0;
efa1cf45
DY
1900
1901 err = netdev_get_flag(name, &flag);
1902 if (err)
ebc73a67
CB
1903 return err;
1904
efa1cf45
DY
1905 if (flag & IFF_UP)
1906 return 1;
ebc73a67 1907
efa1cf45 1908 return 0;
efa1cf45
DY
1909}
1910
0130df54
SH
1911int netdev_get_mtu(int ifindex)
1912{
a5f5cb41 1913 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1914 struct nl_handler nlh;
a5f5cb41
CB
1915 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1916 int readmore = 0, recv_len = 0;
1917 int answer_len, err, res;
06f976ca 1918 struct ifinfomsg *ifi;
0130df54 1919 struct nlmsghdr *msg;
0130df54 1920
a5f5cb41 1921 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1922 if (err)
1923 return err;
1924
0130df54
SH
1925 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1926 if (!nlmsg)
a5f5cb41 1927 return ret_errno(ENOMEM);
0130df54 1928
06f976ca 1929 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1930 if (!answer)
a5f5cb41 1931 return ret_errno(ENOMEM);
0130df54
SH
1932
1933 /* Save the answer buffer length, since it will be overwritten
1934 * on the first receive (and we might need to receive more than
ebc73a67
CB
1935 * once.
1936 */
06f976ca
SZ
1937 answer_len = answer->nlmsghdr->nlmsg_len;
1938
ebc73a67 1939 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1940 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1941
06f976ca 1942 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1943 if (!ifi)
a5f5cb41
CB
1944 return ret_errno(ENOMEM);
1945
06f976ca 1946 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1947
1948 /* Send the request for addresses, which returns all addresses
1949 * on all interfaces. */
a5f5cb41 1950 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1951 if (err < 0)
a5f5cb41 1952 return ret_set_errno(-1, errno);
0130df54 1953
6ce39620
CB
1954#pragma GCC diagnostic push
1955#pragma GCC diagnostic ignored "-Wcast-align"
1956
0130df54
SH
1957 do {
1958 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1959 * overwritten by a previous receive.
1960 */
06f976ca 1961 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1962
1963 /* Get the (next) batch of reply messages */
a5f5cb41 1964 err = netlink_rcv(nlh_ptr, answer);
0130df54 1965 if (err < 0)
a5f5cb41 1966 return ret_set_errno(-1, errno);
0130df54
SH
1967
1968 recv_len = err;
0130df54
SH
1969
1970 /* Satisfy the typing for the netlink macros */
06f976ca 1971 msg = answer->nlmsghdr;
0130df54
SH
1972
1973 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1974 /* Stop reading if we see an error message */
1975 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1976 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1977 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1978 }
1979
1980 /* Stop reading if we see a NLMSG_DONE message */
1981 if (msg->nlmsg_type == NLMSG_DONE) {
1982 readmore = 0;
1983 break;
1984 }
1985
06f976ca 1986 ifi = NLMSG_DATA(msg);
0130df54
SH
1987 if (ifi->ifi_index == ifindex) {
1988 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1989 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1990
0130df54 1991 res = 0;
ebc73a67 1992 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1993 /*
a5f5cb41 1994 * Found a local address for the
ebc73a67
CB
1995 * requested interface, return it.
1996 */
0130df54 1997 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1998 memcpy(&res, RTA_DATA(rta), sizeof(int));
1999 return res;
0130df54 2000 }
a5f5cb41 2001
0130df54
SH
2002 rta = RTA_NEXT(rta, attr_len);
2003 }
0130df54
SH
2004 }
2005
ebc73a67
CB
2006 /* Keep reading more data from the socket if the last
2007 * message had the NLF_F_MULTI flag set.
2008 */
0130df54
SH
2009 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2010
ebc73a67 2011 /* Look at the next message received in this buffer. */
0130df54
SH
2012 msg = NLMSG_NEXT(msg, recv_len);
2013 }
2014 } while (readmore);
2015
6ce39620
CB
2016#pragma GCC diagnostic pop
2017
ebc73a67 2018 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 2019 return -1;
0130df54
SH
2020}
2021
d472214b 2022int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 2023{
a5f5cb41
CB
2024 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2025 struct nl_handler nlh;
2026 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 2027 int err, len;
06f976ca 2028 struct ifinfomsg *ifi;
75d09f83 2029
a5f5cb41 2030 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2031 if (err)
2032 return err;
75d09f83
DL
2033
2034 len = strlen(name);
dae3fdf6 2035 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2036 return ret_errno(EINVAL);
75d09f83
DL
2037
2038 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2039 if (!nlmsg)
a5f5cb41 2040 return ret_errno(ENOMEM);
75d09f83 2041
06f976ca 2042 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 2043 if (!answer)
a5f5cb41 2044 return ret_errno(ENOMEM);
75d09f83 2045
ebc73a67 2046 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
2047 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2048
2049 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2050 if (!ifi)
2051 return ret_errno(ENOMEM);
2052
06f976ca 2053 ifi->ifi_family = AF_UNSPEC;
54256301
CB
2054
2055 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2056 return ret_errno(ENOMEM);
75d09f83
DL
2057
2058 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2059 return ret_errno(ENOMEM);
75d09f83 2060
a5f5cb41 2061 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
2062}
2063
d472214b 2064int lxc_netdev_up(const char *name)
0ad19a3f 2065{
d472214b 2066 return netdev_set_flag(name, IFF_UP);
0ad19a3f 2067}
2068
d472214b 2069int lxc_netdev_down(const char *name)
0ad19a3f 2070{
d472214b 2071 return netdev_set_flag(name, 0);
0ad19a3f 2072}
2073
54256301 2074int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 2075{
a5f5cb41
CB
2076 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2077 struct nl_handler nlh;
2078 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2079 int err, len;
06f976ca 2080 struct ifinfomsg *ifi;
0ad19a3f 2081 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 2082
a5f5cb41 2083 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2084 if (err)
2085 return err;
0ad19a3f 2086
2087 len = strlen(name1);
dae3fdf6 2088 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2089 return ret_errno(EINVAL);
0ad19a3f 2090
2091 len = strlen(name2);
dae3fdf6 2092 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2093 return ret_errno(EINVAL);
0ad19a3f 2094
2095 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2096 if (!nlmsg)
a5f5cb41 2097 return ret_errno(ENOMEM);
0ad19a3f 2098
06f976ca 2099 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2100 if (!answer)
a5f5cb41 2101 return ret_errno(ENOMEM);
0ad19a3f 2102
a5f5cb41 2103 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2104 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2105
2106 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 2107 if (!ifi)
a5f5cb41
CB
2108 return ret_errno(ENOMEM);
2109
06f976ca 2110 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2111
79e68309 2112 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2113 if (!nest1)
a5f5cb41 2114 return ret_errno(EINVAL);
0ad19a3f 2115
2116 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2117 return ret_errno(ENOMEM);
0ad19a3f 2118
2119 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2120 if (!nest2)
a5f5cb41 2121 return ret_errno(ENOMEM);
0ad19a3f 2122
2123 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2124 if (!nest3)
a5f5cb41 2125 return ret_errno(ENOMEM);
0ad19a3f 2126
06f976ca 2127 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2128 if (!ifi)
2129 return ret_errno(ENOMEM);
0ad19a3f 2130
2131 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2132 return ret_errno(ENOMEM);
0ad19a3f 2133
54256301 2134 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2135 return ret_errno(ENOMEM);
54256301
CB
2136
2137 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2138 return ret_errno(ENOMEM);
54256301 2139
0ad19a3f 2140 nla_end_nested(nlmsg, nest3);
0ad19a3f 2141 nla_end_nested(nlmsg, nest2);
0ad19a3f 2142 nla_end_nested(nlmsg, nest1);
2143
2144 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2145 return ret_errno(ENOMEM);
0ad19a3f 2146
a5f5cb41 2147 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2148}
2149
ebc73a67 2150/* TODO: merge with lxc_macvlan_create */
0dc9a142 2151int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
26c39028 2152{
a5f5cb41
CB
2153 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2154 struct nl_handler nlh;
2155 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2156 int err, len, lindex;
06f976ca 2157 struct ifinfomsg *ifi;
26c39028 2158 struct rtattr *nest, *nest2;
26c39028 2159
a5f5cb41 2160 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2161 if (err)
2162 return err;
26c39028 2163
0dc9a142 2164 len = strlen(parent);
dae3fdf6 2165 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2166 return ret_errno(EINVAL);
26c39028
JHS
2167
2168 len = strlen(name);
dae3fdf6 2169 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2170 return ret_errno(EINVAL);
26c39028
JHS
2171
2172 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2173 if (!nlmsg)
a5f5cb41 2174 return ret_errno(ENOMEM);
26c39028 2175
06f976ca 2176 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2177 if (!answer)
a5f5cb41 2178 return ret_errno(ENOMEM);
26c39028 2179
0dc9a142 2180 lindex = if_nametoindex(parent);
26c39028 2181 if (!lindex)
a5f5cb41 2182 return ret_errno(EINVAL);
26c39028 2183
a5f5cb41 2184 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2185 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2186
2187 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2188 if (!ifi)
2189 return ret_errno(ENOMEM);
2190
06f976ca 2191 ifi->ifi_family = AF_UNSPEC;
26c39028 2192
79e68309 2193 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2194 if (!nest)
a5f5cb41 2195 return ret_errno(ENOMEM);
26c39028
JHS
2196
2197 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2198 return ret_errno(ENOMEM);
26c39028
JHS
2199
2200 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2201 if (!nest2)
a5f5cb41 2202 return ret_errno(ENOMEM);
e892973e 2203
26c39028 2204 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2205 return ret_errno(ENOMEM);
e892973e 2206
26c39028 2207 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2208 nla_end_nested(nlmsg, nest);
2209
2210 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2211 return ret_errno(ENOMEM);
26c39028
JHS
2212
2213 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2214 return ret_errno(ENOMEM);
2215
2216 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2217}
2218
0dc9a142 2219int lxc_macvlan_create(const char *parent, const char *name, int mode)
0ad19a3f 2220{
a5f5cb41
CB
2221 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2222 struct nl_handler nlh;
2223 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2224 int err, index, len;
06f976ca 2225 struct ifinfomsg *ifi;
e892973e 2226 struct rtattr *nest, *nest2;
0ad19a3f 2227
a5f5cb41 2228 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2229 if (err)
2230 return err;
0ad19a3f 2231
0dc9a142 2232 len = strlen(parent);
dae3fdf6 2233 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2234 return ret_errno(EINVAL);
0ad19a3f 2235
2236 len = strlen(name);
dae3fdf6 2237 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2238 return ret_errno(EINVAL);
0ad19a3f 2239
2240 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2241 if (!nlmsg)
a5f5cb41 2242 return ret_errno(ENOMEM);
0ad19a3f 2243
06f976ca 2244 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2245 if (!answer)
a5f5cb41 2246 return ret_errno(ENOMEM);
0ad19a3f 2247
0dc9a142 2248 index = if_nametoindex(parent);
0ad19a3f 2249 if (!index)
a5f5cb41 2250 return ret_errno(EINVAL);
0ad19a3f 2251
a5f5cb41 2252 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2253 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2254
2255 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2256 if (!ifi)
2257 return ret_errno(ENOMEM);
2258
06f976ca 2259 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2260
79e68309 2261 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2262 if (!nest)
a5f5cb41 2263 return ret_errno(ENOMEM);
0ad19a3f 2264
2265 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2266 return ret_errno(ENOMEM);
0ad19a3f 2267
e892973e
DL
2268 if (mode) {
2269 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2270 if (!nest2)
a5f5cb41 2271 return ret_errno(ENOMEM);
e892973e
DL
2272
2273 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2274 return ret_errno(ENOMEM);
e892973e
DL
2275
2276 nla_end_nested(nlmsg, nest2);
2277 }
2278
0ad19a3f 2279 nla_end_nested(nlmsg, nest);
2280
2281 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2282 return ret_errno(ENOMEM);
0ad19a3f 2283
2284 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2285 return ret_errno(ENOMEM);
0ad19a3f 2286
a5f5cb41 2287 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2288}
2289
2290static int proc_sys_net_write(const char *path, const char *value)
2291{
ebc73a67
CB
2292 int fd;
2293 int err = 0;
0ad19a3f 2294
2295 fd = open(path, O_WRONLY);
2296 if (fd < 0)
2297 return -errno;
2298
f640cf46 2299 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2300 err = -errno;
2301
2302 close(fd);
2303 return err;
2304}
2305
6dfa9581 2306static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2307{
2308 int ret;
2309 char path[PATH_MAX];
6509154d 2310
2311 if (family != AF_INET && family != AF_INET6)
6dfa9581 2312 return -EINVAL;
6509154d 2313
387c1c70
CB
2314 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2315 family == AF_INET ? "ipv4" : "ipv6", ifname,
2316 "forwarding");
2317 if (ret < 0)
6dfa9581 2318 return -E2BIG;
6509154d 2319
6dfa9581
TP
2320 return proc_sys_net_write(path, flag ? "1" : "0");
2321}
2322
2323int lxc_ip_forwarding_on(const char *name, int family)
2324{
2325 return ip_forwarding_set(name, family, 1);
2326}
2327
2328int lxc_ip_forwarding_off(const char *name, int family)
2329{
2330 return ip_forwarding_set(name, family, 0);
6509154d 2331}
2332
0ad19a3f 2333static int neigh_proxy_set(const char *ifname, int family, int flag)
2334{
9ba8130c 2335 int ret;
419590da 2336 char path[PATH_MAX];
0ad19a3f 2337
2338 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2339 return -EINVAL;
0ad19a3f 2340
387c1c70
CB
2341 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2342 family == AF_INET ? "ipv4" : "ipv6", ifname,
2343 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2344 if (ret < 0)
9ba8130c 2345 return -E2BIG;
0ad19a3f 2346
ebc73a67 2347 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2348}
2349
6509154d 2350static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2351{
2352 int ret;
2353 char path[PATH_MAX];
2354 char buf[1] = "";
2355
2356 if (family != AF_INET && family != AF_INET6)
596a002c 2357 return ret_set_errno(-1, EINVAL);
6509154d 2358
387c1c70
CB
2359 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2360 family == AF_INET ? "ipv4" : "ipv6", ifname,
2361 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2362 if (ret < 0)
596a002c 2363 return ret_set_errno(-1, E2BIG);
6509154d 2364
2365 return lxc_read_file_expect(path, buf, 1, "1");
2366}
2367
497353b6 2368int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2369{
2370 return neigh_proxy_set(name, family, 1);
2371}
2372
497353b6 2373int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2374{
2375 return neigh_proxy_set(name, family, 0);
2376}
2377
2378int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2379{
1f1b18e7
DL
2380 int i = 0;
2381 unsigned val;
ebc73a67
CB
2382 char c;
2383 unsigned char *data;
1f1b18e7
DL
2384
2385 sockaddr->sa_family = ARPHRD_ETHER;
2386 data = (unsigned char *)sockaddr->sa_data;
2387
2388 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2389 c = *macaddr++;
2390 if (isdigit(c))
2391 val = c - '0';
2392 else if (c >= 'a' && c <= 'f')
2393 val = c - 'a' + 10;
2394 else if (c >= 'A' && c <= 'F')
2395 val = c - 'A' + 10;
2396 else
2397 return -EINVAL;
2398
2399 val <<= 4;
2400 c = *macaddr;
2401 if (isdigit(c))
2402 val |= c - '0';
2403 else if (c >= 'a' && c <= 'f')
2404 val |= c - 'a' + 10;
2405 else if (c >= 'A' && c <= 'F')
2406 val |= c - 'A' + 10;
2407 else if (c == ':' || c == 0)
2408 val >>= 4;
2409 else
2410 return -EINVAL;
2411 if (c != 0)
2412 macaddr++;
2413 *data++ = (unsigned char)(val & 0377);
2414 i++;
2415
2416 if (*macaddr == ':')
2417 macaddr++;
0ad19a3f 2418 }
0ad19a3f 2419
1f1b18e7 2420 return 0;
0ad19a3f 2421}
2422
ebc73a67
CB
2423static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2424 void *acast, int prefix)
0ad19a3f 2425{
a5f5cb41
CB
2426 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2427 struct nl_handler nlh;
2428 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2429 int addrlen, err;
06f976ca 2430 struct ifaddrmsg *ifa;
0ad19a3f 2431
ebc73a67
CB
2432 addrlen = family == AF_INET ? sizeof(struct in_addr)
2433 : sizeof(struct in6_addr);
4bf1968d 2434
a5f5cb41 2435 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2436 if (err)
2437 return err;
0ad19a3f 2438
0ad19a3f 2439 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2440 if (!nlmsg)
a5f5cb41 2441 return ret_errno(ENOMEM);
0ad19a3f 2442
06f976ca 2443 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2444 if (!answer)
a5f5cb41 2445 return ret_errno(ENOMEM);
0ad19a3f 2446
a5f5cb41 2447 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2448 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2449
2450 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2451 if (!ifa)
a5f5cb41
CB
2452 return ret_errno(ENOMEM);
2453
06f976ca
SZ
2454 ifa->ifa_prefixlen = prefix;
2455 ifa->ifa_index = ifindex;
2456 ifa->ifa_family = family;
2457 ifa->ifa_scope = 0;
acf47e1b 2458
4bf1968d 2459 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2460 return ret_errno(EINVAL);
0ad19a3f 2461
4bf1968d 2462 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2463 return ret_errno(EINVAL);
0ad19a3f 2464
d8948a52 2465 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2466 return ret_errno(EINVAL);
1f1b18e7 2467
ebc73a67 2468 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2469 if (family == AF_INET6 &&
2470 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2471 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2472 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2473
a5f5cb41 2474 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2475}
2476
1f1b18e7 2477int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2478 struct in6_addr *mcast, struct in6_addr *acast,
2479 int prefix)
1f1b18e7
DL
2480{
2481 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2482}
2483
ebc73a67
CB
2484int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2485 int prefix)
1f1b18e7
DL
2486{
2487 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2488}
2489
ebc73a67
CB
2490/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2491 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2492 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2493 */
6ce39620
CB
2494#pragma GCC diagnostic push
2495#pragma GCC diagnostic ignored "-Wcast-align"
2496
ebc73a67
CB
2497static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2498{
2499 int addrlen;
06f976ca
SZ
2500 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2501 struct rtattr *rta = IFA_RTA(ifa);
2502 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2503
06f976ca 2504 if (ifa->ifa_family != family)
19a26f82
MK
2505 return 0;
2506
ebc73a67
CB
2507 addrlen = family == AF_INET ? sizeof(struct in_addr)
2508 : sizeof(struct in6_addr);
19a26f82
MK
2509
2510 /* Loop over the rtattr's in this message */
ebc73a67 2511 while (RTA_OK(rta, attr_len)) {
19a26f82 2512 /* Found a local address for the requested interface,
ebc73a67
CB
2513 * return it.
2514 */
2515 if (rta->rta_type == IFA_LOCAL ||
2516 rta->rta_type == IFA_ADDRESS) {
2517 /* Sanity check. The family check above should make sure
2518 * the address length is correct, but check here just in
2519 * case.
2520 */
19a26f82
MK
2521 if (RTA_PAYLOAD(rta) != addrlen)
2522 return -1;
2523
ebc73a67
CB
2524 /* We might have found an IFA_ADDRESS before, which we
2525 * now overwrite with an IFA_LOCAL.
2526 */
dd66e5ad 2527 if (!*res) {
19a26f82 2528 *res = malloc(addrlen);
dd66e5ad
DE
2529 if (!*res)
2530 return -1;
2531 }
19a26f82
MK
2532
2533 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2534 if (rta->rta_type == IFA_LOCAL)
2535 break;
2536 }
2537 rta = RTA_NEXT(rta, attr_len);
2538 }
2539 return 0;
2540}
2541
6ce39620
CB
2542#pragma GCC diagnostic pop
2543
19a26f82
MK
2544static int ip_addr_get(int family, int ifindex, void **res)
2545{
a5f5cb41
CB
2546 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2547 struct nl_handler nlh;
2548 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2549 int answer_len, err;
06f976ca 2550 struct ifaddrmsg *ifa;
19a26f82 2551 struct nlmsghdr *msg;
ebc73a67 2552 int readmore = 0, recv_len = 0;
19a26f82 2553
a5f5cb41 2554 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2555 if (err)
2556 return err;
2557
19a26f82
MK
2558 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2559 if (!nlmsg)
a5f5cb41 2560 return ret_errno(ENOMEM);
19a26f82 2561
06f976ca 2562 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2563 if (!answer)
a5f5cb41 2564 return ret_errno(ENOMEM);
19a26f82 2565
ebc73a67
CB
2566 /* Save the answer buffer length, since it will be overwritten on the
2567 * first receive (and we might need to receive more than once).
2568 */
06f976ca
SZ
2569 answer_len = answer->nlmsghdr->nlmsg_len;
2570
ebc73a67 2571 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2572 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2573
06f976ca 2574 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2575 if (!ifa)
a5f5cb41
CB
2576 return ret_errno(ENOMEM);
2577
06f976ca 2578 ifa->ifa_family = family;
19a26f82 2579
ebc73a67
CB
2580 /* Send the request for addresses, which returns all addresses on all
2581 * interfaces.
2582 */
a5f5cb41 2583 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2584 if (err < 0)
a5f5cb41 2585 return ret_set_errno(err, errno);
19a26f82 2586
6ce39620
CB
2587#pragma GCC diagnostic push
2588#pragma GCC diagnostic ignored "-Wcast-align"
2589
19a26f82
MK
2590 do {
2591 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2592 * overwritten by a previous receive.
2593 */
06f976ca 2594 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2595
ebc73a67 2596 /* Get the (next) batch of reply messages. */
a5f5cb41 2597 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2598 if (err < 0)
a5f5cb41 2599 return ret_set_errno(err, errno);
19a26f82
MK
2600
2601 recv_len = err;
2602 err = 0;
2603
ebc73a67 2604 /* Satisfy the typing for the netlink macros. */
06f976ca 2605 msg = answer->nlmsghdr;
19a26f82
MK
2606
2607 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2608 /* Stop reading if we see an error message. */
19a26f82 2609 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2610 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2611 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2612 }
2613
ebc73a67 2614 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2615 if (msg->nlmsg_type == NLMSG_DONE) {
2616 readmore = 0;
2617 break;
2618 }
2619
a5f5cb41
CB
2620 if (msg->nlmsg_type != RTM_NEWADDR)
2621 return ret_errno(EINVAL);
19a26f82 2622
06f976ca
SZ
2623 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2624 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2625 if (ifa_get_local_ip(family, msg, res) < 0)
2626 return ret_errno(EINVAL);
51e7a874 2627
ebc73a67 2628 /* Found a result, stop searching. */
19a26f82 2629 if (*res)
a5f5cb41 2630 return 0;
19a26f82
MK
2631 }
2632
ebc73a67
CB
2633 /* Keep reading more data from the socket if the last
2634 * message had the NLF_F_MULTI flag set.
2635 */
19a26f82
MK
2636 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2637
ebc73a67 2638 /* Look at the next message received in this buffer. */
19a26f82
MK
2639 msg = NLMSG_NEXT(msg, recv_len);
2640 }
2641 } while (readmore);
2642
6ce39620
CB
2643#pragma GCC diagnostic pop
2644
19a26f82 2645 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2646 * error.
2647 */
a5f5cb41 2648 return -1;
19a26f82
MK
2649}
2650
2651int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2652{
ebc73a67 2653 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2654}
2655
ebc73a67 2656int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2657{
ebc73a67 2658 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2659}
2660
f8fee0e2
MK
2661static int ip_gateway_add(int family, int ifindex, void *gw)
2662{
a5f5cb41 2663 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2664 struct nl_handler nlh;
a5f5cb41
CB
2665 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2666 int addrlen, err;
06f976ca 2667 struct rtmsg *rt;
f8fee0e2 2668
ebc73a67
CB
2669 addrlen = family == AF_INET ? sizeof(struct in_addr)
2670 : sizeof(struct in6_addr);
f8fee0e2 2671
a5f5cb41 2672 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2673 if (err)
2674 return err;
2675
f8fee0e2
MK
2676 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2677 if (!nlmsg)
a5f5cb41 2678 return ret_errno(ENOMEM);
f8fee0e2 2679
06f976ca 2680 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2681 if (!answer)
a5f5cb41 2682 return ret_errno(ENOMEM);
f8fee0e2 2683
a5f5cb41 2684 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2685 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2686
2687 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2688 if (!rt)
a5f5cb41
CB
2689 return ret_errno(ENOMEM);
2690
06f976ca
SZ
2691 rt->rtm_family = family;
2692 rt->rtm_table = RT_TABLE_MAIN;
2693 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2694 rt->rtm_protocol = RTPROT_BOOT;
2695 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2696 /* "default" destination */
06f976ca 2697 rt->rtm_dst_len = 0;
f8fee0e2 2698
a2f9a670 2699 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2700 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2701 return ret_errno(ENOMEM);
f8fee0e2
MK
2702
2703 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2704 * addresses for the gateway.
2705 */
f8fee0e2 2706 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2707 return ret_errno(EINVAL);
f8fee0e2 2708
a5f5cb41 2709 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2710}
2711
2712int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2713{
2714 return ip_gateway_add(AF_INET, ifindex, gw);
2715}
2716
2717int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2718{
2719 return ip_gateway_add(AF_INET6, ifindex, gw);
2720}
581c75e7 2721bool is_ovs_bridge(const char *bridge)
0d204771 2722{
ebc73a67 2723 int ret;
0d204771 2724 struct stat sb;
ebc73a67 2725 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2726
387c1c70
CB
2727 ret = strnprintf(brdirname, 22 + IFNAMSIZ + 1,
2728 "/sys/class/net/%s/bridge", bridge);
2729 if (ret < 0)
ebc73a67
CB
2730 return false;
2731
2732 ret = stat(brdirname, &sb);
2733 if (ret < 0 && errno == ENOENT)
0d204771 2734 return true;
ebc73a67 2735
0d204771
SH
2736 return false;
2737}
2738
581c75e7
CB
2739struct ovs_veth_args {
2740 const char *bridge;
2741 const char *nic;
2742};
2743
cb0dc11b
CB
2744/* Called from a background thread - when nic goes away, remove it from the
2745 * bridge.
c43cbc04 2746 */
581c75e7 2747static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2748{
581c75e7 2749 struct ovs_veth_args *args = data;
cb0dc11b 2750
9c66dc4f 2751 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2752 return -1;
c43cbc04
SH
2753}
2754
581c75e7 2755int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2756{
c43cbc04 2757 int ret;
419590da 2758 char cmd_output[PATH_MAX];
581c75e7 2759 struct ovs_veth_args args;
6ad22d06 2760
581c75e7
CB
2761 args.bridge = bridge;
2762 args.nic = nic;
2763 ret = run_command(cmd_output, sizeof(cmd_output),
2764 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2765 if (ret < 0)
2766 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2767
581c75e7
CB
2768 return 0;
2769}
ebc73a67 2770
581c75e7
CB
2771static int lxc_ovs_attach_bridge_exec(void *data)
2772{
2773 struct ovs_veth_args *args = data;
ebc73a67 2774
9c66dc4f 2775 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2776 return -1;
2777}
ebc73a67 2778
581c75e7
CB
2779static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2780{
2781 int ret;
419590da 2782 char cmd_output[PATH_MAX];
581c75e7 2783 struct ovs_veth_args args;
ebc73a67 2784
581c75e7
CB
2785 args.bridge = bridge;
2786 args.nic = nic;
2787 ret = run_command(cmd_output, sizeof(cmd_output),
2788 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2789 if (ret < 0)
2790 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2791
581c75e7 2792 return 0;
0d204771 2793}
0d204771 2794
581c75e7 2795int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2796{
ebc73a67 2797 int err, fd, index;
9de31d5a 2798 size_t retlen;
0ad19a3f 2799 struct ifreq ifr;
2800
dae3fdf6 2801 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2802 return -EINVAL;
0ad19a3f 2803
2804 index = if_nametoindex(ifname);
2805 if (!index)
3cfc0f3a 2806 return -EINVAL;
0ad19a3f 2807
0d204771 2808 if (is_ovs_bridge(bridge))
581c75e7 2809 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2810
ad9429e5 2811 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2812 if (fd < 0)
3cfc0f3a 2813 return -errno;
0ad19a3f 2814
9de31d5a 2815 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2816 if (retlen >= IFNAMSIZ) {
2817 close(fd);
9de31d5a 2818 return -E2BIG;
42cc4083 2819 }
9de31d5a 2820
ebc73a67 2821 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2822 ifr.ifr_ifindex = index;
7d163508 2823 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2824 close(fd);
3cfc0f3a
MN
2825 if (err)
2826 err = -errno;
0ad19a3f 2827
2828 return err;
2829}
72d0e1cb 2830
8befa924
SH
2831int setup_private_host_hw_addr(char *veth1)
2832{
387c1c70
CB
2833 __do_close int sockfd = -EBADF;
2834 int err;
8befa924 2835 struct ifreq ifr;
8befa924 2836
ad9429e5 2837 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2838 if (sockfd < 0)
2839 return -errno;
2840
387c1c70
CB
2841 err = strnprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2842 if (err < 0)
2843 return err;
ebc73a67 2844
8befa924 2845 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
387c1c70 2846 if (err < 0)
8befa924 2847 return -errno;
8befa924
SH
2848
2849 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2850 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924
SH
2851 if (err < 0)
2852 return -errno;
2853
2854 return 0;
2855}
811ef482
CB
2856
2857int lxc_find_gateway_addresses(struct lxc_handler *handler)
2858{
811ef482
CB
2859 struct lxc_netdev *netdev;
2860 int link_index;
2861
87d0990c 2862 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
811ef482
CB
2863 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2864 continue;
2865
9c66dc4f
CB
2866 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2867 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2868
87d0990c 2869 if (is_empty_string(netdev->link))
9c66dc4f 2870 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2871
2872 link_index = if_nametoindex(netdev->link);
2873 if (!link_index)
2874 return -EINVAL;
2875
2876 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2877 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2878 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2879 }
2880
2881 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2882 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2883 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2884 }
2885 }
2886
2887 return 0;
2888}
2889
2890#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
071d0934
CB
2891static int lxc_create_network_unpriv_exec(const char *lxcpath,
2892 const char *lxcname,
2893 struct lxc_netdev *netdev, pid_t pid,
2894 unsigned int hooks_version)
811ef482
CB
2895{
2896 int ret;
2897 pid_t child;
2898 int bytes, pipefd[2];
2899 char *token, *saveptr = NULL;
095ead80 2900 char netdev_link[IFNAMSIZ];
419590da 2901 char buffer[PATH_MAX] = {0};
94b1cade 2902 size_t retlen;
811ef482 2903
9c66dc4f 2904 if (netdev->type != LXC_NET_VETH)
071d0934
CB
2905 return log_error_errno(-1, errno,
2906 "Network type %d not support for unprivileged use",
2907 netdev->type);
811ef482
CB
2908
2909 ret = pipe(pipefd);
9c66dc4f
CB
2910 if (ret < 0)
2911 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2912
2913 child = fork();
2914 if (child < 0) {
811ef482
CB
2915 close(pipefd[0]);
2916 close(pipefd[1]);
9c66dc4f 2917 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2918 }
2919
2920 if (child == 0) {
8335fd40 2921 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2922
2923 close(pipefd[0]);
2924
2925 ret = dup2(pipefd[1], STDOUT_FILENO);
2926 if (ret >= 0)
2927 ret = dup2(pipefd[1], STDERR_FILENO);
2928 close(pipefd[1]);
2929 if (ret < 0) {
2930 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2931 _exit(EXIT_FAILURE);
811ef482
CB
2932 }
2933
f2711167 2934 if (!is_empty_string(netdev->link))
9de31d5a 2935 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2936 else
9de31d5a
CB
2937 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2938 if (retlen >= IFNAMSIZ) {
2939 SYSERROR("Invalid network device name");
2940 _exit(EXIT_FAILURE);
2941 }
811ef482 2942
387c1c70
CB
2943 ret = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
2944 if (ret < 0)
78070056 2945 _exit(EXIT_FAILURE);
8335fd40 2946 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2947
2948 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
071d0934 2949 lxcname, pidstr, netdev_link, !is_empty_string(netdev->name) ? netdev->name : "(null)");
3473ca76 2950 if (!is_empty_string(netdev->name))
811ef482
CB
2951 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2952 lxcpath, lxcname, pidstr, "veth", netdev_link,
2953 netdev->name, (char *)NULL);
2954 else
2955 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2956 lxcpath, lxcname, pidstr, "veth", netdev_link,
2957 (char *)NULL);
2958 SYSERROR("Failed to execute lxc-user-nic");
78070056 2959 _exit(EXIT_FAILURE);
811ef482
CB
2960 }
2961
2962 /* close the write-end of the pipe */
2963 close(pipefd[1]);
2964
9c66dc4f 2965 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2966 if (bytes < 0) {
74c6e2b0 2967 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2968 close(pipefd[0]);
6b9f82a9
CB
2969 } else {
2970 buffer[bytes - 1] = '\0';
811ef482 2971 }
811ef482
CB
2972
2973 ret = wait_for_pid(child);
2974 close(pipefd[0]);
9c66dc4f 2975 if (ret != 0 || bytes < 0)
071d0934
CB
2976 return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
2977 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2978 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2979
2980 /* netdev->name */
2981 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2982 if (!token)
2983 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2984
e389f2af
CB
2985 /*
2986 * lxc-user-nic will take care of proper network device naming. So
fdd6be55 2987 * netdev->name and netdev->transient_name need to be identical to not
e389f2af
CB
2988 * trigger another rename later on.
2989 */
2990 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
fdd6be55
CB
2991 if (retlen < IFNAMSIZ) {
2992 retlen = strlcpy(netdev->transient_name, token, IFNAMSIZ);
2993 if (retlen < IFNAMSIZ)
2994 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2995 }
9c66dc4f 2996 if (retlen >= IFNAMSIZ)
071d0934
CB
2997 return log_error_errno(-1, E2BIG,
2998 "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2999
74c6e2b0 3000 /* netdev->ifindex */
811ef482 3001 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3002 if (!token)
3003 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3004
74c6e2b0 3005 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f 3006 if (ret < 0)
071d0934
CB
3007 return log_error_errno(-1, -ret,
3008 "Failed to convert string \"%s\" to integer", token);
811ef482 3009
74c6e2b0 3010 /* netdev->priv.veth_attr.veth1 */
811ef482 3011 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3012 if (!token)
3013 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3014
94b1cade 3015 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f 3016 if (retlen >= IFNAMSIZ)
071d0934
CB
3017 return log_error_errno(-1, E2BIG,
3018 "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
3019
3020 /* netdev->priv.veth_attr.ifindex */
3021 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3022 if (!token)
3023 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
3024
3025 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f 3026 if (ret < 0)
071d0934
CB
3027 return log_error_errno(-1, -ret,
3028 "Failed to convert string \"%s\" to integer", token);
811ef482 3029
4d781681 3030 if (netdev->upscript) {
3031 char *argv[] = {
3032 "veth",
3033 netdev->link,
3034 netdev->priv.veth_attr.veth1,
3035 NULL,
3036 };
3037
e389f2af
CB
3038 ret = run_script_argv(lxcname, hooks_version, "net",
3039 netdev->upscript, "up", argv);
4d781681 3040 if (ret < 0)
3041 return -1;
071d0934 3042 }
4d781681 3043
811ef482
CB
3044 return 0;
3045}
3046
f0ecc19d 3047static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3048 struct lxc_netdev *netdev,
3049 const char *netns_path)
811ef482
CB
3050{
3051 int bytes, ret;
3052 pid_t child;
3053 int pipefd[2];
25619b99 3054 char buffer[PATH_MAX] = {};
811ef482 3055
9c66dc4f
CB
3056 if (netdev->type != LXC_NET_VETH)
3057 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3058
3059 ret = pipe(pipefd);
9c66dc4f
CB
3060 if (ret < 0)
3061 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3062
3063 child = fork();
3064 if (child < 0) {
811ef482
CB
3065 close(pipefd[0]);
3066 close(pipefd[1]);
9c66dc4f 3067 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3068 }
3069
3070 if (child == 0) {
8843fde4 3071 char *hostveth;
811ef482
CB
3072
3073 close(pipefd[0]);
3074
3075 ret = dup2(pipefd[1], STDOUT_FILENO);
3076 if (ret >= 0)
3077 ret = dup2(pipefd[1], STDERR_FILENO);
3078 close(pipefd[1]);
3079 if (ret < 0) {
3080 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3081 _exit(EXIT_FAILURE);
811ef482
CB
3082 }
3083
f2711167 3084 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3085 hostveth = netdev->priv.veth_attr.pair;
3086 else
3087 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3088 if (is_empty_string(hostveth)) {
74c6e2b0 3089 SYSERROR("Host side veth device name is missing");
a30b9023 3090 _exit(EXIT_FAILURE);
74c6e2b0
CB
3091 }
3092
f2711167
CB
3093 if (is_empty_string(netdev->link)) {
3094 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3095 _exit(EXIT_FAILURE);
74c6e2b0 3096 }
811ef482 3097
811ef482 3098 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3099 lxcname, netns_path, netdev->link, hostveth);
811ef482 3100 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3101 lxcname, netns_path, "veth", netdev->link, hostveth,
3102 (char *)NULL);
811ef482 3103 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3104 _exit(EXIT_FAILURE);
811ef482
CB
3105 }
3106
3107 close(pipefd[1]);
3108
9c66dc4f 3109 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3110 if (bytes < 0) {
3111 SYSERROR("Failed to read from pipe file descriptor.");
3112 close(pipefd[0]);
6b9f82a9
CB
3113 } else {
3114 buffer[bytes - 1] = '\0';
811ef482 3115 }
811ef482 3116
6b9f82a9 3117 ret = wait_for_pid(child);
9c66dc4f
CB
3118 close_prot_errno_disarm(pipefd[0]);
3119 if (ret != 0 || bytes < 0)
3120 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3121 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3122
811ef482
CB
3123 return 0;
3124}
3125
59eac805 3126static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
1bd8d726
CB
3127{
3128 int ret;
87d0990c 3129 struct lxc_netdev *netdev;
1bd8d726
CB
3130 /* strlen("/proc/") = 6
3131 * +
8335fd40 3132 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3133 * +
3134 * strlen("/fd/") = 4
3135 * +
8335fd40 3136 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3137 * +
3138 * \0
3139 */
8335fd40 3140 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3141
3142 *netns_path = '\0';
3143
9c66dc4f
CB
3144 if (handler->nsfd[LXC_NS_NET] < 0)
3145 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726 3146
387c1c70
CB
3147 ret = strnprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
3148 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
3149 if (ret < 0)
1bd8d726
CB
3150 return false;
3151
87d0990c 3152 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
1bd8d726 3153 char *hostveth = NULL;
1bd8d726
CB
3154
3155 /* We can only delete devices whose ifindex we have. If we don't
3156 * have the index it means that we didn't create it.
3157 */
3158 if (!netdev->ifindex)
3159 continue;
3160
3161 if (netdev->type == LXC_NET_PHYS) {
3162 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3163 netdev->link);
3164 if (ret < 0)
9c66dc4f 3165 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3166 netdev->ifindex, netdev->link);
3167 else
9c66dc4f 3168 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3169 netdev->ifindex, netdev->link);
b3259dc6
TP
3170
3171 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3172 if (ret < 0)
3173 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3174 netdev->ifindex, netdev->link);
66a7c406 3175 goto clear_ifindices;
1bd8d726
CB
3176 }
3177
3178 ret = netdev_deconf[netdev->type](handler, netdev);
3179 if (ret < 0)
3180 WARN("Failed to deconfigure network device");
3181
3182 if (netdev->type != LXC_NET_VETH)
66a7c406 3183 goto clear_ifindices;
1bd8d726 3184
f2711167 3185 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3186 goto clear_ifindices;
1bd8d726 3187
f2711167 3188 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3189 hostveth = netdev->priv.veth_attr.pair;
3190 else
3191 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3192 if (is_empty_string(hostveth))
66a7c406 3193 goto clear_ifindices;
8843fde4 3194
1bd8d726
CB
3195 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3196 handler->name, netdev,
3197 netns_path);
3198 if (ret < 0) {
9c66dc4f 3199 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3200 goto clear_ifindices;
1bd8d726 3201 }
9c66dc4f 3202 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3203
3204clear_ifindices:
0858c829
CB
3205 /*
3206 * We need to clear any ifindices we recorded so liblxc won't
3207 * have cached stale data which would cause it to fail on
3208 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3209 */
3210 netdev->ifindex = 0;
3211 if (netdev->type == LXC_NET_PHYS) {
3212 netdev->priv.phys_attr.ifindex = 0;
3213 } else if (netdev->type == LXC_NET_VETH) {
3214 netdev->priv.veth_attr.veth1[0] = '\0';
3215 netdev->priv.veth_attr.ifindex = 0;
3216 }
1bd8d726
CB
3217 }
3218
bb84beda 3219 return true;
1bd8d726
CB
3220}
3221
6509154d 3222static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
6509154d 3223 struct lxc_inetdev *inet4dev;
3224 struct lxc_inet6dev *inet6dev;
3225 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3226 int err = 0;
5fe147e9
TP
3227 unsigned int lo_ifindex = 0, link_ifindex = 0;
3228
3229 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3230 if (link_ifindex == 0)
3231 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3232
6509154d 3233
3234 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
05a54a64 3235 if (!list_empty(&netdev->ipv4_addresses)) {
6509154d 3236 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3237 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3238 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3239 }
3240
3241 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
05a54a64 3242 if (!list_empty(&netdev->ipv6_addresses)) {
6509154d 3243 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3244 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3245 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3246
3247 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3248 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3249 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3250 }
3251
b670016a 3252 /* Perform IPVLAN specific checks. */
3253 if (netdev->type == LXC_NET_IPVLAN) {
3254 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3255 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3256 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3257
3258 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3259 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3260 if (lo_ifindex == 0)
3261 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3262 }
3263
05a54a64 3264 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
6509154d 3265 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3266 return ret_set_errno(-1, -errno);
6509154d 3267
5fe147e9 3268 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3269 return ret_set_errno(-1, EINVAL);
b670016a 3270
3271 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3272 if (netdev->type == LXC_NET_IPVLAN) {
3273 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3274 if (err < 0)
3275 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3276 }
6509154d 3277 }
3278
05a54a64 3279 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
6509154d 3280 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3281 return ret_set_errno(-1, -errno);
6509154d 3282
5fe147e9 3283 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3284 return ret_set_errno(-1, EINVAL);
b670016a 3285
3286 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3287 if (netdev->type == LXC_NET_IPVLAN) {
3288 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3289 if (err < 0)
3290 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3291 }
6509154d 3292 }
3293
3294 return 0;
3295}
3296
9c66dc4f
CB
3297static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3298{
b670016a 3299 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3300 bool had_error = false;
3301 unsigned int link_ifindex = 0;
b670016a 3302
9c66dc4f
CB
3303 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3304 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3305
3306 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3307 if (lo_ifindex > 0) {
3308 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3309 had_error = true;
b670016a 3310 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3311 }
3312 }
3313
3314 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3315 if (!is_empty_string(link)) {
5fe147e9 3316 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3317 if (link_ifindex == 0)
3318 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3319
3320 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3321 had_error = true;
b670016a 3322 }
3323
9c66dc4f 3324 if (had_error)
596a002c 3325 return ret_set_errno(-1, EINVAL);
b670016a 3326
3327 return 0;
3328}
3329
9c66dc4f
CB
3330static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3331{
b670016a 3332 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3333 bool had_error = false;
3334 unsigned int link_ifindex = 0;
b670016a 3335
9c66dc4f
CB
3336 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3337 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3338
3339 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3340 if (lo_ifindex > 0) {
3341 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3342 had_error = true;
b670016a 3343 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3344 }
3345 }
3346
3347 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3348 if (!is_empty_string(link)) {
5fe147e9
TP
3349 link_ifindex = if_nametoindex(link);
3350 if (link_ifindex == 0) {
3351 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3352 return ret_set_errno(-1, EINVAL);
3353 }
3354
3355 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3356 had_error = true;
b670016a 3357 }
3358
9c66dc4f 3359 if (had_error)
596a002c 3360 return ret_set_errno(-1, EINVAL);
b670016a 3361
3362 return 0;
3363}
3364
cd32fc73
CB
3365static int lxc_delete_l2proxy(struct lxc_netdev *netdev)
3366{
b670016a 3367 unsigned int lo_ifindex = 0;
cd32fc73 3368 unsigned int err = 0;
6509154d 3369 struct lxc_inetdev *inet4dev;
3370 struct lxc_inet6dev *inet6dev;
6509154d 3371
b670016a 3372 /* Perform IPVLAN specific checks. */
3373 if (netdev->type == LXC_NET_IPVLAN) {
3374 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3375 lo_ifindex = if_nametoindex(loop_device);
b670016a 3376 if (lo_ifindex == 0) {
cd32fc73 3377 err++;
3ebffb98 3378 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3379 }
b670016a 3380 }
6509154d 3381
05a54a64 3382 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
b670016a 3383 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
cd32fc73 3384 err++;
6509154d 3385 }
3386
05a54a64 3387 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
b670016a 3388 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
cd32fc73 3389 err++;
6509154d 3390 }
3391
cd32fc73
CB
3392 if (err > 0)
3393 return ret_errno(EINVAL);
6509154d 3394
3395 return 0;
3396}
3397
e389f2af 3398static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3399{
87d0990c 3400 struct lxc_netdev *netdev;
811ef482 3401
87d0990c 3402 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
9c66dc4f
CB
3403 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3404 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3405
6509154d 3406 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3407 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3408 if (lxc_setup_l2proxy(netdev))
3409 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3410 }
3411
bad2f913 3412 if (netdev_configure_server[netdev->type](handler, netdev))
9c66dc4f 3413 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3414 }
3415
3416 return 0;
3417}
3418
fdd6be55
CB
3419/*
3420 * LXC moves network devices into the target namespace based on their created
3421 * name. The created name can either be randomly generated for e.g. veth
3422 * devices or it can be the name of the existing device in the server's
3423 * namespaces. This is e.g. the case when moving physical devices. However this
3424 * can lead to weird clashes. Consider we have a network namespace that has the
3425 * following devices:
3426
3427 * 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3428 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3429 * altname enp7s0
3430 * 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3431 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3432 * altname enp8s0
3433 *
3434 * and the user generates the following network config for their container:
3435 *
3436 * lxc.net.0.type = phys
3437 * lxc.net.0.name = eth1
3438 * lxc.net.0.link = eth2
3439 *
3440 * lxc.net.1.type = phys
3441 * lxc.net.1.name = eth2
3442 * lxc.net.1.link = eth1
3443 *
3444 * This would cause LXC to move the devices eth1 and eth2 from the server's
3445 * network namespace into the container's network namespace:
3446 *
3447 * 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3448 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3449 * altname enp7s0
3450 * 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3451 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3452 * altname enp8s0
3453 *
3454 * According to the network config above we now need to rename the network
3455 * devices in the container's network namespace. Let's say we start with
3456 * renaming eth2 to eth1. This would immediately lead to a clash since the
3457 * container's network namespace already contains a network device with that
3458 * name. Renaming the other device would have the same problem.
3459 *
3460 * There are multiple ways to fix this but I'm concerned with keeping the logic
3461 * somewhat reasonable which is why we simply start creating transient device
3462 * names that are unique which we'll use to move and rename the network device
3463 * in the container's network namespace at the same time. And then we rename
3464 * based on those random devices names to the target name.
3465 *
3466 * Note that the transient name is based on the type of network device as
3467 * specified in the LXC config. However, that doesn't mean it's correct. LXD
3468 * passes veth devices and a range of other network devices (e.g. Infiniband
3469 * VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
3470 * sense we like to think about it so you might see a veth device being
3471 * assigned a "physXXXXXX" transient name. That's not a problem.
3472 */
3473static int create_transient_name(struct lxc_netdev *netdev)
3474{
3475 const struct lxc_network_info *info;
3476
3477 if (!is_empty_string(netdev->transient_name))
3478 return syserror_set(-EINVAL, "Network device already had a transient name %s",
3479 netdev->transient_name);
3480
3481 info = &lxc_network_info[netdev->type];
3482 strlcpy(netdev->transient_name, info->template, info->template_len + 1);
3483
3484 if (!lxc_ifname_alnum_case_sensitive(netdev->transient_name))
3485 return syserror_set(-EINVAL, "Failed to create transient name for network device %s", netdev->created_name);
3486
3487 TRACE("Created transient name %s for network device", netdev->transient_name);
3488 return 0;
3489}
3490
43e2a964
CB
3491static int netdev_requires_move(const struct lxc_netdev *netdev)
3492{
3493 if (IN_SET(netdev->type, LXC_NET_EMPTY, LXC_NET_NONE))
3494 return false;
3495
3496 /*
3497 * Veth devices are directly created in the container's network
3498 * namespace so the device doesn't need to be moved into the
3499 * container's network namespace. The transient name will
3500 * already have been set above when we created the veth tunnel.
3501 */
3502 if (!netdev->ifindex)
3503 return false;
3504
3505 return true;
3506}
3507
e389f2af 3508int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3509{
e389f2af 3510 pid_t pid = handler->pid;
87d0990c 3511 struct lxc_netdev *netdev;
811ef482 3512
e0010464 3513 if (am_guest_unpriv())
74c6e2b0 3514 return 0;
811ef482 3515
87d0990c 3516 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3dd78294 3517 __do_free char *physname = NULL;
e389f2af 3518 int ret;
811ef482 3519
43e2a964 3520 if (!netdev_requires_move(netdev))
811ef482
CB
3521 continue;
3522
fdd6be55
CB
3523 ret = create_transient_name(netdev);
3524 if (ret < 0)
3525 return ret;
3526
3dd78294
CB
3527 if (netdev->type == LXC_NET_PHYS)
3528 physname = is_wlan(netdev->link);
3529
3530 if (physname)
fdd6be55 3531 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->transient_name);
3dd78294 3532 else
fdd6be55 3533 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->transient_name);
9c66dc4f 3534 if (ret)
fdd6be55
CB
3535 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
3536 netdev->created_name, netdev->ifindex, pid, netdev->transient_name);
811ef482 3537
fdd6be55
CB
3538 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
3539 maybe_empty(netdev->created_name), netdev->ifindex, pid, netdev->transient_name);
811ef482
CB
3540 }
3541
3542 return 0;
3543}
3544
3c09b97c
CB
3545static int network_requires_advanced_setup(int type)
3546{
3547 if (type == LXC_NET_EMPTY)
3548 return false;
3549
3550 if (type == LXC_NET_NONE)
3551 return false;
3552
3553 return true;
3554}
3555
e389f2af 3556static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3557{
e389f2af
CB
3558 int hooks_version = handler->conf->hooks_version;
3559 const char *lxcname = handler->name;
3560 const char *lxcpath = handler->lxcpath;
e389f2af 3561 pid_t pid = handler->pid;
87d0990c 3562 struct lxc_netdev *netdev;
74c6e2b0 3563
87d0990c 3564 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3c09b97c 3565 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3566 continue;
3567
9c66dc4f
CB
3568 if (netdev->type != LXC_NET_VETH)
3569 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3570 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3571
3572 if (netdev->mtu)
3573 INFO("mtu ignored due to insufficient privilege");
3574
e389f2af
CB
3575 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3576 pid, hooks_version))
74c6e2b0
CB
3577 return -1;
3578 }
3579
3580 return 0;
3581}
3582
59eac805 3583static bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3584{
3585 int ret;
87d0990c 3586 struct lxc_netdev *netdev;
1bd8d726 3587
87d0990c 3588 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
811ef482 3589 char *hostveth = NULL;
811ef482
CB
3590
3591 /* We can only delete devices whose ifindex we have. If we don't
3592 * have the index it means that we didn't create it.
3593 */
3594 if (!netdev->ifindex)
3595 continue;
3596
0104c121
CB
3597 /*
3598 * If the network device has been moved back from the
3599 * containers network namespace, update the ifindex.
3600 */
3601 netdev->ifindex = if_nametoindex(netdev->name);
3602
6509154d 3603 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3604 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3605 if (lxc_delete_l2proxy(netdev))
3606 WARN("Failed to delete all l2proxy config");
3607 /* Don't return, let the network be cleaned up as normal. */
3608 }
3609
811ef482 3610 if (netdev->type == LXC_NET_PHYS) {
bb301db7
SB
3611 /* Physical interfaces are initially returned to the parent namespace
3612 * with their transient name to avoid collisions
3613 */
3614 netdev->ifindex = if_nametoindex(netdev->transient_name);
811ef482
CB
3615 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3616 if (ret < 0)
3617 WARN("Failed to rename interface with index %d "
b809f232
CB
3618 "from \"%s\" to its initial name \"%s\"",
3619 netdev->ifindex, netdev->name, netdev->link);
0b154989 3620 else {
29589196
CB
3621 TRACE("Renamed interface with index %d from "
3622 "\"%s\" to its initial name \"%s\"",
3623 netdev->ifindex, netdev->name,
3624 netdev->link);
0b154989
TP
3625
3626 /* Restore original MTU */
3627 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3628 if (ret < 0) {
3629 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3630 netdev->link, netdev->priv.phys_attr.mtu);
3631 } else {
3632 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3633 netdev->link, netdev->priv.phys_attr.mtu);
3634 }
3635 }
b3259dc6
TP
3636
3637 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3638 if (ret < 0)
3639 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3640 netdev->ifindex, netdev->link);
66a7c406 3641 goto clear_ifindices;
811ef482
CB
3642 }
3643
3644 ret = netdev_deconf[netdev->type](handler, netdev);
3645 if (ret < 0)
3646 WARN("Failed to deconfigure network device");
3647
811ef482 3648 if (netdev->type != LXC_NET_VETH)
66a7c406 3649 goto clear_ifindices;
811ef482 3650
811ef482
CB
3651 /* Explicitly delete host veth device to prevent lingering
3652 * devices. We had issues in LXD around this.
3653 */
f2711167 3654 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3655 hostveth = netdev->priv.veth_attr.pair;
3656 else
3657 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3658 if (is_empty_string(hostveth))
66a7c406 3659 goto clear_ifindices;
811ef482 3660
1ee56cff
CB
3661 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3662 ret = lxc_netdev_delete_by_name(hostveth);
3663 if (ret < 0)
3664 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3665
1ee56cff
CB
3666 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3667 } else if (!is_empty_string(netdev->link)) {
3668 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3669 if (ret < 0)
3670 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3671
1ee56cff
CB
3672 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3673 }
811ef482 3674
66a7c406 3675clear_ifindices:
ad2ddfcd 3676 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3677 * have cached stale data which would cause it to fail on reboot
3678 * we're we don't re-read the on-disk config file.
3679 */
3680 netdev->ifindex = 0;
3681 if (netdev->type == LXC_NET_PHYS) {
3682 netdev->priv.phys_attr.ifindex = 0;
3683 } else if (netdev->type == LXC_NET_VETH) {
3684 netdev->priv.veth_attr.veth1[0] = '\0';
3685 netdev->priv.veth_attr.ifindex = 0;
3686 }
bb301db7
SB
3687
3688 /* Clear transient name */
3689 if (!is_empty_string (netdev->transient_name))
3690 {
3691 netdev->transient_name[0] = '\0';
3692 }
811ef482
CB
3693 }
3694
bb84beda 3695 return true;
811ef482
CB
3696}
3697
3698int lxc_requests_empty_network(struct lxc_handler *handler)
3699{
87d0990c 3700 struct list_head *netdevs = &handler->conf->netdevs;
811ef482 3701 bool found_none = false, found_nic = false;
87d0990c 3702 struct lxc_netdev *netdev;
811ef482 3703
87d0990c 3704 if (list_empty(netdevs))
811ef482
CB
3705 return 0;
3706
87d0990c 3707 list_for_each_entry(netdev, netdevs, head) {
811ef482
CB
3708
3709 if (netdev->type == LXC_NET_NONE)
3710 found_none = true;
3711 else
3712 found_nic = true;
3713 }
9c66dc4f 3714
811ef482
CB
3715 if (found_none && !found_nic)
3716 return 1;
9c66dc4f 3717
811ef482
CB
3718 return 0;
3719}
3720
3721/* try to move physical nics to the init netns */
b809f232 3722int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3723{
9c66dc4f
CB
3724 __do_close int oldfd = -EBADF;
3725 int netnsfd = handler->nsfd[LXC_NS_NET];
3726 struct lxc_conf *conf = handler->conf;
811ef482 3727 int ret;
811ef482 3728 char ifname[IFNAMSIZ];
87d0990c 3729 struct lxc_netdev *netdev;
811ef482 3730
04213960
TA
3731 /*
3732 * If we weren't asked to clone a new network namespace, there's
3733 * nothing to restore.
3734 */
3735 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3736 return 0;
3737
b809f232
CB
3738 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3739 * the parent network namespace. We won't have this capability if we are
3740 * unprivileged.
3741 */
d0fbc7ba 3742 if (!handler->am_root)
b809f232 3743 return 0;
811ef482 3744
b809f232 3745 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3746
0037ab49 3747 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3748 if (oldfd < 0)
3749 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3750
b809f232 3751 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3752 if (ret < 0)
3753 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3754
87d0990c 3755 list_for_each_entry(netdev, &conf->netdevs, head) {
b809f232
CB
3756 if (netdev->type != LXC_NET_PHYS)
3757 continue;
3758
3759 /* Retrieve the name of the interface in the container's network
3760 * namespace.
3761 */
3762 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3763 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3764 continue;
3765 }
b809f232 3766
bb301db7
SB
3767 /* Restore physical interfaces to host's network namespace with its transient name
3768 * to avoid collisions with the host's other interfaces.
3769 */
3770 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->transient_name);
b809f232 3771 if (ret < 0)
9c66dc4f 3772 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3773 else
9c66dc4f 3774 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3775 }
811ef482 3776
b809f232 3777 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3778 if (ret < 0)
3779 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3780
3781 return 0;
811ef482
CB
3782}
3783
3784static int setup_hw_addr(char *hwaddr, const char *ifname)
3785{
9c66dc4f 3786 __do_close int fd = -EBADF;
811ef482
CB
3787 struct sockaddr sockaddr;
3788 struct ifreq ifr;
9c66dc4f 3789 int ret;
811ef482
CB
3790
3791 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3792 if (ret)
3793 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3794
3795 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3796 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3797 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3798
ad9429e5 3799 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3800 if (fd < 0)
3801 return -1;
3802
3803 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3804 if (ret)
6d1400b5 3805 SYSERROR("Failed to perform ioctl");
3806
9c66dc4f 3807 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3808
3809 return ret;
3810}
3811
2ec31bbd 3812static int setup_ipv4_addr(struct lxc_netdev *netdev)
811ef482 3813{
2ec31bbd 3814 int ifindex = netdev->ifindex;
811ef482 3815 int err;
2ec31bbd 3816 struct lxc_inetdev *inet4dev;
811ef482 3817
05a54a64 3818 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
2ec31bbd
CB
3819 err = lxc_ipv4_addr_add(ifindex, &inet4dev->addr,
3820 &inet4dev->bcast, inet4dev->prefix);
9c66dc4f
CB
3821 if (err)
3822 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3823 }
3824
3825 return 0;
3826}
3827
cd32fc73 3828static int setup_ipv6_addr(struct lxc_netdev *netdev)
811ef482 3829{
811ef482 3830 int err;
cd32fc73
CB
3831 struct lxc_inet6dev *inet6dev;
3832 int ifindex = netdev->ifindex;
811ef482 3833
05a54a64 3834 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
811ef482
CB
3835 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3836 &inet6dev->mcast, &inet6dev->acast,
3837 inet6dev->prefix);
9c66dc4f
CB
3838 if (err)
3839 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3840 }
3841
3842 return 0;
3843}
3844
8bf64b77 3845static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3846{
811ef482 3847 int err;
009d6127 3848 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482 3849
811ef482 3850 /* set a mac address */
9c66dc4f
CB
3851 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3852 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3853
3854 /* setup ipv4 addresses on the interface */
2ec31bbd 3855 if (setup_ipv4_addr(netdev))
9c66dc4f 3856 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3857
3858 /* setup ipv6 addresses on the interface */
cd32fc73 3859 if (setup_ipv6_addr(netdev))
9c66dc4f 3860 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3861
3862 /* set the network device up */
3863 if (netdev->flags & IFF_UP) {
8bf64b77 3864 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3865 if (err)
3866 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3867
3868 /* the network is up, make the loopback up too */
3869 err = lxc_netdev_up("lo");
9c66dc4f
CB
3870 if (err)
3871 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3872 }
3873
811ef482 3874 /* setup ipv4 gateway on the interface */
a2f9a670 3875 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3876 if (!(netdev->flags & IFF_UP))
3877 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3878
05a54a64 3879 if (list_empty(&netdev->ipv4_addresses))
9c66dc4f 3880 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3881
a2f9a670 3882 /* Setup device route if ipv4_gateway_dev is enabled */
3883 if (netdev->ipv4_gateway_dev) {
3884 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3885 if (err < 0)
3886 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3887 } else {
009d6127 3888 /* Check the gateway address is valid */
3889 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3890 return ret_set_errno(-1, errno);
009d6127 3891
3892 /* Try adding a default route to the gateway address */
811ef482 3893 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3894 if (err < 0) {
3895 /* If adding the default route fails, this could be because the
3896 * gateway address is in a different subnet to the container's address.
3897 * To work around this, we try adding a static device route to the
3898 * gateway address first, and then try again.
3899 */
a2f9a670 3900 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3901 if (err < 0)
3902 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3903
a2f9a670 3904 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3905 if (err < 0)
3906 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3907 }
3908 }
3909 }
3910
3911 /* setup ipv6 gateway on the interface */
a2f9a670 3912 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3913 if (!(netdev->flags & IFF_UP))
3914 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3915
05a54a64 3916 if (list_empty(&netdev->ipv6_addresses) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
9c66dc4f 3917 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3918
a2f9a670 3919 /* Setup device route if ipv6_gateway_dev is enabled */
3920 if (netdev->ipv6_gateway_dev) {
3921 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3922 if (err < 0)
3923 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3924 } else {
009d6127 3925 /* Check the gateway address is valid */
3926 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3927 return ret_set_errno(-1, errno);
009d6127 3928
3929 /* Try adding a default route to the gateway address */
811ef482 3930 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3931 if (err < 0) {
3932 /* If adding the default route fails, this could be because the
3933 * gateway address is in a different subnet to the container's address.
3934 * To work around this, we try adding a static device route to the
3935 * gateway address first, and then try again.
3936 */
a2f9a670 3937 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3938 if (err < 0)
3939 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3940
a2f9a670 3941 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3942 if (err < 0)
3943 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3944 }
3945 }
3946 }
3947
8bf64b77 3948 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3949
3950 return 0;
3951}
3952
3a197a1b
CB
3953/**
3954 * Consider the following network layout:
3955 *
3956 * lxc.net.0.type = phys
3957 * lxc.net.0.link = eth2
3958 * lxc.net.0.name = eth%d
3959 *
3960 * lxc.net.1.type = phys
3961 * lxc.net.1.link = eth1
3962 * lxc.net.1.name = eth0
3963 *
3964 * If we simply follow this order and create the first network first the kernel
3965 * will allocate eth0 for the first network but the second network requests
3966 * that eth1 be renamed to eth0 in the container's network namespace which
3967 * would lead to a clash.
3968 *
3969 * Note, we don't handle cases like:
3970 *
3971 * lxc.net.0.type = phys
3972 * lxc.net.0.link = eth2
3973 * lxc.net.0.name = eth0
3974 *
3975 * lxc.net.1.type = phys
3976 * lxc.net.1.link = eth1
3977 * lxc.net.1.name = eth0
3978 *
3979 * That'll brutally fail of course but there's nothing we can do about it.
3980 */
87d0990c 3981int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf)
811ef482 3982{
3a197a1b 3983 bool needs_second_pass = false;
87d0990c
CB
3984 struct lxc_netdev *netdev;
3985 const struct list_head *netdevs = &conf->netdevs;
811ef482 3986
87d0990c 3987 if (list_empty(netdevs))
3a197a1b
CB
3988 return 0;
3989
3990 /* Configure all devices that have a specific target name. */
87d0990c 3991 list_for_each_entry(netdev, netdevs, head) {
8bf64b77 3992 int ret;
811ef482 3993
3a197a1b
CB
3994 if (is_empty_string(netdev->name) || strequal(netdev->name, "eth%d")) {
3995 needs_second_pass = true;
3996 continue;
3997 }
3998
bad2f913 3999 ret = netdev_configure_container[netdev->type](netdev);
8bf64b77
CB
4000 if (!ret)
4001 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
4002 if (ret)
4003 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482 4004 }
3a197a1b
CB
4005 INFO("Finished setting up network devices with caller assigned names");
4006
4007 if (needs_second_pass) {
4008 /* Configure all devices that have a kernel assigned name. */
87d0990c 4009 list_for_each_entry(netdev, netdevs, head) {
3a197a1b 4010 int ret;
811ef482 4011
3a197a1b
CB
4012 if (!is_empty_string(netdev->name) && !strequal(netdev->name, "eth%d"))
4013 continue;
4014
4015 ret = netdev_configure_container[netdev->type](netdev);
4016 if (!ret)
4017 ret = lxc_network_setup_in_child_namespaces_common(netdev);
4018 if (ret)
4019 return log_error_errno(-1, errno, "Failed to setup netdev");
4020 }
4021 INFO("Finished setting up network devices with kernel assigned names");
4022 }
811ef482
CB
4023
4024 return 0;
4025}
7ab1ba02 4026
3c09b97c 4027int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02 4028{
7ab1ba02 4029 int data_sock = handler->data_sock[0];
87d0990c 4030 struct lxc_netdev *netdev;
7ab1ba02 4031
87d0990c 4032 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
7ab1ba02 4033 int ret;
7ab1ba02 4034
3c09b97c 4035 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4036 continue;
4037
7fbb15ec 4038 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 4039 if (ret < 0)
7ab1ba02 4040 return -1;
e389f2af 4041
fdd6be55 4042 ret = lxc_send_nointr(data_sock, netdev->transient_name, IFNAMSIZ, MSG_NOSIGNAL);
e389f2af
CB
4043 if (ret < 0)
4044 return -1;
4045
fdd6be55 4046 TRACE("Sent network device name \"%s\" to child", netdev->transient_name);
7ab1ba02
CB
4047 }
4048
4049 return 0;
4050}
4051
3c09b97c 4052int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02 4053{
7ab1ba02 4054 int data_sock = handler->data_sock[1];
87d0990c 4055 struct lxc_netdev *netdev;
7ab1ba02 4056
87d0990c 4057 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
7ab1ba02 4058 int ret;
7ab1ba02 4059
3c09b97c 4060 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4061 continue;
4062
e3233f26 4063 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 4064 if (ret < 0)
7ab1ba02 4065 return -1;
e389f2af 4066
fdd6be55 4067 ret = lxc_recv_nointr(data_sock, netdev->transient_name, IFNAMSIZ, 0);
e389f2af
CB
4068 if (ret < 0)
4069 return -1;
54256301 4070
fdd6be55 4071 TRACE("Received network device name \"%s\" from parent", netdev->transient_name);
7ab1ba02
CB
4072 }
4073
4074 return 0;
4075}
a1ae535a
CB
4076
4077int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
4078{
a1ae535a 4079 int data_sock = handler->data_sock[0];
87d0990c
CB
4080 struct lxc_netdev *netdev;
4081 struct list_head *netdevs = &handler->conf->netdevs;
a1ae535a
CB
4082
4083 if (!handler->am_root)
4084 return 0;
4085
87d0990c 4086 list_for_each_entry(netdev, netdevs, head) {
a1ae535a 4087 int ret;
a1ae535a
CB
4088
4089 /* Send network device name in the child's namespace to parent. */
7fbb15ec 4090 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 4091 if (ret < 0)
7729f8e5 4092 return -1;
a1ae535a
CB
4093
4094 /* Send network device ifindex in the child's namespace to
4095 * parent.
4096 */
7fbb15ec 4097 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 4098 if (ret < 0)
7729f8e5 4099 return -1;
a1150aa1
CB
4100
4101 TRACE("Sent network device %s with ifindex %d to parent", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4102 }
4103
87d0990c 4104 if (!list_empty(netdevs))
e389f2af
CB
4105 TRACE("Sent network device names and ifindices to parent");
4106
a1ae535a 4107 return 0;
a1ae535a
CB
4108}
4109
4110int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
4111{
a1ae535a 4112 int data_sock = handler->data_sock[1];
87d0990c 4113 struct lxc_netdev *netdev;
a1ae535a
CB
4114
4115 if (!handler->am_root)
4116 return 0;
4117
87d0990c 4118 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
a1ae535a 4119 int ret;
a1ae535a
CB
4120
4121 /* Receive network device name in the child's namespace to
4122 * parent.
4123 */
e3233f26 4124 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 4125 if (ret < 0)
7729f8e5 4126 return -1;
a1ae535a
CB
4127
4128 /* Receive network device ifindex in the child's namespace to
4129 * parent.
4130 */
e3233f26 4131 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 4132 if (ret < 0)
7729f8e5 4133 return -1;
a1150aa1
CB
4134
4135 TRACE("Received network device %s with ifindex %d from child", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4136 }
4137
4138 return 0;
a1ae535a 4139}
bb84beda
CB
4140
4141void lxc_delete_network(struct lxc_handler *handler)
4142{
4143 bool bret;
4144
37631ddb
CB
4145 /*
4146 * Always expose namespace fd paths to network down hooks via
4147 * environment variables. No need to complicate things by passing them
4148 * as additional hook arguments.
4149 */
4150 lxc_expose_namespace_environment(handler);
4151
bb84beda
CB
4152 if (handler->am_root)
4153 bret = lxc_delete_network_priv(handler);
4154 else
4155 bret = lxc_delete_network_unpriv(handler);
4156 if (!bret)
4157 DEBUG("Failed to delete network devices");
4158 else
4159 DEBUG("Deleted network devices");
4160}
1cd95214 4161
1cd95214
CB
4162int lxc_netns_set_nsid(int fd)
4163{
41a3300d 4164 int ret;
0ce60f0d
CB
4165 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4166 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4167 NLMSG_ALIGN(1024)];
1cd95214 4168 struct nl_handler nlh;
a5f5cb41 4169 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
4170 struct nlmsghdr *hdr;
4171 struct rtgenmsg *msg;
9d036caa
CB
4172 const __s32 ns_id = -1;
4173 const __u32 netns_fd = fd;
1cd95214 4174
a5f5cb41 4175 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4176 if (ret < 0)
41a3300d 4177 return -1;
1cd95214 4178
0ce60f0d 4179 memset(buf, 0, sizeof(buf));
6ce39620
CB
4180
4181#pragma GCC diagnostic push
4182#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4183 hdr = (struct nlmsghdr *)buf;
4184 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4185#pragma GCC diagnostic pop
1cd95214 4186
0ce60f0d
CB
4187 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4188 hdr->nlmsg_type = RTM_NEWNSID;
4189 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4190 hdr->nlmsg_pid = 0;
4191 hdr->nlmsg_seq = RTM_NEWNSID;
4192 msg->rtgen_family = AF_UNSPEC;
1cd95214 4193
9d036caa
CB
4194 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4195 if (ret < 0)
a5f5cb41 4196 return ret_errno(ENOMEM);
9d036caa
CB
4197
4198 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4199 if (ret < 0)
a5f5cb41 4200 return ret_errno(ENOMEM);
1cd95214 4201
a5f5cb41 4202 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4203}
938980ba
CB
4204
4205static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4206{
4207
4208 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4209
4210 while (RTA_OK(rta, len)) {
4211 unsigned short type = rta->rta_type;
4212
4213 if ((type <= max) && (!tb[type]))
4214 tb[type] = rta;
4215
6ce39620
CB
4216#pragma GCC diagnostic push
4217#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4218 rta = RTA_NEXT(rta, len);
6ce39620 4219#pragma GCC diagnostic pop
938980ba
CB
4220 }
4221
4222 return 0;
4223}
4224
4225static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4226{
4227 return *(__s32 *)RTA_DATA(rta);
4228}
4229
4230#ifndef NETNS_RTA
4231#define NETNS_RTA(r) \
4232 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4233#endif
4234
4235int lxc_netns_get_nsid(int fd)
4236{
a5f5cb41
CB
4237 struct nl_handler nlh;
4238 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4239 int ret;
4240 ssize_t len;
4241 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4242 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4243 NLMSG_ALIGN(1024)];
938980ba 4244 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4245 struct nlmsghdr *hdr;
4246 struct rtgenmsg *msg;
938980ba
CB
4247 __u32 netns_fd = fd;
4248
a5f5cb41 4249 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4250 if (ret < 0)
4251 return -1;
4252
4253 memset(buf, 0, sizeof(buf));
6ce39620
CB
4254
4255#pragma GCC diagnostic push
4256#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4257 hdr = (struct nlmsghdr *)buf;
4258 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4259#pragma GCC diagnostic pop
938980ba
CB
4260
4261 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4262 hdr->nlmsg_type = RTM_GETNSID;
4263 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4264 hdr->nlmsg_pid = 0;
4265 hdr->nlmsg_seq = RTM_GETNSID;
4266 msg->rtgen_family = AF_UNSPEC;
4267
9d036caa 4268 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4269 if (ret < 0)
4270 return ret_errno(ENOMEM);
938980ba 4271
a5f5cb41 4272 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4273 if (ret < 0)
4274 return -1;
4275
4276 msg = NLMSG_DATA(hdr);
4277 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4278 if (len < 0)
a5f5cb41 4279 return ret_errno(EINVAL);
938980ba 4280
6ce39620
CB
4281#pragma GCC diagnostic push
4282#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4283 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4284 if (tb[__LXC_NETNSA_NSID])
4285 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4286#pragma GCC diagnostic pop
938980ba
CB
4287
4288 return -1;
4289}
e389f2af
CB
4290
4291int lxc_create_network(struct lxc_handler *handler)
4292{
4293 int ret;
4294
e389f2af
CB
4295 if (handler->am_root) {
4296 ret = lxc_create_network_priv(handler);
4297 if (ret)
4298 return -1;
4299
4300 return lxc_network_move_created_netdev_priv(handler);
4301 }
4302
4303 return lxc_create_network_unpriv(handler);
4304}