]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
github: Update for main branch
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
1160ce89
CB
3#include "config.h"
4
d38dd64a 5#include <arpa/inet.h>
cb0dc11b
CB
6#include <ctype.h>
7#include <errno.h>
8#include <fcntl.h>
0ad19a3f 9#include <linux/netlink.h>
10#include <linux/rtnetlink.h>
11#include <linux/sockios.h>
cb0dc11b
CB
12#include <net/ethernet.h>
13#include <net/if.h>
14#include <net/if_arp.h>
15#include <netinet/in.h>
d38dd64a
CB
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
cb0dc11b
CB
19#include <sys/inotify.h>
20#include <sys/ioctl.h>
21#include <sys/param.h>
22#include <sys/socket.h>
23#include <sys/stat.h>
24#include <sys/types.h>
d38dd64a
CB
25#include <time.h>
26#include <unistd.h>
f549edcc 27
58db1a61 28#include "netns_ifaddrs.h"
7ab1ba02 29#include "af_unix.h"
72d0e1cb 30#include "conf.h"
e3233f26 31#include "file_utils.h"
cb0dc11b 32#include "log.h"
8335fd40 33#include "macro.h"
95ea3d1f 34#include "memory_utils.h"
cb0dc11b
CB
35#include "network.h"
36#include "nl.h"
f40988c7 37#include "process_utils.h"
fdd6be55 38#include "string_utils.h"
59524108 39#include "syscall_wrappers.h"
0d204771 40#include "utils.h"
0ad19a3f 41
34498dea 42#if !HAVE_STRLCPY
58db1a61 43#include "strlcpy.h"
9de31d5a
CB
44#endif
45
ac2cecc4 46lxc_log_define(network, lxc);
f8fee0e2 47
bad2f913
CB
48typedef int (*netdev_configure_server_cb)(struct lxc_handler *, struct lxc_netdev *);
49typedef int (*netdev_configure_container_cb)(struct lxc_netdev *);
50typedef int (*netdev_shutdown_server_cb)(struct lxc_handler *, struct lxc_netdev *);
51
3392d379
CB
52const struct lxc_network_info {
53 const char *name;
fdd6be55
CB
54 const char template[IFNAMSIZ];
55 size_t template_len;
3392d379 56} lxc_network_info[LXC_NET_MAXCONFTYPE + 1] = {
fdd6be55
CB
57 [LXC_NET_EMPTY] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
58 [LXC_NET_VETH] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
59 [LXC_NET_MACVLAN] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
60 [LXC_NET_IPVLAN] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
61 [LXC_NET_PHYS] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
62 [LXC_NET_VLAN] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
63 [LXC_NET_NONE] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
64 [LXC_NET_MAXCONFTYPE] = { NULL, "", 0 }
3392d379
CB
65};
66
67const char *lxc_net_type_to_str(int type)
68{
69 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
70 return NULL;
71
72 return lxc_network_info[type].name;
73}
74
75static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
76
77char *lxc_ifname_alnum_case_sensitive(char *template)
78{
79 char name[IFNAMSIZ];
80 size_t i = 0;
81#ifdef HAVE_RAND_R
82 unsigned int seed;
83
84 seed = randseed(false);
85#else
86
87 (void)randseed(true);
88#endif
89
90 if (strlen(template) >= IFNAMSIZ)
91 return NULL;
92
93 /* Generate random names until we find one that doesn't exist. */
94 for (;;) {
95 name[0] = '\0';
96 (void)strlcpy(name, template, IFNAMSIZ);
97
98 for (i = 0; i < strlen(name); i++) {
99 if (name[i] == 'X') {
100#ifdef HAVE_RAND_R
101 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
102#else
103 name[i] = padchar[rand() % strlen(padchar)];
104#endif
105 }
106 }
107
108 if (if_nametoindex(name) == 0)
109 break;
110 }
111
112 (void)strlcpy(template, name, strlen(template) + 1);
113
114 return template;
115}
3ebffb98 116static const char loop_device[] = "lo";
811ef482 117
b670016a 118static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 119{
d16bda44 120 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 121 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44
CB
122 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
123 int addrlen, err;
8f82874c 124 struct rtmsg *rt;
8f82874c 125
126 addrlen = family == AF_INET ? sizeof(struct in_addr)
127 : sizeof(struct in6_addr);
128
d16bda44 129 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 130 if (err)
131 return err;
132
8f82874c 133 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
134 if (!nlmsg)
d16bda44 135 return -ENOMEM;
8f82874c 136
137 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
138 if (!answer)
a5f5cb41 139 return -ENOMEM;
8f82874c 140
141 nlmsg->nlmsghdr->nlmsg_flags =
142 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 143 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 144
145 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
146 if (!rt)
a5f5cb41 147 return -ENOMEM;
d16bda44 148
8f82874c 149 rt->rtm_family = family;
150 rt->rtm_table = RT_TABLE_MAIN;
151 rt->rtm_scope = RT_SCOPE_LINK;
152 rt->rtm_protocol = RTPROT_BOOT;
153 rt->rtm_type = RTN_UNICAST;
154 rt->rtm_dst_len = netmask;
155
8f82874c 156 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
157 return -EINVAL;
158
8f82874c 159 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
160 return -EINVAL;
161
162 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 163}
164
165static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
166{
b670016a 167 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 168}
169
170static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
171{
b670016a 172 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
173}
174
175static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
176{
177 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
178}
179
180static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
181{
182 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 183}
184
303707f6 185static int setup_ipv4_routes(struct lxc_netdev *netdev)
d4a7da46 186{
303707f6
CB
187 int ifindex = netdev->priv.veth_attr.ifindex;
188 struct lxc_inetdev *inetdev;
d4a7da46 189 int err;
190
303707f6 191 list_for_each_entry(inetdev, &netdev->priv.veth_attr.ipv4_routes, head) {
d4a7da46 192 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
193 if (err)
194 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 195 }
196
197 return 0;
198}
199
6bf0c06b 200static int setup_ipv6_routes(struct lxc_netdev *netdev)
d4a7da46 201{
d4a7da46 202 int err;
6bf0c06b
CB
203 struct lxc_inet6dev *inet6dev;
204 int ifindex = netdev->priv.veth_attr.ifindex;
d4a7da46 205
6bf0c06b 206 list_for_each_entry(inet6dev, &netdev->priv.veth_attr.ipv6_routes, head) {
d4a7da46 207 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
208 if (err)
209 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 210 }
211
212 return 0;
213}
214
2ec31bbd 215static int setup_ipv4_addr_routes(struct lxc_netdev *netdev)
6dfa9581 216{
6dfa9581 217 int err;
2ec31bbd
CB
218 struct lxc_inetdev *inetdev;
219 int ifindex;
6dfa9581 220
2ec31bbd
CB
221 if (netdev->type != LXC_NET_VETH)
222 return ret_errno(EINVAL);
6dfa9581 223
2ec31bbd 224 ifindex = netdev->priv.veth_attr.ifindex;
05a54a64 225 list_for_each_entry(inetdev, &netdev->ipv4_addresses, head) {
6dfa9581 226 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
6dfa9581 227 if (err)
9c66dc4f 228 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
229 }
230
231 return 0;
232}
233
cd32fc73 234static int setup_ipv6_addr_routes(struct lxc_netdev *netdev)
6dfa9581 235{
6dfa9581 236 int err;
cd32fc73
CB
237 struct lxc_inet6dev *inet6dev;
238 int ifindex;
6dfa9581 239
cd32fc73
CB
240 if (netdev->type != LXC_NET_VETH)
241 return ret_errno(EINVAL);
242
243 ifindex = netdev->priv.veth_attr.ifindex;
05a54a64 244 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
6dfa9581
TP
245
246 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
247 if (err)
9c66dc4f 248 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
249 }
250
251 return 0;
252}
253
5fe147e9 254static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 255{
d16bda44 256 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 257 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44
CB
258 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
259 int addrlen, err;
5fe147e9 260 struct ndmsg *rt;
6dfa9581 261
5fe147e9 262 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 263
d16bda44 264 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
265 if (err)
266 return err;
6dfa9581 267
5fe147e9
TP
268 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
269 if (!nlmsg)
d16bda44 270 return -ENOMEM;
6dfa9581 271
5fe147e9
TP
272 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
273 if (!answer)
d16bda44 274 return -ENOMEM;
6dfa9581 275
5fe147e9
TP
276 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
277 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 278
5fe147e9
TP
279 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
280 if (!rt)
d16bda44
CB
281 return -ENOMEM;
282
5fe147e9
TP
283 rt->ndm_ifindex = ifindex;
284 rt->ndm_flags = NTF_PROXY;
285 rt->ndm_type = NDA_DST;
286 rt->ndm_family = family;
6dfa9581 287
5fe147e9 288 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 289 return -EINVAL;
6dfa9581 290
d16bda44 291 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
292}
293
294static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
295{
296 int ret;
297 char path[PATH_MAX];
298 char buf[1] = "";
299
300 if (family != AF_INET && family != AF_INET6)
596a002c 301 return ret_set_errno(-1, EINVAL);
6dfa9581 302
387c1c70
CB
303 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
304 family == AF_INET ? "ipv4" : "ipv6", ifname,
305 "forwarding");
306 if (ret < 0)
596a002c 307 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
308
309 return lxc_read_file_expect(path, buf, 1, "1");
310}
311
622f05c7
TP
312struct bridge_vlan_info {
313 __u16 flags;
314 __u16 vid;
315};
316
317static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
318{
319 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 320 struct nl_handler nlh = NL_HANDLER_INIT;
622f05c7
TP
321 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
322 int err;
323 struct ifinfomsg *ifi;
324 struct rtattr *nest;
325 unsigned short bridge_flags = 0;
326 struct bridge_vlan_info vlan_info;
327
328 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
329 if (err)
330 return err;
331
332 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
333 if (!nlmsg)
334 return ret_errno(ENOMEM);
335
336 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
337 if (!answer)
338 return ret_errno(ENOMEM);
339
340 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
341 nlmsg->nlmsghdr->nlmsg_type = operation;
342
343 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
344 if (!ifi)
345 return ret_errno(ENOMEM);
346 ifi->ifi_family = AF_BRIDGE;
347 ifi->ifi_index = ifindex;
348
349 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
350 if (!nest)
351 return ret_errno(ENOMEM);
352
353 bridge_flags |= BRIDGE_FLAGS_MASTER;
354 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
355 return ret_errno(ENOMEM);
356
357 vlan_info.vid = vlan_id;
358 vlan_info.flags = 0;
359 if (!tagged)
360 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
361
362 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
363 return ret_errno(ENOMEM);
364
365 nla_end_nested(nlmsg, nest);
366
367 return netlink_transaction(nlh_ptr, nlmsg, answer);
368}
369
370static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
371{
372 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
373}
374
375static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
376{
377 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
378}
379
380static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
381{
382 struct lxc_list *iterator;
383 int err;
384
385 lxc_list_for_each(iterator, vlan_ids) {
386 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
387
388 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
389 if (err)
390 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
391 }
392
393 return 0;
394}
395
33320936
TP
396static int validate_veth(struct lxc_netdev *netdev)
397{
398 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
399 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
400 if (netdev->priv.veth_attr.vlan_id_set)
401 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
402
403 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
404 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
405 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
406 }
407
408 if (netdev->priv.veth_attr.vlan_id_set) {
409 struct lxc_list *it;
410 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
411 unsigned short i = PTR_TO_USHORT(it->elem);
412 if (i == netdev->priv.veth_attr.vlan_id)
413 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
414 }
415 }
416
417 return 0;
418}
419
420static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
421{
422 int err, rc, veth1index;
423 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
424 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
425
426 /* Skip setup if no VLAN options are specified. */
427 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
428 return 0;
429
430 /* Check vlan filtering is enabled on parent bridge. */
387c1c70
CB
431 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
432 if (rc < 0)
33320936
TP
433 return -1;
434
435 rc = lxc_read_from_file(path, buf, sizeof(buf));
436 if (rc < 0)
437 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
438
439 buf[rc - 1] = '\0';
440
6ee997a7 441 if (!strequal(buf, "1"))
33320936
TP
442 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
443
444 /* Get veth1 ifindex for use with netlink. */
445 veth1index = if_nametoindex(veth1);
446 if (!veth1index)
447 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
448
449 /* Configure untagged VLAN settings on bridge port if specified. */
450 if (netdev->priv.veth_attr.vlan_id_set) {
451 unsigned short default_pvid;
452
453 /* Get the bridge's default VLAN PVID. */
387c1c70
CB
454 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
455 if (rc < 0)
33320936
TP
456 return -1;
457
458 rc = lxc_read_from_file(path, buf, sizeof(buf));
459 if (rc < 0)
460 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
461
462 buf[rc - 1] = '\0';
463 err = get_u16(&default_pvid, buf, 0);
464 if (err)
465 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
466
467 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
468 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
469 err = lxc_bridge_vlan_del(veth1index, default_pvid);
470 if (err)
471 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
472 }
473
474 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
475 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
476 if (err)
477 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
478 }
479 }
480
481 /* Configure tagged VLAN settings on bridge port if specified. */
482 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
483 if (err)
484 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
485
486 return 0;
487}
488
8f7c3358
TP
489struct ovs_veth_vlan_args {
490 const char *nic;
491 const char *vlan_mode; /* Port VLAN mode. */
492 short vlan_id; /* PVID VLAN ID. */
d2f8b272 493 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
8f7c3358
TP
494};
495
d2f8b272
TP
496static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
497{
498 free_disarm(args->trunks);
499}
8f7c3358
TP
500
501static int lxc_ovs_setup_bridge_vlan_exec(void *data)
502{
503 struct ovs_veth_vlan_args *args = data;
785e1540
TP
504 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
505
506 if (!args->vlan_mode)
507 return ret_errno(EINVAL);
8f7c3358
TP
508
509 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
510
785e1540 511 if (args->vlan_id > BRIDGE_VLAN_NONE) {
8f7c3358
TP
512 char buf[5];
513 int rc;
514
387c1c70
CB
515 rc = strnprintf(buf, sizeof(buf), "%u", args->vlan_id);
516 if (rc < 0)
72e8122b 517 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
8f7c3358
TP
518
519 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
520 }
521
785e1540 522 if (args->trunks)
8f7c3358
TP
523 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
524
525 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
785e1540 526 if (tag && trunks)
8f7c3358 527 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
785e1540 528 else if (tag)
8f7c3358 529 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
785e1540 530 else if (trunks)
8f7c3358
TP
531 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
532 else
533 return -EINVAL;
534
535 return -errno;
536}
537
538static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
539{
540 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
541 struct ovs_veth_vlan_args args;
542 args.nic = veth1;
1ee07848
TP
543 args.vlan_mode = NULL;
544 args.vlan_id = BRIDGE_VLAN_NONE;
545 args.trunks = NULL;
8f7c3358
TP
546
547 /* Skip setup if no VLAN options are specified. */
548 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
549 return 0;
550
551 /* Configure untagged VLAN settings on bridge port if specified. */
552 if (netdev->priv.veth_attr.vlan_id_set) {
553 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
554 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
555
556 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
557 * Also set the vlan_mode=access, which will drop any tagged frames.
558 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
559 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
560 */
561 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
562 args.vlan_mode = "access";
563 args.vlan_id = netdev->priv.veth_attr.vlan_id;
564 }
565 }
566
567 if (taggedLength > 0) {
568 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
569
570 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
571 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
572 args.vlan_mode = "native-untagged";
573 }
574
575 struct lxc_list *iterator;
576 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
577 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
578 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
579 int rc;
580
387c1c70
CB
581 rc = strnprintf(buf, sizeof(buf), "%u", vlan_id);
582 if (rc < 0) {
3fe6b5cf 583 free_ovs_veth_vlan_args(&args);
8f7c3358 584 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
3fe6b5cf 585 }
8f7c3358 586
1ee07848
TP
587 if (args.trunks)
588 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
589 else
590 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
8f7c3358
TP
591 }
592 }
593
1ee07848 594 if (args.vlan_mode) {
8f7c3358
TP
595 int ret;
596 char cmd_output[PATH_MAX];
597
598 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
3fe6b5cf
TP
599 if (ret < 0) {
600 free_ovs_veth_vlan_args(&args);
8f7c3358 601 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
3fe6b5cf 602 }
8f7c3358
TP
603 }
604
3fe6b5cf 605 free_ovs_veth_vlan_args(&args);
8f7c3358
TP
606 return 0;
607}
608
bad2f913 609static int netdev_configure_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 610{
54256301 611 int err;
a00fbab5 612 unsigned int mtu = 1500;
811ef482
CB
613 char *veth1, *veth2;
614 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 615
33320936
TP
616 err = validate_veth(netdev);
617 if (err)
618 return err;
619
f2711167 620 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
621 veth1 = netdev->priv.veth_attr.pair;
622 if (handler->conf->reboot)
623 lxc_netdev_delete_by_name(veth1);
624 } else {
387c1c70
CB
625 err = strnprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
626 if (err < 0)
811ef482
CB
627 return -1;
628
3646ffd9 629 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
630 if (!veth1)
631 return -1;
632
633 /* store away for deconf */
634 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
635 }
636
387c1c70
CB
637 err = strnprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
638 if (err < 0)
d34212ad
CB
639 return -1;
640
3646ffd9 641 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 642 if (!veth2)
54256301
CB
643 return -1;
644
a00fbab5
TP
645 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
646 if (netdev->mtu) {
647 if (lxc_safe_uint(netdev->mtu, &mtu))
648 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 649 } else if (!is_empty_string(netdev->link)) {
54256301 650 int ifindex_mtu;
811ef482 651
54256301
CB
652 ifindex_mtu = if_nametoindex(netdev->link);
653 if (ifindex_mtu) {
654 mtu = netdev_get_mtu(ifindex_mtu);
655 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
656 }
657 }
658
be538b8b
CD
659 err = lxc_veth_create(veth1, veth2, handler->pid, mtu,
660 netdev->priv.veth_attr.n_rxqueues, netdev->priv.veth_attr.n_txqueues);
9c66dc4f
CB
661 if (err)
662 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 663
fdd6be55
CB
664 /*
665 * Veth devices are directly created in the container's network
666 * namespace so the device doesn't need to be moved into the
667 * container's network namespace. Make this explicit by setting the
668 * devices ifindex to 0.
669 */
670 netdev->ifindex = 0;
671
24190194
CB
672 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
673
fdd6be55
CB
674 /*
675 * Since the device won't be moved transient name generation won't
676 * happen. But the transient name is needed for the container to
677 * retrieve the ifindex for the device.
678 */
679 strlcpy(netdev->transient_name, veth2, IFNAMSIZ);
680
681 /*
682 * Changing the high byte of the mac address to 0xfe, the bridge interface
811ef482 683 * will always keep the host's mac address and not take the mac address
fdd6be55
CB
684 * of a container.
685 */
811ef482
CB
686 err = setup_private_host_hw_addr(veth1);
687 if (err) {
6d1400b5 688 errno = -err;
689 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
690 goto out_delete;
691 }
692
8da62485
CB
693 /* Retrieve ifindex of the host's veth device. */
694 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
695 if (!netdev->priv.veth_attr.ifindex) {
696 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
697 goto out_delete;
698 }
699
811ef482
CB
700 if (mtu) {
701 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 702 if (err) {
6d1400b5 703 errno = -err;
54256301 704 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
705 goto out_delete;
706 }
707 }
708
f2711167 709 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
710 if (!lxc_nic_exists(netdev->link)) {
711 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
712 goto out_delete;
713 }
714
811ef482
CB
715 err = lxc_bridge_attach(netdev->link, veth1);
716 if (err) {
6d1400b5 717 errno = -err;
26da53c3 718 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
719 goto out_delete;
720 }
721 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936 722
38790036
TP
723 if (is_ovs_bridge(netdev->link)) {
724 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
725 if (err) {
726 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
727 lxc_ovs_delete_port(netdev->link, veth1);
728 goto out_delete;
729 }
730 } else {
33320936
TP
731 err = setup_veth_native_bridge_vlan(veth1, netdev);
732 if (err) {
733 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
734 goto out_delete;
735 }
736 }
811ef482
CB
737 }
738
739 err = lxc_netdev_up(veth1);
740 if (err) {
6d1400b5 741 errno = -err;
742 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
743 goto out_delete;
744 }
745
d4a7da46 746 /* setup ipv4 routes on the host interface */
303707f6 747 if (setup_ipv4_routes(netdev)) {
d4a7da46 748 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
749 goto out_delete;
750 }
751
752 /* setup ipv6 routes on the host interface */
6bf0c06b 753 if (setup_ipv6_routes(netdev)) {
d4a7da46 754 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
755 goto out_delete;
756 }
757
6dfa9581 758 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
759 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
760 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
761 appear to be added successfully but then do not appear in the proxy list. The length of time
762 slept doesn't appear to be important, only that the process sleeps for a short period of time.
763 */
764 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
765
6dfa9581
TP
766 if (netdev->ipv4_gateway) {
767 char bufinet4[INET_ADDRSTRLEN];
768 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 769 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
770 goto out_delete;
771 }
772
773 err = lxc_ip_forwarding_on(veth1, AF_INET);
774 if (err) {
9c66dc4f 775 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
776 goto out_delete;
777 }
778
5fe147e9 779 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 780 if (err) {
9c66dc4f 781 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
782 goto out_delete;
783 }
784 }
785
786 if (netdev->ipv6_gateway) {
787 char bufinet6[INET6_ADDRSTRLEN];
788
789 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 790 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
791 goto out_delete;
792 }
793
794 /* Check for sysctl net.ipv6.conf.all.forwarding=1
795 Kernel requires this to route any packets for IPv6.
796 */
797 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
798 if (err) {
9c66dc4f 799 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
800 goto out_delete;
801 }
802
803 err = lxc_ip_forwarding_on(veth1, AF_INET6);
804 if (err) {
9c66dc4f 805 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
806 goto out_delete;
807 }
808
809 err = lxc_neigh_proxy_on(veth1, AF_INET6);
810 if (err) {
9c66dc4f 811 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
812 goto out_delete;
813 }
814
5fe147e9 815 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 816 if (err) {
9c66dc4f 817 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
818 goto out_delete;
819 }
820 }
821
822 /* setup ipv4 address routes on the host interface */
2ec31bbd 823 err = setup_ipv4_addr_routes(netdev);
6dfa9581 824 if (err) {
9c66dc4f 825 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
826 goto out_delete;
827 }
828
829 /* setup ipv6 address routes on the host interface */
cd32fc73 830 err = setup_ipv6_addr_routes(netdev);
6dfa9581 831 if (err) {
9c66dc4f 832 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
833 goto out_delete;
834 }
835 }
836
811ef482 837 if (netdev->upscript) {
14a7b0f9
CB
838 char *argv[] = {
839 "veth",
840 netdev->link,
990b9ac3 841 veth1,
14a7b0f9
CB
842 NULL,
843 };
844
845 err = run_script_argv(handler->name,
846 handler->conf->hooks_version, "net",
847 netdev->upscript, "up", argv);
848 if (err < 0)
811ef482
CB
849 goto out_delete;
850 }
851
54256301 852 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
853
854 return 0;
855
856out_delete:
54256301 857 lxc_netdev_delete_by_name(veth1);
811ef482
CB
858 return -1;
859}
860
bad2f913 861static int netdev_configure_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 862{
8021de25 863 char peer[IFNAMSIZ];
811ef482
CB
864 int err;
865
f2711167 866 if (is_empty_string(netdev->link)) {
811ef482
CB
867 ERROR("No link for macvlan network device specified");
868 return -1;
869 }
870
387c1c70
CB
871 err = strnprintf(peer, sizeof(peer), "mcXXXXXX");
872 if (err < 0)
811ef482
CB
873 return -1;
874
3646ffd9 875 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
876 return -1;
877
878 err = lxc_macvlan_create(netdev->link, peer,
879 netdev->priv.macvlan_attr.mode);
880 if (err) {
6d1400b5 881 errno = -err;
882 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
883 peer, netdev->link);
966e9f1f 884 goto on_error;
811ef482
CB
885 }
886
9f8cf6e1
CB
887 strlcpy(netdev->created_name, peer, IFNAMSIZ);
888
811ef482
CB
889 netdev->ifindex = if_nametoindex(peer);
890 if (!netdev->ifindex) {
891 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 892 goto on_error;
811ef482
CB
893 }
894
3bef7b7b 895 if (netdev->mtu) {
54256301
CB
896 unsigned int mtu;
897
3bef7b7b
TP
898 err = lxc_safe_uint(netdev->mtu, &mtu);
899 if (err < 0) {
900 errno = -err;
901 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
902 goto on_error;
903 }
904
905 err = lxc_netdev_set_mtu(peer, mtu);
906 if (err < 0) {
907 errno = -err;
908 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
909 goto on_error;
910 }
911 }
912
811ef482 913 if (netdev->upscript) {
14a7b0f9
CB
914 char *argv[] = {
915 "macvlan",
916 netdev->link,
917 NULL,
918 };
919
920 err = run_script_argv(handler->name,
921 handler->conf->hooks_version, "net",
922 netdev->upscript, "up", argv);
923 if (err < 0)
966e9f1f 924 goto on_error;
811ef482
CB
925 }
926
4a037d61 927 DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
811ef482
CB
928 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
929
930 return 0;
966e9f1f
CB
931
932on_error:
811ef482 933 lxc_netdev_delete_by_name(peer);
811ef482
CB
934 return -1;
935}
936
0dc9a142 937static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
c9f52382 938{
d16bda44 939 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 940 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44 941 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 942 int err, index, len;
943 struct ifinfomsg *ifi;
c9f52382 944 struct rtattr *nest, *nest2;
c9f52382 945
0dc9a142 946 len = strlen(parent);
c9f52382 947 if (len == 1 || len >= IFNAMSIZ)
d16bda44 948 return ret_errno(EINVAL);
c9f52382 949
950 len = strlen(name);
951 if (len == 1 || len >= IFNAMSIZ)
d16bda44 952 return ret_errno(EINVAL);
c9f52382 953
0dc9a142 954 index = if_nametoindex(parent);
c9f52382 955 if (!index)
d16bda44 956 return ret_errno(EINVAL);
c9f52382 957
d16bda44 958 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 959 if (err)
df62850d 960 return err;
c9f52382 961
c9f52382 962 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
963 if (!nlmsg)
d16bda44 964 return ret_errno(ENOMEM);
c9f52382 965
966 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
967 if (!answer)
d16bda44 968 return ret_errno(ENOMEM);
c9f52382 969
970 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
971 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
972
973 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
974 if (!ifi)
975 return ret_errno(ENOMEM);
c9f52382 976 ifi->ifi_family = AF_UNSPEC;
977
c9f52382 978 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
979 if (!nest)
d16bda44 980 return ret_errno(EPROTO);
c9f52382 981
982 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 983 return ret_errno(EPROTO);
c9f52382 984
5755765e
KT
985 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
986 if (!nest2)
987 return ret_errno(EPROTO);
988
3a934e2e 989 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
990 return ret_errno(EPROTO);
991
cf88a827
TP
992 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
993 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
994 * according to ipvlan docs.
5755765e 995 */
cf88a827 996 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 997 return ret_errno(EPROTO);
c9f52382 998
5755765e 999 nla_end_nested(nlmsg, nest2);
c9f52382 1000 nla_end_nested(nlmsg, nest);
1001
1002 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 1003 return ret_errno(EPROTO);
c9f52382 1004
1005 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
1006 return ret_errno(EPROTO);
1007
1008 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 1009}
1010
bad2f913 1011static int netdev_configure_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1012{
dd119206 1013 char peer[IFNAMSIZ];
c9f52382 1014 int err;
1015
f2711167 1016 if (is_empty_string(netdev->link)) {
c9f52382 1017 ERROR("No link for ipvlan network device specified");
1018 return -1;
1019 }
1020
387c1c70
CB
1021 err = strnprintf(peer, sizeof(peer), "ipXXXXXX");
1022 if (err < 0)
c9f52382 1023 return -1;
1024
3646ffd9 1025 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 1026 return -1;
1027
dd119206
CB
1028 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
1029 netdev->priv.ipvlan_attr.isolation);
c9f52382 1030 if (err) {
dd119206
CB
1031 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
1032 peer, netdev->link);
c9f52382 1033 goto on_error;
1034 }
1035
e7fdd504
CB
1036 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1037
c9f52382 1038 netdev->ifindex = if_nametoindex(peer);
1039 if (!netdev->ifindex) {
1040 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1041 goto on_error;
1042 }
1043
006e135e 1044 if (netdev->mtu) {
54256301
CB
1045 unsigned int mtu;
1046
006e135e 1047 err = lxc_safe_uint(netdev->mtu, &mtu);
1048 if (err < 0) {
1049 errno = -err;
54256301 1050 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1051 goto on_error;
1052 }
1053
1054 err = lxc_netdev_set_mtu(peer, mtu);
1055 if (err < 0) {
1056 errno = -err;
54256301 1057 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 1058 goto on_error;
1059 }
1060 }
1061
c9f52382 1062 if (netdev->upscript) {
1063 char *argv[] = {
1064 "ipvlan",
1065 netdev->link,
1066 NULL,
1067 };
1068
dd119206
CB
1069 err = run_script_argv(handler->name, handler->conf->hooks_version,
1070 "net", netdev->upscript, "up", argv);
c9f52382 1071 if (err < 0)
1072 goto on_error;
1073 }
1074
4a037d61 1075 DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer,
dd119206 1076 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 1077
1078 return 0;
1079
1080on_error:
1081 lxc_netdev_delete_by_name(peer);
1082 return -1;
1083}
1084
bad2f913 1085static int netdev_configure_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1086{
1087 char peer[IFNAMSIZ];
1088 int err;
1089 static uint16_t vlan_cntr = 0;
811ef482 1090
f2711167 1091 if (is_empty_string(netdev->link)) {
811ef482
CB
1092 ERROR("No link for vlan network device specified");
1093 return -1;
1094 }
1095
387c1c70
CB
1096 err = strnprintf(peer, sizeof(peer), "vlan%d-%d",
1097 netdev->priv.vlan_attr.vid, vlan_cntr++);
1098 if (err < 0)
811ef482
CB
1099 return -1;
1100
1101 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1102 if (err) {
6d1400b5 1103 errno = -err;
1104 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1105 peer, netdev->link);
811ef482
CB
1106 return -1;
1107 }
1108
83530dba
CB
1109 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1110
811ef482
CB
1111 netdev->ifindex = if_nametoindex(peer);
1112 if (!netdev->ifindex) {
1113 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1114 goto on_error;
1115 }
1116
1117 if (netdev->mtu) {
54256301
CB
1118 unsigned int mtu;
1119
3e2a7b08 1120 err = lxc_safe_uint(netdev->mtu, &mtu);
1121 if (err < 0) {
1122 errno = -err;
54256301 1123 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1124 goto on_error;
1125 }
1126
1127 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1128 if (err < 0) {
3e2a7b08 1129 errno = -err;
54256301 1130 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1131 goto on_error;
1132 }
811ef482
CB
1133 }
1134
3a73d9f1 1135 if (netdev->upscript) {
1136 char *argv[] = {
1137 "vlan",
1138 netdev->link,
1139 NULL,
1140 };
1141
d4d68410
CB
1142 err = run_script_argv(handler->name, handler->conf->hooks_version,
1143 "net", netdev->upscript, "up", argv);
19abca58 1144 if (err < 0) {
3e2a7b08 1145 goto on_error;
19abca58 1146 }
3a73d9f1 1147 }
1148
4a037d61 1149 DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer,
d4d68410 1150 netdev->ifindex);
811ef482
CB
1151
1152 return 0;
3e2a7b08 1153
1154on_error:
1155 lxc_netdev_delete_by_name(peer);
1156 return -1;
811ef482
CB
1157}
1158
bad2f913 1159static int netdev_configure_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1160{
0b154989 1161 int err, mtu_orig = 0;
14a7b0f9 1162
9c66dc4f
CB
1163 if (is_empty_string(netdev->link))
1164 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1165
75b074ee
CB
1166 /*
1167 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1168 * network namespace because we need it to move the device from the
1169 * host's network namespace to the container's network namespace later
1170 * on.
1171 * Note that netdev->link will contain the name of the physical network
1172 * device in the host's namespace.
1173 */
811ef482 1174 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1175 if (!netdev->ifindex)
1176 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1177
61302ef7 1178 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1179 if (is_empty_string(netdev->name))
8bf64b77 1180 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1181
75b074ee
CB
1182 /*
1183 * Store the ifindex of the host's network device in the host's
790255cf
CB
1184 * namespace.
1185 */
1186 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1187
75b074ee
CB
1188 /*
1189 * Get original device MTU setting and store for restoration after
1190 * container shutdown.
1191 */
0b154989 1192 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1193 if (mtu_orig < 0)
1194 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1195
1196 netdev->priv.phys_attr.mtu = mtu_orig;
1197
3bef7b7b 1198 if (netdev->mtu) {
54256301
CB
1199 unsigned int mtu;
1200
3bef7b7b 1201 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1202 if (err < 0)
1203 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1204
3bef7b7b 1205 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1206 if (err < 0)
1207 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1208 }
1209
1210 if (netdev->upscript) {
1211 char *argv[] = {
1212 "phys",
1213 netdev->link,
1214 NULL,
1215 };
1216
75b074ee
CB
1217 err = run_script_argv(handler->name, handler->conf->hooks_version,
1218 "net", netdev->upscript, "up", argv);
9c66dc4f 1219 if (err < 0)
3bef7b7b 1220 return -1;
3bef7b7b
TP
1221 }
1222
4a037d61 1223 DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev->link,
75b074ee 1224 netdev->ifindex);
811ef482
CB
1225
1226 return 0;
1227}
1228
bad2f913 1229static int netdev_configure_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1230{
14a7b0f9
CB
1231 int ret;
1232 char *argv[] = {
1233 "empty",
1234 NULL,
1235 };
1236
43e2a964
CB
1237 /* The loopback device always has index 1. */
1238 netdev->ifindex = 1;
1239
1240 if (!strequal(netdev->name, "lo"))
1241 return syserror_set(-EINVAL, "Custom loopback device names not supported");
1242
14a7b0f9
CB
1243 if (!netdev->upscript)
1244 return 0;
1245
1246 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1247 "net", netdev->upscript, "up", argv);
1248 if (ret < 0)
1249 return -1;
1250
811ef482
CB
1251 return 0;
1252}
1253
bad2f913 1254static int netdev_configure_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1255{
1256 netdev->ifindex = 0;
1257 return 0;
1258}
1259
bad2f913
CB
1260static netdev_configure_server_cb netdev_configure_server[LXC_NET_MAXCONFTYPE + 1] = {
1261 [LXC_NET_VETH] = netdev_configure_server_veth,
1262 [LXC_NET_MACVLAN] = netdev_configure_server_macvlan,
1263 [LXC_NET_IPVLAN] = netdev_configure_server_ipvlan,
1264 [LXC_NET_VLAN] = netdev_configure_server_vlan,
1265 [LXC_NET_PHYS] = netdev_configure_server_phys,
1266 [LXC_NET_EMPTY] = netdev_configure_server_empty,
1267 [LXC_NET_NONE] = netdev_configure_server_none,
811ef482
CB
1268};
1269
bad2f913 1270static int __netdev_configure_container_common(struct lxc_netdev *netdev)
8bf64b77
CB
1271{
1272 char current_ifname[IFNAMSIZ];
1273
fdd6be55 1274 netdev->ifindex = if_nametoindex(netdev->transient_name);
8bf64b77
CB
1275 if (!netdev->ifindex)
1276 return log_error_errno(-1,
1277 errno, "Failed to retrieve ifindex for network device with name %s",
fdd6be55 1278 netdev->transient_name);
8bf64b77 1279
3473ca76 1280 if (is_empty_string(netdev->name))
8bf64b77
CB
1281 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1282
fdd6be55 1283 if (!strequal(netdev->transient_name, netdev->name)) {
8bf64b77
CB
1284 int ret;
1285
fdd6be55 1286 ret = lxc_netdev_rename_by_name(netdev->transient_name, netdev->name);
8bf64b77 1287 if (ret)
9c66dc4f 1288 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
fdd6be55 1289 netdev->transient_name, netdev->name);
8bf64b77 1290
fdd6be55 1291 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->transient_name, netdev->name);
8bf64b77
CB
1292 }
1293
1294 /*
1295 * Re-read the name of the interface because its name has changed and
1296 * would be automatically allocated by the system
1297 */
1298 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1299 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1300
1301 /*
1302 * Now update the recorded name of the network device to reflect the
1303 * name of the network device in the child's network namespace. We will
1304 * later on send this information back to the parent.
1305 */
1306 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
fdd6be55 1307 netdev->transient_name[0] = '\0';
8bf64b77
CB
1308
1309 return 0;
1310}
1311
bad2f913 1312static int netdev_configure_container_veth(struct lxc_netdev *netdev)
8bf64b77 1313{
8bf64b77 1314
bad2f913 1315 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1316}
1317
bad2f913 1318static int netdev_configure_container_macvlan(struct lxc_netdev *netdev)
8bf64b77 1319{
bad2f913 1320 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1321}
1322
bad2f913 1323static int netdev_configure_container_ipvlan(struct lxc_netdev *netdev)
8bf64b77 1324{
bad2f913 1325 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1326}
1327
bad2f913 1328static int netdev_configure_container_vlan(struct lxc_netdev *netdev)
8bf64b77 1329{
bad2f913 1330 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1331}
1332
bad2f913 1333static int netdev_configure_container_phys(struct lxc_netdev *netdev)
8bf64b77 1334{
bad2f913 1335 return __netdev_configure_container_common(netdev);
8bf64b77
CB
1336}
1337
bad2f913 1338static int netdev_configure_container_empty(struct lxc_netdev *netdev)
8bf64b77
CB
1339{
1340 return 0;
1341}
1342
bad2f913 1343static int netdev_configure_container_none(struct lxc_netdev *netdev)
8bf64b77
CB
1344{
1345 return 0;
1346}
1347
bad2f913
CB
1348static netdev_configure_container_cb netdev_configure_container[LXC_NET_MAXCONFTYPE + 1] = {
1349 [LXC_NET_VETH] = netdev_configure_container_veth,
1350 [LXC_NET_MACVLAN] = netdev_configure_container_macvlan,
1351 [LXC_NET_IPVLAN] = netdev_configure_container_ipvlan,
1352 [LXC_NET_VLAN] = netdev_configure_container_vlan,
1353 [LXC_NET_PHYS] = netdev_configure_container_phys,
1354 [LXC_NET_EMPTY] = netdev_configure_container_empty,
1355 [LXC_NET_NONE] = netdev_configure_container_none,
8bf64b77
CB
1356};
1357
bad2f913 1358static int netdev_shutdown_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1359{
14a7b0f9
CB
1360 int ret;
1361 char *argv[] = {
1362 "veth",
1363 netdev->link,
1364 NULL,
1365 NULL,
1366 };
1367
1368 if (!netdev->downscript)
1369 return 0;
811ef482 1370
f2711167 1371 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1372 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1373 else
14a7b0f9
CB
1374 argv[2] = netdev->priv.veth_attr.veth1;
1375
1376 ret = run_script_argv(handler->name,
1377 handler->conf->hooks_version, "net",
1378 netdev->downscript, "down", argv);
1379 if (ret < 0)
1380 return -1;
811ef482 1381
811ef482
CB
1382 return 0;
1383}
1384
bad2f913 1385static int netdev_shutdown_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1386{
14a7b0f9
CB
1387 int ret;
1388 char *argv[] = {
1389 "macvlan",
1390 netdev->link,
1391 NULL,
1392 };
1393
1394 if (!netdev->downscript)
1395 return 0;
1396
1397 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1398 "net", netdev->downscript, "down", argv);
1399 if (ret < 0)
1400 return -1;
811ef482 1401
811ef482
CB
1402 return 0;
1403}
1404
bad2f913 1405static int netdev_shutdown_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
c9f52382 1406{
1407 int ret;
1408 char *argv[] = {
1409 "ipvlan",
1410 netdev->link,
1411 NULL,
1412 };
1413
1414 if (!netdev->downscript)
1415 return 0;
1416
1417 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1418 "net", netdev->downscript, "down", argv);
1419 if (ret < 0)
1420 return -1;
1421
1422 return 0;
1423}
1424
bad2f913 1425static int netdev_shutdown_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1426{
3a73d9f1 1427 int ret;
1428 char *argv[] = {
1429 "vlan",
1430 netdev->link,
1431 NULL,
1432 };
1433
1434 if (!netdev->downscript)
1435 return 0;
1436
1437 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1438 "net", netdev->downscript, "down", argv);
1439 if (ret < 0)
1440 return -1;
1441
811ef482
CB
1442 return 0;
1443}
1444
bad2f913 1445static int netdev_shutdown_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1446{
14a7b0f9
CB
1447 int ret;
1448 char *argv[] = {
1449 "phys",
1450 netdev->link,
1451 NULL,
1452 };
1453
1454 if (!netdev->downscript)
1455 return 0;
1456
1457 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1458 "net", netdev->downscript, "down", argv);
1459 if (ret < 0)
1460 return -1;
811ef482 1461
811ef482
CB
1462 return 0;
1463}
1464
bad2f913 1465static int netdev_shutdown_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482 1466{
14a7b0f9
CB
1467 int ret;
1468 char *argv[] = {
1469 "empty",
1470 NULL,
1471 };
1472
1473 if (!netdev->downscript)
1474 return 0;
1475
1476 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1477 "net", netdev->downscript, "down", argv);
1478 if (ret < 0)
1479 return -1;
811ef482 1480
811ef482
CB
1481 return 0;
1482}
1483
bad2f913 1484static int netdev_shutdown_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
811ef482
CB
1485{
1486 return 0;
1487}
1488
bad2f913
CB
1489static netdev_shutdown_server_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1490 [LXC_NET_VETH] = netdev_shutdown_server_veth,
1491 [LXC_NET_MACVLAN] = netdev_shutdown_server_macvlan,
1492 [LXC_NET_IPVLAN] = netdev_shutdown_server_ipvlan,
1493 [LXC_NET_VLAN] = netdev_shutdown_server_vlan,
1494 [LXC_NET_PHYS] = netdev_shutdown_server_phys,
1495 [LXC_NET_EMPTY] = netdev_shutdown_server_empty,
1496 [LXC_NET_NONE] = netdev_shutdown_server_none,
811ef482
CB
1497};
1498
0037ab49
TP
1499static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1500{
d16bda44 1501 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
f8aa61f9 1502 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44
CB
1503 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1504 int err;
0037ab49 1505 struct ifinfomsg *ifi;
0037ab49 1506
d16bda44 1507 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1508 if (err)
1509 return err;
1510
0037ab49
TP
1511 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1512 if (!nlmsg)
d16bda44 1513 return ret_errno(ENOMEM);
0037ab49
TP
1514
1515 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1516 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1517
1518 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1519 if (!ifi)
d16bda44
CB
1520 return ret_errno(ENOMEM);
1521
0037ab49
TP
1522 ifi->ifi_family = AF_UNSPEC;
1523 ifi->ifi_index = ifindex;
1524
1525 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1526 return ret_errno(ENOMEM);
0037ab49 1527
3473ca76 1528 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1529 return ret_errno(ENOMEM);
0037ab49 1530
d16bda44 1531 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1532}
1533
ebc73a67 1534int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1535{
d16bda44 1536 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
f8aa61f9 1537 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44
CB
1538 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1539 int err;
06f976ca 1540 struct ifinfomsg *ifi;
0ad19a3f 1541
d16bda44 1542 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1543 if (err)
1544 return err;
0ad19a3f 1545
0ad19a3f 1546 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1547 if (!nlmsg)
d16bda44 1548 return ret_errno(ENOMEM);
0ad19a3f 1549
ebc73a67 1550 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1551 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1552
1553 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1554 if (!ifi)
d16bda44
CB
1555 return ret_errno(ENOMEM);
1556
06f976ca
SZ
1557 ifi->ifi_family = AF_UNSPEC;
1558 ifi->ifi_index = ifindex;
0ad19a3f 1559
1560 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1561 return ret_errno(ENOMEM);
0ad19a3f 1562
3473ca76 1563 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1564 return ret_errno(ENOMEM);
8d357196 1565
d16bda44 1566 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1567}
1568
ebc73a67
CB
1569/* If we are asked to move a wireless interface, then we must actually move its
1570 * phyN device. Detect that condition and return the physname here. The physname
1571 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1572 */
1573#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1574char *is_wlan(const char *ifname)
e5848d39 1575{
4110345b
CB
1576 __do_fclose FILE *f = NULL;
1577 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1578 int i, ret;
e5848d39 1579 long physlen;
ebc73a67 1580 size_t len;
e5848d39 1581
ebc73a67 1582 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1583 path = must_realloc(NULL, len + 1);
387c1c70
CB
1584 ret = strnprintf(path, len, PHYSNAME, ifname);
1585 if (ret < 0)
4110345b 1586 return NULL;
ebc73a67 1587
4110345b 1588 f = fopen(path, "re");
ebc73a67 1589 if (!f)
4110345b 1590 return NULL;
ebc73a67 1591
1a0e70ac 1592 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1593 fseek(f, 0, SEEK_END);
1594 physlen = ftell(f);
1595 fseek(f, 0, SEEK_SET);
4110345b
CB
1596 if (physlen < 0)
1597 return NULL;
ebc73a67
CB
1598
1599 physname = malloc(physlen + 1);
4110345b
CB
1600 if (!physname)
1601 return NULL;
ebc73a67
CB
1602
1603 memset(physname, 0, physlen + 1);
e5848d39 1604 ret = fread(physname, 1, physlen, f);
e5848d39 1605 if (ret < 0)
4110345b 1606 return NULL;
e5848d39 1607
ebc73a67 1608 for (i = 0; i < physlen; i++) {
e5848d39
SH
1609 if (physname[i] == '\n')
1610 physname[i] = '\0';
ebc73a67 1611
e5848d39
SH
1612 if (physname[i] == '\0')
1613 break;
1614 }
1615
4110345b 1616 return move_ptr(physname);
e5848d39
SH
1617}
1618
ebc73a67
CB
1619static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1620 const char *new)
e5848d39 1621{
ebc73a67 1622 pid_t fpid;
e5848d39 1623
ebc73a67 1624 fpid = fork();
e5848d39
SH
1625 if (fpid < 0)
1626 return -1;
ebc73a67 1627
e5848d39
SH
1628 if (fpid != 0)
1629 return wait_for_pid(fpid);
ebc73a67 1630
e5848d39
SH
1631 if (!switch_to_ns(pid, "net"))
1632 return -1;
ebc73a67 1633
05ec44f8 1634 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1635}
1636
e4103cf6 1637int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1638 const char *newname)
e5848d39 1639{
3dd78294 1640 __do_free char *cmd = NULL;
ebc73a67 1641 pid_t fpid;
e5848d39
SH
1642
1643 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1644 * However, IIUC this involves a bit more complicated work to talk to
1645 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1646 */
1647 cmd = on_path("iw", NULL);
0ed79f45
M
1648 if (!cmd) {
1649 ERROR("Couldn't find the application iw in PATH");
3dd78294 1650 return -1;
0ed79f45 1651 }
e5848d39
SH
1652
1653 fpid = fork();
1654 if (fpid < 0)
3dd78294 1655 return -1;
ebc73a67 1656
e5848d39
SH
1657 if (fpid == 0) {
1658 char pidstr[30];
1659 sprintf(pidstr, "%d", pid);
9c66dc4f 1660 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1661 _exit(EXIT_FAILURE);
e5848d39 1662 }
ebc73a67 1663
e5848d39 1664 if (wait_for_pid(fpid))
3dd78294 1665 return -1;
e5848d39 1666
e5848d39 1667 if (newname)
3dd78294 1668 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1669
3dd78294 1670 return 0;
e5848d39
SH
1671}
1672
8d357196 1673int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1674{
3dd78294 1675 __do_free char *physname = NULL;
8befa924
SH
1676 int index;
1677
8befa924
SH
1678 if (!ifname)
1679 return -EINVAL;
1680
32571606 1681 index = if_nametoindex(ifname);
49428bf3
DY
1682 if (!index)
1683 return -EINVAL;
32571606 1684
ebc73a67
CB
1685 physname = is_wlan(ifname);
1686 if (physname)
e5848d39
SH
1687 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1688
8d357196 1689 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1690}
1691
b84f58b9 1692int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1693{
d16bda44 1694 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 1695 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44 1696 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1697 int err;
ebc73a67 1698 struct ifinfomsg *ifi;
0ad19a3f 1699
d16bda44 1700 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1701 if (err)
1702 return err;
0ad19a3f 1703
0ad19a3f 1704 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1705 if (!nlmsg)
d16bda44 1706 return ret_errno(ENOMEM);
0ad19a3f 1707
06f976ca 1708 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1709 if (!answer)
d16bda44 1710 return ret_errno(ENOMEM);
0ad19a3f 1711
ebc73a67 1712 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1713 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1714
1715 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1716 if (!ifi)
d16bda44
CB
1717 return ret_errno(ENOMEM);
1718
06f976ca
SZ
1719 ifi->ifi_family = AF_UNSPEC;
1720 ifi->ifi_index = ifindex;
0ad19a3f 1721
d16bda44 1722 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1723}
1724
b84f58b9
DL
1725int lxc_netdev_delete_by_name(const char *name)
1726{
1727 int index;
1728
1729 index = if_nametoindex(name);
1730 if (!index)
1731 return -EINVAL;
1732
1733 return lxc_netdev_delete_by_index(index);
1734}
1735
1736int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1737{
d16bda44 1738 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 1739 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44 1740 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1741 int err, len;
06f976ca 1742 struct ifinfomsg *ifi;
b9a5bb58 1743
d16bda44 1744 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1745 if (err)
1746 return err;
b9a5bb58 1747
b84f58b9 1748 len = strlen(newname);
d16bda44
CB
1749 if (len == 1 || len >= IFNAMSIZ)
1750 return ret_errno(EINVAL);
b84f58b9 1751
b9a5bb58
DL
1752 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1753 if (!nlmsg)
d16bda44 1754 return ret_errno(ENOMEM);
b9a5bb58 1755
06f976ca 1756 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1757 if (!answer)
d16bda44 1758 return ret_errno(ENOMEM);
b9a5bb58 1759
ebc73a67 1760 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1761 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1762
1763 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1764 if (!ifi)
d16bda44
CB
1765 return ret_errno(ENOMEM);
1766
06f976ca
SZ
1767 ifi->ifi_family = AF_UNSPEC;
1768 ifi->ifi_index = ifindex;
b84f58b9
DL
1769
1770 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1771 return ret_errno(ENOMEM);
b9a5bb58 1772
d16bda44 1773 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1774}
1775
b84f58b9
DL
1776int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1777{
1778 int len, index;
1779
1780 len = strlen(oldname);
dae3fdf6 1781 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1782 return -EINVAL;
1783
1784 index = if_nametoindex(oldname);
1785 if (!index)
1786 return -EINVAL;
1787
1788 return lxc_netdev_rename_by_index(index, newname);
1789}
1790
8befa924 1791int netdev_set_flag(const char *name, int flag)
0ad19a3f 1792{
d16bda44 1793 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 1794 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44 1795 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1796 int err, index, len;
06f976ca 1797 struct ifinfomsg *ifi;
0ad19a3f 1798
d16bda44 1799 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1800 if (err)
1801 return err;
0ad19a3f 1802
1803 len = strlen(name);
dae3fdf6 1804 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1805 return ret_errno(EINVAL);
0ad19a3f 1806
1807 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1808 if (!nlmsg)
d16bda44 1809 return ret_errno(ENOMEM);
0ad19a3f 1810
06f976ca 1811 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1812 if (!answer)
d16bda44 1813 return ret_errno(ENOMEM);
0ad19a3f 1814
1815 index = if_nametoindex(name);
1816 if (!index)
d16bda44 1817 return ret_errno(EINVAL);
0ad19a3f 1818
ebc73a67 1819 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1820 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1821
1822 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1823 if (!ifi)
1824 return ret_errno(ENOMEM);
1825
06f976ca
SZ
1826 ifi->ifi_family = AF_UNSPEC;
1827 ifi->ifi_index = index;
1828 ifi->ifi_change |= IFF_UP;
1829 ifi->ifi_flags |= flag;
0ad19a3f 1830
d16bda44 1831 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1832}
1833
59eac805 1834static int netdev_get_flag(const char *name, int *flag)
efa1cf45 1835{
d16bda44 1836 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 1837 struct nl_handler nlh = NL_HANDLER_INIT;
d16bda44 1838 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1839 int err, index, len;
a4318300 1840 struct ifinfomsg *ifi;
efa1cf45
DY
1841
1842 if (!name)
d16bda44 1843 return ret_errno(EINVAL);
efa1cf45 1844
d16bda44 1845 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1846 if (err)
1847 return err;
1848
efa1cf45
DY
1849 len = strlen(name);
1850 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1851 return ret_errno(EINVAL);
efa1cf45 1852
efa1cf45
DY
1853 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1854 if (!nlmsg)
d16bda44 1855 return ret_errno(ENOMEM);
efa1cf45 1856
06f976ca 1857 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1858 if (!answer)
d16bda44 1859 return ret_errno(ENOMEM);
efa1cf45 1860
efa1cf45
DY
1861 index = if_nametoindex(name);
1862 if (!index)
d16bda44 1863 return ret_errno(EINVAL);
efa1cf45 1864
06f976ca
SZ
1865 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1866 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1867
1868 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1869 if (!ifi)
1870 return ret_errno(ENOMEM);
1871
06f976ca
SZ
1872 ifi->ifi_family = AF_UNSPEC;
1873 ifi->ifi_index = index;
efa1cf45 1874
d16bda44 1875 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1876 if (err)
d16bda44 1877 return ret_set_errno(-1, errno);
efa1cf45 1878
06f976ca 1879 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1880
1881 *flag = ifi->ifi_flags;
efa1cf45
DY
1882 return err;
1883}
1884
1885/*
1886 * \brief Check a interface is up or not.
1887 *
1888 * \param name: name for the interface.
1889 *
1890 * \return int.
1891 * 0 means interface is down.
1892 * 1 means interface is up.
1893 * Others means error happened, and ret-value is the error number.
1894 */
ebc73a67 1895int lxc_netdev_isup(const char *name)
efa1cf45 1896{
4db0514d
CB
1897 int err;
1898 int flag = 0;
efa1cf45
DY
1899
1900 err = netdev_get_flag(name, &flag);
1901 if (err)
ebc73a67
CB
1902 return err;
1903
efa1cf45
DY
1904 if (flag & IFF_UP)
1905 return 1;
ebc73a67 1906
efa1cf45 1907 return 0;
efa1cf45
DY
1908}
1909
0130df54
SH
1910int netdev_get_mtu(int ifindex)
1911{
a5f5cb41 1912 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 1913 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 1914 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
df0834ed
CB
1915 int readmore = 0;
1916 __u32 recv_len = 0;
a5f5cb41 1917 int answer_len, err, res;
06f976ca 1918 struct ifinfomsg *ifi;
0130df54 1919 struct nlmsghdr *msg;
0130df54 1920
a5f5cb41 1921 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1922 if (err)
1923 return err;
1924
0130df54
SH
1925 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1926 if (!nlmsg)
a5f5cb41 1927 return ret_errno(ENOMEM);
0130df54 1928
06f976ca 1929 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1930 if (!answer)
a5f5cb41 1931 return ret_errno(ENOMEM);
0130df54
SH
1932
1933 /* Save the answer buffer length, since it will be overwritten
1934 * on the first receive (and we might need to receive more than
ebc73a67
CB
1935 * once.
1936 */
06f976ca
SZ
1937 answer_len = answer->nlmsghdr->nlmsg_len;
1938
ebc73a67 1939 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1940 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1941
06f976ca 1942 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1943 if (!ifi)
a5f5cb41
CB
1944 return ret_errno(ENOMEM);
1945
06f976ca 1946 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1947
1948 /* Send the request for addresses, which returns all addresses
1949 * on all interfaces. */
a5f5cb41 1950 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1951 if (err < 0)
a5f5cb41 1952 return ret_set_errno(-1, errno);
0130df54 1953
6ce39620
CB
1954#pragma GCC diagnostic push
1955#pragma GCC diagnostic ignored "-Wcast-align"
1956
0130df54
SH
1957 do {
1958 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1959 * overwritten by a previous receive.
1960 */
06f976ca 1961 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1962
1963 /* Get the (next) batch of reply messages */
a5f5cb41 1964 err = netlink_rcv(nlh_ptr, answer);
0130df54 1965 if (err < 0)
a5f5cb41 1966 return ret_set_errno(-1, errno);
0130df54
SH
1967
1968 recv_len = err;
0130df54
SH
1969
1970 /* Satisfy the typing for the netlink macros */
06f976ca 1971 msg = answer->nlmsghdr;
0130df54
SH
1972
1973 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1974 /* Stop reading if we see an error message */
1975 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1976 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1977 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1978 }
1979
1980 /* Stop reading if we see a NLMSG_DONE message */
1981 if (msg->nlmsg_type == NLMSG_DONE) {
1982 readmore = 0;
1983 break;
1984 }
1985
06f976ca 1986 ifi = NLMSG_DATA(msg);
0130df54
SH
1987 if (ifi->ifi_index == ifindex) {
1988 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1989 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1990
0130df54 1991 res = 0;
ebc73a67 1992 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1993 /*
a5f5cb41 1994 * Found a local address for the
ebc73a67
CB
1995 * requested interface, return it.
1996 */
0130df54 1997 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1998 memcpy(&res, RTA_DATA(rta), sizeof(int));
1999 return res;
0130df54 2000 }
a5f5cb41 2001
0130df54
SH
2002 rta = RTA_NEXT(rta, attr_len);
2003 }
0130df54
SH
2004 }
2005
ebc73a67
CB
2006 /* Keep reading more data from the socket if the last
2007 * message had the NLF_F_MULTI flag set.
2008 */
0130df54
SH
2009 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2010
ebc73a67 2011 /* Look at the next message received in this buffer. */
0130df54
SH
2012 msg = NLMSG_NEXT(msg, recv_len);
2013 }
2014 } while (readmore);
2015
6ce39620
CB
2016#pragma GCC diagnostic pop
2017
ebc73a67 2018 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 2019 return -1;
0130df54
SH
2020}
2021
d472214b 2022int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 2023{
a5f5cb41 2024 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 2025 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 2026 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 2027 int err, len;
06f976ca 2028 struct ifinfomsg *ifi;
75d09f83 2029
a5f5cb41 2030 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2031 if (err)
2032 return err;
75d09f83
DL
2033
2034 len = strlen(name);
dae3fdf6 2035 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2036 return ret_errno(EINVAL);
75d09f83
DL
2037
2038 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2039 if (!nlmsg)
a5f5cb41 2040 return ret_errno(ENOMEM);
75d09f83 2041
06f976ca 2042 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 2043 if (!answer)
a5f5cb41 2044 return ret_errno(ENOMEM);
75d09f83 2045
ebc73a67 2046 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
2047 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2048
2049 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2050 if (!ifi)
2051 return ret_errno(ENOMEM);
2052
06f976ca 2053 ifi->ifi_family = AF_UNSPEC;
54256301
CB
2054
2055 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2056 return ret_errno(ENOMEM);
75d09f83
DL
2057
2058 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2059 return ret_errno(ENOMEM);
75d09f83 2060
a5f5cb41 2061 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
2062}
2063
d472214b 2064int lxc_netdev_up(const char *name)
0ad19a3f 2065{
d472214b 2066 return netdev_set_flag(name, IFF_UP);
0ad19a3f 2067}
2068
d472214b 2069int lxc_netdev_down(const char *name)
0ad19a3f 2070{
d472214b 2071 return netdev_set_flag(name, 0);
0ad19a3f 2072}
2073
be538b8b
CD
2074int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu,
2075 int n_rxqueues, int n_txqueues)
0ad19a3f 2076{
a5f5cb41 2077 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 2078 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 2079 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2080 int err, len;
06f976ca 2081 struct ifinfomsg *ifi;
0ad19a3f 2082 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 2083
a5f5cb41 2084 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2085 if (err)
2086 return err;
0ad19a3f 2087
2088 len = strlen(name1);
dae3fdf6 2089 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2090 return ret_errno(EINVAL);
0ad19a3f 2091
2092 len = strlen(name2);
dae3fdf6 2093 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2094 return ret_errno(EINVAL);
0ad19a3f 2095
2096 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2097 if (!nlmsg)
a5f5cb41 2098 return ret_errno(ENOMEM);
0ad19a3f 2099
06f976ca 2100 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2101 if (!answer)
a5f5cb41 2102 return ret_errno(ENOMEM);
0ad19a3f 2103
a5f5cb41 2104 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2105 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2106
2107 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 2108 if (!ifi)
a5f5cb41
CB
2109 return ret_errno(ENOMEM);
2110
06f976ca 2111 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2112
79e68309 2113 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2114 if (!nest1)
a5f5cb41 2115 return ret_errno(EINVAL);
0ad19a3f 2116
2117 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2118 return ret_errno(ENOMEM);
0ad19a3f 2119
2120 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2121 if (!nest2)
a5f5cb41 2122 return ret_errno(ENOMEM);
0ad19a3f 2123
2124 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2125 if (!nest3)
a5f5cb41 2126 return ret_errno(ENOMEM);
0ad19a3f 2127
06f976ca 2128 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2129 if (!ifi)
2130 return ret_errno(ENOMEM);
0ad19a3f 2131
2132 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2133 return ret_errno(ENOMEM);
0ad19a3f 2134
be538b8b
CD
2135 if (n_rxqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_RX_QUEUES, (unsigned int)n_rxqueues))
2136 return ret_errno(ENOMEM);
2137
2138 if (n_txqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_TX_QUEUES, (unsigned int)n_txqueues))
2139 return ret_errno(ENOMEM);
2140
54256301 2141 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2142 return ret_errno(ENOMEM);
54256301
CB
2143
2144 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2145 return ret_errno(ENOMEM);
54256301 2146
0ad19a3f 2147 nla_end_nested(nlmsg, nest3);
0ad19a3f 2148 nla_end_nested(nlmsg, nest2);
0ad19a3f 2149 nla_end_nested(nlmsg, nest1);
2150
2151 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2152 return ret_errno(ENOMEM);
0ad19a3f 2153
be538b8b
CD
2154 if (n_txqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_RX_QUEUES, (unsigned int)n_txqueues))
2155 return ret_errno(ENOMEM);
2156
2157 if (n_rxqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_TX_QUEUES, (unsigned int)n_rxqueues))
2158 return ret_errno(ENOMEM);
2159
a5f5cb41 2160 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2161}
2162
ebc73a67 2163/* TODO: merge with lxc_macvlan_create */
0dc9a142 2164int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
26c39028 2165{
a5f5cb41 2166 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 2167 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 2168 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2169 int err, len, lindex;
06f976ca 2170 struct ifinfomsg *ifi;
26c39028 2171 struct rtattr *nest, *nest2;
26c39028 2172
a5f5cb41 2173 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2174 if (err)
2175 return err;
26c39028 2176
0dc9a142 2177 len = strlen(parent);
dae3fdf6 2178 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2179 return ret_errno(EINVAL);
26c39028
JHS
2180
2181 len = strlen(name);
dae3fdf6 2182 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2183 return ret_errno(EINVAL);
26c39028
JHS
2184
2185 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2186 if (!nlmsg)
a5f5cb41 2187 return ret_errno(ENOMEM);
26c39028 2188
06f976ca 2189 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2190 if (!answer)
a5f5cb41 2191 return ret_errno(ENOMEM);
26c39028 2192
0dc9a142 2193 lindex = if_nametoindex(parent);
26c39028 2194 if (!lindex)
a5f5cb41 2195 return ret_errno(EINVAL);
26c39028 2196
a5f5cb41 2197 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2198 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2199
2200 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2201 if (!ifi)
2202 return ret_errno(ENOMEM);
2203
06f976ca 2204 ifi->ifi_family = AF_UNSPEC;
26c39028 2205
79e68309 2206 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2207 if (!nest)
a5f5cb41 2208 return ret_errno(ENOMEM);
26c39028
JHS
2209
2210 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2211 return ret_errno(ENOMEM);
26c39028
JHS
2212
2213 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2214 if (!nest2)
a5f5cb41 2215 return ret_errno(ENOMEM);
e892973e 2216
26c39028 2217 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2218 return ret_errno(ENOMEM);
e892973e 2219
26c39028 2220 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2221 nla_end_nested(nlmsg, nest);
2222
2223 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2224 return ret_errno(ENOMEM);
26c39028
JHS
2225
2226 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2227 return ret_errno(ENOMEM);
2228
2229 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2230}
2231
0dc9a142 2232int lxc_macvlan_create(const char *parent, const char *name, int mode)
0ad19a3f 2233{
a5f5cb41 2234 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 2235 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 2236 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2237 int err, index, len;
06f976ca 2238 struct ifinfomsg *ifi;
e892973e 2239 struct rtattr *nest, *nest2;
0ad19a3f 2240
a5f5cb41 2241 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2242 if (err)
2243 return err;
0ad19a3f 2244
0dc9a142 2245 len = strlen(parent);
dae3fdf6 2246 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2247 return ret_errno(EINVAL);
0ad19a3f 2248
2249 len = strlen(name);
dae3fdf6 2250 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2251 return ret_errno(EINVAL);
0ad19a3f 2252
2253 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2254 if (!nlmsg)
a5f5cb41 2255 return ret_errno(ENOMEM);
0ad19a3f 2256
06f976ca 2257 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2258 if (!answer)
a5f5cb41 2259 return ret_errno(ENOMEM);
0ad19a3f 2260
0dc9a142 2261 index = if_nametoindex(parent);
0ad19a3f 2262 if (!index)
a5f5cb41 2263 return ret_errno(EINVAL);
0ad19a3f 2264
a5f5cb41 2265 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2266 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2267
2268 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2269 if (!ifi)
2270 return ret_errno(ENOMEM);
2271
06f976ca 2272 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2273
79e68309 2274 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2275 if (!nest)
a5f5cb41 2276 return ret_errno(ENOMEM);
0ad19a3f 2277
2278 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2279 return ret_errno(ENOMEM);
0ad19a3f 2280
e892973e
DL
2281 if (mode) {
2282 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2283 if (!nest2)
a5f5cb41 2284 return ret_errno(ENOMEM);
e892973e
DL
2285
2286 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2287 return ret_errno(ENOMEM);
e892973e
DL
2288
2289 nla_end_nested(nlmsg, nest2);
2290 }
2291
0ad19a3f 2292 nla_end_nested(nlmsg, nest);
2293
2294 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2295 return ret_errno(ENOMEM);
0ad19a3f 2296
2297 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2298 return ret_errno(ENOMEM);
0ad19a3f 2299
a5f5cb41 2300 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2301}
2302
2303static int proc_sys_net_write(const char *path, const char *value)
2304{
ebc73a67
CB
2305 int fd;
2306 int err = 0;
0ad19a3f 2307
2308 fd = open(path, O_WRONLY);
2309 if (fd < 0)
2310 return -errno;
2311
f640cf46 2312 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2313 err = -errno;
2314
2315 close(fd);
2316 return err;
2317}
2318
6dfa9581 2319static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2320{
2321 int ret;
2322 char path[PATH_MAX];
6509154d 2323
2324 if (family != AF_INET && family != AF_INET6)
6dfa9581 2325 return -EINVAL;
6509154d 2326
387c1c70
CB
2327 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2328 family == AF_INET ? "ipv4" : "ipv6", ifname,
2329 "forwarding");
2330 if (ret < 0)
6dfa9581 2331 return -E2BIG;
6509154d 2332
6dfa9581
TP
2333 return proc_sys_net_write(path, flag ? "1" : "0");
2334}
2335
2336int lxc_ip_forwarding_on(const char *name, int family)
2337{
2338 return ip_forwarding_set(name, family, 1);
2339}
2340
2341int lxc_ip_forwarding_off(const char *name, int family)
2342{
2343 return ip_forwarding_set(name, family, 0);
6509154d 2344}
2345
0ad19a3f 2346static int neigh_proxy_set(const char *ifname, int family, int flag)
2347{
9ba8130c 2348 int ret;
419590da 2349 char path[PATH_MAX];
0ad19a3f 2350
2351 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2352 return -EINVAL;
0ad19a3f 2353
387c1c70
CB
2354 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2355 family == AF_INET ? "ipv4" : "ipv6", ifname,
2356 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2357 if (ret < 0)
9ba8130c 2358 return -E2BIG;
0ad19a3f 2359
ebc73a67 2360 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2361}
2362
6509154d 2363static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2364{
2365 int ret;
2366 char path[PATH_MAX];
2367 char buf[1] = "";
2368
2369 if (family != AF_INET && family != AF_INET6)
596a002c 2370 return ret_set_errno(-1, EINVAL);
6509154d 2371
387c1c70
CB
2372 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2373 family == AF_INET ? "ipv4" : "ipv6", ifname,
2374 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2375 if (ret < 0)
596a002c 2376 return ret_set_errno(-1, E2BIG);
6509154d 2377
2378 return lxc_read_file_expect(path, buf, 1, "1");
2379}
2380
497353b6 2381int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2382{
2383 return neigh_proxy_set(name, family, 1);
2384}
2385
497353b6 2386int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2387{
2388 return neigh_proxy_set(name, family, 0);
2389}
2390
2391int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2392{
1f1b18e7
DL
2393 int i = 0;
2394 unsigned val;
ebc73a67
CB
2395 char c;
2396 unsigned char *data;
1f1b18e7
DL
2397
2398 sockaddr->sa_family = ARPHRD_ETHER;
2399 data = (unsigned char *)sockaddr->sa_data;
2400
2401 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2402 c = *macaddr++;
2403 if (isdigit(c))
2404 val = c - '0';
2405 else if (c >= 'a' && c <= 'f')
2406 val = c - 'a' + 10;
2407 else if (c >= 'A' && c <= 'F')
2408 val = c - 'A' + 10;
2409 else
2410 return -EINVAL;
2411
2412 val <<= 4;
2413 c = *macaddr;
2414 if (isdigit(c))
2415 val |= c - '0';
2416 else if (c >= 'a' && c <= 'f')
2417 val |= c - 'a' + 10;
2418 else if (c >= 'A' && c <= 'F')
2419 val |= c - 'A' + 10;
2420 else if (c == ':' || c == 0)
2421 val >>= 4;
2422 else
2423 return -EINVAL;
2424 if (c != 0)
2425 macaddr++;
2426 *data++ = (unsigned char)(val & 0377);
2427 i++;
2428
2429 if (*macaddr == ':')
2430 macaddr++;
0ad19a3f 2431 }
0ad19a3f 2432
1f1b18e7 2433 return 0;
0ad19a3f 2434}
2435
ebc73a67
CB
2436static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2437 void *acast, int prefix)
0ad19a3f 2438{
a5f5cb41 2439 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 2440 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 2441 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2442 int addrlen, err;
06f976ca 2443 struct ifaddrmsg *ifa;
0ad19a3f 2444
ebc73a67
CB
2445 addrlen = family == AF_INET ? sizeof(struct in_addr)
2446 : sizeof(struct in6_addr);
4bf1968d 2447
a5f5cb41 2448 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2449 if (err)
2450 return err;
0ad19a3f 2451
0ad19a3f 2452 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2453 if (!nlmsg)
a5f5cb41 2454 return ret_errno(ENOMEM);
0ad19a3f 2455
06f976ca 2456 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2457 if (!answer)
a5f5cb41 2458 return ret_errno(ENOMEM);
0ad19a3f 2459
a5f5cb41 2460 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2461 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2462
2463 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2464 if (!ifa)
a5f5cb41
CB
2465 return ret_errno(ENOMEM);
2466
06f976ca
SZ
2467 ifa->ifa_prefixlen = prefix;
2468 ifa->ifa_index = ifindex;
2469 ifa->ifa_family = family;
2470 ifa->ifa_scope = 0;
acf47e1b 2471
4bf1968d 2472 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2473 return ret_errno(EINVAL);
0ad19a3f 2474
4bf1968d 2475 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2476 return ret_errno(EINVAL);
0ad19a3f 2477
d8948a52 2478 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2479 return ret_errno(EINVAL);
1f1b18e7 2480
ebc73a67 2481 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2482 if (family == AF_INET6 &&
2483 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2484 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2485 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2486
a5f5cb41 2487 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2488}
2489
1f1b18e7 2490int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2491 struct in6_addr *mcast, struct in6_addr *acast,
2492 int prefix)
1f1b18e7
DL
2493{
2494 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2495}
2496
ebc73a67
CB
2497int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2498 int prefix)
1f1b18e7
DL
2499{
2500 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2501}
2502
ebc73a67
CB
2503/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2504 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2505 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2506 */
6ce39620
CB
2507#pragma GCC diagnostic push
2508#pragma GCC diagnostic ignored "-Wcast-align"
2509
ebc73a67
CB
2510static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2511{
2512 int addrlen;
06f976ca
SZ
2513 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2514 struct rtattr *rta = IFA_RTA(ifa);
2515 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2516
06f976ca 2517 if (ifa->ifa_family != family)
19a26f82
MK
2518 return 0;
2519
ebc73a67
CB
2520 addrlen = family == AF_INET ? sizeof(struct in_addr)
2521 : sizeof(struct in6_addr);
19a26f82
MK
2522
2523 /* Loop over the rtattr's in this message */
ebc73a67 2524 while (RTA_OK(rta, attr_len)) {
19a26f82 2525 /* Found a local address for the requested interface,
ebc73a67
CB
2526 * return it.
2527 */
2528 if (rta->rta_type == IFA_LOCAL ||
2529 rta->rta_type == IFA_ADDRESS) {
2530 /* Sanity check. The family check above should make sure
2531 * the address length is correct, but check here just in
2532 * case.
2533 */
9a064678 2534 if (RTA_PAYLOAD(rta) != (unsigned int)addrlen)
19a26f82
MK
2535 return -1;
2536
ebc73a67
CB
2537 /* We might have found an IFA_ADDRESS before, which we
2538 * now overwrite with an IFA_LOCAL.
2539 */
dd66e5ad 2540 if (!*res) {
19a26f82 2541 *res = malloc(addrlen);
dd66e5ad
DE
2542 if (!*res)
2543 return -1;
2544 }
19a26f82
MK
2545
2546 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2547 if (rta->rta_type == IFA_LOCAL)
2548 break;
2549 }
2550 rta = RTA_NEXT(rta, attr_len);
2551 }
2552 return 0;
2553}
2554
6ce39620
CB
2555#pragma GCC diagnostic pop
2556
19a26f82
MK
2557static int ip_addr_get(int family, int ifindex, void **res)
2558{
a5f5cb41 2559 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 2560 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 2561 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2562 int answer_len, err;
06f976ca 2563 struct ifaddrmsg *ifa;
19a26f82 2564 struct nlmsghdr *msg;
df0834ed
CB
2565 int readmore = 0;
2566 __u32 recv_len = 0;
19a26f82 2567
a5f5cb41 2568 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2569 if (err)
2570 return err;
2571
19a26f82
MK
2572 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2573 if (!nlmsg)
a5f5cb41 2574 return ret_errno(ENOMEM);
19a26f82 2575
06f976ca 2576 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2577 if (!answer)
a5f5cb41 2578 return ret_errno(ENOMEM);
19a26f82 2579
ebc73a67
CB
2580 /* Save the answer buffer length, since it will be overwritten on the
2581 * first receive (and we might need to receive more than once).
2582 */
06f976ca
SZ
2583 answer_len = answer->nlmsghdr->nlmsg_len;
2584
ebc73a67 2585 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2586 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2587
06f976ca 2588 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2589 if (!ifa)
a5f5cb41
CB
2590 return ret_errno(ENOMEM);
2591
06f976ca 2592 ifa->ifa_family = family;
19a26f82 2593
ebc73a67
CB
2594 /* Send the request for addresses, which returns all addresses on all
2595 * interfaces.
2596 */
a5f5cb41 2597 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2598 if (err < 0)
a5f5cb41 2599 return ret_set_errno(err, errno);
19a26f82 2600
6ce39620
CB
2601#pragma GCC diagnostic push
2602#pragma GCC diagnostic ignored "-Wcast-align"
2603
19a26f82
MK
2604 do {
2605 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2606 * overwritten by a previous receive.
2607 */
06f976ca 2608 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2609
ebc73a67 2610 /* Get the (next) batch of reply messages. */
a5f5cb41 2611 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2612 if (err < 0)
a5f5cb41 2613 return ret_set_errno(err, errno);
19a26f82
MK
2614
2615 recv_len = err;
2616 err = 0;
2617
ebc73a67 2618 /* Satisfy the typing for the netlink macros. */
06f976ca 2619 msg = answer->nlmsghdr;
19a26f82
MK
2620
2621 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2622 /* Stop reading if we see an error message. */
19a26f82 2623 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2624 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2625 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2626 }
2627
ebc73a67 2628 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2629 if (msg->nlmsg_type == NLMSG_DONE) {
2630 readmore = 0;
2631 break;
2632 }
2633
a5f5cb41
CB
2634 if (msg->nlmsg_type != RTM_NEWADDR)
2635 return ret_errno(EINVAL);
19a26f82 2636
06f976ca 2637 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
9a064678 2638 if (ifa->ifa_index == (__u32)ifindex) {
a5f5cb41
CB
2639 if (ifa_get_local_ip(family, msg, res) < 0)
2640 return ret_errno(EINVAL);
51e7a874 2641
ebc73a67 2642 /* Found a result, stop searching. */
19a26f82 2643 if (*res)
a5f5cb41 2644 return 0;
19a26f82
MK
2645 }
2646
ebc73a67
CB
2647 /* Keep reading more data from the socket if the last
2648 * message had the NLF_F_MULTI flag set.
2649 */
19a26f82
MK
2650 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2651
ebc73a67 2652 /* Look at the next message received in this buffer. */
19a26f82
MK
2653 msg = NLMSG_NEXT(msg, recv_len);
2654 }
2655 } while (readmore);
2656
6ce39620
CB
2657#pragma GCC diagnostic pop
2658
19a26f82 2659 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2660 * error.
2661 */
a5f5cb41 2662 return -1;
19a26f82
MK
2663}
2664
2665int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2666{
ebc73a67 2667 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2668}
2669
ebc73a67 2670int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2671{
ebc73a67 2672 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2673}
2674
f8fee0e2
MK
2675static int ip_gateway_add(int family, int ifindex, void *gw)
2676{
a5f5cb41 2677 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8aa61f9 2678 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41
CB
2679 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2680 int addrlen, err;
06f976ca 2681 struct rtmsg *rt;
f8fee0e2 2682
ebc73a67
CB
2683 addrlen = family == AF_INET ? sizeof(struct in_addr)
2684 : sizeof(struct in6_addr);
f8fee0e2 2685
a5f5cb41 2686 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2687 if (err)
2688 return err;
2689
f8fee0e2
MK
2690 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2691 if (!nlmsg)
a5f5cb41 2692 return ret_errno(ENOMEM);
f8fee0e2 2693
06f976ca 2694 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2695 if (!answer)
a5f5cb41 2696 return ret_errno(ENOMEM);
f8fee0e2 2697
a5f5cb41 2698 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2699 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2700
2701 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2702 if (!rt)
a5f5cb41
CB
2703 return ret_errno(ENOMEM);
2704
06f976ca
SZ
2705 rt->rtm_family = family;
2706 rt->rtm_table = RT_TABLE_MAIN;
2707 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2708 rt->rtm_protocol = RTPROT_BOOT;
2709 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2710 /* "default" destination */
06f976ca 2711 rt->rtm_dst_len = 0;
f8fee0e2 2712
a2f9a670 2713 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2714 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2715 return ret_errno(ENOMEM);
f8fee0e2
MK
2716
2717 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2718 * addresses for the gateway.
2719 */
f8fee0e2 2720 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2721 return ret_errno(EINVAL);
f8fee0e2 2722
a5f5cb41 2723 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2724}
2725
2726int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2727{
2728 return ip_gateway_add(AF_INET, ifindex, gw);
2729}
2730
2731int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2732{
2733 return ip_gateway_add(AF_INET6, ifindex, gw);
2734}
581c75e7 2735bool is_ovs_bridge(const char *bridge)
0d204771 2736{
ebc73a67 2737 int ret;
0d204771 2738 struct stat sb;
ebc73a67 2739 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2740
387c1c70
CB
2741 ret = strnprintf(brdirname, 22 + IFNAMSIZ + 1,
2742 "/sys/class/net/%s/bridge", bridge);
2743 if (ret < 0)
ebc73a67
CB
2744 return false;
2745
2746 ret = stat(brdirname, &sb);
2747 if (ret < 0 && errno == ENOENT)
0d204771 2748 return true;
ebc73a67 2749
0d204771
SH
2750 return false;
2751}
2752
581c75e7
CB
2753struct ovs_veth_args {
2754 const char *bridge;
2755 const char *nic;
2756};
2757
cb0dc11b
CB
2758/* Called from a background thread - when nic goes away, remove it from the
2759 * bridge.
c43cbc04 2760 */
581c75e7 2761static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2762{
581c75e7 2763 struct ovs_veth_args *args = data;
cb0dc11b 2764
9c66dc4f 2765 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2766 return -1;
c43cbc04
SH
2767}
2768
581c75e7 2769int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2770{
c43cbc04 2771 int ret;
419590da 2772 char cmd_output[PATH_MAX];
581c75e7 2773 struct ovs_veth_args args;
6ad22d06 2774
581c75e7
CB
2775 args.bridge = bridge;
2776 args.nic = nic;
2777 ret = run_command(cmd_output, sizeof(cmd_output),
2778 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2779 if (ret < 0)
2780 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2781
581c75e7
CB
2782 return 0;
2783}
ebc73a67 2784
581c75e7
CB
2785static int lxc_ovs_attach_bridge_exec(void *data)
2786{
2787 struct ovs_veth_args *args = data;
ebc73a67 2788
9c66dc4f 2789 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2790 return -1;
2791}
ebc73a67 2792
581c75e7
CB
2793static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2794{
2795 int ret;
419590da 2796 char cmd_output[PATH_MAX];
581c75e7 2797 struct ovs_veth_args args;
ebc73a67 2798
581c75e7
CB
2799 args.bridge = bridge;
2800 args.nic = nic;
2801 ret = run_command(cmd_output, sizeof(cmd_output),
2802 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2803 if (ret < 0)
2804 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2805
581c75e7 2806 return 0;
0d204771 2807}
0d204771 2808
581c75e7 2809int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2810{
ebc73a67 2811 int err, fd, index;
9de31d5a 2812 size_t retlen;
0ad19a3f 2813 struct ifreq ifr;
2814
dae3fdf6 2815 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2816 return -EINVAL;
0ad19a3f 2817
2818 index = if_nametoindex(ifname);
2819 if (!index)
3cfc0f3a 2820 return -EINVAL;
0ad19a3f 2821
0d204771 2822 if (is_ovs_bridge(bridge))
581c75e7 2823 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2824
ad9429e5 2825 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2826 if (fd < 0)
3cfc0f3a 2827 return -errno;
0ad19a3f 2828
9de31d5a 2829 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2830 if (retlen >= IFNAMSIZ) {
2831 close(fd);
9de31d5a 2832 return -E2BIG;
42cc4083 2833 }
9de31d5a 2834
ebc73a67 2835 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2836 ifr.ifr_ifindex = index;
7d163508 2837 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2838 close(fd);
3cfc0f3a
MN
2839 if (err)
2840 err = -errno;
0ad19a3f 2841
2842 return err;
2843}
72d0e1cb 2844
8befa924
SH
2845int setup_private_host_hw_addr(char *veth1)
2846{
387c1c70
CB
2847 __do_close int sockfd = -EBADF;
2848 int err;
8befa924 2849 struct ifreq ifr;
8befa924 2850
ad9429e5 2851 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2852 if (sockfd < 0)
2853 return -errno;
2854
387c1c70
CB
2855 err = strnprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2856 if (err < 0)
2857 return err;
ebc73a67 2858
8befa924 2859 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
387c1c70 2860 if (err < 0)
8befa924 2861 return -errno;
8befa924
SH
2862
2863 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2864 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924
SH
2865 if (err < 0)
2866 return -errno;
2867
2868 return 0;
2869}
811ef482
CB
2870
2871int lxc_find_gateway_addresses(struct lxc_handler *handler)
2872{
811ef482
CB
2873 struct lxc_netdev *netdev;
2874 int link_index;
2875
87d0990c 2876 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
811ef482
CB
2877 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2878 continue;
2879
9c66dc4f
CB
2880 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2881 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2882
87d0990c 2883 if (is_empty_string(netdev->link))
9c66dc4f 2884 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2885
2886 link_index = if_nametoindex(netdev->link);
2887 if (!link_index)
2888 return -EINVAL;
2889
2890 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2891 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2892 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2893 }
2894
2895 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2896 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2897 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2898 }
2899 }
2900
2901 return 0;
2902}
2903
2904#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
071d0934
CB
2905static int lxc_create_network_unpriv_exec(const char *lxcpath,
2906 const char *lxcname,
2907 struct lxc_netdev *netdev, pid_t pid,
2908 unsigned int hooks_version)
811ef482
CB
2909{
2910 int ret;
2911 pid_t child;
2912 int bytes, pipefd[2];
2913 char *token, *saveptr = NULL;
095ead80 2914 char netdev_link[IFNAMSIZ];
419590da 2915 char buffer[PATH_MAX] = {0};
94b1cade 2916 size_t retlen;
811ef482 2917
9c66dc4f 2918 if (netdev->type != LXC_NET_VETH)
071d0934
CB
2919 return log_error_errno(-1, errno,
2920 "Network type %d not support for unprivileged use",
2921 netdev->type);
811ef482
CB
2922
2923 ret = pipe(pipefd);
9c66dc4f
CB
2924 if (ret < 0)
2925 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2926
2927 child = fork();
2928 if (child < 0) {
811ef482
CB
2929 close(pipefd[0]);
2930 close(pipefd[1]);
9c66dc4f 2931 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2932 }
2933
2934 if (child == 0) {
8335fd40 2935 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2936
2937 close(pipefd[0]);
2938
2939 ret = dup2(pipefd[1], STDOUT_FILENO);
2940 if (ret >= 0)
2941 ret = dup2(pipefd[1], STDERR_FILENO);
2942 close(pipefd[1]);
2943 if (ret < 0) {
2944 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2945 _exit(EXIT_FAILURE);
811ef482
CB
2946 }
2947
f2711167 2948 if (!is_empty_string(netdev->link))
9de31d5a 2949 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2950 else
9de31d5a
CB
2951 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2952 if (retlen >= IFNAMSIZ) {
2953 SYSERROR("Invalid network device name");
2954 _exit(EXIT_FAILURE);
2955 }
811ef482 2956
387c1c70
CB
2957 ret = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
2958 if (ret < 0)
78070056 2959 _exit(EXIT_FAILURE);
8335fd40 2960 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2961
2962 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
071d0934 2963 lxcname, pidstr, netdev_link, !is_empty_string(netdev->name) ? netdev->name : "(null)");
3473ca76 2964 if (!is_empty_string(netdev->name))
811ef482
CB
2965 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2966 lxcpath, lxcname, pidstr, "veth", netdev_link,
2967 netdev->name, (char *)NULL);
2968 else
2969 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2970 lxcpath, lxcname, pidstr, "veth", netdev_link,
2971 (char *)NULL);
2972 SYSERROR("Failed to execute lxc-user-nic");
78070056 2973 _exit(EXIT_FAILURE);
811ef482
CB
2974 }
2975
2976 /* close the write-end of the pipe */
2977 close(pipefd[1]);
2978
9c66dc4f 2979 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2980 if (bytes < 0) {
74c6e2b0 2981 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2982 close(pipefd[0]);
6b9f82a9
CB
2983 } else {
2984 buffer[bytes - 1] = '\0';
811ef482 2985 }
811ef482
CB
2986
2987 ret = wait_for_pid(child);
2988 close(pipefd[0]);
9c66dc4f 2989 if (ret != 0 || bytes < 0)
071d0934
CB
2990 return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
2991 buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2992 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2993
2994 /* netdev->name */
2995 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2996 if (!token)
2997 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2998
e389f2af
CB
2999 /*
3000 * lxc-user-nic will take care of proper network device naming. So
fdd6be55 3001 * netdev->name and netdev->transient_name need to be identical to not
e389f2af
CB
3002 * trigger another rename later on.
3003 */
3004 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
fdd6be55
CB
3005 if (retlen < IFNAMSIZ) {
3006 retlen = strlcpy(netdev->transient_name, token, IFNAMSIZ);
3007 if (retlen < IFNAMSIZ)
3008 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
3009 }
9c66dc4f 3010 if (retlen >= IFNAMSIZ)
071d0934
CB
3011 return log_error_errno(-1, E2BIG,
3012 "Container side veth device name returned by lxc-user-nic is too long");
811ef482 3013
74c6e2b0 3014 /* netdev->ifindex */
811ef482 3015 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3016 if (!token)
3017 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3018
74c6e2b0 3019 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f 3020 if (ret < 0)
071d0934
CB
3021 return log_error_errno(-1, -ret,
3022 "Failed to convert string \"%s\" to integer", token);
811ef482 3023
74c6e2b0 3024 /* netdev->priv.veth_attr.veth1 */
811ef482 3025 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3026 if (!token)
3027 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 3028
94b1cade 3029 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f 3030 if (retlen >= IFNAMSIZ)
071d0934
CB
3031 return log_error_errno(-1, E2BIG,
3032 "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
3033
3034 /* netdev->priv.veth_attr.ifindex */
3035 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
3036 if (!token)
3037 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
3038
3039 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f 3040 if (ret < 0)
071d0934
CB
3041 return log_error_errno(-1, -ret,
3042 "Failed to convert string \"%s\" to integer", token);
811ef482 3043
4d781681 3044 if (netdev->upscript) {
3045 char *argv[] = {
3046 "veth",
3047 netdev->link,
3048 netdev->priv.veth_attr.veth1,
3049 NULL,
3050 };
3051
e389f2af
CB
3052 ret = run_script_argv(lxcname, hooks_version, "net",
3053 netdev->upscript, "up", argv);
4d781681 3054 if (ret < 0)
3055 return -1;
071d0934 3056 }
4d781681 3057
811ef482
CB
3058 return 0;
3059}
3060
f0ecc19d 3061static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3062 struct lxc_netdev *netdev,
3063 const char *netns_path)
811ef482
CB
3064{
3065 int bytes, ret;
3066 pid_t child;
3067 int pipefd[2];
25619b99 3068 char buffer[PATH_MAX] = {};
811ef482 3069
9c66dc4f
CB
3070 if (netdev->type != LXC_NET_VETH)
3071 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3072
3073 ret = pipe(pipefd);
9c66dc4f
CB
3074 if (ret < 0)
3075 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3076
3077 child = fork();
3078 if (child < 0) {
811ef482
CB
3079 close(pipefd[0]);
3080 close(pipefd[1]);
9c66dc4f 3081 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3082 }
3083
3084 if (child == 0) {
8843fde4 3085 char *hostveth;
811ef482
CB
3086
3087 close(pipefd[0]);
3088
3089 ret = dup2(pipefd[1], STDOUT_FILENO);
3090 if (ret >= 0)
3091 ret = dup2(pipefd[1], STDERR_FILENO);
3092 close(pipefd[1]);
3093 if (ret < 0) {
3094 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3095 _exit(EXIT_FAILURE);
811ef482
CB
3096 }
3097
f2711167 3098 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3099 hostveth = netdev->priv.veth_attr.pair;
3100 else
3101 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3102 if (is_empty_string(hostveth)) {
74c6e2b0 3103 SYSERROR("Host side veth device name is missing");
a30b9023 3104 _exit(EXIT_FAILURE);
74c6e2b0
CB
3105 }
3106
f2711167
CB
3107 if (is_empty_string(netdev->link)) {
3108 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3109 _exit(EXIT_FAILURE);
74c6e2b0 3110 }
811ef482 3111
811ef482 3112 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3113 lxcname, netns_path, netdev->link, hostveth);
811ef482 3114 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3115 lxcname, netns_path, "veth", netdev->link, hostveth,
3116 (char *)NULL);
811ef482 3117 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3118 _exit(EXIT_FAILURE);
811ef482
CB
3119 }
3120
3121 close(pipefd[1]);
3122
9c66dc4f 3123 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3124 if (bytes < 0) {
3125 SYSERROR("Failed to read from pipe file descriptor.");
3126 close(pipefd[0]);
6b9f82a9
CB
3127 } else {
3128 buffer[bytes - 1] = '\0';
811ef482 3129 }
811ef482 3130
6b9f82a9 3131 ret = wait_for_pid(child);
9c66dc4f
CB
3132 close_prot_errno_disarm(pipefd[0]);
3133 if (ret != 0 || bytes < 0)
3134 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3135 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3136
811ef482
CB
3137 return 0;
3138}
3139
59eac805 3140static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
1bd8d726
CB
3141{
3142 int ret;
87d0990c 3143 struct lxc_netdev *netdev;
1bd8d726
CB
3144 /* strlen("/proc/") = 6
3145 * +
8335fd40 3146 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3147 * +
3148 * strlen("/fd/") = 4
3149 * +
8335fd40 3150 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3151 * +
3152 * \0
3153 */
8335fd40 3154 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3155
3156 *netns_path = '\0';
3157
9c66dc4f
CB
3158 if (handler->nsfd[LXC_NS_NET] < 0)
3159 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726 3160
387c1c70
CB
3161 ret = strnprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
3162 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
3163 if (ret < 0)
1bd8d726
CB
3164 return false;
3165
87d0990c 3166 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
1bd8d726 3167 char *hostveth = NULL;
1bd8d726
CB
3168
3169 /* We can only delete devices whose ifindex we have. If we don't
3170 * have the index it means that we didn't create it.
3171 */
3172 if (!netdev->ifindex)
3173 continue;
3174
3175 if (netdev->type == LXC_NET_PHYS) {
3176 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3177 netdev->link);
3178 if (ret < 0)
9c66dc4f 3179 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3180 netdev->ifindex, netdev->link);
3181 else
9c66dc4f 3182 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3183 netdev->ifindex, netdev->link);
b3259dc6
TP
3184
3185 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3186 if (ret < 0)
3187 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3188 netdev->ifindex, netdev->link);
66a7c406 3189 goto clear_ifindices;
1bd8d726
CB
3190 }
3191
3192 ret = netdev_deconf[netdev->type](handler, netdev);
3193 if (ret < 0)
3194 WARN("Failed to deconfigure network device");
3195
3196 if (netdev->type != LXC_NET_VETH)
66a7c406 3197 goto clear_ifindices;
1bd8d726 3198
f2711167 3199 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3200 goto clear_ifindices;
1bd8d726 3201
f2711167 3202 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3203 hostveth = netdev->priv.veth_attr.pair;
3204 else
3205 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3206 if (is_empty_string(hostveth))
66a7c406 3207 goto clear_ifindices;
8843fde4 3208
1bd8d726
CB
3209 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3210 handler->name, netdev,
3211 netns_path);
3212 if (ret < 0) {
9c66dc4f 3213 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3214 goto clear_ifindices;
1bd8d726 3215 }
9c66dc4f 3216 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3217
3218clear_ifindices:
0858c829
CB
3219 /*
3220 * We need to clear any ifindices we recorded so liblxc won't
3221 * have cached stale data which would cause it to fail on
3222 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3223 */
3224 netdev->ifindex = 0;
3225 if (netdev->type == LXC_NET_PHYS) {
3226 netdev->priv.phys_attr.ifindex = 0;
3227 } else if (netdev->type == LXC_NET_VETH) {
3228 netdev->priv.veth_attr.veth1[0] = '\0';
3229 netdev->priv.veth_attr.ifindex = 0;
3230 }
1bd8d726
CB
3231 }
3232
bb84beda 3233 return true;
1bd8d726
CB
3234}
3235
6509154d 3236static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
6509154d 3237 struct lxc_inetdev *inet4dev;
3238 struct lxc_inet6dev *inet6dev;
3239 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3240 int err = 0;
5fe147e9
TP
3241 unsigned int lo_ifindex = 0, link_ifindex = 0;
3242
3243 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3244 if (link_ifindex == 0)
3245 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3246
6509154d 3247
3248 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
05a54a64 3249 if (!list_empty(&netdev->ipv4_addresses)) {
6509154d 3250 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3251 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3252 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3253 }
3254
3255 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
05a54a64 3256 if (!list_empty(&netdev->ipv6_addresses)) {
6509154d 3257 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3258 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3259 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3260
3261 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3262 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3263 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3264 }
3265
b670016a 3266 /* Perform IPVLAN specific checks. */
3267 if (netdev->type == LXC_NET_IPVLAN) {
3268 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3269 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3270 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3271
3272 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3273 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3274 if (lo_ifindex == 0)
3275 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3276 }
3277
05a54a64 3278 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
6509154d 3279 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3280 return ret_set_errno(-1, -errno);
6509154d 3281
5fe147e9 3282 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3283 return ret_set_errno(-1, EINVAL);
b670016a 3284
3285 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3286 if (netdev->type == LXC_NET_IPVLAN) {
3287 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3288 if (err < 0)
3289 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3290 }
6509154d 3291 }
3292
05a54a64 3293 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
6509154d 3294 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3295 return ret_set_errno(-1, -errno);
6509154d 3296
5fe147e9 3297 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3298 return ret_set_errno(-1, EINVAL);
b670016a 3299
3300 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3301 if (netdev->type == LXC_NET_IPVLAN) {
3302 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3303 if (err < 0)
3304 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3305 }
6509154d 3306 }
3307
3308 return 0;
3309}
3310
9c66dc4f
CB
3311static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3312{
b670016a 3313 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3314 bool had_error = false;
3315 unsigned int link_ifindex = 0;
b670016a 3316
9c66dc4f
CB
3317 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3318 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3319
3320 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3321 if (lo_ifindex > 0) {
3322 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3323 had_error = true;
b670016a 3324 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3325 }
3326 }
3327
3328 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3329 if (!is_empty_string(link)) {
5fe147e9 3330 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3331 if (link_ifindex == 0)
3332 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3333
3334 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3335 had_error = true;
b670016a 3336 }
3337
9c66dc4f 3338 if (had_error)
596a002c 3339 return ret_set_errno(-1, EINVAL);
b670016a 3340
3341 return 0;
3342}
3343
9c66dc4f
CB
3344static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3345{
b670016a 3346 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3347 bool had_error = false;
3348 unsigned int link_ifindex = 0;
b670016a 3349
9c66dc4f
CB
3350 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3351 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3352
3353 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3354 if (lo_ifindex > 0) {
3355 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3356 had_error = true;
b670016a 3357 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3358 }
3359 }
3360
3361 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3362 if (!is_empty_string(link)) {
5fe147e9
TP
3363 link_ifindex = if_nametoindex(link);
3364 if (link_ifindex == 0) {
3365 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3366 return ret_set_errno(-1, EINVAL);
3367 }
3368
3369 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3370 had_error = true;
b670016a 3371 }
3372
9c66dc4f 3373 if (had_error)
596a002c 3374 return ret_set_errno(-1, EINVAL);
b670016a 3375
3376 return 0;
3377}
3378
cd32fc73
CB
3379static int lxc_delete_l2proxy(struct lxc_netdev *netdev)
3380{
b670016a 3381 unsigned int lo_ifindex = 0;
cd32fc73 3382 unsigned int err = 0;
6509154d 3383 struct lxc_inetdev *inet4dev;
3384 struct lxc_inet6dev *inet6dev;
6509154d 3385
b670016a 3386 /* Perform IPVLAN specific checks. */
3387 if (netdev->type == LXC_NET_IPVLAN) {
3388 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3389 lo_ifindex = if_nametoindex(loop_device);
b670016a 3390 if (lo_ifindex == 0) {
cd32fc73 3391 err++;
3ebffb98 3392 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3393 }
b670016a 3394 }
6509154d 3395
05a54a64 3396 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
b670016a 3397 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
cd32fc73 3398 err++;
6509154d 3399 }
3400
05a54a64 3401 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
b670016a 3402 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
cd32fc73 3403 err++;
6509154d 3404 }
3405
cd32fc73
CB
3406 if (err > 0)
3407 return ret_errno(EINVAL);
6509154d 3408
3409 return 0;
3410}
3411
e389f2af 3412static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3413{
87d0990c 3414 struct lxc_netdev *netdev;
811ef482 3415
87d0990c 3416 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
9c66dc4f
CB
3417 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3418 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3419
6509154d 3420 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3421 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3422 if (lxc_setup_l2proxy(netdev))
3423 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3424 }
3425
bad2f913 3426 if (netdev_configure_server[netdev->type](handler, netdev))
9c66dc4f 3427 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3428 }
3429
3430 return 0;
3431}
3432
fdd6be55
CB
3433/*
3434 * LXC moves network devices into the target namespace based on their created
3435 * name. The created name can either be randomly generated for e.g. veth
3436 * devices or it can be the name of the existing device in the server's
3437 * namespaces. This is e.g. the case when moving physical devices. However this
3438 * can lead to weird clashes. Consider we have a network namespace that has the
3439 * following devices:
3440
3441 * 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3442 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3443 * altname enp7s0
3444 * 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3445 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3446 * altname enp8s0
3447 *
3448 * and the user generates the following network config for their container:
3449 *
3450 * lxc.net.0.type = phys
3451 * lxc.net.0.name = eth1
3452 * lxc.net.0.link = eth2
3453 *
3454 * lxc.net.1.type = phys
3455 * lxc.net.1.name = eth2
3456 * lxc.net.1.link = eth1
3457 *
3458 * This would cause LXC to move the devices eth1 and eth2 from the server's
3459 * network namespace into the container's network namespace:
3460 *
3461 * 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3462 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3463 * altname enp7s0
3464 * 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3465 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3466 * altname enp8s0
3467 *
3468 * According to the network config above we now need to rename the network
3469 * devices in the container's network namespace. Let's say we start with
3470 * renaming eth2 to eth1. This would immediately lead to a clash since the
3471 * container's network namespace already contains a network device with that
3472 * name. Renaming the other device would have the same problem.
3473 *
3474 * There are multiple ways to fix this but I'm concerned with keeping the logic
3475 * somewhat reasonable which is why we simply start creating transient device
3476 * names that are unique which we'll use to move and rename the network device
3477 * in the container's network namespace at the same time. And then we rename
3478 * based on those random devices names to the target name.
3479 *
3480 * Note that the transient name is based on the type of network device as
3481 * specified in the LXC config. However, that doesn't mean it's correct. LXD
3482 * passes veth devices and a range of other network devices (e.g. Infiniband
3483 * VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
3484 * sense we like to think about it so you might see a veth device being
3485 * assigned a "physXXXXXX" transient name. That's not a problem.
3486 */
3487static int create_transient_name(struct lxc_netdev *netdev)
3488{
3489 const struct lxc_network_info *info;
3490
3491 if (!is_empty_string(netdev->transient_name))
3492 return syserror_set(-EINVAL, "Network device already had a transient name %s",
3493 netdev->transient_name);
3494
3495 info = &lxc_network_info[netdev->type];
3496 strlcpy(netdev->transient_name, info->template, info->template_len + 1);
3497
3498 if (!lxc_ifname_alnum_case_sensitive(netdev->transient_name))
3499 return syserror_set(-EINVAL, "Failed to create transient name for network device %s", netdev->created_name);
3500
3501 TRACE("Created transient name %s for network device", netdev->transient_name);
3502 return 0;
3503}
3504
43e2a964
CB
3505static int netdev_requires_move(const struct lxc_netdev *netdev)
3506{
4deaa28c 3507 if (netdev->type == LXC_NET_EMPTY || netdev->type == LXC_NET_NONE)
43e2a964
CB
3508 return false;
3509
3510 /*
3511 * Veth devices are directly created in the container's network
3512 * namespace so the device doesn't need to be moved into the
3513 * container's network namespace. The transient name will
3514 * already have been set above when we created the veth tunnel.
3515 */
3516 if (!netdev->ifindex)
3517 return false;
3518
3519 return true;
3520}
3521
e389f2af 3522int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3523{
e389f2af 3524 pid_t pid = handler->pid;
87d0990c 3525 struct lxc_netdev *netdev;
811ef482 3526
e0010464 3527 if (am_guest_unpriv())
74c6e2b0 3528 return 0;
811ef482 3529
87d0990c 3530 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3dd78294 3531 __do_free char *physname = NULL;
e389f2af 3532 int ret;
811ef482 3533
43e2a964 3534 if (!netdev_requires_move(netdev))
811ef482
CB
3535 continue;
3536
fdd6be55
CB
3537 ret = create_transient_name(netdev);
3538 if (ret < 0)
3539 return ret;
3540
3dd78294
CB
3541 if (netdev->type == LXC_NET_PHYS)
3542 physname = is_wlan(netdev->link);
3543
3544 if (physname)
fdd6be55 3545 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->transient_name);
3dd78294 3546 else
fdd6be55 3547 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->transient_name);
9c66dc4f 3548 if (ret)
fdd6be55
CB
3549 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
3550 netdev->created_name, netdev->ifindex, pid, netdev->transient_name);
811ef482 3551
fdd6be55
CB
3552 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
3553 maybe_empty(netdev->created_name), netdev->ifindex, pid, netdev->transient_name);
811ef482
CB
3554 }
3555
3556 return 0;
3557}
3558
3c09b97c
CB
3559static int network_requires_advanced_setup(int type)
3560{
3561 if (type == LXC_NET_EMPTY)
3562 return false;
3563
3564 if (type == LXC_NET_NONE)
3565 return false;
3566
3567 return true;
3568}
3569
e389f2af 3570static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3571{
e389f2af
CB
3572 int hooks_version = handler->conf->hooks_version;
3573 const char *lxcname = handler->name;
3574 const char *lxcpath = handler->lxcpath;
e389f2af 3575 pid_t pid = handler->pid;
87d0990c 3576 struct lxc_netdev *netdev;
74c6e2b0 3577
87d0990c 3578 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3c09b97c 3579 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3580 continue;
3581
9c66dc4f
CB
3582 if (netdev->type != LXC_NET_VETH)
3583 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3584 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3585
3586 if (netdev->mtu)
3587 INFO("mtu ignored due to insufficient privilege");
3588
e389f2af
CB
3589 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3590 pid, hooks_version))
74c6e2b0
CB
3591 return -1;
3592 }
3593
3594 return 0;
3595}
3596
59eac805 3597static bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3598{
3599 int ret;
87d0990c 3600 struct lxc_netdev *netdev;
1bd8d726 3601
87d0990c 3602 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
811ef482 3603 char *hostveth = NULL;
811ef482
CB
3604
3605 /* We can only delete devices whose ifindex we have. If we don't
3606 * have the index it means that we didn't create it.
3607 */
3608 if (!netdev->ifindex)
3609 continue;
3610
0104c121
CB
3611 /*
3612 * If the network device has been moved back from the
3613 * containers network namespace, update the ifindex.
3614 */
3615 netdev->ifindex = if_nametoindex(netdev->name);
3616
6509154d 3617 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3618 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3619 if (lxc_delete_l2proxy(netdev))
3620 WARN("Failed to delete all l2proxy config");
3621 /* Don't return, let the network be cleaned up as normal. */
3622 }
3623
811ef482 3624 if (netdev->type == LXC_NET_PHYS) {
bb301db7
SB
3625 /* Physical interfaces are initially returned to the parent namespace
3626 * with their transient name to avoid collisions
3627 */
3628 netdev->ifindex = if_nametoindex(netdev->transient_name);
811ef482
CB
3629 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3630 if (ret < 0)
3631 WARN("Failed to rename interface with index %d "
b809f232
CB
3632 "from \"%s\" to its initial name \"%s\"",
3633 netdev->ifindex, netdev->name, netdev->link);
0b154989 3634 else {
29589196
CB
3635 TRACE("Renamed interface with index %d from "
3636 "\"%s\" to its initial name \"%s\"",
3637 netdev->ifindex, netdev->name,
3638 netdev->link);
0b154989
TP
3639
3640 /* Restore original MTU */
3641 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3642 if (ret < 0) {
3643 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3644 netdev->link, netdev->priv.phys_attr.mtu);
3645 } else {
3646 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3647 netdev->link, netdev->priv.phys_attr.mtu);
3648 }
3649 }
b3259dc6
TP
3650
3651 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3652 if (ret < 0)
3653 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3654 netdev->ifindex, netdev->link);
66a7c406 3655 goto clear_ifindices;
811ef482
CB
3656 }
3657
3658 ret = netdev_deconf[netdev->type](handler, netdev);
3659 if (ret < 0)
3660 WARN("Failed to deconfigure network device");
3661
811ef482 3662 if (netdev->type != LXC_NET_VETH)
66a7c406 3663 goto clear_ifindices;
811ef482 3664
811ef482
CB
3665 /* Explicitly delete host veth device to prevent lingering
3666 * devices. We had issues in LXD around this.
3667 */
f2711167 3668 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3669 hostveth = netdev->priv.veth_attr.pair;
3670 else
3671 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3672 if (is_empty_string(hostveth))
66a7c406 3673 goto clear_ifindices;
811ef482 3674
1ee56cff
CB
3675 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3676 ret = lxc_netdev_delete_by_name(hostveth);
3677 if (ret < 0)
3678 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3679
1ee56cff
CB
3680 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3681 } else if (!is_empty_string(netdev->link)) {
3682 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3683 if (ret < 0)
3684 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3685
1ee56cff
CB
3686 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3687 }
811ef482 3688
66a7c406 3689clear_ifindices:
ad2ddfcd 3690 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3691 * have cached stale data which would cause it to fail on reboot
3692 * we're we don't re-read the on-disk config file.
3693 */
3694 netdev->ifindex = 0;
3695 if (netdev->type == LXC_NET_PHYS) {
3696 netdev->priv.phys_attr.ifindex = 0;
3697 } else if (netdev->type == LXC_NET_VETH) {
3698 netdev->priv.veth_attr.veth1[0] = '\0';
3699 netdev->priv.veth_attr.ifindex = 0;
3700 }
bb301db7
SB
3701
3702 /* Clear transient name */
3703 if (!is_empty_string (netdev->transient_name))
3704 {
3705 netdev->transient_name[0] = '\0';
3706 }
811ef482
CB
3707 }
3708
bb84beda 3709 return true;
811ef482
CB
3710}
3711
3712int lxc_requests_empty_network(struct lxc_handler *handler)
3713{
87d0990c 3714 struct list_head *netdevs = &handler->conf->netdevs;
811ef482 3715 bool found_none = false, found_nic = false;
87d0990c 3716 struct lxc_netdev *netdev;
811ef482 3717
87d0990c 3718 if (list_empty(netdevs))
811ef482
CB
3719 return 0;
3720
87d0990c 3721 list_for_each_entry(netdev, netdevs, head) {
811ef482
CB
3722
3723 if (netdev->type == LXC_NET_NONE)
3724 found_none = true;
3725 else
3726 found_nic = true;
3727 }
9c66dc4f 3728
811ef482
CB
3729 if (found_none && !found_nic)
3730 return 1;
9c66dc4f 3731
811ef482
CB
3732 return 0;
3733}
3734
3735/* try to move physical nics to the init netns */
b809f232 3736int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3737{
9c66dc4f
CB
3738 __do_close int oldfd = -EBADF;
3739 int netnsfd = handler->nsfd[LXC_NS_NET];
3740 struct lxc_conf *conf = handler->conf;
811ef482 3741 int ret;
811ef482 3742 char ifname[IFNAMSIZ];
87d0990c 3743 struct lxc_netdev *netdev;
811ef482 3744
04213960
TA
3745 /*
3746 * If we weren't asked to clone a new network namespace, there's
3747 * nothing to restore.
3748 */
3749 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3750 return 0;
3751
b809f232
CB
3752 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3753 * the parent network namespace. We won't have this capability if we are
3754 * unprivileged.
3755 */
d0fbc7ba 3756 if (!handler->am_root)
b809f232 3757 return 0;
811ef482 3758
b809f232 3759 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3760
0037ab49 3761 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3762 if (oldfd < 0)
3763 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3764
b809f232 3765 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3766 if (ret < 0)
3767 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3768
87d0990c 3769 list_for_each_entry(netdev, &conf->netdevs, head) {
b809f232
CB
3770 if (netdev->type != LXC_NET_PHYS)
3771 continue;
3772
3773 /* Retrieve the name of the interface in the container's network
3774 * namespace.
3775 */
3776 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3777 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3778 continue;
3779 }
b809f232 3780
bb301db7
SB
3781 /* Restore physical interfaces to host's network namespace with its transient name
3782 * to avoid collisions with the host's other interfaces.
3783 */
3784 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->transient_name);
b809f232 3785 if (ret < 0)
9c66dc4f 3786 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3787 else
9c66dc4f 3788 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3789 }
811ef482 3790
b809f232 3791 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3792 if (ret < 0)
3793 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3794
3795 return 0;
811ef482
CB
3796}
3797
3798static int setup_hw_addr(char *hwaddr, const char *ifname)
3799{
9c66dc4f 3800 __do_close int fd = -EBADF;
811ef482
CB
3801 struct sockaddr sockaddr;
3802 struct ifreq ifr;
9c66dc4f 3803 int ret;
811ef482
CB
3804
3805 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3806 if (ret)
3807 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3808
3809 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3810 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3811 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3812
ad9429e5 3813 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3814 if (fd < 0)
3815 return -1;
3816
3817 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3818 if (ret)
6d1400b5 3819 SYSERROR("Failed to perform ioctl");
3820
9c66dc4f 3821 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3822
3823 return ret;
3824}
3825
2ec31bbd 3826static int setup_ipv4_addr(struct lxc_netdev *netdev)
811ef482 3827{
2ec31bbd 3828 int ifindex = netdev->ifindex;
811ef482 3829 int err;
2ec31bbd 3830 struct lxc_inetdev *inet4dev;
811ef482 3831
05a54a64 3832 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
2ec31bbd
CB
3833 err = lxc_ipv4_addr_add(ifindex, &inet4dev->addr,
3834 &inet4dev->bcast, inet4dev->prefix);
9c66dc4f
CB
3835 if (err)
3836 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3837 }
3838
3839 return 0;
3840}
3841
cd32fc73 3842static int setup_ipv6_addr(struct lxc_netdev *netdev)
811ef482 3843{
811ef482 3844 int err;
cd32fc73
CB
3845 struct lxc_inet6dev *inet6dev;
3846 int ifindex = netdev->ifindex;
811ef482 3847
05a54a64 3848 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
811ef482
CB
3849 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3850 &inet6dev->mcast, &inet6dev->acast,
3851 inet6dev->prefix);
9c66dc4f
CB
3852 if (err)
3853 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3854 }
3855
3856 return 0;
3857}
3858
8bf64b77 3859static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3860{
811ef482 3861 int err;
009d6127 3862 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482 3863
811ef482 3864 /* set a mac address */
9c66dc4f
CB
3865 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3866 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3867
3868 /* setup ipv4 addresses on the interface */
2ec31bbd 3869 if (setup_ipv4_addr(netdev))
9c66dc4f 3870 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3871
3872 /* setup ipv6 addresses on the interface */
cd32fc73 3873 if (setup_ipv6_addr(netdev))
9c66dc4f 3874 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3875
3876 /* set the network device up */
3877 if (netdev->flags & IFF_UP) {
8bf64b77 3878 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3879 if (err)
3880 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3881
3882 /* the network is up, make the loopback up too */
3883 err = lxc_netdev_up("lo");
9c66dc4f
CB
3884 if (err)
3885 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3886 }
3887
811ef482 3888 /* setup ipv4 gateway on the interface */
a2f9a670 3889 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3890 if (!(netdev->flags & IFF_UP))
3891 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3892
05a54a64 3893 if (list_empty(&netdev->ipv4_addresses))
9c66dc4f 3894 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3895
a2f9a670 3896 /* Setup device route if ipv4_gateway_dev is enabled */
3897 if (netdev->ipv4_gateway_dev) {
3898 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3899 if (err < 0)
3900 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3901 } else {
009d6127 3902 /* Check the gateway address is valid */
3903 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3904 return ret_set_errno(-1, errno);
009d6127 3905
3906 /* Try adding a default route to the gateway address */
811ef482 3907 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3908 if (err < 0) {
3909 /* If adding the default route fails, this could be because the
3910 * gateway address is in a different subnet to the container's address.
3911 * To work around this, we try adding a static device route to the
3912 * gateway address first, and then try again.
3913 */
a2f9a670 3914 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3915 if (err < 0)
3916 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3917
a2f9a670 3918 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3919 if (err < 0)
3920 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3921 }
3922 }
3923 }
3924
3925 /* setup ipv6 gateway on the interface */
a2f9a670 3926 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3927 if (!(netdev->flags & IFF_UP))
3928 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3929
05a54a64 3930 if (list_empty(&netdev->ipv6_addresses) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
9c66dc4f 3931 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3932
a2f9a670 3933 /* Setup device route if ipv6_gateway_dev is enabled */
3934 if (netdev->ipv6_gateway_dev) {
3935 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3936 if (err < 0)
3937 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3938 } else {
009d6127 3939 /* Check the gateway address is valid */
3940 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3941 return ret_set_errno(-1, errno);
009d6127 3942
3943 /* Try adding a default route to the gateway address */
811ef482 3944 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3945 if (err < 0) {
3946 /* If adding the default route fails, this could be because the
3947 * gateway address is in a different subnet to the container's address.
3948 * To work around this, we try adding a static device route to the
3949 * gateway address first, and then try again.
3950 */
a2f9a670 3951 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3952 if (err < 0)
3953 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3954
a2f9a670 3955 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3956 if (err < 0)
3957 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3958 }
3959 }
3960 }
3961
8bf64b77 3962 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3963
3964 return 0;
3965}
3966
3a197a1b
CB
3967/**
3968 * Consider the following network layout:
3969 *
3970 * lxc.net.0.type = phys
3971 * lxc.net.0.link = eth2
3972 * lxc.net.0.name = eth%d
3973 *
3974 * lxc.net.1.type = phys
3975 * lxc.net.1.link = eth1
3976 * lxc.net.1.name = eth0
3977 *
3978 * If we simply follow this order and create the first network first the kernel
3979 * will allocate eth0 for the first network but the second network requests
3980 * that eth1 be renamed to eth0 in the container's network namespace which
3981 * would lead to a clash.
3982 *
3983 * Note, we don't handle cases like:
3984 *
3985 * lxc.net.0.type = phys
3986 * lxc.net.0.link = eth2
3987 * lxc.net.0.name = eth0
3988 *
3989 * lxc.net.1.type = phys
3990 * lxc.net.1.link = eth1
3991 * lxc.net.1.name = eth0
3992 *
3993 * That'll brutally fail of course but there's nothing we can do about it.
3994 */
87d0990c 3995int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf)
811ef482 3996{
3a197a1b 3997 bool needs_second_pass = false;
87d0990c
CB
3998 struct lxc_netdev *netdev;
3999 const struct list_head *netdevs = &conf->netdevs;
811ef482 4000
87d0990c 4001 if (list_empty(netdevs))
3a197a1b
CB
4002 return 0;
4003
4004 /* Configure all devices that have a specific target name. */
87d0990c 4005 list_for_each_entry(netdev, netdevs, head) {
8bf64b77 4006 int ret;
811ef482 4007
3a197a1b
CB
4008 if (is_empty_string(netdev->name) || strequal(netdev->name, "eth%d")) {
4009 needs_second_pass = true;
4010 continue;
4011 }
4012
bad2f913 4013 ret = netdev_configure_container[netdev->type](netdev);
8bf64b77
CB
4014 if (!ret)
4015 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
4016 if (ret)
4017 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482 4018 }
3a197a1b
CB
4019 INFO("Finished setting up network devices with caller assigned names");
4020
4021 if (needs_second_pass) {
4022 /* Configure all devices that have a kernel assigned name. */
87d0990c 4023 list_for_each_entry(netdev, netdevs, head) {
3a197a1b 4024 int ret;
811ef482 4025
3a197a1b
CB
4026 if (!is_empty_string(netdev->name) && !strequal(netdev->name, "eth%d"))
4027 continue;
4028
4029 ret = netdev_configure_container[netdev->type](netdev);
4030 if (!ret)
4031 ret = lxc_network_setup_in_child_namespaces_common(netdev);
4032 if (ret)
4033 return log_error_errno(-1, errno, "Failed to setup netdev");
4034 }
4035 INFO("Finished setting up network devices with kernel assigned names");
4036 }
811ef482
CB
4037
4038 return 0;
4039}
7ab1ba02 4040
3c09b97c 4041int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02 4042{
7ab1ba02 4043 int data_sock = handler->data_sock[0];
87d0990c 4044 struct lxc_netdev *netdev;
7ab1ba02 4045
87d0990c 4046 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
7ab1ba02 4047 int ret;
7ab1ba02 4048
3c09b97c 4049 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4050 continue;
4051
7fbb15ec 4052 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 4053 if (ret < 0)
7ab1ba02 4054 return -1;
e389f2af 4055
fdd6be55 4056 ret = lxc_send_nointr(data_sock, netdev->transient_name, IFNAMSIZ, MSG_NOSIGNAL);
e389f2af
CB
4057 if (ret < 0)
4058 return -1;
4059
fdd6be55 4060 TRACE("Sent network device name \"%s\" to child", netdev->transient_name);
7ab1ba02
CB
4061 }
4062
4063 return 0;
4064}
4065
3c09b97c 4066int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02 4067{
7ab1ba02 4068 int data_sock = handler->data_sock[1];
87d0990c 4069 struct lxc_netdev *netdev;
7ab1ba02 4070
87d0990c 4071 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
7ab1ba02 4072 int ret;
7ab1ba02 4073
3c09b97c 4074 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
4075 continue;
4076
e3233f26 4077 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 4078 if (ret < 0)
7ab1ba02 4079 return -1;
e389f2af 4080
fdd6be55 4081 ret = lxc_recv_nointr(data_sock, netdev->transient_name, IFNAMSIZ, 0);
e389f2af
CB
4082 if (ret < 0)
4083 return -1;
54256301 4084
fdd6be55 4085 TRACE("Received network device name \"%s\" from parent", netdev->transient_name);
7ab1ba02
CB
4086 }
4087
4088 return 0;
4089}
a1ae535a
CB
4090
4091int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
4092{
a1ae535a 4093 int data_sock = handler->data_sock[0];
87d0990c
CB
4094 struct lxc_netdev *netdev;
4095 struct list_head *netdevs = &handler->conf->netdevs;
a1ae535a
CB
4096
4097 if (!handler->am_root)
4098 return 0;
4099
87d0990c 4100 list_for_each_entry(netdev, netdevs, head) {
a1ae535a 4101 int ret;
a1ae535a
CB
4102
4103 /* Send network device name in the child's namespace to parent. */
7fbb15ec 4104 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 4105 if (ret < 0)
7729f8e5 4106 return -1;
a1ae535a
CB
4107
4108 /* Send network device ifindex in the child's namespace to
4109 * parent.
4110 */
7fbb15ec 4111 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 4112 if (ret < 0)
7729f8e5 4113 return -1;
a1150aa1
CB
4114
4115 TRACE("Sent network device %s with ifindex %d to parent", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4116 }
4117
87d0990c 4118 if (!list_empty(netdevs))
e389f2af
CB
4119 TRACE("Sent network device names and ifindices to parent");
4120
a1ae535a 4121 return 0;
a1ae535a
CB
4122}
4123
4124int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
4125{
a1ae535a 4126 int data_sock = handler->data_sock[1];
87d0990c 4127 struct lxc_netdev *netdev;
a1ae535a
CB
4128
4129 if (!handler->am_root)
4130 return 0;
4131
87d0990c 4132 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
a1ae535a 4133 int ret;
a1ae535a
CB
4134
4135 /* Receive network device name in the child's namespace to
4136 * parent.
4137 */
e3233f26 4138 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 4139 if (ret < 0)
7729f8e5 4140 return -1;
a1ae535a
CB
4141
4142 /* Receive network device ifindex in the child's namespace to
4143 * parent.
4144 */
e3233f26 4145 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 4146 if (ret < 0)
7729f8e5 4147 return -1;
a1150aa1
CB
4148
4149 TRACE("Received network device %s with ifindex %d from child", maybe_empty(netdev->name), netdev->ifindex);
a1ae535a
CB
4150 }
4151
4152 return 0;
a1ae535a 4153}
bb84beda
CB
4154
4155void lxc_delete_network(struct lxc_handler *handler)
4156{
4157 bool bret;
4158
37631ddb
CB
4159 /*
4160 * Always expose namespace fd paths to network down hooks via
4161 * environment variables. No need to complicate things by passing them
4162 * as additional hook arguments.
4163 */
4164 lxc_expose_namespace_environment(handler);
4165
bb84beda
CB
4166 if (handler->am_root)
4167 bret = lxc_delete_network_priv(handler);
4168 else
4169 bret = lxc_delete_network_unpriv(handler);
4170 if (!bret)
4171 DEBUG("Failed to delete network devices");
4172 else
4173 DEBUG("Deleted network devices");
4174}
1cd95214 4175
1cd95214
CB
4176int lxc_netns_set_nsid(int fd)
4177{
41a3300d 4178 int ret;
0ce60f0d
CB
4179 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4180 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4181 NLMSG_ALIGN(1024)];
f8aa61f9 4182 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 4183 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
4184 struct nlmsghdr *hdr;
4185 struct rtgenmsg *msg;
9d036caa
CB
4186 const __s32 ns_id = -1;
4187 const __u32 netns_fd = fd;
1cd95214 4188
a5f5cb41 4189 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4190 if (ret < 0)
41a3300d 4191 return -1;
1cd95214 4192
0ce60f0d 4193 memset(buf, 0, sizeof(buf));
6ce39620
CB
4194
4195#pragma GCC diagnostic push
4196#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4197 hdr = (struct nlmsghdr *)buf;
4198 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4199#pragma GCC diagnostic pop
1cd95214 4200
0ce60f0d
CB
4201 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4202 hdr->nlmsg_type = RTM_NEWNSID;
4203 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4204 hdr->nlmsg_pid = 0;
4205 hdr->nlmsg_seq = RTM_NEWNSID;
4206 msg->rtgen_family = AF_UNSPEC;
1cd95214 4207
9d036caa
CB
4208 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4209 if (ret < 0)
a5f5cb41 4210 return ret_errno(ENOMEM);
9d036caa
CB
4211
4212 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4213 if (ret < 0)
a5f5cb41 4214 return ret_errno(ENOMEM);
1cd95214 4215
a5f5cb41 4216 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4217}
938980ba
CB
4218
4219static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4220{
4221
4222 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4223
4224 while (RTA_OK(rta, len)) {
4225 unsigned short type = rta->rta_type;
4226
4227 if ((type <= max) && (!tb[type]))
4228 tb[type] = rta;
4229
6ce39620
CB
4230#pragma GCC diagnostic push
4231#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4232 rta = RTA_NEXT(rta, len);
6ce39620 4233#pragma GCC diagnostic pop
938980ba
CB
4234 }
4235
4236 return 0;
4237}
4238
4239static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4240{
4241 return *(__s32 *)RTA_DATA(rta);
4242}
4243
4244#ifndef NETNS_RTA
4245#define NETNS_RTA(r) \
4246 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4247#endif
4248
4249int lxc_netns_get_nsid(int fd)
4250{
f8aa61f9 4251 struct nl_handler nlh = NL_HANDLER_INIT;
a5f5cb41 4252 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4253 int ret;
4254 ssize_t len;
4255 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4256 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4257 NLMSG_ALIGN(1024)];
938980ba 4258 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4259 struct nlmsghdr *hdr;
4260 struct rtgenmsg *msg;
938980ba
CB
4261 __u32 netns_fd = fd;
4262
a5f5cb41 4263 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4264 if (ret < 0)
4265 return -1;
4266
4267 memset(buf, 0, sizeof(buf));
6ce39620
CB
4268
4269#pragma GCC diagnostic push
4270#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4271 hdr = (struct nlmsghdr *)buf;
4272 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4273#pragma GCC diagnostic pop
938980ba
CB
4274
4275 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4276 hdr->nlmsg_type = RTM_GETNSID;
4277 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4278 hdr->nlmsg_pid = 0;
4279 hdr->nlmsg_seq = RTM_GETNSID;
4280 msg->rtgen_family = AF_UNSPEC;
4281
9d036caa 4282 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4283 if (ret < 0)
4284 return ret_errno(ENOMEM);
938980ba 4285
a5f5cb41 4286 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4287 if (ret < 0)
4288 return -1;
4289
4290 msg = NLMSG_DATA(hdr);
4291 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4292 if (len < 0)
a5f5cb41 4293 return ret_errno(EINVAL);
938980ba 4294
6ce39620
CB
4295#pragma GCC diagnostic push
4296#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4297 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4298 if (tb[__LXC_NETNSA_NSID])
4299 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4300#pragma GCC diagnostic pop
938980ba
CB
4301
4302 return -1;
4303}
e389f2af
CB
4304
4305int lxc_create_network(struct lxc_handler *handler)
4306{
4307 int ret;
4308
e389f2af
CB
4309 if (handler->am_root) {
4310 ret = lxc_create_network_priv(handler);
4311 if (ret)
4312 return -1;
4313
4314 return lxc_network_move_created_netdev_priv(handler);
4315 }
4316
4317 return lxc_create_network_unpriv(handler);
4318}