]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
Merge pull request #3660 from brauner/2021-02-05/fixes_1
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
8bf64b77 50typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
3ebffb98 51static const char loop_device[] = "lo";
811ef482 52
b670016a 53static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 54{
d16bda44 55 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 56 struct nl_handler nlh;
d16bda44
CB
57 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
58 int addrlen, err;
8f82874c 59 struct rtmsg *rt;
8f82874c 60
61 addrlen = family == AF_INET ? sizeof(struct in_addr)
62 : sizeof(struct in6_addr);
63
d16bda44 64 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 65 if (err)
66 return err;
67
8f82874c 68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
d16bda44 70 return -ENOMEM;
8f82874c 71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
a5f5cb41 74 return -ENOMEM;
8f82874c 75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
a5f5cb41 82 return -ENOMEM;
d16bda44 83
8f82874c 84 rt->rtm_family = family;
85 rt->rtm_table = RT_TABLE_MAIN;
86 rt->rtm_scope = RT_SCOPE_LINK;
87 rt->rtm_protocol = RTPROT_BOOT;
88 rt->rtm_type = RTN_UNICAST;
89 rt->rtm_dst_len = netmask;
90
8f82874c 91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
92 return -EINVAL;
93
8f82874c 94 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
95 return -EINVAL;
96
97 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 98}
99
100static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
101{
b670016a 102 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 103}
104
105static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
106{
b670016a 107 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
108}
109
110static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
111{
112 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
113}
114
115static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
116{
117 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 118}
119
d4a7da46 120static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
121{
122 struct lxc_list *iterator;
123 int err;
124
125 lxc_list_for_each(iterator, ip) {
126 struct lxc_inetdev *inetdev = iterator->elem;
127
128 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
129 if (err)
130 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 131 }
132
133 return 0;
134}
135
136static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
137{
138 struct lxc_list *iterator;
139 int err;
140
141 lxc_list_for_each(iterator, ip) {
142 struct lxc_inet6dev *inet6dev = iterator->elem;
143
144 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
145 if (err)
146 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 147 }
148
149 return 0;
150}
151
6dfa9581
TP
152static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
153{
154 struct lxc_list *iterator;
155 int err;
156
157 lxc_list_for_each(iterator, ip) {
158 struct lxc_inetdev *inetdev = iterator->elem;
159
160 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
161
162 if (err)
9c66dc4f 163 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
164 }
165
166 return 0;
167}
168
169static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
170{
171 struct lxc_list *iterator;
172 int err;
173
174 lxc_list_for_each(iterator, ip) {
175 struct lxc_inet6dev *inet6dev = iterator->elem;
176
177 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
178 if (err)
9c66dc4f 179 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
180 }
181
182 return 0;
183}
184
5fe147e9 185static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 186{
d16bda44 187 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 188 struct nl_handler nlh;
d16bda44
CB
189 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
190 int addrlen, err;
5fe147e9 191 struct ndmsg *rt;
6dfa9581 192
5fe147e9 193 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 194
d16bda44 195 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
196 if (err)
197 return err;
6dfa9581 198
5fe147e9
TP
199 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
200 if (!nlmsg)
d16bda44 201 return -ENOMEM;
6dfa9581 202
5fe147e9
TP
203 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
204 if (!answer)
d16bda44 205 return -ENOMEM;
6dfa9581 206
5fe147e9
TP
207 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
208 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 209
5fe147e9
TP
210 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
211 if (!rt)
d16bda44
CB
212 return -ENOMEM;
213
5fe147e9
TP
214 rt->ndm_ifindex = ifindex;
215 rt->ndm_flags = NTF_PROXY;
216 rt->ndm_type = NDA_DST;
217 rt->ndm_family = family;
6dfa9581 218
5fe147e9 219 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 220 return -EINVAL;
6dfa9581 221
d16bda44 222 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
223}
224
225static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
226{
227 int ret;
228 char path[PATH_MAX];
229 char buf[1] = "";
230
231 if (family != AF_INET && family != AF_INET6)
596a002c 232 return ret_set_errno(-1, EINVAL);
6dfa9581 233
9c66dc4f 234 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581
TP
235 family == AF_INET ? "ipv4" : "ipv6", ifname,
236 "forwarding");
9c66dc4f 237 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 238 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
239
240 return lxc_read_file_expect(path, buf, 1, "1");
241}
242
622f05c7
TP
243struct bridge_vlan_info {
244 __u16 flags;
245 __u16 vid;
246};
247
248static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
249{
250 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
251 struct nl_handler nlh;
252 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
253 int err;
254 struct ifinfomsg *ifi;
255 struct rtattr *nest;
256 unsigned short bridge_flags = 0;
257 struct bridge_vlan_info vlan_info;
258
259 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
260 if (err)
261 return err;
262
263 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
264 if (!nlmsg)
265 return ret_errno(ENOMEM);
266
267 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
268 if (!answer)
269 return ret_errno(ENOMEM);
270
271 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
272 nlmsg->nlmsghdr->nlmsg_type = operation;
273
274 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
275 if (!ifi)
276 return ret_errno(ENOMEM);
277 ifi->ifi_family = AF_BRIDGE;
278 ifi->ifi_index = ifindex;
279
280 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
281 if (!nest)
282 return ret_errno(ENOMEM);
283
284 bridge_flags |= BRIDGE_FLAGS_MASTER;
285 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
286 return ret_errno(ENOMEM);
287
288 vlan_info.vid = vlan_id;
289 vlan_info.flags = 0;
290 if (!tagged)
291 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
292
293 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
294 return ret_errno(ENOMEM);
295
296 nla_end_nested(nlmsg, nest);
297
298 return netlink_transaction(nlh_ptr, nlmsg, answer);
299}
300
301static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
302{
303 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
304}
305
306static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
307{
308 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
309}
310
311static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
312{
313 struct lxc_list *iterator;
314 int err;
315
316 lxc_list_for_each(iterator, vlan_ids) {
317 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
318
319 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
320 if (err)
321 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
322 }
323
324 return 0;
325}
326
33320936
TP
327static int validate_veth(struct lxc_netdev *netdev)
328{
329 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
330 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
331 if (netdev->priv.veth_attr.vlan_id_set)
332 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
333
334 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
335 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
336 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
337 }
338
339 if (netdev->priv.veth_attr.vlan_id_set) {
340 struct lxc_list *it;
341 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
342 unsigned short i = PTR_TO_USHORT(it->elem);
343 if (i == netdev->priv.veth_attr.vlan_id)
344 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
345 }
346 }
347
348 return 0;
349}
350
351static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
352{
353 int err, rc, veth1index;
354 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
355 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
356
357 /* Skip setup if no VLAN options are specified. */
358 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
359 return 0;
360
361 /* Check vlan filtering is enabled on parent bridge. */
362 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
363 if (rc < 0 || (size_t)rc >= sizeof(path))
364 return -1;
365
366 rc = lxc_read_from_file(path, buf, sizeof(buf));
367 if (rc < 0)
368 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
369
370 buf[rc - 1] = '\0';
371
372 if (strcmp(buf, "1") != 0)
373 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
374
375 /* Get veth1 ifindex for use with netlink. */
376 veth1index = if_nametoindex(veth1);
377 if (!veth1index)
378 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
379
380 /* Configure untagged VLAN settings on bridge port if specified. */
381 if (netdev->priv.veth_attr.vlan_id_set) {
382 unsigned short default_pvid;
383
384 /* Get the bridge's default VLAN PVID. */
385 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
386 if (rc < 0 || (size_t)rc >= sizeof(path))
387 return -1;
388
389 rc = lxc_read_from_file(path, buf, sizeof(buf));
390 if (rc < 0)
391 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
392
393 buf[rc - 1] = '\0';
394 err = get_u16(&default_pvid, buf, 0);
395 if (err)
396 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
397
398 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
399 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
400 err = lxc_bridge_vlan_del(veth1index, default_pvid);
401 if (err)
402 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
403 }
404
405 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
406 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
407 if (err)
408 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
409 }
410 }
411
412 /* Configure tagged VLAN settings on bridge port if specified. */
413 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
414 if (err)
415 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
416
417 return 0;
418}
419
8f7c3358
TP
420struct ovs_veth_vlan_args {
421 const char *nic;
422 const char *vlan_mode; /* Port VLAN mode. */
423 short vlan_id; /* PVID VLAN ID. */
d2f8b272 424 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
8f7c3358
TP
425};
426
d2f8b272
TP
427static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
428{
429 free_disarm(args->trunks);
430}
8f7c3358
TP
431
432static int lxc_ovs_setup_bridge_vlan_exec(void *data)
433{
434 struct ovs_veth_vlan_args *args = data;
785e1540
TP
435 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
436
437 if (!args->vlan_mode)
438 return ret_errno(EINVAL);
8f7c3358
TP
439
440 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
441
785e1540 442 if (args->vlan_id > BRIDGE_VLAN_NONE) {
8f7c3358
TP
443 char buf[5];
444 int rc;
445
446 rc = snprintf(buf, sizeof(buf), "%u", args->vlan_id);
447 if (rc < 0 || (size_t)rc >= sizeof(buf))
72e8122b 448 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
8f7c3358
TP
449
450 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
451 }
452
785e1540 453 if (args->trunks)
8f7c3358
TP
454 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
455
456 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
785e1540 457 if (tag && trunks)
8f7c3358 458 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
785e1540 459 else if (tag)
8f7c3358 460 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
785e1540 461 else if (trunks)
8f7c3358
TP
462 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
463 else
464 return -EINVAL;
465
466 return -errno;
467}
468
469static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
470{
471 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
472 struct ovs_veth_vlan_args args;
473 args.nic = veth1;
1ee07848
TP
474 args.vlan_mode = NULL;
475 args.vlan_id = BRIDGE_VLAN_NONE;
476 args.trunks = NULL;
8f7c3358
TP
477
478 /* Skip setup if no VLAN options are specified. */
479 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
480 return 0;
481
482 /* Configure untagged VLAN settings on bridge port if specified. */
483 if (netdev->priv.veth_attr.vlan_id_set) {
484 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
485 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
486
487 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
488 * Also set the vlan_mode=access, which will drop any tagged frames.
489 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
490 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
491 */
492 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
493 args.vlan_mode = "access";
494 args.vlan_id = netdev->priv.veth_attr.vlan_id;
495 }
496 }
497
498 if (taggedLength > 0) {
499 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
500
501 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
502 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
503 args.vlan_mode = "native-untagged";
504 }
505
506 struct lxc_list *iterator;
507 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
508 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
509 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
510 int rc;
511
512 rc = snprintf(buf, sizeof(buf), "%u", vlan_id);
3fe6b5cf
TP
513 if (rc < 0 || (size_t)rc >= sizeof(buf)) {
514 free_ovs_veth_vlan_args(&args);
8f7c3358 515 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
3fe6b5cf 516 }
8f7c3358 517
1ee07848
TP
518 if (args.trunks)
519 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
520 else
521 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
8f7c3358
TP
522 }
523 }
524
1ee07848 525 if (args.vlan_mode) {
8f7c3358
TP
526 int ret;
527 char cmd_output[PATH_MAX];
528
529 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
3fe6b5cf
TP
530 if (ret < 0) {
531 free_ovs_veth_vlan_args(&args);
8f7c3358 532 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
3fe6b5cf 533 }
8f7c3358
TP
534 }
535
3fe6b5cf 536 free_ovs_veth_vlan_args(&args);
8f7c3358
TP
537 return 0;
538}
539
811ef482
CB
540static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
541{
54256301 542 int err;
a00fbab5 543 unsigned int mtu = 1500;
811ef482
CB
544 char *veth1, *veth2;
545 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 546
33320936
TP
547 err = validate_veth(netdev);
548 if (err)
549 return err;
550
f2711167 551 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
552 veth1 = netdev->priv.veth_attr.pair;
553 if (handler->conf->reboot)
554 lxc_netdev_delete_by_name(veth1);
555 } else {
556 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
557 if (err < 0 || (size_t)err >= sizeof(veth1buf))
558 return -1;
559
3646ffd9 560 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
561 if (!veth1)
562 return -1;
563
564 /* store away for deconf */
565 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
566 }
567
d34212ad
CB
568 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
569 if (err < 0 || (size_t)err >= sizeof(veth2buf))
570 return -1;
571
3646ffd9 572 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 573 if (!veth2)
54256301
CB
574 return -1;
575
a00fbab5
TP
576 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
577 if (netdev->mtu) {
578 if (lxc_safe_uint(netdev->mtu, &mtu))
579 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 580 } else if (!is_empty_string(netdev->link)) {
54256301 581 int ifindex_mtu;
811ef482 582
54256301
CB
583 ifindex_mtu = if_nametoindex(netdev->link);
584 if (ifindex_mtu) {
585 mtu = netdev_get_mtu(ifindex_mtu);
586 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
587 }
588 }
589
590 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
591 if (err)
592 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 593
24190194
CB
594 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
595
811ef482
CB
596 /* changing the high byte of the mac address to 0xfe, the bridge interface
597 * will always keep the host's mac address and not take the mac address
598 * of a container */
599 err = setup_private_host_hw_addr(veth1);
600 if (err) {
6d1400b5 601 errno = -err;
602 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
603 goto out_delete;
604 }
605
8da62485
CB
606 /* Retrieve ifindex of the host's veth device. */
607 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
608 if (!netdev->priv.veth_attr.ifindex) {
609 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
610 goto out_delete;
611 }
612
811ef482
CB
613 if (mtu) {
614 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 615 if (err) {
6d1400b5 616 errno = -err;
54256301 617 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
618 goto out_delete;
619 }
620 }
621
f2711167 622 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
623 if (!lxc_nic_exists(netdev->link)) {
624 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
625 goto out_delete;
626 }
627
811ef482
CB
628 err = lxc_bridge_attach(netdev->link, veth1);
629 if (err) {
6d1400b5 630 errno = -err;
26da53c3 631 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
632 goto out_delete;
633 }
634 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936 635
38790036
TP
636 if (is_ovs_bridge(netdev->link)) {
637 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
638 if (err) {
639 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
640 lxc_ovs_delete_port(netdev->link, veth1);
641 goto out_delete;
642 }
643 } else {
33320936
TP
644 err = setup_veth_native_bridge_vlan(veth1, netdev);
645 if (err) {
646 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
647 goto out_delete;
648 }
649 }
811ef482
CB
650 }
651
652 err = lxc_netdev_up(veth1);
653 if (err) {
6d1400b5 654 errno = -err;
655 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
656 goto out_delete;
657 }
658
d4a7da46 659 /* setup ipv4 routes on the host interface */
660 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
661 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
662 goto out_delete;
663 }
664
665 /* setup ipv6 routes on the host interface */
666 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
667 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
668 goto out_delete;
669 }
670
6dfa9581 671 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
672 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
673 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
674 appear to be added successfully but then do not appear in the proxy list. The length of time
675 slept doesn't appear to be important, only that the process sleeps for a short period of time.
676 */
677 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
678
6dfa9581
TP
679 if (netdev->ipv4_gateway) {
680 char bufinet4[INET_ADDRSTRLEN];
681 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 682 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
683 goto out_delete;
684 }
685
686 err = lxc_ip_forwarding_on(veth1, AF_INET);
687 if (err) {
9c66dc4f 688 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
689 goto out_delete;
690 }
691
5fe147e9 692 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 693 if (err) {
9c66dc4f 694 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
695 goto out_delete;
696 }
697 }
698
699 if (netdev->ipv6_gateway) {
700 char bufinet6[INET6_ADDRSTRLEN];
701
702 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 703 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
704 goto out_delete;
705 }
706
707 /* Check for sysctl net.ipv6.conf.all.forwarding=1
708 Kernel requires this to route any packets for IPv6.
709 */
710 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
711 if (err) {
9c66dc4f 712 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
713 goto out_delete;
714 }
715
716 err = lxc_ip_forwarding_on(veth1, AF_INET6);
717 if (err) {
9c66dc4f 718 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
719 goto out_delete;
720 }
721
722 err = lxc_neigh_proxy_on(veth1, AF_INET6);
723 if (err) {
9c66dc4f 724 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
725 goto out_delete;
726 }
727
5fe147e9 728 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 729 if (err) {
9c66dc4f 730 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
731 goto out_delete;
732 }
733 }
734
735 /* setup ipv4 address routes on the host interface */
736 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
737 if (err) {
9c66dc4f 738 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
739 goto out_delete;
740 }
741
742 /* setup ipv6 address routes on the host interface */
743 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
744 if (err) {
9c66dc4f 745 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
746 goto out_delete;
747 }
748 }
749
811ef482 750 if (netdev->upscript) {
14a7b0f9
CB
751 char *argv[] = {
752 "veth",
753 netdev->link,
990b9ac3 754 veth1,
14a7b0f9
CB
755 NULL,
756 };
757
758 err = run_script_argv(handler->name,
759 handler->conf->hooks_version, "net",
760 netdev->upscript, "up", argv);
761 if (err < 0)
811ef482
CB
762 goto out_delete;
763 }
764
54256301 765 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
766
767 return 0;
768
769out_delete:
54256301 770 lxc_netdev_delete_by_name(veth1);
811ef482
CB
771 return -1;
772}
773
774static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
775{
8021de25 776 char peer[IFNAMSIZ];
811ef482
CB
777 int err;
778
f2711167 779 if (is_empty_string(netdev->link)) {
811ef482
CB
780 ERROR("No link for macvlan network device specified");
781 return -1;
782 }
783
8021de25
CB
784 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
785 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
786 return -1;
787
3646ffd9 788 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
789 return -1;
790
791 err = lxc_macvlan_create(netdev->link, peer,
792 netdev->priv.macvlan_attr.mode);
793 if (err) {
6d1400b5 794 errno = -err;
795 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
796 peer, netdev->link);
966e9f1f 797 goto on_error;
811ef482
CB
798 }
799
9f8cf6e1
CB
800 strlcpy(netdev->created_name, peer, IFNAMSIZ);
801
811ef482
CB
802 netdev->ifindex = if_nametoindex(peer);
803 if (!netdev->ifindex) {
804 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 805 goto on_error;
811ef482
CB
806 }
807
3bef7b7b 808 if (netdev->mtu) {
54256301
CB
809 unsigned int mtu;
810
3bef7b7b
TP
811 err = lxc_safe_uint(netdev->mtu, &mtu);
812 if (err < 0) {
813 errno = -err;
814 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
815 goto on_error;
816 }
817
818 err = lxc_netdev_set_mtu(peer, mtu);
819 if (err < 0) {
820 errno = -err;
821 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
822 goto on_error;
823 }
824 }
825
811ef482 826 if (netdev->upscript) {
14a7b0f9
CB
827 char *argv[] = {
828 "macvlan",
829 netdev->link,
830 NULL,
831 };
832
833 err = run_script_argv(handler->name,
834 handler->conf->hooks_version, "net",
835 netdev->upscript, "up", argv);
836 if (err < 0)
966e9f1f 837 goto on_error;
811ef482
CB
838 }
839
840 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
841 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
842
843 return 0;
966e9f1f
CB
844
845on_error:
811ef482 846 lxc_netdev_delete_by_name(peer);
811ef482
CB
847 return -1;
848}
849
0dc9a142 850static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
c9f52382 851{
d16bda44
CB
852 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
853 struct nl_handler nlh;
854 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 855 int err, index, len;
856 struct ifinfomsg *ifi;
c9f52382 857 struct rtattr *nest, *nest2;
c9f52382 858
0dc9a142 859 len = strlen(parent);
c9f52382 860 if (len == 1 || len >= IFNAMSIZ)
d16bda44 861 return ret_errno(EINVAL);
c9f52382 862
863 len = strlen(name);
864 if (len == 1 || len >= IFNAMSIZ)
d16bda44 865 return ret_errno(EINVAL);
c9f52382 866
0dc9a142 867 index = if_nametoindex(parent);
c9f52382 868 if (!index)
d16bda44 869 return ret_errno(EINVAL);
c9f52382 870
d16bda44 871 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 872 if (err)
df62850d 873 return err;
c9f52382 874
c9f52382 875 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
876 if (!nlmsg)
d16bda44 877 return ret_errno(ENOMEM);
c9f52382 878
879 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
880 if (!answer)
d16bda44 881 return ret_errno(ENOMEM);
c9f52382 882
883 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
884 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
885
886 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
887 if (!ifi)
888 return ret_errno(ENOMEM);
c9f52382 889 ifi->ifi_family = AF_UNSPEC;
890
c9f52382 891 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
892 if (!nest)
d16bda44 893 return ret_errno(EPROTO);
c9f52382 894
895 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 896 return ret_errno(EPROTO);
c9f52382 897
5755765e
KT
898 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
899 if (!nest2)
900 return ret_errno(EPROTO);
901
3a934e2e 902 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
903 return ret_errno(EPROTO);
904
cf88a827
TP
905 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
906 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
907 * according to ipvlan docs.
5755765e 908 */
cf88a827 909 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 910 return ret_errno(EPROTO);
c9f52382 911
5755765e 912 nla_end_nested(nlmsg, nest2);
c9f52382 913 nla_end_nested(nlmsg, nest);
914
915 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 916 return ret_errno(EPROTO);
c9f52382 917
918 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
919 return ret_errno(EPROTO);
920
921 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 922}
923
924static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
925{
dd119206 926 char peer[IFNAMSIZ];
c9f52382 927 int err;
928
f2711167 929 if (is_empty_string(netdev->link)) {
c9f52382 930 ERROR("No link for ipvlan network device specified");
931 return -1;
932 }
933
dd119206
CB
934 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
935 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 936 return -1;
937
3646ffd9 938 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 939 return -1;
940
dd119206
CB
941 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
942 netdev->priv.ipvlan_attr.isolation);
c9f52382 943 if (err) {
dd119206
CB
944 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
945 peer, netdev->link);
c9f52382 946 goto on_error;
947 }
948
e7fdd504
CB
949 strlcpy(netdev->created_name, peer, IFNAMSIZ);
950
c9f52382 951 netdev->ifindex = if_nametoindex(peer);
952 if (!netdev->ifindex) {
953 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
954 goto on_error;
955 }
956
006e135e 957 if (netdev->mtu) {
54256301
CB
958 unsigned int mtu;
959
006e135e 960 err = lxc_safe_uint(netdev->mtu, &mtu);
961 if (err < 0) {
962 errno = -err;
54256301 963 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 964 goto on_error;
965 }
966
967 err = lxc_netdev_set_mtu(peer, mtu);
968 if (err < 0) {
969 errno = -err;
54256301 970 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 971 goto on_error;
972 }
973 }
974
c9f52382 975 if (netdev->upscript) {
976 char *argv[] = {
977 "ipvlan",
978 netdev->link,
979 NULL,
980 };
981
dd119206
CB
982 err = run_script_argv(handler->name, handler->conf->hooks_version,
983 "net", netdev->upscript, "up", argv);
c9f52382 984 if (err < 0)
985 goto on_error;
986 }
987
dd119206
CB
988 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
989 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 990
991 return 0;
992
993on_error:
994 lxc_netdev_delete_by_name(peer);
995 return -1;
996}
997
811ef482
CB
998static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
999{
1000 char peer[IFNAMSIZ];
1001 int err;
1002 static uint16_t vlan_cntr = 0;
811ef482 1003
f2711167 1004 if (is_empty_string(netdev->link)) {
811ef482
CB
1005 ERROR("No link for vlan network device specified");
1006 return -1;
1007 }
1008
d4d68410
CB
1009 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
1010 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
1011 if (err < 0 || (size_t)err >= sizeof(peer))
1012 return -1;
1013
1014 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1015 if (err) {
6d1400b5 1016 errno = -err;
1017 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1018 peer, netdev->link);
811ef482
CB
1019 return -1;
1020 }
1021
83530dba
CB
1022 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1023
811ef482
CB
1024 netdev->ifindex = if_nametoindex(peer);
1025 if (!netdev->ifindex) {
1026 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1027 goto on_error;
1028 }
1029
1030 if (netdev->mtu) {
54256301
CB
1031 unsigned int mtu;
1032
3e2a7b08 1033 err = lxc_safe_uint(netdev->mtu, &mtu);
1034 if (err < 0) {
1035 errno = -err;
54256301 1036 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1037 goto on_error;
1038 }
1039
1040 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1041 if (err < 0) {
3e2a7b08 1042 errno = -err;
54256301 1043 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1044 goto on_error;
1045 }
811ef482
CB
1046 }
1047
3a73d9f1 1048 if (netdev->upscript) {
1049 char *argv[] = {
1050 "vlan",
1051 netdev->link,
1052 NULL,
1053 };
1054
d4d68410
CB
1055 err = run_script_argv(handler->name, handler->conf->hooks_version,
1056 "net", netdev->upscript, "up", argv);
19abca58 1057 if (err < 0) {
3e2a7b08 1058 goto on_error;
19abca58 1059 }
3a73d9f1 1060 }
1061
d4d68410
CB
1062 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
1063 netdev->ifindex);
811ef482
CB
1064
1065 return 0;
3e2a7b08 1066
1067on_error:
1068 lxc_netdev_delete_by_name(peer);
1069 return -1;
811ef482
CB
1070}
1071
1072static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1073{
0b154989 1074 int err, mtu_orig = 0;
14a7b0f9 1075
9c66dc4f
CB
1076 if (is_empty_string(netdev->link))
1077 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1078
75b074ee
CB
1079 /*
1080 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1081 * network namespace because we need it to move the device from the
1082 * host's network namespace to the container's network namespace later
1083 * on.
1084 * Note that netdev->link will contain the name of the physical network
1085 * device in the host's namespace.
1086 */
811ef482 1087 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1088 if (!netdev->ifindex)
1089 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1090
61302ef7 1091 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1092 if (is_empty_string(netdev->name))
8bf64b77 1093 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1094
75b074ee
CB
1095 /*
1096 * Store the ifindex of the host's network device in the host's
790255cf
CB
1097 * namespace.
1098 */
1099 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1100
75b074ee
CB
1101 /*
1102 * Get original device MTU setting and store for restoration after
1103 * container shutdown.
1104 */
0b154989 1105 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1106 if (mtu_orig < 0)
1107 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1108
1109 netdev->priv.phys_attr.mtu = mtu_orig;
1110
3bef7b7b 1111 if (netdev->mtu) {
54256301
CB
1112 unsigned int mtu;
1113
3bef7b7b 1114 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1115 if (err < 0)
1116 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1117
3bef7b7b 1118 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1119 if (err < 0)
1120 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1121 }
1122
1123 if (netdev->upscript) {
1124 char *argv[] = {
1125 "phys",
1126 netdev->link,
1127 NULL,
1128 };
1129
75b074ee
CB
1130 err = run_script_argv(handler->name, handler->conf->hooks_version,
1131 "net", netdev->upscript, "up", argv);
9c66dc4f 1132 if (err < 0)
3bef7b7b 1133 return -1;
3bef7b7b
TP
1134 }
1135
75b074ee
CB
1136 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
1137 netdev->ifindex);
811ef482
CB
1138
1139 return 0;
1140}
1141
1142static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1143{
14a7b0f9
CB
1144 int ret;
1145 char *argv[] = {
1146 "empty",
1147 NULL,
1148 };
1149
811ef482 1150 netdev->ifindex = 0;
14a7b0f9
CB
1151 if (!netdev->upscript)
1152 return 0;
1153
1154 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1155 "net", netdev->upscript, "up", argv);
1156 if (ret < 0)
1157 return -1;
1158
811ef482
CB
1159 return 0;
1160}
1161
1162static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1163{
1164 netdev->ifindex = 0;
1165 return 0;
1166}
1167
1168static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
1169 [LXC_NET_VETH] = instantiate_veth,
1170 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 1171 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
1172 [LXC_NET_VLAN] = instantiate_vlan,
1173 [LXC_NET_PHYS] = instantiate_phys,
1174 [LXC_NET_EMPTY] = instantiate_empty,
1175 [LXC_NET_NONE] = instantiate_none,
1176};
1177
9d0406c7 1178static int __instantiate_ns_common(struct lxc_netdev *netdev)
8bf64b77
CB
1179{
1180 char current_ifname[IFNAMSIZ];
1181
1182 netdev->ifindex = if_nametoindex(netdev->created_name);
1183 if (!netdev->ifindex)
1184 return log_error_errno(-1,
1185 errno, "Failed to retrieve ifindex for network device with name %s",
1186 netdev->created_name);
1187
3473ca76 1188 if (is_empty_string(netdev->name))
8bf64b77
CB
1189 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1190
1191 if (strcmp(netdev->created_name, netdev->name) != 0) {
1192 int ret;
1193
1194 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
1195 if (ret)
9c66dc4f 1196 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
8bf64b77
CB
1197 netdev->created_name,
1198 netdev->name);
1199
1200 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
1201 }
1202
1203 /*
1204 * Re-read the name of the interface because its name has changed and
1205 * would be automatically allocated by the system
1206 */
1207 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1208 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1209
1210 /*
1211 * Now update the recorded name of the network device to reflect the
1212 * name of the network device in the child's network namespace. We will
1213 * later on send this information back to the parent.
1214 */
1215 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
1216
1217 return 0;
1218}
1219
717f77f2 1220static int instantiate_ns_veth(struct lxc_netdev *netdev)
8bf64b77 1221{
8bf64b77 1222
9d0406c7 1223 return __instantiate_ns_common(netdev);
8bf64b77
CB
1224}
1225
1226static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
1227{
9d0406c7 1228 return __instantiate_ns_common(netdev);
8bf64b77
CB
1229}
1230
1231static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
1232{
9d0406c7 1233 return __instantiate_ns_common(netdev);
8bf64b77
CB
1234}
1235
1236static int instantiate_ns_vlan(struct lxc_netdev *netdev)
1237{
9d0406c7 1238 return __instantiate_ns_common(netdev);
8bf64b77
CB
1239}
1240
1241static int instantiate_ns_phys(struct lxc_netdev *netdev)
1242{
9d0406c7 1243 return __instantiate_ns_common(netdev);
8bf64b77
CB
1244}
1245
1246static int instantiate_ns_empty(struct lxc_netdev *netdev)
1247{
1248 return 0;
1249}
1250
1251static int instantiate_ns_none(struct lxc_netdev *netdev)
1252{
1253 return 0;
1254}
1255
1256static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
1257 [LXC_NET_VETH] = instantiate_ns_veth,
1258 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
1259 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
1260 [LXC_NET_VLAN] = instantiate_ns_vlan,
1261 [LXC_NET_PHYS] = instantiate_ns_phys,
1262 [LXC_NET_EMPTY] = instantiate_ns_empty,
1263 [LXC_NET_NONE] = instantiate_ns_none,
1264};
1265
811ef482
CB
1266static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1267{
14a7b0f9
CB
1268 int ret;
1269 char *argv[] = {
1270 "veth",
1271 netdev->link,
1272 NULL,
1273 NULL,
1274 };
1275
1276 if (!netdev->downscript)
1277 return 0;
811ef482 1278
f2711167 1279 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1280 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1281 else
14a7b0f9
CB
1282 argv[2] = netdev->priv.veth_attr.veth1;
1283
1284 ret = run_script_argv(handler->name,
1285 handler->conf->hooks_version, "net",
1286 netdev->downscript, "down", argv);
1287 if (ret < 0)
1288 return -1;
811ef482 1289
811ef482
CB
1290 return 0;
1291}
1292
1293static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1294{
14a7b0f9
CB
1295 int ret;
1296 char *argv[] = {
1297 "macvlan",
1298 netdev->link,
1299 NULL,
1300 };
1301
1302 if (!netdev->downscript)
1303 return 0;
1304
1305 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1306 "net", netdev->downscript, "down", argv);
1307 if (ret < 0)
1308 return -1;
811ef482 1309
811ef482
CB
1310 return 0;
1311}
1312
c9f52382 1313static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1314{
1315 int ret;
1316 char *argv[] = {
1317 "ipvlan",
1318 netdev->link,
1319 NULL,
1320 };
1321
1322 if (!netdev->downscript)
1323 return 0;
1324
1325 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1326 "net", netdev->downscript, "down", argv);
1327 if (ret < 0)
1328 return -1;
1329
1330 return 0;
1331}
1332
811ef482
CB
1333static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1334{
3a73d9f1 1335 int ret;
1336 char *argv[] = {
1337 "vlan",
1338 netdev->link,
1339 NULL,
1340 };
1341
1342 if (!netdev->downscript)
1343 return 0;
1344
1345 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1346 "net", netdev->downscript, "down", argv);
1347 if (ret < 0)
1348 return -1;
1349
811ef482
CB
1350 return 0;
1351}
1352
1353static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1354{
14a7b0f9
CB
1355 int ret;
1356 char *argv[] = {
1357 "phys",
1358 netdev->link,
1359 NULL,
1360 };
1361
1362 if (!netdev->downscript)
1363 return 0;
1364
1365 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1366 "net", netdev->downscript, "down", argv);
1367 if (ret < 0)
1368 return -1;
811ef482 1369
811ef482
CB
1370 return 0;
1371}
1372
1373static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1374{
14a7b0f9
CB
1375 int ret;
1376 char *argv[] = {
1377 "empty",
1378 NULL,
1379 };
1380
1381 if (!netdev->downscript)
1382 return 0;
1383
1384 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1385 "net", netdev->downscript, "down", argv);
1386 if (ret < 0)
1387 return -1;
811ef482 1388
811ef482
CB
1389 return 0;
1390}
1391
1392static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1393{
1394 return 0;
1395}
1396
1397static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1398 [LXC_NET_VETH] = shutdown_veth,
1399 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1400 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1401 [LXC_NET_VLAN] = shutdown_vlan,
1402 [LXC_NET_PHYS] = shutdown_phys,
1403 [LXC_NET_EMPTY] = shutdown_empty,
1404 [LXC_NET_NONE] = shutdown_none,
1405};
1406
0037ab49
TP
1407static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1408{
d16bda44 1409 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1410 struct nl_handler nlh;
d16bda44
CB
1411 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1412 int err;
0037ab49 1413 struct ifinfomsg *ifi;
0037ab49 1414
d16bda44 1415 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1416 if (err)
1417 return err;
1418
0037ab49
TP
1419 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1420 if (!nlmsg)
d16bda44 1421 return ret_errno(ENOMEM);
0037ab49
TP
1422
1423 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1424 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1425
1426 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1427 if (!ifi)
d16bda44
CB
1428 return ret_errno(ENOMEM);
1429
0037ab49
TP
1430 ifi->ifi_family = AF_UNSPEC;
1431 ifi->ifi_index = ifindex;
1432
1433 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1434 return ret_errno(ENOMEM);
0037ab49 1435
3473ca76 1436 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1437 return ret_errno(ENOMEM);
0037ab49 1438
d16bda44 1439 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1440}
1441
ebc73a67 1442int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1443{
d16bda44 1444 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1445 struct nl_handler nlh;
d16bda44
CB
1446 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1447 int err;
06f976ca 1448 struct ifinfomsg *ifi;
0ad19a3f 1449
d16bda44 1450 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1451 if (err)
1452 return err;
0ad19a3f 1453
0ad19a3f 1454 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1455 if (!nlmsg)
d16bda44 1456 return ret_errno(ENOMEM);
0ad19a3f 1457
ebc73a67 1458 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1459 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1460
1461 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1462 if (!ifi)
d16bda44
CB
1463 return ret_errno(ENOMEM);
1464
06f976ca
SZ
1465 ifi->ifi_family = AF_UNSPEC;
1466 ifi->ifi_index = ifindex;
0ad19a3f 1467
1468 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1469 return ret_errno(ENOMEM);
0ad19a3f 1470
3473ca76 1471 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1472 return ret_errno(ENOMEM);
8d357196 1473
d16bda44 1474 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1475}
1476
ebc73a67
CB
1477/* If we are asked to move a wireless interface, then we must actually move its
1478 * phyN device. Detect that condition and return the physname here. The physname
1479 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1480 */
1481#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1482char *is_wlan(const char *ifname)
e5848d39 1483{
4110345b
CB
1484 __do_fclose FILE *f = NULL;
1485 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1486 int i, ret;
e5848d39 1487 long physlen;
ebc73a67 1488 size_t len;
e5848d39 1489
ebc73a67 1490 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1491 path = must_realloc(NULL, len + 1);
e5848d39 1492 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1493 if (ret < 0 || (size_t)ret >= len)
4110345b 1494 return NULL;
ebc73a67 1495
4110345b 1496 f = fopen(path, "re");
ebc73a67 1497 if (!f)
4110345b 1498 return NULL;
ebc73a67 1499
1a0e70ac 1500 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1501 fseek(f, 0, SEEK_END);
1502 physlen = ftell(f);
1503 fseek(f, 0, SEEK_SET);
4110345b
CB
1504 if (physlen < 0)
1505 return NULL;
ebc73a67
CB
1506
1507 physname = malloc(physlen + 1);
4110345b
CB
1508 if (!physname)
1509 return NULL;
ebc73a67
CB
1510
1511 memset(physname, 0, physlen + 1);
e5848d39 1512 ret = fread(physname, 1, physlen, f);
e5848d39 1513 if (ret < 0)
4110345b 1514 return NULL;
e5848d39 1515
ebc73a67 1516 for (i = 0; i < physlen; i++) {
e5848d39
SH
1517 if (physname[i] == '\n')
1518 physname[i] = '\0';
ebc73a67 1519
e5848d39
SH
1520 if (physname[i] == '\0')
1521 break;
1522 }
1523
4110345b 1524 return move_ptr(physname);
e5848d39
SH
1525}
1526
ebc73a67
CB
1527static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1528 const char *new)
e5848d39 1529{
ebc73a67 1530 pid_t fpid;
e5848d39 1531
ebc73a67 1532 fpid = fork();
e5848d39
SH
1533 if (fpid < 0)
1534 return -1;
ebc73a67 1535
e5848d39
SH
1536 if (fpid != 0)
1537 return wait_for_pid(fpid);
ebc73a67 1538
e5848d39
SH
1539 if (!switch_to_ns(pid, "net"))
1540 return -1;
ebc73a67 1541
05ec44f8 1542 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1543}
1544
e4103cf6 1545int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1546 const char *newname)
e5848d39 1547{
3dd78294 1548 __do_free char *cmd = NULL;
ebc73a67 1549 pid_t fpid;
e5848d39
SH
1550
1551 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1552 * However, IIUC this involves a bit more complicated work to talk to
1553 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1554 */
1555 cmd = on_path("iw", NULL);
9c66dc4f 1556 if (!cmd)
3dd78294 1557 return -1;
e5848d39
SH
1558
1559 fpid = fork();
1560 if (fpid < 0)
3dd78294 1561 return -1;
ebc73a67 1562
e5848d39
SH
1563 if (fpid == 0) {
1564 char pidstr[30];
1565 sprintf(pidstr, "%d", pid);
9c66dc4f 1566 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1567 _exit(EXIT_FAILURE);
e5848d39 1568 }
ebc73a67 1569
e5848d39 1570 if (wait_for_pid(fpid))
3dd78294 1571 return -1;
e5848d39 1572
e5848d39 1573 if (newname)
3dd78294 1574 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1575
3dd78294 1576 return 0;
e5848d39
SH
1577}
1578
8d357196 1579int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1580{
3dd78294 1581 __do_free char *physname = NULL;
8befa924
SH
1582 int index;
1583
8befa924
SH
1584 if (!ifname)
1585 return -EINVAL;
1586
32571606 1587 index = if_nametoindex(ifname);
49428bf3
DY
1588 if (!index)
1589 return -EINVAL;
32571606 1590
ebc73a67
CB
1591 physname = is_wlan(ifname);
1592 if (physname)
e5848d39
SH
1593 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1594
8d357196 1595 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1596}
1597
b84f58b9 1598int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1599{
d16bda44
CB
1600 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1601 struct nl_handler nlh;
1602 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1603 int err;
ebc73a67 1604 struct ifinfomsg *ifi;
0ad19a3f 1605
d16bda44 1606 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1607 if (err)
1608 return err;
0ad19a3f 1609
0ad19a3f 1610 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1611 if (!nlmsg)
d16bda44 1612 return ret_errno(ENOMEM);
0ad19a3f 1613
06f976ca 1614 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1615 if (!answer)
d16bda44 1616 return ret_errno(ENOMEM);
0ad19a3f 1617
ebc73a67 1618 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1619 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1620
1621 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1622 if (!ifi)
d16bda44
CB
1623 return ret_errno(ENOMEM);
1624
06f976ca
SZ
1625 ifi->ifi_family = AF_UNSPEC;
1626 ifi->ifi_index = ifindex;
0ad19a3f 1627
d16bda44 1628 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1629}
1630
b84f58b9
DL
1631int lxc_netdev_delete_by_name(const char *name)
1632{
1633 int index;
1634
1635 index = if_nametoindex(name);
1636 if (!index)
1637 return -EINVAL;
1638
1639 return lxc_netdev_delete_by_index(index);
1640}
1641
1642int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1643{
d16bda44
CB
1644 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1645 struct nl_handler nlh;
1646 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1647 int err, len;
06f976ca 1648 struct ifinfomsg *ifi;
b9a5bb58 1649
d16bda44 1650 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1651 if (err)
1652 return err;
b9a5bb58 1653
b84f58b9 1654 len = strlen(newname);
d16bda44
CB
1655 if (len == 1 || len >= IFNAMSIZ)
1656 return ret_errno(EINVAL);
b84f58b9 1657
b9a5bb58
DL
1658 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1659 if (!nlmsg)
d16bda44 1660 return ret_errno(ENOMEM);
b9a5bb58 1661
06f976ca 1662 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1663 if (!answer)
d16bda44 1664 return ret_errno(ENOMEM);
b9a5bb58 1665
ebc73a67 1666 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1667 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1668
1669 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1670 if (!ifi)
d16bda44
CB
1671 return ret_errno(ENOMEM);
1672
06f976ca
SZ
1673 ifi->ifi_family = AF_UNSPEC;
1674 ifi->ifi_index = ifindex;
b84f58b9
DL
1675
1676 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1677 return ret_errno(ENOMEM);
b9a5bb58 1678
d16bda44 1679 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1680}
1681
b84f58b9
DL
1682int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1683{
1684 int len, index;
1685
1686 len = strlen(oldname);
dae3fdf6 1687 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1688 return -EINVAL;
1689
1690 index = if_nametoindex(oldname);
1691 if (!index)
1692 return -EINVAL;
1693
1694 return lxc_netdev_rename_by_index(index, newname);
1695}
1696
8befa924 1697int netdev_set_flag(const char *name, int flag)
0ad19a3f 1698{
d16bda44
CB
1699 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1700 struct nl_handler nlh;
1701 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1702 int err, index, len;
06f976ca 1703 struct ifinfomsg *ifi;
0ad19a3f 1704
d16bda44 1705 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1706 if (err)
1707 return err;
0ad19a3f 1708
1709 len = strlen(name);
dae3fdf6 1710 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1711 return ret_errno(EINVAL);
0ad19a3f 1712
1713 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1714 if (!nlmsg)
d16bda44 1715 return ret_errno(ENOMEM);
0ad19a3f 1716
06f976ca 1717 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1718 if (!answer)
d16bda44 1719 return ret_errno(ENOMEM);
0ad19a3f 1720
1721 index = if_nametoindex(name);
1722 if (!index)
d16bda44 1723 return ret_errno(EINVAL);
0ad19a3f 1724
ebc73a67 1725 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1726 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1727
1728 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1729 if (!ifi)
1730 return ret_errno(ENOMEM);
1731
06f976ca
SZ
1732 ifi->ifi_family = AF_UNSPEC;
1733 ifi->ifi_index = index;
1734 ifi->ifi_change |= IFF_UP;
1735 ifi->ifi_flags |= flag;
0ad19a3f 1736
d16bda44 1737 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1738}
1739
59eac805 1740static int netdev_get_flag(const char *name, int *flag)
efa1cf45 1741{
d16bda44
CB
1742 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1743 struct nl_handler nlh;
1744 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1745 int err, index, len;
a4318300 1746 struct ifinfomsg *ifi;
efa1cf45
DY
1747
1748 if (!name)
d16bda44 1749 return ret_errno(EINVAL);
efa1cf45 1750
d16bda44 1751 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1752 if (err)
1753 return err;
1754
efa1cf45
DY
1755 len = strlen(name);
1756 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1757 return ret_errno(EINVAL);
efa1cf45 1758
efa1cf45
DY
1759 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1760 if (!nlmsg)
d16bda44 1761 return ret_errno(ENOMEM);
efa1cf45 1762
06f976ca 1763 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1764 if (!answer)
d16bda44 1765 return ret_errno(ENOMEM);
efa1cf45 1766
efa1cf45
DY
1767 index = if_nametoindex(name);
1768 if (!index)
d16bda44 1769 return ret_errno(EINVAL);
efa1cf45 1770
06f976ca
SZ
1771 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1772 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1773
1774 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1775 if (!ifi)
1776 return ret_errno(ENOMEM);
1777
06f976ca
SZ
1778 ifi->ifi_family = AF_UNSPEC;
1779 ifi->ifi_index = index;
efa1cf45 1780
d16bda44 1781 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1782 if (err)
d16bda44 1783 return ret_set_errno(-1, errno);
efa1cf45 1784
06f976ca 1785 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1786
1787 *flag = ifi->ifi_flags;
efa1cf45
DY
1788 return err;
1789}
1790
1791/*
1792 * \brief Check a interface is up or not.
1793 *
1794 * \param name: name for the interface.
1795 *
1796 * \return int.
1797 * 0 means interface is down.
1798 * 1 means interface is up.
1799 * Others means error happened, and ret-value is the error number.
1800 */
ebc73a67 1801int lxc_netdev_isup(const char *name)
efa1cf45 1802{
ebc73a67 1803 int err, flag;
efa1cf45
DY
1804
1805 err = netdev_get_flag(name, &flag);
1806 if (err)
ebc73a67
CB
1807 return err;
1808
efa1cf45
DY
1809 if (flag & IFF_UP)
1810 return 1;
ebc73a67 1811
efa1cf45 1812 return 0;
efa1cf45
DY
1813}
1814
0130df54
SH
1815int netdev_get_mtu(int ifindex)
1816{
a5f5cb41 1817 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1818 struct nl_handler nlh;
a5f5cb41
CB
1819 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1820 int readmore = 0, recv_len = 0;
1821 int answer_len, err, res;
06f976ca 1822 struct ifinfomsg *ifi;
0130df54 1823 struct nlmsghdr *msg;
0130df54 1824
a5f5cb41 1825 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1826 if (err)
1827 return err;
1828
0130df54
SH
1829 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1830 if (!nlmsg)
a5f5cb41 1831 return ret_errno(ENOMEM);
0130df54 1832
06f976ca 1833 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1834 if (!answer)
a5f5cb41 1835 return ret_errno(ENOMEM);
0130df54
SH
1836
1837 /* Save the answer buffer length, since it will be overwritten
1838 * on the first receive (and we might need to receive more than
ebc73a67
CB
1839 * once.
1840 */
06f976ca
SZ
1841 answer_len = answer->nlmsghdr->nlmsg_len;
1842
ebc73a67 1843 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1844 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1845
06f976ca 1846 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1847 if (!ifi)
a5f5cb41
CB
1848 return ret_errno(ENOMEM);
1849
06f976ca 1850 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1851
1852 /* Send the request for addresses, which returns all addresses
1853 * on all interfaces. */
a5f5cb41 1854 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1855 if (err < 0)
a5f5cb41 1856 return ret_set_errno(-1, errno);
0130df54 1857
6ce39620
CB
1858#pragma GCC diagnostic push
1859#pragma GCC diagnostic ignored "-Wcast-align"
1860
0130df54
SH
1861 do {
1862 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1863 * overwritten by a previous receive.
1864 */
06f976ca 1865 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1866
1867 /* Get the (next) batch of reply messages */
a5f5cb41 1868 err = netlink_rcv(nlh_ptr, answer);
0130df54 1869 if (err < 0)
a5f5cb41 1870 return ret_set_errno(-1, errno);
0130df54
SH
1871
1872 recv_len = err;
0130df54
SH
1873
1874 /* Satisfy the typing for the netlink macros */
06f976ca 1875 msg = answer->nlmsghdr;
0130df54
SH
1876
1877 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1878 /* Stop reading if we see an error message */
1879 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1880 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1881 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1882 }
1883
1884 /* Stop reading if we see a NLMSG_DONE message */
1885 if (msg->nlmsg_type == NLMSG_DONE) {
1886 readmore = 0;
1887 break;
1888 }
1889
06f976ca 1890 ifi = NLMSG_DATA(msg);
0130df54
SH
1891 if (ifi->ifi_index == ifindex) {
1892 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1893 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1894
0130df54 1895 res = 0;
ebc73a67 1896 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1897 /*
a5f5cb41 1898 * Found a local address for the
ebc73a67
CB
1899 * requested interface, return it.
1900 */
0130df54 1901 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1902 memcpy(&res, RTA_DATA(rta), sizeof(int));
1903 return res;
0130df54 1904 }
a5f5cb41 1905
0130df54
SH
1906 rta = RTA_NEXT(rta, attr_len);
1907 }
0130df54
SH
1908 }
1909
ebc73a67
CB
1910 /* Keep reading more data from the socket if the last
1911 * message had the NLF_F_MULTI flag set.
1912 */
0130df54
SH
1913 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1914
ebc73a67 1915 /* Look at the next message received in this buffer. */
0130df54
SH
1916 msg = NLMSG_NEXT(msg, recv_len);
1917 }
1918 } while (readmore);
1919
6ce39620
CB
1920#pragma GCC diagnostic pop
1921
ebc73a67 1922 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 1923 return -1;
0130df54
SH
1924}
1925
d472214b 1926int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1927{
a5f5cb41
CB
1928 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1929 struct nl_handler nlh;
1930 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 1931 int err, len;
06f976ca 1932 struct ifinfomsg *ifi;
75d09f83 1933
a5f5cb41 1934 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1935 if (err)
1936 return err;
75d09f83
DL
1937
1938 len = strlen(name);
dae3fdf6 1939 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1940 return ret_errno(EINVAL);
75d09f83
DL
1941
1942 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1943 if (!nlmsg)
a5f5cb41 1944 return ret_errno(ENOMEM);
75d09f83 1945
06f976ca 1946 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 1947 if (!answer)
a5f5cb41 1948 return ret_errno(ENOMEM);
75d09f83 1949
ebc73a67 1950 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1951 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1952
1953 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1954 if (!ifi)
1955 return ret_errno(ENOMEM);
1956
06f976ca 1957 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1958
1959 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 1960 return ret_errno(ENOMEM);
75d09f83
DL
1961
1962 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 1963 return ret_errno(ENOMEM);
75d09f83 1964
a5f5cb41 1965 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
1966}
1967
d472214b 1968int lxc_netdev_up(const char *name)
0ad19a3f 1969{
d472214b 1970 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1971}
1972
d472214b 1973int lxc_netdev_down(const char *name)
0ad19a3f 1974{
d472214b 1975 return netdev_set_flag(name, 0);
0ad19a3f 1976}
1977
54256301 1978int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1979{
a5f5cb41
CB
1980 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1981 struct nl_handler nlh;
1982 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1983 int err, len;
06f976ca 1984 struct ifinfomsg *ifi;
0ad19a3f 1985 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 1986
a5f5cb41 1987 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1988 if (err)
1989 return err;
0ad19a3f 1990
1991 len = strlen(name1);
dae3fdf6 1992 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1993 return ret_errno(EINVAL);
0ad19a3f 1994
1995 len = strlen(name2);
dae3fdf6 1996 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1997 return ret_errno(EINVAL);
0ad19a3f 1998
1999 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2000 if (!nlmsg)
a5f5cb41 2001 return ret_errno(ENOMEM);
0ad19a3f 2002
06f976ca 2003 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2004 if (!answer)
a5f5cb41 2005 return ret_errno(ENOMEM);
0ad19a3f 2006
a5f5cb41 2007 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2008 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2009
2010 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 2011 if (!ifi)
a5f5cb41
CB
2012 return ret_errno(ENOMEM);
2013
06f976ca 2014 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2015
79e68309 2016 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2017 if (!nest1)
a5f5cb41 2018 return ret_errno(EINVAL);
0ad19a3f 2019
2020 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2021 return ret_errno(ENOMEM);
0ad19a3f 2022
2023 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2024 if (!nest2)
a5f5cb41 2025 return ret_errno(ENOMEM);
0ad19a3f 2026
2027 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2028 if (!nest3)
a5f5cb41 2029 return ret_errno(ENOMEM);
0ad19a3f 2030
06f976ca 2031 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2032 if (!ifi)
2033 return ret_errno(ENOMEM);
0ad19a3f 2034
2035 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2036 return ret_errno(ENOMEM);
0ad19a3f 2037
54256301 2038 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2039 return ret_errno(ENOMEM);
54256301
CB
2040
2041 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2042 return ret_errno(ENOMEM);
54256301 2043
0ad19a3f 2044 nla_end_nested(nlmsg, nest3);
0ad19a3f 2045 nla_end_nested(nlmsg, nest2);
0ad19a3f 2046 nla_end_nested(nlmsg, nest1);
2047
2048 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2049 return ret_errno(ENOMEM);
0ad19a3f 2050
a5f5cb41 2051 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2052}
2053
ebc73a67 2054/* TODO: merge with lxc_macvlan_create */
0dc9a142 2055int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
26c39028 2056{
a5f5cb41
CB
2057 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2058 struct nl_handler nlh;
2059 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2060 int err, len, lindex;
06f976ca 2061 struct ifinfomsg *ifi;
26c39028 2062 struct rtattr *nest, *nest2;
26c39028 2063
a5f5cb41 2064 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2065 if (err)
2066 return err;
26c39028 2067
0dc9a142 2068 len = strlen(parent);
dae3fdf6 2069 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2070 return ret_errno(EINVAL);
26c39028
JHS
2071
2072 len = strlen(name);
dae3fdf6 2073 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2074 return ret_errno(EINVAL);
26c39028
JHS
2075
2076 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2077 if (!nlmsg)
a5f5cb41 2078 return ret_errno(ENOMEM);
26c39028 2079
06f976ca 2080 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2081 if (!answer)
a5f5cb41 2082 return ret_errno(ENOMEM);
26c39028 2083
0dc9a142 2084 lindex = if_nametoindex(parent);
26c39028 2085 if (!lindex)
a5f5cb41 2086 return ret_errno(EINVAL);
26c39028 2087
a5f5cb41 2088 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2089 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2090
2091 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2092 if (!ifi)
2093 return ret_errno(ENOMEM);
2094
06f976ca 2095 ifi->ifi_family = AF_UNSPEC;
26c39028 2096
79e68309 2097 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2098 if (!nest)
a5f5cb41 2099 return ret_errno(ENOMEM);
26c39028
JHS
2100
2101 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2102 return ret_errno(ENOMEM);
26c39028
JHS
2103
2104 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2105 if (!nest2)
a5f5cb41 2106 return ret_errno(ENOMEM);
e892973e 2107
26c39028 2108 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2109 return ret_errno(ENOMEM);
e892973e 2110
26c39028 2111 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2112 nla_end_nested(nlmsg, nest);
2113
2114 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2115 return ret_errno(ENOMEM);
26c39028
JHS
2116
2117 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2118 return ret_errno(ENOMEM);
2119
2120 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2121}
2122
0dc9a142 2123int lxc_macvlan_create(const char *parent, const char *name, int mode)
0ad19a3f 2124{
a5f5cb41
CB
2125 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2126 struct nl_handler nlh;
2127 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2128 int err, index, len;
06f976ca 2129 struct ifinfomsg *ifi;
e892973e 2130 struct rtattr *nest, *nest2;
0ad19a3f 2131
a5f5cb41 2132 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2133 if (err)
2134 return err;
0ad19a3f 2135
0dc9a142 2136 len = strlen(parent);
dae3fdf6 2137 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2138 return ret_errno(EINVAL);
0ad19a3f 2139
2140 len = strlen(name);
dae3fdf6 2141 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2142 return ret_errno(EINVAL);
0ad19a3f 2143
2144 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2145 if (!nlmsg)
a5f5cb41 2146 return ret_errno(ENOMEM);
0ad19a3f 2147
06f976ca 2148 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2149 if (!answer)
a5f5cb41 2150 return ret_errno(ENOMEM);
0ad19a3f 2151
0dc9a142 2152 index = if_nametoindex(parent);
0ad19a3f 2153 if (!index)
a5f5cb41 2154 return ret_errno(EINVAL);
0ad19a3f 2155
a5f5cb41 2156 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2157 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2158
2159 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2160 if (!ifi)
2161 return ret_errno(ENOMEM);
2162
06f976ca 2163 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2164
79e68309 2165 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2166 if (!nest)
a5f5cb41 2167 return ret_errno(ENOMEM);
0ad19a3f 2168
2169 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2170 return ret_errno(ENOMEM);
0ad19a3f 2171
e892973e
DL
2172 if (mode) {
2173 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2174 if (!nest2)
a5f5cb41 2175 return ret_errno(ENOMEM);
e892973e
DL
2176
2177 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2178 return ret_errno(ENOMEM);
e892973e
DL
2179
2180 nla_end_nested(nlmsg, nest2);
2181 }
2182
0ad19a3f 2183 nla_end_nested(nlmsg, nest);
2184
2185 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2186 return ret_errno(ENOMEM);
0ad19a3f 2187
2188 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2189 return ret_errno(ENOMEM);
0ad19a3f 2190
a5f5cb41 2191 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2192}
2193
2194static int proc_sys_net_write(const char *path, const char *value)
2195{
ebc73a67
CB
2196 int fd;
2197 int err = 0;
0ad19a3f 2198
2199 fd = open(path, O_WRONLY);
2200 if (fd < 0)
2201 return -errno;
2202
f640cf46 2203 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2204 err = -errno;
2205
2206 close(fd);
2207 return err;
2208}
2209
6dfa9581 2210static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2211{
2212 int ret;
2213 char path[PATH_MAX];
6509154d 2214
2215 if (family != AF_INET && family != AF_INET6)
6dfa9581 2216 return -EINVAL;
6509154d 2217
9c66dc4f 2218 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 2219 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
9c66dc4f 2220 if (ret < 0 || (size_t)ret >= sizeof(path))
6dfa9581 2221 return -E2BIG;
6509154d 2222
6dfa9581
TP
2223 return proc_sys_net_write(path, flag ? "1" : "0");
2224}
2225
2226int lxc_ip_forwarding_on(const char *name, int family)
2227{
2228 return ip_forwarding_set(name, family, 1);
2229}
2230
2231int lxc_ip_forwarding_off(const char *name, int family)
2232{
2233 return ip_forwarding_set(name, family, 0);
6509154d 2234}
2235
0ad19a3f 2236static int neigh_proxy_set(const char *ifname, int family, int flag)
2237{
9ba8130c 2238 int ret;
419590da 2239 char path[PATH_MAX];
0ad19a3f 2240
2241 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2242 return -EINVAL;
0ad19a3f 2243
9c66dc4f 2244 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
2245 family == AF_INET ? "ipv4" : "ipv6", ifname,
2246 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2247 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 2248 return -E2BIG;
0ad19a3f 2249
ebc73a67 2250 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2251}
2252
6509154d 2253static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2254{
2255 int ret;
2256 char path[PATH_MAX];
2257 char buf[1] = "";
2258
2259 if (family != AF_INET && family != AF_INET6)
596a002c 2260 return ret_set_errno(-1, EINVAL);
6509154d 2261
9c66dc4f 2262 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6509154d 2263 family == AF_INET ? "ipv4" : "ipv6", ifname,
2264 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2265 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 2266 return ret_set_errno(-1, E2BIG);
6509154d 2267
2268 return lxc_read_file_expect(path, buf, 1, "1");
2269}
2270
497353b6 2271int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2272{
2273 return neigh_proxy_set(name, family, 1);
2274}
2275
497353b6 2276int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2277{
2278 return neigh_proxy_set(name, family, 0);
2279}
2280
2281int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2282{
1f1b18e7
DL
2283 int i = 0;
2284 unsigned val;
ebc73a67
CB
2285 char c;
2286 unsigned char *data;
1f1b18e7
DL
2287
2288 sockaddr->sa_family = ARPHRD_ETHER;
2289 data = (unsigned char *)sockaddr->sa_data;
2290
2291 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2292 c = *macaddr++;
2293 if (isdigit(c))
2294 val = c - '0';
2295 else if (c >= 'a' && c <= 'f')
2296 val = c - 'a' + 10;
2297 else if (c >= 'A' && c <= 'F')
2298 val = c - 'A' + 10;
2299 else
2300 return -EINVAL;
2301
2302 val <<= 4;
2303 c = *macaddr;
2304 if (isdigit(c))
2305 val |= c - '0';
2306 else if (c >= 'a' && c <= 'f')
2307 val |= c - 'a' + 10;
2308 else if (c >= 'A' && c <= 'F')
2309 val |= c - 'A' + 10;
2310 else if (c == ':' || c == 0)
2311 val >>= 4;
2312 else
2313 return -EINVAL;
2314 if (c != 0)
2315 macaddr++;
2316 *data++ = (unsigned char)(val & 0377);
2317 i++;
2318
2319 if (*macaddr == ':')
2320 macaddr++;
0ad19a3f 2321 }
0ad19a3f 2322
1f1b18e7 2323 return 0;
0ad19a3f 2324}
2325
ebc73a67
CB
2326static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2327 void *acast, int prefix)
0ad19a3f 2328{
a5f5cb41
CB
2329 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2330 struct nl_handler nlh;
2331 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2332 int addrlen, err;
06f976ca 2333 struct ifaddrmsg *ifa;
0ad19a3f 2334
ebc73a67
CB
2335 addrlen = family == AF_INET ? sizeof(struct in_addr)
2336 : sizeof(struct in6_addr);
4bf1968d 2337
a5f5cb41 2338 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2339 if (err)
2340 return err;
0ad19a3f 2341
0ad19a3f 2342 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2343 if (!nlmsg)
a5f5cb41 2344 return ret_errno(ENOMEM);
0ad19a3f 2345
06f976ca 2346 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2347 if (!answer)
a5f5cb41 2348 return ret_errno(ENOMEM);
0ad19a3f 2349
a5f5cb41 2350 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2351 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2352
2353 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2354 if (!ifa)
a5f5cb41
CB
2355 return ret_errno(ENOMEM);
2356
06f976ca
SZ
2357 ifa->ifa_prefixlen = prefix;
2358 ifa->ifa_index = ifindex;
2359 ifa->ifa_family = family;
2360 ifa->ifa_scope = 0;
acf47e1b 2361
4bf1968d 2362 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2363 return ret_errno(EINVAL);
0ad19a3f 2364
4bf1968d 2365 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2366 return ret_errno(EINVAL);
0ad19a3f 2367
d8948a52 2368 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2369 return ret_errno(EINVAL);
1f1b18e7 2370
ebc73a67 2371 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2372 if (family == AF_INET6 &&
2373 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2374 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2375 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2376
a5f5cb41 2377 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2378}
2379
1f1b18e7 2380int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2381 struct in6_addr *mcast, struct in6_addr *acast,
2382 int prefix)
1f1b18e7
DL
2383{
2384 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2385}
2386
ebc73a67
CB
2387int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2388 int prefix)
1f1b18e7
DL
2389{
2390 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2391}
2392
ebc73a67
CB
2393/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2394 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2395 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2396 */
6ce39620
CB
2397#pragma GCC diagnostic push
2398#pragma GCC diagnostic ignored "-Wcast-align"
2399
ebc73a67
CB
2400static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2401{
2402 int addrlen;
06f976ca
SZ
2403 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2404 struct rtattr *rta = IFA_RTA(ifa);
2405 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2406
06f976ca 2407 if (ifa->ifa_family != family)
19a26f82
MK
2408 return 0;
2409
ebc73a67
CB
2410 addrlen = family == AF_INET ? sizeof(struct in_addr)
2411 : sizeof(struct in6_addr);
19a26f82
MK
2412
2413 /* Loop over the rtattr's in this message */
ebc73a67 2414 while (RTA_OK(rta, attr_len)) {
19a26f82 2415 /* Found a local address for the requested interface,
ebc73a67
CB
2416 * return it.
2417 */
2418 if (rta->rta_type == IFA_LOCAL ||
2419 rta->rta_type == IFA_ADDRESS) {
2420 /* Sanity check. The family check above should make sure
2421 * the address length is correct, but check here just in
2422 * case.
2423 */
19a26f82
MK
2424 if (RTA_PAYLOAD(rta) != addrlen)
2425 return -1;
2426
ebc73a67
CB
2427 /* We might have found an IFA_ADDRESS before, which we
2428 * now overwrite with an IFA_LOCAL.
2429 */
dd66e5ad 2430 if (!*res) {
19a26f82 2431 *res = malloc(addrlen);
dd66e5ad
DE
2432 if (!*res)
2433 return -1;
2434 }
19a26f82
MK
2435
2436 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2437 if (rta->rta_type == IFA_LOCAL)
2438 break;
2439 }
2440 rta = RTA_NEXT(rta, attr_len);
2441 }
2442 return 0;
2443}
2444
6ce39620
CB
2445#pragma GCC diagnostic pop
2446
19a26f82
MK
2447static int ip_addr_get(int family, int ifindex, void **res)
2448{
a5f5cb41
CB
2449 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2450 struct nl_handler nlh;
2451 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2452 int answer_len, err;
06f976ca 2453 struct ifaddrmsg *ifa;
19a26f82 2454 struct nlmsghdr *msg;
ebc73a67 2455 int readmore = 0, recv_len = 0;
19a26f82 2456
a5f5cb41 2457 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2458 if (err)
2459 return err;
2460
19a26f82
MK
2461 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2462 if (!nlmsg)
a5f5cb41 2463 return ret_errno(ENOMEM);
19a26f82 2464
06f976ca 2465 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2466 if (!answer)
a5f5cb41 2467 return ret_errno(ENOMEM);
19a26f82 2468
ebc73a67
CB
2469 /* Save the answer buffer length, since it will be overwritten on the
2470 * first receive (and we might need to receive more than once).
2471 */
06f976ca
SZ
2472 answer_len = answer->nlmsghdr->nlmsg_len;
2473
ebc73a67 2474 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2475 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2476
06f976ca 2477 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2478 if (!ifa)
a5f5cb41
CB
2479 return ret_errno(ENOMEM);
2480
06f976ca 2481 ifa->ifa_family = family;
19a26f82 2482
ebc73a67
CB
2483 /* Send the request for addresses, which returns all addresses on all
2484 * interfaces.
2485 */
a5f5cb41 2486 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2487 if (err < 0)
a5f5cb41 2488 return ret_set_errno(err, errno);
19a26f82 2489
6ce39620
CB
2490#pragma GCC diagnostic push
2491#pragma GCC diagnostic ignored "-Wcast-align"
2492
19a26f82
MK
2493 do {
2494 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2495 * overwritten by a previous receive.
2496 */
06f976ca 2497 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2498
ebc73a67 2499 /* Get the (next) batch of reply messages. */
a5f5cb41 2500 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2501 if (err < 0)
a5f5cb41 2502 return ret_set_errno(err, errno);
19a26f82
MK
2503
2504 recv_len = err;
2505 err = 0;
2506
ebc73a67 2507 /* Satisfy the typing for the netlink macros. */
06f976ca 2508 msg = answer->nlmsghdr;
19a26f82
MK
2509
2510 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2511 /* Stop reading if we see an error message. */
19a26f82 2512 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2513 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2514 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2515 }
2516
ebc73a67 2517 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2518 if (msg->nlmsg_type == NLMSG_DONE) {
2519 readmore = 0;
2520 break;
2521 }
2522
a5f5cb41
CB
2523 if (msg->nlmsg_type != RTM_NEWADDR)
2524 return ret_errno(EINVAL);
19a26f82 2525
06f976ca
SZ
2526 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2527 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2528 if (ifa_get_local_ip(family, msg, res) < 0)
2529 return ret_errno(EINVAL);
51e7a874 2530
ebc73a67 2531 /* Found a result, stop searching. */
19a26f82 2532 if (*res)
a5f5cb41 2533 return 0;
19a26f82
MK
2534 }
2535
ebc73a67
CB
2536 /* Keep reading more data from the socket if the last
2537 * message had the NLF_F_MULTI flag set.
2538 */
19a26f82
MK
2539 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2540
ebc73a67 2541 /* Look at the next message received in this buffer. */
19a26f82
MK
2542 msg = NLMSG_NEXT(msg, recv_len);
2543 }
2544 } while (readmore);
2545
6ce39620
CB
2546#pragma GCC diagnostic pop
2547
19a26f82 2548 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2549 * error.
2550 */
a5f5cb41 2551 return -1;
19a26f82
MK
2552}
2553
2554int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2555{
ebc73a67 2556 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2557}
2558
ebc73a67 2559int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2560{
ebc73a67 2561 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2562}
2563
f8fee0e2
MK
2564static int ip_gateway_add(int family, int ifindex, void *gw)
2565{
a5f5cb41 2566 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2567 struct nl_handler nlh;
a5f5cb41
CB
2568 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2569 int addrlen, err;
06f976ca 2570 struct rtmsg *rt;
f8fee0e2 2571
ebc73a67
CB
2572 addrlen = family == AF_INET ? sizeof(struct in_addr)
2573 : sizeof(struct in6_addr);
f8fee0e2 2574
a5f5cb41 2575 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2576 if (err)
2577 return err;
2578
f8fee0e2
MK
2579 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2580 if (!nlmsg)
a5f5cb41 2581 return ret_errno(ENOMEM);
f8fee0e2 2582
06f976ca 2583 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2584 if (!answer)
a5f5cb41 2585 return ret_errno(ENOMEM);
f8fee0e2 2586
a5f5cb41 2587 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2588 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2589
2590 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2591 if (!rt)
a5f5cb41
CB
2592 return ret_errno(ENOMEM);
2593
06f976ca
SZ
2594 rt->rtm_family = family;
2595 rt->rtm_table = RT_TABLE_MAIN;
2596 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2597 rt->rtm_protocol = RTPROT_BOOT;
2598 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2599 /* "default" destination */
06f976ca 2600 rt->rtm_dst_len = 0;
f8fee0e2 2601
a2f9a670 2602 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2603 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2604 return ret_errno(ENOMEM);
f8fee0e2
MK
2605
2606 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2607 * addresses for the gateway.
2608 */
f8fee0e2 2609 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2610 return ret_errno(EINVAL);
f8fee0e2 2611
a5f5cb41 2612 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2613}
2614
2615int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2616{
2617 return ip_gateway_add(AF_INET, ifindex, gw);
2618}
2619
2620int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2621{
2622 return ip_gateway_add(AF_INET6, ifindex, gw);
2623}
581c75e7 2624bool is_ovs_bridge(const char *bridge)
0d204771 2625{
ebc73a67 2626 int ret;
0d204771 2627 struct stat sb;
ebc73a67 2628 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2629
ebc73a67
CB
2630 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2631 bridge);
2632 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2633 return false;
2634
2635 ret = stat(brdirname, &sb);
2636 if (ret < 0 && errno == ENOENT)
0d204771 2637 return true;
ebc73a67 2638
0d204771
SH
2639 return false;
2640}
2641
581c75e7
CB
2642struct ovs_veth_args {
2643 const char *bridge;
2644 const char *nic;
2645};
2646
cb0dc11b
CB
2647/* Called from a background thread - when nic goes away, remove it from the
2648 * bridge.
c43cbc04 2649 */
581c75e7 2650static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2651{
581c75e7 2652 struct ovs_veth_args *args = data;
cb0dc11b 2653
9c66dc4f 2654 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2655 return -1;
c43cbc04
SH
2656}
2657
581c75e7 2658int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2659{
c43cbc04 2660 int ret;
419590da 2661 char cmd_output[PATH_MAX];
581c75e7 2662 struct ovs_veth_args args;
6ad22d06 2663
581c75e7
CB
2664 args.bridge = bridge;
2665 args.nic = nic;
2666 ret = run_command(cmd_output, sizeof(cmd_output),
2667 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2668 if (ret < 0)
2669 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2670
581c75e7
CB
2671 return 0;
2672}
ebc73a67 2673
581c75e7
CB
2674static int lxc_ovs_attach_bridge_exec(void *data)
2675{
2676 struct ovs_veth_args *args = data;
ebc73a67 2677
9c66dc4f 2678 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2679 return -1;
2680}
ebc73a67 2681
581c75e7
CB
2682static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2683{
2684 int ret;
419590da 2685 char cmd_output[PATH_MAX];
581c75e7 2686 struct ovs_veth_args args;
ebc73a67 2687
581c75e7
CB
2688 args.bridge = bridge;
2689 args.nic = nic;
2690 ret = run_command(cmd_output, sizeof(cmd_output),
2691 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2692 if (ret < 0)
2693 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2694
581c75e7 2695 return 0;
0d204771 2696}
0d204771 2697
581c75e7 2698int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2699{
ebc73a67 2700 int err, fd, index;
9de31d5a 2701 size_t retlen;
0ad19a3f 2702 struct ifreq ifr;
2703
dae3fdf6 2704 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2705 return -EINVAL;
0ad19a3f 2706
2707 index = if_nametoindex(ifname);
2708 if (!index)
3cfc0f3a 2709 return -EINVAL;
0ad19a3f 2710
0d204771 2711 if (is_ovs_bridge(bridge))
581c75e7 2712 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2713
ad9429e5 2714 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2715 if (fd < 0)
3cfc0f3a 2716 return -errno;
0ad19a3f 2717
9de31d5a 2718 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2719 if (retlen >= IFNAMSIZ) {
2720 close(fd);
9de31d5a 2721 return -E2BIG;
42cc4083 2722 }
9de31d5a 2723
ebc73a67 2724 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2725 ifr.ifr_ifindex = index;
7d163508 2726 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2727 close(fd);
3cfc0f3a
MN
2728 if (err)
2729 err = -errno;
0ad19a3f 2730
2731 return err;
2732}
72d0e1cb 2733
ebc73a67 2734static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2735 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2736 [LXC_NET_VETH] = "veth",
2737 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2738 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2739 [LXC_NET_PHYS] = "phys",
b343592b
BP
2740 [LXC_NET_VLAN] = "vlan",
2741 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2742};
2743
2744const char *lxc_net_type_to_str(int type)
2745{
2746 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2747 return NULL;
ebc73a67 2748
72d0e1cb
SG
2749 return lxc_network_types[type];
2750}
8befa924 2751
3646ffd9 2752static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2753
3646ffd9 2754char *lxc_ifname_alnum_case_sensitive(char *template)
a0265685 2755{
966e9f1f 2756 char name[IFNAMSIZ];
966e9f1f 2757 size_t i = 0;
280cc35f 2758#ifdef HAVE_RAND_R
2759 unsigned int seed;
2760
2761 seed = randseed(false);
2762#else
2763
2764 (void)randseed(true);
2765#endif
a0265685 2766
535e8859
CB
2767 if (strlen(template) >= IFNAMSIZ)
2768 return NULL;
2769
ebc73a67 2770 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2771 for (;;) {
966e9f1f 2772 name[0] = '\0';
94b1cade 2773 (void)strlcpy(name, template, IFNAMSIZ);
a0265685
SG
2774
2775 for (i = 0; i < strlen(name); i++) {
2776 if (name[i] == 'X') {
2777#ifdef HAVE_RAND_R
8523344a 2778 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2779#else
8523344a 2780 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2781#endif
2782 }
2783 }
2784
3a026996 2785 if (if_nametoindex(name) == 0)
a0265685 2786 break;
a0265685
SG
2787 }
2788
94b1cade
DJ
2789 (void)strlcpy(template, name, strlen(template) + 1);
2790
2791 return template;
a0265685
SG
2792}
2793
8befa924
SH
2794int setup_private_host_hw_addr(char *veth1)
2795{
ebc73a67 2796 int err, sockfd;
8befa924 2797 struct ifreq ifr;
8befa924 2798
ad9429e5 2799 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2800 if (sockfd < 0)
2801 return -errno;
2802
ebc73a67 2803 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2804 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2805 close(sockfd);
ebc73a67 2806 return -E2BIG;
87c6e5db 2807 }
ebc73a67 2808
8befa924
SH
2809 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2810 if (err < 0) {
8befa924 2811 close(sockfd);
8befa924
SH
2812 return -errno;
2813 }
2814
2815 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2816 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2817 close(sockfd);
8befa924
SH
2818 if (err < 0)
2819 return -errno;
2820
2821 return 0;
2822}
811ef482
CB
2823
2824int lxc_find_gateway_addresses(struct lxc_handler *handler)
2825{
2826 struct lxc_list *network = &handler->conf->network;
2827 struct lxc_list *iterator;
2828 struct lxc_netdev *netdev;
2829 int link_index;
2830
2831 lxc_list_for_each(iterator, network) {
2832 netdev = iterator->elem;
2833
2834 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2835 continue;
2836
9c66dc4f
CB
2837 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2838 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2839
f2711167 2840 if (is_empty_string(netdev->link)) {
9c66dc4f 2841 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2842 }
2843
2844 link_index = if_nametoindex(netdev->link);
2845 if (!link_index)
2846 return -EINVAL;
2847
2848 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2849 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2850 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2851 }
2852
2853 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2854 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2855 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2856 }
2857 }
2858
2859 return 0;
2860}
2861
2862#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2863static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2864 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2865{
2866 int ret;
2867 pid_t child;
2868 int bytes, pipefd[2];
2869 char *token, *saveptr = NULL;
095ead80 2870 char netdev_link[IFNAMSIZ];
419590da 2871 char buffer[PATH_MAX] = {0};
94b1cade 2872 size_t retlen;
811ef482 2873
9c66dc4f
CB
2874 if (netdev->type != LXC_NET_VETH)
2875 return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2876
2877 ret = pipe(pipefd);
9c66dc4f
CB
2878 if (ret < 0)
2879 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2880
2881 child = fork();
2882 if (child < 0) {
811ef482
CB
2883 close(pipefd[0]);
2884 close(pipefd[1]);
9c66dc4f 2885 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2886 }
2887
2888 if (child == 0) {
8335fd40 2889 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2890
2891 close(pipefd[0]);
2892
2893 ret = dup2(pipefd[1], STDOUT_FILENO);
2894 if (ret >= 0)
2895 ret = dup2(pipefd[1], STDERR_FILENO);
2896 close(pipefd[1]);
2897 if (ret < 0) {
2898 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2899 _exit(EXIT_FAILURE);
811ef482
CB
2900 }
2901
f2711167 2902 if (!is_empty_string(netdev->link))
9de31d5a 2903 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2904 else
9de31d5a
CB
2905 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2906 if (retlen >= IFNAMSIZ) {
2907 SYSERROR("Invalid network device name");
2908 _exit(EXIT_FAILURE);
2909 }
811ef482 2910
8335fd40
CB
2911 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2912 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2913 _exit(EXIT_FAILURE);
8335fd40 2914 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2915
2916 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2917 lxcname, pidstr, netdev_link,
3473ca76
CB
2918 !is_empty_string(netdev->name) ? netdev->name : "(null)");
2919 if (!is_empty_string(netdev->name))
811ef482
CB
2920 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2921 lxcpath, lxcname, pidstr, "veth", netdev_link,
2922 netdev->name, (char *)NULL);
2923 else
2924 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2925 lxcpath, lxcname, pidstr, "veth", netdev_link,
2926 (char *)NULL);
2927 SYSERROR("Failed to execute lxc-user-nic");
78070056 2928 _exit(EXIT_FAILURE);
811ef482
CB
2929 }
2930
2931 /* close the write-end of the pipe */
2932 close(pipefd[1]);
2933
9c66dc4f 2934 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2935 if (bytes < 0) {
74c6e2b0 2936 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2937 close(pipefd[0]);
6b9f82a9
CB
2938 } else {
2939 buffer[bytes - 1] = '\0';
811ef482 2940 }
811ef482
CB
2941
2942 ret = wait_for_pid(child);
2943 close(pipefd[0]);
9c66dc4f
CB
2944 if (ret != 0 || bytes < 0)
2945 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2946 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2947
2948 /* netdev->name */
2949 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2950 if (!token)
2951 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2952
e389f2af
CB
2953 /*
2954 * lxc-user-nic will take care of proper network device naming. So
2955 * netdev->name and netdev->created_name need to be identical to not
2956 * trigger another rename later on.
2957 */
2958 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2959 if (retlen < IFNAMSIZ)
2960 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
9c66dc4f
CB
2961 if (retlen >= IFNAMSIZ)
2962 return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2963
74c6e2b0 2964 /* netdev->ifindex */
811ef482 2965 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2966 if (!token)
2967 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2968
74c6e2b0 2969 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f
CB
2970 if (ret < 0)
2971 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2972
74c6e2b0 2973 /* netdev->priv.veth_attr.veth1 */
811ef482 2974 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2975 if (!token)
2976 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2977
94b1cade 2978 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f
CB
2979 if (retlen >= IFNAMSIZ)
2980 return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
2981
2982 /* netdev->priv.veth_attr.ifindex */
2983 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2984 if (!token)
2985 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
2986
2987 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f
CB
2988 if (ret < 0)
2989 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2990
4d781681 2991 if (netdev->upscript) {
2992 char *argv[] = {
2993 "veth",
2994 netdev->link,
2995 netdev->priv.veth_attr.veth1,
2996 NULL,
2997 };
2998
e389f2af
CB
2999 ret = run_script_argv(lxcname, hooks_version, "net",
3000 netdev->upscript, "up", argv);
4d781681 3001 if (ret < 0)
3002 return -1;
3003 }
3004
811ef482
CB
3005 return 0;
3006}
3007
f0ecc19d 3008static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3009 struct lxc_netdev *netdev,
3010 const char *netns_path)
811ef482
CB
3011{
3012 int bytes, ret;
3013 pid_t child;
3014 int pipefd[2];
25619b99 3015 char buffer[PATH_MAX] = {};
811ef482 3016
9c66dc4f
CB
3017 if (netdev->type != LXC_NET_VETH)
3018 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3019
3020 ret = pipe(pipefd);
9c66dc4f
CB
3021 if (ret < 0)
3022 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3023
3024 child = fork();
3025 if (child < 0) {
811ef482
CB
3026 close(pipefd[0]);
3027 close(pipefd[1]);
9c66dc4f 3028 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3029 }
3030
3031 if (child == 0) {
8843fde4 3032 char *hostveth;
811ef482
CB
3033
3034 close(pipefd[0]);
3035
3036 ret = dup2(pipefd[1], STDOUT_FILENO);
3037 if (ret >= 0)
3038 ret = dup2(pipefd[1], STDERR_FILENO);
3039 close(pipefd[1]);
3040 if (ret < 0) {
3041 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3042 _exit(EXIT_FAILURE);
811ef482
CB
3043 }
3044
f2711167 3045 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3046 hostveth = netdev->priv.veth_attr.pair;
3047 else
3048 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3049 if (is_empty_string(hostveth)) {
74c6e2b0 3050 SYSERROR("Host side veth device name is missing");
a30b9023 3051 _exit(EXIT_FAILURE);
74c6e2b0
CB
3052 }
3053
f2711167
CB
3054 if (is_empty_string(netdev->link)) {
3055 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3056 _exit(EXIT_FAILURE);
74c6e2b0 3057 }
811ef482 3058
811ef482 3059 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3060 lxcname, netns_path, netdev->link, hostveth);
811ef482 3061 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3062 lxcname, netns_path, "veth", netdev->link, hostveth,
3063 (char *)NULL);
811ef482 3064 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3065 _exit(EXIT_FAILURE);
811ef482
CB
3066 }
3067
3068 close(pipefd[1]);
3069
9c66dc4f 3070 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3071 if (bytes < 0) {
3072 SYSERROR("Failed to read from pipe file descriptor.");
3073 close(pipefd[0]);
6b9f82a9
CB
3074 } else {
3075 buffer[bytes - 1] = '\0';
811ef482 3076 }
811ef482 3077
6b9f82a9 3078 ret = wait_for_pid(child);
9c66dc4f
CB
3079 close_prot_errno_disarm(pipefd[0]);
3080 if (ret != 0 || bytes < 0)
3081 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3082 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3083
811ef482
CB
3084 return 0;
3085}
3086
59eac805 3087static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
1bd8d726
CB
3088{
3089 int ret;
3090 struct lxc_list *iterator;
3091 struct lxc_list *network = &handler->conf->network;
3092 /* strlen("/proc/") = 6
3093 * +
8335fd40 3094 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3095 * +
3096 * strlen("/fd/") = 4
3097 * +
8335fd40 3098 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3099 * +
3100 * \0
3101 */
8335fd40 3102 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3103
3104 *netns_path = '\0';
3105
9c66dc4f
CB
3106 if (handler->nsfd[LXC_NS_NET] < 0)
3107 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726
CB
3108
3109 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 3110 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
3111 if (ret < 0 || ret >= sizeof(netns_path))
3112 return false;
3113
3114 lxc_list_for_each(iterator, network) {
3115 char *hostveth = NULL;
3116 struct lxc_netdev *netdev = iterator->elem;
3117
3118 /* We can only delete devices whose ifindex we have. If we don't
3119 * have the index it means that we didn't create it.
3120 */
3121 if (!netdev->ifindex)
3122 continue;
3123
3124 if (netdev->type == LXC_NET_PHYS) {
3125 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3126 netdev->link);
3127 if (ret < 0)
9c66dc4f 3128 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3129 netdev->ifindex, netdev->link);
3130 else
9c66dc4f 3131 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3132 netdev->ifindex, netdev->link);
b3259dc6
TP
3133
3134 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3135 if (ret < 0)
3136 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3137 netdev->ifindex, netdev->link);
66a7c406 3138 goto clear_ifindices;
1bd8d726
CB
3139 }
3140
3141 ret = netdev_deconf[netdev->type](handler, netdev);
3142 if (ret < 0)
3143 WARN("Failed to deconfigure network device");
3144
3145 if (netdev->type != LXC_NET_VETH)
66a7c406 3146 goto clear_ifindices;
1bd8d726 3147
f2711167 3148 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3149 goto clear_ifindices;
1bd8d726 3150
f2711167 3151 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3152 hostveth = netdev->priv.veth_attr.pair;
3153 else
3154 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3155 if (is_empty_string(hostveth))
66a7c406 3156 goto clear_ifindices;
8843fde4 3157
1bd8d726
CB
3158 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3159 handler->name, netdev,
3160 netns_path);
3161 if (ret < 0) {
9c66dc4f 3162 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3163 goto clear_ifindices;
1bd8d726 3164 }
9c66dc4f 3165 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3166
3167clear_ifindices:
0858c829
CB
3168 /*
3169 * We need to clear any ifindices we recorded so liblxc won't
3170 * have cached stale data which would cause it to fail on
3171 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3172 */
3173 netdev->ifindex = 0;
3174 if (netdev->type == LXC_NET_PHYS) {
3175 netdev->priv.phys_attr.ifindex = 0;
3176 } else if (netdev->type == LXC_NET_VETH) {
3177 netdev->priv.veth_attr.veth1[0] = '\0';
3178 netdev->priv.veth_attr.ifindex = 0;
3179 }
1bd8d726
CB
3180 }
3181
bb84beda 3182 return true;
1bd8d726
CB
3183}
3184
6509154d 3185static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3186 struct lxc_list *cur, *next;
3187 struct lxc_inetdev *inet4dev;
3188 struct lxc_inet6dev *inet6dev;
3189 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3190 int err = 0;
5fe147e9
TP
3191 unsigned int lo_ifindex = 0, link_ifindex = 0;
3192
3193 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3194 if (link_ifindex == 0)
3195 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3196
6509154d 3197
3198 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3199 if (!lxc_list_empty(&netdev->ipv4)) {
3200 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3201 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3202 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3203 }
3204
3205 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3206 if (!lxc_list_empty(&netdev->ipv6)) {
3207 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3208 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3209 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3210
3211 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3212 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3213 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3214 }
3215
b670016a 3216 /* Perform IPVLAN specific checks. */
3217 if (netdev->type == LXC_NET_IPVLAN) {
3218 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3219 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3220 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3221
3222 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3223 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3224 if (lo_ifindex == 0)
3225 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3226 }
3227
6509154d 3228 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3229 inet4dev = cur->elem;
3230 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3231 return ret_set_errno(-1, -errno);
6509154d 3232
5fe147e9 3233 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3234 return ret_set_errno(-1, EINVAL);
b670016a 3235
3236 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3237 if (netdev->type == LXC_NET_IPVLAN) {
3238 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3239 if (err < 0)
3240 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3241 }
6509154d 3242 }
3243
3244 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3245 inet6dev = cur->elem;
3246 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3247 return ret_set_errno(-1, -errno);
6509154d 3248
5fe147e9 3249 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3250 return ret_set_errno(-1, EINVAL);
b670016a 3251
3252 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3253 if (netdev->type == LXC_NET_IPVLAN) {
3254 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3255 if (err < 0)
3256 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3257 }
6509154d 3258 }
3259
3260 return 0;
3261}
3262
9c66dc4f
CB
3263static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3264{
b670016a 3265 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3266 bool had_error = false;
3267 unsigned int link_ifindex = 0;
b670016a 3268
9c66dc4f
CB
3269 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3270 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3271
3272 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3273 if (lo_ifindex > 0) {
3274 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3275 had_error = true;
b670016a 3276 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3277 }
3278 }
3279
3280 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3281 if (!is_empty_string(link)) {
5fe147e9 3282 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3283 if (link_ifindex == 0)
3284 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3285
3286 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3287 had_error = true;
b670016a 3288 }
3289
9c66dc4f 3290 if (had_error)
596a002c 3291 return ret_set_errno(-1, EINVAL);
b670016a 3292
3293 return 0;
3294}
3295
9c66dc4f
CB
3296static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3297{
b670016a 3298 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3299 bool had_error = false;
3300 unsigned int link_ifindex = 0;
b670016a 3301
9c66dc4f
CB
3302 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3303 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3304
3305 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3306 if (lo_ifindex > 0) {
3307 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3308 had_error = true;
b670016a 3309 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3310 }
3311 }
3312
3313 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3314 if (!is_empty_string(link)) {
5fe147e9
TP
3315 link_ifindex = if_nametoindex(link);
3316 if (link_ifindex == 0) {
3317 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3318 return ret_set_errno(-1, EINVAL);
3319 }
3320
3321 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3322 had_error = true;
b670016a 3323 }
3324
9c66dc4f 3325 if (had_error)
596a002c 3326 return ret_set_errno(-1, EINVAL);
b670016a 3327
3328 return 0;
3329}
3330
6509154d 3331static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3332 unsigned int lo_ifindex = 0;
3333 unsigned int errCount = 0;
6509154d 3334 struct lxc_list *cur, *next;
3335 struct lxc_inetdev *inet4dev;
3336 struct lxc_inet6dev *inet6dev;
6509154d 3337
b670016a 3338 /* Perform IPVLAN specific checks. */
3339 if (netdev->type == LXC_NET_IPVLAN) {
3340 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3341 lo_ifindex = if_nametoindex(loop_device);
b670016a 3342 if (lo_ifindex == 0) {
3343 errCount++;
3ebffb98 3344 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3345 }
b670016a 3346 }
6509154d 3347
b670016a 3348 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3349 inet4dev = cur->elem;
3350 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3351 errCount++;
6509154d 3352 }
3353
3354 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3355 inet6dev = cur->elem;
b670016a 3356 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3357 errCount++;
6509154d 3358 }
3359
b670016a 3360 if (errCount > 0)
596a002c 3361 return ret_set_errno(-1, EINVAL);
6509154d 3362
3363 return 0;
3364}
3365
e389f2af 3366static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3367{
811ef482
CB
3368 struct lxc_list *iterator;
3369 struct lxc_list *network = &handler->conf->network;
3370
811ef482
CB
3371 lxc_list_for_each(iterator, network) {
3372 struct lxc_netdev *netdev = iterator->elem;
3373
9c66dc4f
CB
3374 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3375 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3376
6509154d 3377 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3378 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3379 if (lxc_setup_l2proxy(netdev))
3380 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3381 }
3382
9c66dc4f
CB
3383 if (netdev_conf[netdev->type](handler, netdev))
3384 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3385 }
3386
3387 return 0;
3388}
3389
e389f2af 3390int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3391{
e389f2af
CB
3392 pid_t pid = handler->pid;
3393 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3394 struct lxc_list *iterator;
3395
e0010464 3396 if (am_guest_unpriv())
74c6e2b0 3397 return 0;
811ef482
CB
3398
3399 lxc_list_for_each(iterator, network) {
3dd78294 3400 __do_free char *physname = NULL;
e389f2af 3401 int ret;
811ef482
CB
3402 struct lxc_netdev *netdev = iterator->elem;
3403
811ef482
CB
3404 if (!netdev->ifindex)
3405 continue;
3406
3dd78294
CB
3407 if (netdev->type == LXC_NET_PHYS)
3408 physname = is_wlan(netdev->link);
3409
3410 if (physname)
9f8cf6e1 3411 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
3dd78294 3412 else
9f8cf6e1 3413 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
9c66dc4f
CB
3414 if (ret)
3415 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3416 netdev->created_name,
3417 netdev->ifindex, pid);
811ef482 3418
24190194
CB
3419 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3420 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3421 }
3422
3423 return 0;
3424}
3425
3c09b97c
CB
3426static int network_requires_advanced_setup(int type)
3427{
3428 if (type == LXC_NET_EMPTY)
3429 return false;
3430
3431 if (type == LXC_NET_NONE)
3432 return false;
3433
3434 return true;
3435}
3436
e389f2af 3437static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3438{
e389f2af
CB
3439 int hooks_version = handler->conf->hooks_version;
3440 const char *lxcname = handler->name;
3441 const char *lxcpath = handler->lxcpath;
3442 struct lxc_list *network = &handler->conf->network;
3443 pid_t pid = handler->pid;
74c6e2b0
CB
3444 struct lxc_list *iterator;
3445
74c6e2b0
CB
3446 lxc_list_for_each(iterator, network) {
3447 struct lxc_netdev *netdev = iterator->elem;
3448
3c09b97c 3449 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3450 continue;
3451
9c66dc4f
CB
3452 if (netdev->type != LXC_NET_VETH)
3453 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3454 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3455
3456 if (netdev->mtu)
3457 INFO("mtu ignored due to insufficient privilege");
3458
e389f2af
CB
3459 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3460 pid, hooks_version))
74c6e2b0
CB
3461 return -1;
3462 }
3463
3464 return 0;
3465}
3466
59eac805 3467static bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3468{
3469 int ret;
3470 struct lxc_list *iterator;
3471 struct lxc_list *network = &handler->conf->network;
1bd8d726 3472
811ef482
CB
3473 lxc_list_for_each(iterator, network) {
3474 char *hostveth = NULL;
3475 struct lxc_netdev *netdev = iterator->elem;
3476
3477 /* We can only delete devices whose ifindex we have. If we don't
3478 * have the index it means that we didn't create it.
3479 */
3480 if (!netdev->ifindex)
3481 continue;
3482
0104c121
CB
3483 /*
3484 * If the network device has been moved back from the
3485 * containers network namespace, update the ifindex.
3486 */
3487 netdev->ifindex = if_nametoindex(netdev->name);
3488
6509154d 3489 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3490 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3491 if (lxc_delete_l2proxy(netdev))
3492 WARN("Failed to delete all l2proxy config");
3493 /* Don't return, let the network be cleaned up as normal. */
3494 }
3495
811ef482
CB
3496 if (netdev->type == LXC_NET_PHYS) {
3497 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3498 if (ret < 0)
3499 WARN("Failed to rename interface with index %d "
b809f232
CB
3500 "from \"%s\" to its initial name \"%s\"",
3501 netdev->ifindex, netdev->name, netdev->link);
0b154989 3502 else {
29589196
CB
3503 TRACE("Renamed interface with index %d from "
3504 "\"%s\" to its initial name \"%s\"",
3505 netdev->ifindex, netdev->name,
3506 netdev->link);
0b154989
TP
3507
3508 /* Restore original MTU */
3509 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3510 if (ret < 0) {
3511 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3512 netdev->link, netdev->priv.phys_attr.mtu);
3513 } else {
3514 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3515 netdev->link, netdev->priv.phys_attr.mtu);
3516 }
3517 }
b3259dc6
TP
3518
3519 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3520 if (ret < 0)
3521 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3522 netdev->ifindex, netdev->link);
66a7c406 3523 goto clear_ifindices;
811ef482
CB
3524 }
3525
3526 ret = netdev_deconf[netdev->type](handler, netdev);
3527 if (ret < 0)
3528 WARN("Failed to deconfigure network device");
3529
811ef482 3530 if (netdev->type != LXC_NET_VETH)
66a7c406 3531 goto clear_ifindices;
811ef482 3532
811ef482
CB
3533 /* Explicitly delete host veth device to prevent lingering
3534 * devices. We had issues in LXD around this.
3535 */
f2711167 3536 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3537 hostveth = netdev->priv.veth_attr.pair;
3538 else
3539 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3540 if (is_empty_string(hostveth))
66a7c406 3541 goto clear_ifindices;
811ef482 3542
1ee56cff
CB
3543 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3544 ret = lxc_netdev_delete_by_name(hostveth);
3545 if (ret < 0)
3546 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3547
1ee56cff
CB
3548 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3549 } else if (!is_empty_string(netdev->link)) {
3550 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3551 if (ret < 0)
3552 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3553
1ee56cff
CB
3554 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3555 }
811ef482 3556
66a7c406 3557clear_ifindices:
ad2ddfcd 3558 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3559 * have cached stale data which would cause it to fail on reboot
3560 * we're we don't re-read the on-disk config file.
3561 */
3562 netdev->ifindex = 0;
3563 if (netdev->type == LXC_NET_PHYS) {
3564 netdev->priv.phys_attr.ifindex = 0;
3565 } else if (netdev->type == LXC_NET_VETH) {
3566 netdev->priv.veth_attr.veth1[0] = '\0';
3567 netdev->priv.veth_attr.ifindex = 0;
3568 }
811ef482
CB
3569 }
3570
bb84beda 3571 return true;
811ef482
CB
3572}
3573
3574int lxc_requests_empty_network(struct lxc_handler *handler)
3575{
3576 struct lxc_list *network = &handler->conf->network;
3577 struct lxc_list *iterator;
3578 bool found_none = false, found_nic = false;
3579
3580 if (lxc_list_empty(network))
3581 return 0;
3582
9c66dc4f 3583 lxc_list_for_each (iterator, network) {
811ef482
CB
3584 struct lxc_netdev *netdev = iterator->elem;
3585
3586 if (netdev->type == LXC_NET_NONE)
3587 found_none = true;
3588 else
3589 found_nic = true;
3590 }
9c66dc4f 3591
811ef482
CB
3592 if (found_none && !found_nic)
3593 return 1;
9c66dc4f 3594
811ef482
CB
3595 return 0;
3596}
3597
3598/* try to move physical nics to the init netns */
b809f232 3599int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3600{
9c66dc4f
CB
3601 __do_close int oldfd = -EBADF;
3602 int netnsfd = handler->nsfd[LXC_NS_NET];
3603 struct lxc_conf *conf = handler->conf;
811ef482 3604 int ret;
811ef482 3605 char ifname[IFNAMSIZ];
b809f232 3606 struct lxc_list *iterator;
811ef482 3607
04213960
TA
3608 /*
3609 * If we weren't asked to clone a new network namespace, there's
3610 * nothing to restore.
3611 */
3612 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3613 return 0;
3614
b809f232
CB
3615 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3616 * the parent network namespace. We won't have this capability if we are
3617 * unprivileged.
3618 */
d0fbc7ba 3619 if (!handler->am_root)
b809f232 3620 return 0;
811ef482 3621
b809f232 3622 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3623
0037ab49 3624 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3625 if (oldfd < 0)
3626 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3627
b809f232 3628 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3629 if (ret < 0)
3630 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3631
b809f232
CB
3632 lxc_list_for_each(iterator, &conf->network) {
3633 struct lxc_netdev *netdev = iterator->elem;
811ef482 3634
b809f232
CB
3635 if (netdev->type != LXC_NET_PHYS)
3636 continue;
3637
3638 /* Retrieve the name of the interface in the container's network
3639 * namespace.
3640 */
3641 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3642 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3643 continue;
3644 }
b809f232 3645
0037ab49 3646 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3647 if (ret < 0)
9c66dc4f 3648 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3649 else
9c66dc4f 3650 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3651 }
811ef482 3652
b809f232 3653 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3654 if (ret < 0)
3655 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3656
3657 return 0;
811ef482
CB
3658}
3659
3660static int setup_hw_addr(char *hwaddr, const char *ifname)
3661{
9c66dc4f 3662 __do_close int fd = -EBADF;
811ef482
CB
3663 struct sockaddr sockaddr;
3664 struct ifreq ifr;
9c66dc4f 3665 int ret;
811ef482
CB
3666
3667 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3668 if (ret)
3669 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3670
3671 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3672 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3673 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3674
ad9429e5 3675 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3676 if (fd < 0)
3677 return -1;
3678
3679 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3680 if (ret)
6d1400b5 3681 SYSERROR("Failed to perform ioctl");
3682
9c66dc4f 3683 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3684
3685 return ret;
3686}
3687
3688static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3689{
3690 struct lxc_list *iterator;
3691 int err;
3692
3693 lxc_list_for_each(iterator, ip) {
3694 struct lxc_inetdev *inetdev = iterator->elem;
3695
3696 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3697 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3698 if (err)
3699 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3700 }
3701
3702 return 0;
3703}
3704
3705static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3706{
3707 struct lxc_list *iterator;
3708 int err;
3709
3710 lxc_list_for_each(iterator, ip) {
3711 struct lxc_inet6dev *inet6dev = iterator->elem;
3712
3713 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3714 &inet6dev->mcast, &inet6dev->acast,
3715 inet6dev->prefix);
9c66dc4f
CB
3716 if (err)
3717 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3718 }
3719
3720 return 0;
3721}
3722
8bf64b77 3723static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3724{
811ef482 3725 int err;
009d6127 3726 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3727
3728 /* empty network namespace */
8bf64b77
CB
3729 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3730 err = lxc_netdev_up("lo");
9c66dc4f
CB
3731 if (err)
3732 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3733 }
3734
811ef482 3735 /* set a mac address */
9c66dc4f
CB
3736 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3737 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3738
3739 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3740 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3741 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3742
3743 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3744 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3745 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3746
3747 /* set the network device up */
3748 if (netdev->flags & IFF_UP) {
8bf64b77 3749 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3750 if (err)
3751 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3752
3753 /* the network is up, make the loopback up too */
3754 err = lxc_netdev_up("lo");
9c66dc4f
CB
3755 if (err)
3756 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3757 }
3758
811ef482 3759 /* setup ipv4 gateway on the interface */
a2f9a670 3760 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3761 if (!(netdev->flags & IFF_UP))
3762 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3763
9c66dc4f
CB
3764 if (lxc_list_empty(&netdev->ipv4))
3765 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3766
a2f9a670 3767 /* Setup device route if ipv4_gateway_dev is enabled */
3768 if (netdev->ipv4_gateway_dev) {
3769 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3770 if (err < 0)
3771 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3772 } else {
009d6127 3773 /* Check the gateway address is valid */
3774 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3775 return ret_set_errno(-1, errno);
009d6127 3776
3777 /* Try adding a default route to the gateway address */
811ef482 3778 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3779 if (err < 0) {
3780 /* If adding the default route fails, this could be because the
3781 * gateway address is in a different subnet to the container's address.
3782 * To work around this, we try adding a static device route to the
3783 * gateway address first, and then try again.
3784 */
a2f9a670 3785 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3786 if (err < 0)
3787 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3788
a2f9a670 3789 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3790 if (err < 0)
3791 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3792 }
3793 }
3794 }
3795
3796 /* setup ipv6 gateway on the interface */
a2f9a670 3797 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3798 if (!(netdev->flags & IFF_UP))
3799 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3800
9c66dc4f
CB
3801 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3802 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3803
a2f9a670 3804 /* Setup device route if ipv6_gateway_dev is enabled */
3805 if (netdev->ipv6_gateway_dev) {
3806 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3807 if (err < 0)
3808 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3809 } else {
009d6127 3810 /* Check the gateway address is valid */
3811 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3812 return ret_set_errno(-1, errno);
009d6127 3813
3814 /* Try adding a default route to the gateway address */
811ef482 3815 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3816 if (err < 0) {
3817 /* If adding the default route fails, this could be because the
3818 * gateway address is in a different subnet to the container's address.
3819 * To work around this, we try adding a static device route to the
3820 * gateway address first, and then try again.
3821 */
a2f9a670 3822 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3823 if (err < 0)
3824 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3825
a2f9a670 3826 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3827 if (err < 0)
3828 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3829 }
3830 }
3831 }
3832
8bf64b77 3833 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3834
3835 return 0;
3836}
3837
3838int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3839 struct lxc_list *network)
3840{
3841 struct lxc_list *iterator;
811ef482 3842
8bf64b77 3843 lxc_list_for_each (iterator, network) {
e389f2af 3844 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 3845 int ret;
811ef482 3846
8bf64b77
CB
3847 ret = netdev_ns_conf[netdev->type](netdev);
3848 if (!ret)
3849 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
3850 if (ret)
3851 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482
CB
3852 }
3853
3854 if (!lxc_list_empty(network))
e389f2af 3855 INFO("Network has been setup");
811ef482
CB
3856
3857 return 0;
3858}
7ab1ba02 3859
3c09b97c 3860int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3861{
3862 struct lxc_list *iterator;
3863 struct lxc_list *network = &handler->conf->network;
3864 int data_sock = handler->data_sock[0];
3865
7ab1ba02
CB
3866 lxc_list_for_each(iterator, network) {
3867 int ret;
3868 struct lxc_netdev *netdev = iterator->elem;
3869
3c09b97c 3870 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3871 continue;
3872
7fbb15ec 3873 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3874 if (ret < 0)
7ab1ba02 3875 return -1;
e389f2af
CB
3876
3877 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3878 if (ret < 0)
3879 return -1;
3880
3881 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3882 }
3883
3884 return 0;
3885}
3886
3c09b97c 3887int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3888{
3889 struct lxc_list *iterator;
3890 struct lxc_list *network = &handler->conf->network;
3891 int data_sock = handler->data_sock[1];
3892
7ab1ba02
CB
3893 lxc_list_for_each(iterator, network) {
3894 int ret;
3895 struct lxc_netdev *netdev = iterator->elem;
3896
3c09b97c 3897 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3898 continue;
3899
e3233f26 3900 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3901 if (ret < 0)
7ab1ba02 3902 return -1;
e389f2af
CB
3903
3904 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3905 if (ret < 0)
3906 return -1;
54256301 3907
e389f2af 3908 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3909 }
3910
3911 return 0;
3912}
a1ae535a
CB
3913
3914int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3915{
3916 struct lxc_list *iterator, *network;
3917 int data_sock = handler->data_sock[0];
3918
3919 if (!handler->am_root)
3920 return 0;
3921
3922 network = &handler->conf->network;
3923 lxc_list_for_each(iterator, network) {
3924 int ret;
3925 struct lxc_netdev *netdev = iterator->elem;
3926
3927 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3928 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3929 if (ret < 0)
7729f8e5 3930 return -1;
a1ae535a
CB
3931
3932 /* Send network device ifindex in the child's namespace to
3933 * parent.
3934 */
7fbb15ec 3935 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3936 if (ret < 0)
7729f8e5 3937 return -1;
a1ae535a
CB
3938 }
3939
e389f2af
CB
3940 if (!lxc_list_empty(network))
3941 TRACE("Sent network device names and ifindices to parent");
3942
a1ae535a 3943 return 0;
a1ae535a
CB
3944}
3945
3946int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3947{
3948 struct lxc_list *iterator, *network;
3949 int data_sock = handler->data_sock[1];
3950
3951 if (!handler->am_root)
3952 return 0;
3953
3954 network = &handler->conf->network;
3955 lxc_list_for_each(iterator, network) {
3956 int ret;
3957 struct lxc_netdev *netdev = iterator->elem;
3958
3959 /* Receive network device name in the child's namespace to
3960 * parent.
3961 */
e3233f26 3962 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3963 if (ret < 0)
7729f8e5 3964 return -1;
a1ae535a
CB
3965
3966 /* Receive network device ifindex in the child's namespace to
3967 * parent.
3968 */
e3233f26 3969 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3970 if (ret < 0)
7729f8e5 3971 return -1;
a1ae535a
CB
3972 }
3973
3974 return 0;
a1ae535a 3975}
bb84beda
CB
3976
3977void lxc_delete_network(struct lxc_handler *handler)
3978{
3979 bool bret;
3980
3981 if (handler->am_root)
3982 bret = lxc_delete_network_priv(handler);
3983 else
3984 bret = lxc_delete_network_unpriv(handler);
3985 if (!bret)
3986 DEBUG("Failed to delete network devices");
3987 else
3988 DEBUG("Deleted network devices");
3989}
1cd95214 3990
1cd95214
CB
3991int lxc_netns_set_nsid(int fd)
3992{
41a3300d 3993 int ret;
0ce60f0d
CB
3994 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3995 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3996 NLMSG_ALIGN(1024)];
1cd95214 3997 struct nl_handler nlh;
a5f5cb41 3998 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
3999 struct nlmsghdr *hdr;
4000 struct rtgenmsg *msg;
9d036caa
CB
4001 const __s32 ns_id = -1;
4002 const __u32 netns_fd = fd;
1cd95214 4003
a5f5cb41 4004 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4005 if (ret < 0)
41a3300d 4006 return -1;
1cd95214 4007
0ce60f0d 4008 memset(buf, 0, sizeof(buf));
6ce39620
CB
4009
4010#pragma GCC diagnostic push
4011#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4012 hdr = (struct nlmsghdr *)buf;
4013 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4014#pragma GCC diagnostic pop
1cd95214 4015
0ce60f0d
CB
4016 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4017 hdr->nlmsg_type = RTM_NEWNSID;
4018 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4019 hdr->nlmsg_pid = 0;
4020 hdr->nlmsg_seq = RTM_NEWNSID;
4021 msg->rtgen_family = AF_UNSPEC;
1cd95214 4022
9d036caa
CB
4023 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4024 if (ret < 0)
a5f5cb41 4025 return ret_errno(ENOMEM);
9d036caa
CB
4026
4027 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4028 if (ret < 0)
a5f5cb41 4029 return ret_errno(ENOMEM);
1cd95214 4030
a5f5cb41 4031 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4032}
938980ba
CB
4033
4034static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4035{
4036
4037 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4038
4039 while (RTA_OK(rta, len)) {
4040 unsigned short type = rta->rta_type;
4041
4042 if ((type <= max) && (!tb[type]))
4043 tb[type] = rta;
4044
6ce39620
CB
4045#pragma GCC diagnostic push
4046#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4047 rta = RTA_NEXT(rta, len);
6ce39620 4048#pragma GCC diagnostic pop
938980ba
CB
4049 }
4050
4051 return 0;
4052}
4053
4054static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4055{
4056 return *(__s32 *)RTA_DATA(rta);
4057}
4058
4059#ifndef NETNS_RTA
4060#define NETNS_RTA(r) \
4061 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4062#endif
4063
4064int lxc_netns_get_nsid(int fd)
4065{
a5f5cb41
CB
4066 struct nl_handler nlh;
4067 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4068 int ret;
4069 ssize_t len;
4070 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4071 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4072 NLMSG_ALIGN(1024)];
938980ba 4073 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4074 struct nlmsghdr *hdr;
4075 struct rtgenmsg *msg;
938980ba
CB
4076 __u32 netns_fd = fd;
4077
a5f5cb41 4078 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4079 if (ret < 0)
4080 return -1;
4081
4082 memset(buf, 0, sizeof(buf));
6ce39620
CB
4083
4084#pragma GCC diagnostic push
4085#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4086 hdr = (struct nlmsghdr *)buf;
4087 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4088#pragma GCC diagnostic pop
938980ba
CB
4089
4090 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4091 hdr->nlmsg_type = RTM_GETNSID;
4092 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4093 hdr->nlmsg_pid = 0;
4094 hdr->nlmsg_seq = RTM_GETNSID;
4095 msg->rtgen_family = AF_UNSPEC;
4096
9d036caa 4097 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4098 if (ret < 0)
4099 return ret_errno(ENOMEM);
938980ba 4100
a5f5cb41 4101 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4102 if (ret < 0)
4103 return -1;
4104
4105 msg = NLMSG_DATA(hdr);
4106 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4107 if (len < 0)
a5f5cb41 4108 return ret_errno(EINVAL);
938980ba 4109
6ce39620
CB
4110#pragma GCC diagnostic push
4111#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4112 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4113 if (tb[__LXC_NETNSA_NSID])
4114 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4115#pragma GCC diagnostic pop
938980ba
CB
4116
4117 return -1;
4118}
e389f2af
CB
4119
4120int lxc_create_network(struct lxc_handler *handler)
4121{
4122 int ret;
4123
e389f2af
CB
4124 if (handler->am_root) {
4125 ret = lxc_create_network_priv(handler);
4126 if (ret)
4127 return -1;
4128
4129 return lxc_network_move_created_netdev_priv(handler);
4130 }
4131
4132 return lxc_create_network_unpriv(handler);
4133}