]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: Adds free_ovs_veth_vlan_args and allows trunks field to be freed
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
8bf64b77 50typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
3ebffb98 51static const char loop_device[] = "lo";
811ef482 52
b670016a 53static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 54{
d16bda44 55 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 56 struct nl_handler nlh;
d16bda44
CB
57 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
58 int addrlen, err;
8f82874c 59 struct rtmsg *rt;
8f82874c 60
61 addrlen = family == AF_INET ? sizeof(struct in_addr)
62 : sizeof(struct in6_addr);
63
d16bda44 64 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 65 if (err)
66 return err;
67
8f82874c 68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
d16bda44 70 return -ENOMEM;
8f82874c 71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
a5f5cb41 74 return -ENOMEM;
8f82874c 75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
a5f5cb41 82 return -ENOMEM;
d16bda44 83
8f82874c 84 rt->rtm_family = family;
85 rt->rtm_table = RT_TABLE_MAIN;
86 rt->rtm_scope = RT_SCOPE_LINK;
87 rt->rtm_protocol = RTPROT_BOOT;
88 rt->rtm_type = RTN_UNICAST;
89 rt->rtm_dst_len = netmask;
90
8f82874c 91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
92 return -EINVAL;
93
8f82874c 94 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
95 return -EINVAL;
96
97 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 98}
99
100static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
101{
b670016a 102 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 103}
104
105static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
106{
b670016a 107 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
108}
109
110static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
111{
112 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
113}
114
115static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
116{
117 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 118}
119
d4a7da46 120static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
121{
122 struct lxc_list *iterator;
123 int err;
124
125 lxc_list_for_each(iterator, ip) {
126 struct lxc_inetdev *inetdev = iterator->elem;
127
128 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
129 if (err)
130 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 131 }
132
133 return 0;
134}
135
136static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
137{
138 struct lxc_list *iterator;
139 int err;
140
141 lxc_list_for_each(iterator, ip) {
142 struct lxc_inet6dev *inet6dev = iterator->elem;
143
144 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
145 if (err)
146 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 147 }
148
149 return 0;
150}
151
6dfa9581
TP
152static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
153{
154 struct lxc_list *iterator;
155 int err;
156
157 lxc_list_for_each(iterator, ip) {
158 struct lxc_inetdev *inetdev = iterator->elem;
159
160 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
161
162 if (err)
9c66dc4f 163 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
164 }
165
166 return 0;
167}
168
169static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
170{
171 struct lxc_list *iterator;
172 int err;
173
174 lxc_list_for_each(iterator, ip) {
175 struct lxc_inet6dev *inet6dev = iterator->elem;
176
177 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
178 if (err)
9c66dc4f 179 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
180 }
181
182 return 0;
183}
184
5fe147e9 185static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 186{
d16bda44 187 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 188 struct nl_handler nlh;
d16bda44
CB
189 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
190 int addrlen, err;
5fe147e9 191 struct ndmsg *rt;
6dfa9581 192
5fe147e9 193 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 194
d16bda44 195 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
196 if (err)
197 return err;
6dfa9581 198
5fe147e9
TP
199 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
200 if (!nlmsg)
d16bda44 201 return -ENOMEM;
6dfa9581 202
5fe147e9
TP
203 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
204 if (!answer)
d16bda44 205 return -ENOMEM;
6dfa9581 206
5fe147e9
TP
207 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
208 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 209
5fe147e9
TP
210 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
211 if (!rt)
d16bda44
CB
212 return -ENOMEM;
213
5fe147e9
TP
214 rt->ndm_ifindex = ifindex;
215 rt->ndm_flags = NTF_PROXY;
216 rt->ndm_type = NDA_DST;
217 rt->ndm_family = family;
6dfa9581 218
5fe147e9 219 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 220 return -EINVAL;
6dfa9581 221
d16bda44 222 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
223}
224
225static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
226{
227 int ret;
228 char path[PATH_MAX];
229 char buf[1] = "";
230
231 if (family != AF_INET && family != AF_INET6)
596a002c 232 return ret_set_errno(-1, EINVAL);
6dfa9581 233
9c66dc4f 234 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581
TP
235 family == AF_INET ? "ipv4" : "ipv6", ifname,
236 "forwarding");
9c66dc4f 237 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 238 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
239
240 return lxc_read_file_expect(path, buf, 1, "1");
241}
242
622f05c7
TP
243struct bridge_vlan_info {
244 __u16 flags;
245 __u16 vid;
246};
247
248static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
249{
250 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
251 struct nl_handler nlh;
252 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
253 int err;
254 struct ifinfomsg *ifi;
255 struct rtattr *nest;
256 unsigned short bridge_flags = 0;
257 struct bridge_vlan_info vlan_info;
258
259 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
260 if (err)
261 return err;
262
263 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
264 if (!nlmsg)
265 return ret_errno(ENOMEM);
266
267 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
268 if (!answer)
269 return ret_errno(ENOMEM);
270
271 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
272 nlmsg->nlmsghdr->nlmsg_type = operation;
273
274 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
275 if (!ifi)
276 return ret_errno(ENOMEM);
277 ifi->ifi_family = AF_BRIDGE;
278 ifi->ifi_index = ifindex;
279
280 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
281 if (!nest)
282 return ret_errno(ENOMEM);
283
284 bridge_flags |= BRIDGE_FLAGS_MASTER;
285 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
286 return ret_errno(ENOMEM);
287
288 vlan_info.vid = vlan_id;
289 vlan_info.flags = 0;
290 if (!tagged)
291 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
292
293 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
294 return ret_errno(ENOMEM);
295
296 nla_end_nested(nlmsg, nest);
297
298 return netlink_transaction(nlh_ptr, nlmsg, answer);
299}
300
301static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
302{
303 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
304}
305
306static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
307{
308 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
309}
310
311static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
312{
313 struct lxc_list *iterator;
314 int err;
315
316 lxc_list_for_each(iterator, vlan_ids) {
317 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
318
319 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
320 if (err)
321 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
322 }
323
324 return 0;
325}
326
33320936
TP
327static int validate_veth(struct lxc_netdev *netdev)
328{
329 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
330 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
331 if (netdev->priv.veth_attr.vlan_id_set)
332 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
333
334 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
335 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
336 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
337 }
338
339 if (netdev->priv.veth_attr.vlan_id_set) {
340 struct lxc_list *it;
341 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
342 unsigned short i = PTR_TO_USHORT(it->elem);
343 if (i == netdev->priv.veth_attr.vlan_id)
344 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
345 }
346 }
347
348 return 0;
349}
350
351static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
352{
353 int err, rc, veth1index;
354 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
355 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
356
357 /* Skip setup if no VLAN options are specified. */
358 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
359 return 0;
360
361 /* Check vlan filtering is enabled on parent bridge. */
362 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
363 if (rc < 0 || (size_t)rc >= sizeof(path))
364 return -1;
365
366 rc = lxc_read_from_file(path, buf, sizeof(buf));
367 if (rc < 0)
368 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
369
370 buf[rc - 1] = '\0';
371
372 if (strcmp(buf, "1") != 0)
373 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
374
375 /* Get veth1 ifindex for use with netlink. */
376 veth1index = if_nametoindex(veth1);
377 if (!veth1index)
378 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
379
380 /* Configure untagged VLAN settings on bridge port if specified. */
381 if (netdev->priv.veth_attr.vlan_id_set) {
382 unsigned short default_pvid;
383
384 /* Get the bridge's default VLAN PVID. */
385 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
386 if (rc < 0 || (size_t)rc >= sizeof(path))
387 return -1;
388
389 rc = lxc_read_from_file(path, buf, sizeof(buf));
390 if (rc < 0)
391 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
392
393 buf[rc - 1] = '\0';
394 err = get_u16(&default_pvid, buf, 0);
395 if (err)
396 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
397
398 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
399 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
400 err = lxc_bridge_vlan_del(veth1index, default_pvid);
401 if (err)
402 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
403 }
404
405 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
406 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
407 if (err)
408 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
409 }
410 }
411
412 /* Configure tagged VLAN settings on bridge port if specified. */
413 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
414 if (err)
415 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
416
417 return 0;
418}
419
8f7c3358
TP
420struct ovs_veth_vlan_args {
421 const char *nic;
422 const char *vlan_mode; /* Port VLAN mode. */
423 short vlan_id; /* PVID VLAN ID. */
d2f8b272 424 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
8f7c3358
TP
425};
426
d2f8b272
TP
427static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
428{
429 free_disarm(args->trunks);
430}
8f7c3358
TP
431
432static int lxc_ovs_setup_bridge_vlan_exec(void *data)
433{
434 struct ovs_veth_vlan_args *args = data;
785e1540
TP
435 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
436
437 if (!args->vlan_mode)
438 return ret_errno(EINVAL);
8f7c3358
TP
439
440 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
441
785e1540 442 if (args->vlan_id > BRIDGE_VLAN_NONE) {
8f7c3358
TP
443 char buf[5];
444 int rc;
445
446 rc = snprintf(buf, sizeof(buf), "%u", args->vlan_id);
447 if (rc < 0 || (size_t)rc >= sizeof(buf))
448 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%u\"", args->vlan_id);
449
450 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
451 }
452
453
785e1540 454 if (args->trunks)
8f7c3358
TP
455 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
456
457 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
785e1540 458 if (tag && trunks)
8f7c3358 459 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
785e1540 460 else if (tag)
8f7c3358 461 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
785e1540 462 else if (trunks)
8f7c3358
TP
463 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
464 else
465 return -EINVAL;
466
467 return -errno;
468}
469
470static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
471{
472 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
473 struct ovs_veth_vlan_args args;
474 args.nic = veth1;
1ee07848
TP
475 args.vlan_mode = NULL;
476 args.vlan_id = BRIDGE_VLAN_NONE;
477 args.trunks = NULL;
8f7c3358
TP
478
479 /* Skip setup if no VLAN options are specified. */
480 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
481 return 0;
482
483 /* Configure untagged VLAN settings on bridge port if specified. */
484 if (netdev->priv.veth_attr.vlan_id_set) {
485 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
486 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
487
488 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
489 * Also set the vlan_mode=access, which will drop any tagged frames.
490 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
491 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
492 */
493 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
494 args.vlan_mode = "access";
495 args.vlan_id = netdev->priv.veth_attr.vlan_id;
496 }
497 }
498
499 if (taggedLength > 0) {
500 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
501
502 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
503 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
504 args.vlan_mode = "native-untagged";
505 }
506
507 struct lxc_list *iterator;
508 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
509 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
510 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
511 int rc;
512
513 rc = snprintf(buf, sizeof(buf), "%u", vlan_id);
514 if (rc < 0 || (size_t)rc >= sizeof(buf))
515 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
516
1ee07848
TP
517 if (args.trunks)
518 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
519 else
520 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
8f7c3358
TP
521 }
522 }
523
1ee07848 524 if (args.vlan_mode) {
8f7c3358
TP
525 int ret;
526 char cmd_output[PATH_MAX];
527
528 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
529 if (ret < 0)
530 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
531 }
532
533 return 0;
534}
535
811ef482
CB
536static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
537{
54256301 538 int err;
a00fbab5 539 unsigned int mtu = 1500;
811ef482
CB
540 char *veth1, *veth2;
541 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 542
33320936
TP
543 err = validate_veth(netdev);
544 if (err)
545 return err;
546
f2711167 547 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
548 veth1 = netdev->priv.veth_attr.pair;
549 if (handler->conf->reboot)
550 lxc_netdev_delete_by_name(veth1);
551 } else {
552 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
553 if (err < 0 || (size_t)err >= sizeof(veth1buf))
554 return -1;
555
3646ffd9 556 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
557 if (!veth1)
558 return -1;
559
560 /* store away for deconf */
561 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
562 }
563
d34212ad
CB
564 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
565 if (err < 0 || (size_t)err >= sizeof(veth2buf))
566 return -1;
567
3646ffd9 568 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 569 if (!veth2)
54256301
CB
570 return -1;
571
a00fbab5
TP
572 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
573 if (netdev->mtu) {
574 if (lxc_safe_uint(netdev->mtu, &mtu))
575 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 576 } else if (!is_empty_string(netdev->link)) {
54256301 577 int ifindex_mtu;
811ef482 578
54256301
CB
579 ifindex_mtu = if_nametoindex(netdev->link);
580 if (ifindex_mtu) {
581 mtu = netdev_get_mtu(ifindex_mtu);
582 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
583 }
584 }
585
586 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
587 if (err)
588 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 589
24190194
CB
590 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
591
811ef482
CB
592 /* changing the high byte of the mac address to 0xfe, the bridge interface
593 * will always keep the host's mac address and not take the mac address
594 * of a container */
595 err = setup_private_host_hw_addr(veth1);
596 if (err) {
6d1400b5 597 errno = -err;
598 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
599 goto out_delete;
600 }
601
8da62485
CB
602 /* Retrieve ifindex of the host's veth device. */
603 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
604 if (!netdev->priv.veth_attr.ifindex) {
605 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
606 goto out_delete;
607 }
608
811ef482
CB
609 if (mtu) {
610 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 611 if (err) {
6d1400b5 612 errno = -err;
54256301 613 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
614 goto out_delete;
615 }
616 }
617
f2711167 618 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
619 if (!lxc_nic_exists(netdev->link)) {
620 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
621 goto out_delete;
622 }
623
811ef482
CB
624 err = lxc_bridge_attach(netdev->link, veth1);
625 if (err) {
6d1400b5 626 errno = -err;
26da53c3 627 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
628 goto out_delete;
629 }
630 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936 631
38790036
TP
632 if (is_ovs_bridge(netdev->link)) {
633 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
634 if (err) {
635 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
636 lxc_ovs_delete_port(netdev->link, veth1);
637 goto out_delete;
638 }
639 } else {
33320936
TP
640 err = setup_veth_native_bridge_vlan(veth1, netdev);
641 if (err) {
642 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
643 goto out_delete;
644 }
645 }
811ef482
CB
646 }
647
648 err = lxc_netdev_up(veth1);
649 if (err) {
6d1400b5 650 errno = -err;
651 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
652 goto out_delete;
653 }
654
d4a7da46 655 /* setup ipv4 routes on the host interface */
656 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
657 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
658 goto out_delete;
659 }
660
661 /* setup ipv6 routes on the host interface */
662 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
663 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
664 goto out_delete;
665 }
666
6dfa9581 667 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
668 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
669 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
670 appear to be added successfully but then do not appear in the proxy list. The length of time
671 slept doesn't appear to be important, only that the process sleeps for a short period of time.
672 */
673 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
674
6dfa9581
TP
675 if (netdev->ipv4_gateway) {
676 char bufinet4[INET_ADDRSTRLEN];
677 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 678 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
679 goto out_delete;
680 }
681
682 err = lxc_ip_forwarding_on(veth1, AF_INET);
683 if (err) {
9c66dc4f 684 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
685 goto out_delete;
686 }
687
5fe147e9 688 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 689 if (err) {
9c66dc4f 690 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
691 goto out_delete;
692 }
693 }
694
695 if (netdev->ipv6_gateway) {
696 char bufinet6[INET6_ADDRSTRLEN];
697
698 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 699 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
700 goto out_delete;
701 }
702
703 /* Check for sysctl net.ipv6.conf.all.forwarding=1
704 Kernel requires this to route any packets for IPv6.
705 */
706 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
707 if (err) {
9c66dc4f 708 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
709 goto out_delete;
710 }
711
712 err = lxc_ip_forwarding_on(veth1, AF_INET6);
713 if (err) {
9c66dc4f 714 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
715 goto out_delete;
716 }
717
718 err = lxc_neigh_proxy_on(veth1, AF_INET6);
719 if (err) {
9c66dc4f 720 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
721 goto out_delete;
722 }
723
5fe147e9 724 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 725 if (err) {
9c66dc4f 726 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
727 goto out_delete;
728 }
729 }
730
731 /* setup ipv4 address routes on the host interface */
732 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
733 if (err) {
9c66dc4f 734 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
735 goto out_delete;
736 }
737
738 /* setup ipv6 address routes on the host interface */
739 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
740 if (err) {
9c66dc4f 741 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
742 goto out_delete;
743 }
744 }
745
811ef482 746 if (netdev->upscript) {
14a7b0f9
CB
747 char *argv[] = {
748 "veth",
749 netdev->link,
990b9ac3 750 veth1,
14a7b0f9
CB
751 NULL,
752 };
753
754 err = run_script_argv(handler->name,
755 handler->conf->hooks_version, "net",
756 netdev->upscript, "up", argv);
757 if (err < 0)
811ef482
CB
758 goto out_delete;
759 }
760
54256301 761 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
762
763 return 0;
764
765out_delete:
54256301 766 lxc_netdev_delete_by_name(veth1);
811ef482
CB
767 return -1;
768}
769
770static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
771{
8021de25 772 char peer[IFNAMSIZ];
811ef482
CB
773 int err;
774
f2711167 775 if (is_empty_string(netdev->link)) {
811ef482
CB
776 ERROR("No link for macvlan network device specified");
777 return -1;
778 }
779
8021de25
CB
780 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
781 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
782 return -1;
783
3646ffd9 784 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
785 return -1;
786
787 err = lxc_macvlan_create(netdev->link, peer,
788 netdev->priv.macvlan_attr.mode);
789 if (err) {
6d1400b5 790 errno = -err;
791 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
792 peer, netdev->link);
966e9f1f 793 goto on_error;
811ef482
CB
794 }
795
9f8cf6e1
CB
796 strlcpy(netdev->created_name, peer, IFNAMSIZ);
797
811ef482
CB
798 netdev->ifindex = if_nametoindex(peer);
799 if (!netdev->ifindex) {
800 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 801 goto on_error;
811ef482
CB
802 }
803
3bef7b7b 804 if (netdev->mtu) {
54256301
CB
805 unsigned int mtu;
806
3bef7b7b
TP
807 err = lxc_safe_uint(netdev->mtu, &mtu);
808 if (err < 0) {
809 errno = -err;
810 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
811 goto on_error;
812 }
813
814 err = lxc_netdev_set_mtu(peer, mtu);
815 if (err < 0) {
816 errno = -err;
817 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
818 goto on_error;
819 }
820 }
821
811ef482 822 if (netdev->upscript) {
14a7b0f9
CB
823 char *argv[] = {
824 "macvlan",
825 netdev->link,
826 NULL,
827 };
828
829 err = run_script_argv(handler->name,
830 handler->conf->hooks_version, "net",
831 netdev->upscript, "up", argv);
832 if (err < 0)
966e9f1f 833 goto on_error;
811ef482
CB
834 }
835
836 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
837 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
838
839 return 0;
966e9f1f
CB
840
841on_error:
811ef482 842 lxc_netdev_delete_by_name(peer);
811ef482
CB
843 return -1;
844}
845
c9f52382 846static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
847{
d16bda44
CB
848 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
849 struct nl_handler nlh;
850 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 851 int err, index, len;
852 struct ifinfomsg *ifi;
c9f52382 853 struct rtattr *nest, *nest2;
c9f52382 854
855 len = strlen(master);
856 if (len == 1 || len >= IFNAMSIZ)
d16bda44 857 return ret_errno(EINVAL);
c9f52382 858
859 len = strlen(name);
860 if (len == 1 || len >= IFNAMSIZ)
d16bda44 861 return ret_errno(EINVAL);
c9f52382 862
863 index = if_nametoindex(master);
864 if (!index)
d16bda44 865 return ret_errno(EINVAL);
c9f52382 866
d16bda44 867 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 868 if (err)
df62850d 869 return err;
c9f52382 870
c9f52382 871 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
872 if (!nlmsg)
d16bda44 873 return ret_errno(ENOMEM);
c9f52382 874
875 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
876 if (!answer)
d16bda44 877 return ret_errno(ENOMEM);
c9f52382 878
879 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
880 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
881
882 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
883 if (!ifi)
884 return ret_errno(ENOMEM);
c9f52382 885 ifi->ifi_family = AF_UNSPEC;
886
c9f52382 887 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
888 if (!nest)
d16bda44 889 return ret_errno(EPROTO);
c9f52382 890
891 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 892 return ret_errno(EPROTO);
c9f52382 893
5755765e
KT
894 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
895 if (!nest2)
896 return ret_errno(EPROTO);
897
3a934e2e 898 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
899 return ret_errno(EPROTO);
900
cf88a827
TP
901 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
902 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
903 * according to ipvlan docs.
5755765e 904 */
cf88a827 905 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 906 return ret_errno(EPROTO);
c9f52382 907
5755765e 908 nla_end_nested(nlmsg, nest2);
c9f52382 909 nla_end_nested(nlmsg, nest);
910
911 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 912 return ret_errno(EPROTO);
c9f52382 913
914 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
915 return ret_errno(EPROTO);
916
917 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 918}
919
920static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
921{
dd119206 922 char peer[IFNAMSIZ];
c9f52382 923 int err;
924
f2711167 925 if (is_empty_string(netdev->link)) {
c9f52382 926 ERROR("No link for ipvlan network device specified");
927 return -1;
928 }
929
dd119206
CB
930 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
931 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 932 return -1;
933
3646ffd9 934 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 935 return -1;
936
dd119206
CB
937 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
938 netdev->priv.ipvlan_attr.isolation);
c9f52382 939 if (err) {
dd119206
CB
940 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
941 peer, netdev->link);
c9f52382 942 goto on_error;
943 }
944
e7fdd504
CB
945 strlcpy(netdev->created_name, peer, IFNAMSIZ);
946
c9f52382 947 netdev->ifindex = if_nametoindex(peer);
948 if (!netdev->ifindex) {
949 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
950 goto on_error;
951 }
952
006e135e 953 if (netdev->mtu) {
54256301
CB
954 unsigned int mtu;
955
006e135e 956 err = lxc_safe_uint(netdev->mtu, &mtu);
957 if (err < 0) {
958 errno = -err;
54256301 959 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 960 goto on_error;
961 }
962
963 err = lxc_netdev_set_mtu(peer, mtu);
964 if (err < 0) {
965 errno = -err;
54256301 966 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 967 goto on_error;
968 }
969 }
970
c9f52382 971 if (netdev->upscript) {
972 char *argv[] = {
973 "ipvlan",
974 netdev->link,
975 NULL,
976 };
977
dd119206
CB
978 err = run_script_argv(handler->name, handler->conf->hooks_version,
979 "net", netdev->upscript, "up", argv);
c9f52382 980 if (err < 0)
981 goto on_error;
982 }
983
dd119206
CB
984 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
985 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 986
987 return 0;
988
989on_error:
990 lxc_netdev_delete_by_name(peer);
991 return -1;
992}
993
811ef482
CB
994static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
995{
996 char peer[IFNAMSIZ];
997 int err;
998 static uint16_t vlan_cntr = 0;
811ef482 999
f2711167 1000 if (is_empty_string(netdev->link)) {
811ef482
CB
1001 ERROR("No link for vlan network device specified");
1002 return -1;
1003 }
1004
d4d68410
CB
1005 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
1006 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
1007 if (err < 0 || (size_t)err >= sizeof(peer))
1008 return -1;
1009
1010 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1011 if (err) {
6d1400b5 1012 errno = -err;
1013 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1014 peer, netdev->link);
811ef482
CB
1015 return -1;
1016 }
1017
83530dba
CB
1018 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1019
811ef482
CB
1020 netdev->ifindex = if_nametoindex(peer);
1021 if (!netdev->ifindex) {
1022 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1023 goto on_error;
1024 }
1025
1026 if (netdev->mtu) {
54256301
CB
1027 unsigned int mtu;
1028
3e2a7b08 1029 err = lxc_safe_uint(netdev->mtu, &mtu);
1030 if (err < 0) {
1031 errno = -err;
54256301 1032 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1033 goto on_error;
1034 }
1035
1036 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1037 if (err < 0) {
3e2a7b08 1038 errno = -err;
54256301 1039 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1040 goto on_error;
1041 }
811ef482
CB
1042 }
1043
3a73d9f1 1044 if (netdev->upscript) {
1045 char *argv[] = {
1046 "vlan",
1047 netdev->link,
1048 NULL,
1049 };
1050
d4d68410
CB
1051 err = run_script_argv(handler->name, handler->conf->hooks_version,
1052 "net", netdev->upscript, "up", argv);
19abca58 1053 if (err < 0) {
3e2a7b08 1054 goto on_error;
19abca58 1055 }
3a73d9f1 1056 }
1057
d4d68410
CB
1058 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
1059 netdev->ifindex);
811ef482
CB
1060
1061 return 0;
3e2a7b08 1062
1063on_error:
1064 lxc_netdev_delete_by_name(peer);
1065 return -1;
811ef482
CB
1066}
1067
1068static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1069{
0b154989 1070 int err, mtu_orig = 0;
14a7b0f9 1071
9c66dc4f
CB
1072 if (is_empty_string(netdev->link))
1073 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1074
75b074ee
CB
1075 /*
1076 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1077 * network namespace because we need it to move the device from the
1078 * host's network namespace to the container's network namespace later
1079 * on.
1080 * Note that netdev->link will contain the name of the physical network
1081 * device in the host's namespace.
1082 */
811ef482 1083 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1084 if (!netdev->ifindex)
1085 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1086
61302ef7 1087 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1088 if (is_empty_string(netdev->name))
8bf64b77 1089 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1090
75b074ee
CB
1091 /*
1092 * Store the ifindex of the host's network device in the host's
790255cf
CB
1093 * namespace.
1094 */
1095 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1096
75b074ee
CB
1097 /*
1098 * Get original device MTU setting and store for restoration after
1099 * container shutdown.
1100 */
0b154989 1101 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1102 if (mtu_orig < 0)
1103 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1104
1105 netdev->priv.phys_attr.mtu = mtu_orig;
1106
3bef7b7b 1107 if (netdev->mtu) {
54256301
CB
1108 unsigned int mtu;
1109
3bef7b7b 1110 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1111 if (err < 0)
1112 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1113
3bef7b7b 1114 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1115 if (err < 0)
1116 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1117 }
1118
1119 if (netdev->upscript) {
1120 char *argv[] = {
1121 "phys",
1122 netdev->link,
1123 NULL,
1124 };
1125
75b074ee
CB
1126 err = run_script_argv(handler->name, handler->conf->hooks_version,
1127 "net", netdev->upscript, "up", argv);
9c66dc4f 1128 if (err < 0)
3bef7b7b 1129 return -1;
3bef7b7b
TP
1130 }
1131
75b074ee
CB
1132 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
1133 netdev->ifindex);
811ef482
CB
1134
1135 return 0;
1136}
1137
1138static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1139{
14a7b0f9
CB
1140 int ret;
1141 char *argv[] = {
1142 "empty",
1143 NULL,
1144 };
1145
811ef482 1146 netdev->ifindex = 0;
14a7b0f9
CB
1147 if (!netdev->upscript)
1148 return 0;
1149
1150 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1151 "net", netdev->upscript, "up", argv);
1152 if (ret < 0)
1153 return -1;
1154
811ef482
CB
1155 return 0;
1156}
1157
1158static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1159{
1160 netdev->ifindex = 0;
1161 return 0;
1162}
1163
1164static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
1165 [LXC_NET_VETH] = instantiate_veth,
1166 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 1167 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
1168 [LXC_NET_VLAN] = instantiate_vlan,
1169 [LXC_NET_PHYS] = instantiate_phys,
1170 [LXC_NET_EMPTY] = instantiate_empty,
1171 [LXC_NET_NONE] = instantiate_none,
1172};
1173
9d0406c7 1174static int __instantiate_ns_common(struct lxc_netdev *netdev)
8bf64b77
CB
1175{
1176 char current_ifname[IFNAMSIZ];
1177
1178 netdev->ifindex = if_nametoindex(netdev->created_name);
1179 if (!netdev->ifindex)
1180 return log_error_errno(-1,
1181 errno, "Failed to retrieve ifindex for network device with name %s",
1182 netdev->created_name);
1183
3473ca76 1184 if (is_empty_string(netdev->name))
8bf64b77
CB
1185 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1186
1187 if (strcmp(netdev->created_name, netdev->name) != 0) {
1188 int ret;
1189
1190 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
1191 if (ret)
9c66dc4f 1192 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
8bf64b77
CB
1193 netdev->created_name,
1194 netdev->name);
1195
1196 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
1197 }
1198
1199 /*
1200 * Re-read the name of the interface because its name has changed and
1201 * would be automatically allocated by the system
1202 */
1203 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1204 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1205
1206 /*
1207 * Now update the recorded name of the network device to reflect the
1208 * name of the network device in the child's network namespace. We will
1209 * later on send this information back to the parent.
1210 */
1211 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
1212
1213 return 0;
1214}
1215
717f77f2 1216static int instantiate_ns_veth(struct lxc_netdev *netdev)
8bf64b77 1217{
8bf64b77 1218
9d0406c7 1219 return __instantiate_ns_common(netdev);
8bf64b77
CB
1220}
1221
1222static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
1223{
9d0406c7 1224 return __instantiate_ns_common(netdev);
8bf64b77
CB
1225}
1226
1227static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
1228{
9d0406c7 1229 return __instantiate_ns_common(netdev);
8bf64b77
CB
1230}
1231
1232static int instantiate_ns_vlan(struct lxc_netdev *netdev)
1233{
9d0406c7 1234 return __instantiate_ns_common(netdev);
8bf64b77
CB
1235}
1236
1237static int instantiate_ns_phys(struct lxc_netdev *netdev)
1238{
9d0406c7 1239 return __instantiate_ns_common(netdev);
8bf64b77
CB
1240}
1241
1242static int instantiate_ns_empty(struct lxc_netdev *netdev)
1243{
1244 return 0;
1245}
1246
1247static int instantiate_ns_none(struct lxc_netdev *netdev)
1248{
1249 return 0;
1250}
1251
1252static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
1253 [LXC_NET_VETH] = instantiate_ns_veth,
1254 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
1255 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
1256 [LXC_NET_VLAN] = instantiate_ns_vlan,
1257 [LXC_NET_PHYS] = instantiate_ns_phys,
1258 [LXC_NET_EMPTY] = instantiate_ns_empty,
1259 [LXC_NET_NONE] = instantiate_ns_none,
1260};
1261
811ef482
CB
1262static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1263{
14a7b0f9
CB
1264 int ret;
1265 char *argv[] = {
1266 "veth",
1267 netdev->link,
1268 NULL,
1269 NULL,
1270 };
1271
1272 if (!netdev->downscript)
1273 return 0;
811ef482 1274
f2711167 1275 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1276 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1277 else
14a7b0f9
CB
1278 argv[2] = netdev->priv.veth_attr.veth1;
1279
1280 ret = run_script_argv(handler->name,
1281 handler->conf->hooks_version, "net",
1282 netdev->downscript, "down", argv);
1283 if (ret < 0)
1284 return -1;
811ef482 1285
811ef482
CB
1286 return 0;
1287}
1288
1289static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1290{
14a7b0f9
CB
1291 int ret;
1292 char *argv[] = {
1293 "macvlan",
1294 netdev->link,
1295 NULL,
1296 };
1297
1298 if (!netdev->downscript)
1299 return 0;
1300
1301 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1302 "net", netdev->downscript, "down", argv);
1303 if (ret < 0)
1304 return -1;
811ef482 1305
811ef482
CB
1306 return 0;
1307}
1308
c9f52382 1309static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1310{
1311 int ret;
1312 char *argv[] = {
1313 "ipvlan",
1314 netdev->link,
1315 NULL,
1316 };
1317
1318 if (!netdev->downscript)
1319 return 0;
1320
1321 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1322 "net", netdev->downscript, "down", argv);
1323 if (ret < 0)
1324 return -1;
1325
1326 return 0;
1327}
1328
811ef482
CB
1329static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1330{
3a73d9f1 1331 int ret;
1332 char *argv[] = {
1333 "vlan",
1334 netdev->link,
1335 NULL,
1336 };
1337
1338 if (!netdev->downscript)
1339 return 0;
1340
1341 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1342 "net", netdev->downscript, "down", argv);
1343 if (ret < 0)
1344 return -1;
1345
811ef482
CB
1346 return 0;
1347}
1348
1349static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1350{
14a7b0f9
CB
1351 int ret;
1352 char *argv[] = {
1353 "phys",
1354 netdev->link,
1355 NULL,
1356 };
1357
1358 if (!netdev->downscript)
1359 return 0;
1360
1361 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1362 "net", netdev->downscript, "down", argv);
1363 if (ret < 0)
1364 return -1;
811ef482 1365
811ef482
CB
1366 return 0;
1367}
1368
1369static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1370{
14a7b0f9
CB
1371 int ret;
1372 char *argv[] = {
1373 "empty",
1374 NULL,
1375 };
1376
1377 if (!netdev->downscript)
1378 return 0;
1379
1380 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1381 "net", netdev->downscript, "down", argv);
1382 if (ret < 0)
1383 return -1;
811ef482 1384
811ef482
CB
1385 return 0;
1386}
1387
1388static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1389{
1390 return 0;
1391}
1392
1393static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1394 [LXC_NET_VETH] = shutdown_veth,
1395 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1396 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1397 [LXC_NET_VLAN] = shutdown_vlan,
1398 [LXC_NET_PHYS] = shutdown_phys,
1399 [LXC_NET_EMPTY] = shutdown_empty,
1400 [LXC_NET_NONE] = shutdown_none,
1401};
1402
0037ab49
TP
1403static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1404{
d16bda44 1405 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1406 struct nl_handler nlh;
d16bda44
CB
1407 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1408 int err;
0037ab49 1409 struct ifinfomsg *ifi;
0037ab49 1410
d16bda44 1411 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1412 if (err)
1413 return err;
1414
0037ab49
TP
1415 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1416 if (!nlmsg)
d16bda44 1417 return ret_errno(ENOMEM);
0037ab49
TP
1418
1419 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1420 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1421
1422 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1423 if (!ifi)
d16bda44
CB
1424 return ret_errno(ENOMEM);
1425
0037ab49
TP
1426 ifi->ifi_family = AF_UNSPEC;
1427 ifi->ifi_index = ifindex;
1428
1429 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1430 return ret_errno(ENOMEM);
0037ab49 1431
3473ca76 1432 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1433 return ret_errno(ENOMEM);
0037ab49 1434
d16bda44 1435 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1436}
1437
ebc73a67 1438int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1439{
d16bda44 1440 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1441 struct nl_handler nlh;
d16bda44
CB
1442 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1443 int err;
06f976ca 1444 struct ifinfomsg *ifi;
0ad19a3f 1445
d16bda44 1446 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1447 if (err)
1448 return err;
0ad19a3f 1449
0ad19a3f 1450 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1451 if (!nlmsg)
d16bda44 1452 return ret_errno(ENOMEM);
0ad19a3f 1453
ebc73a67 1454 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1455 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1456
1457 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1458 if (!ifi)
d16bda44
CB
1459 return ret_errno(ENOMEM);
1460
06f976ca
SZ
1461 ifi->ifi_family = AF_UNSPEC;
1462 ifi->ifi_index = ifindex;
0ad19a3f 1463
1464 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1465 return ret_errno(ENOMEM);
0ad19a3f 1466
3473ca76 1467 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1468 return ret_errno(ENOMEM);
8d357196 1469
d16bda44 1470 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1471}
1472
ebc73a67
CB
1473/* If we are asked to move a wireless interface, then we must actually move its
1474 * phyN device. Detect that condition and return the physname here. The physname
1475 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1476 */
1477#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1478char *is_wlan(const char *ifname)
e5848d39 1479{
4110345b
CB
1480 __do_fclose FILE *f = NULL;
1481 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1482 int i, ret;
e5848d39 1483 long physlen;
ebc73a67 1484 size_t len;
e5848d39 1485
ebc73a67 1486 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1487 path = must_realloc(NULL, len + 1);
e5848d39 1488 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1489 if (ret < 0 || (size_t)ret >= len)
4110345b 1490 return NULL;
ebc73a67 1491
4110345b 1492 f = fopen(path, "re");
ebc73a67 1493 if (!f)
4110345b 1494 return NULL;
ebc73a67 1495
1a0e70ac 1496 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1497 fseek(f, 0, SEEK_END);
1498 physlen = ftell(f);
1499 fseek(f, 0, SEEK_SET);
4110345b
CB
1500 if (physlen < 0)
1501 return NULL;
ebc73a67
CB
1502
1503 physname = malloc(physlen + 1);
4110345b
CB
1504 if (!physname)
1505 return NULL;
ebc73a67
CB
1506
1507 memset(physname, 0, physlen + 1);
e5848d39 1508 ret = fread(physname, 1, physlen, f);
e5848d39 1509 if (ret < 0)
4110345b 1510 return NULL;
e5848d39 1511
ebc73a67 1512 for (i = 0; i < physlen; i++) {
e5848d39
SH
1513 if (physname[i] == '\n')
1514 physname[i] = '\0';
ebc73a67 1515
e5848d39
SH
1516 if (physname[i] == '\0')
1517 break;
1518 }
1519
4110345b 1520 return move_ptr(physname);
e5848d39
SH
1521}
1522
ebc73a67
CB
1523static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1524 const char *new)
e5848d39 1525{
ebc73a67 1526 pid_t fpid;
e5848d39 1527
ebc73a67 1528 fpid = fork();
e5848d39
SH
1529 if (fpid < 0)
1530 return -1;
ebc73a67 1531
e5848d39
SH
1532 if (fpid != 0)
1533 return wait_for_pid(fpid);
ebc73a67 1534
e5848d39
SH
1535 if (!switch_to_ns(pid, "net"))
1536 return -1;
ebc73a67 1537
05ec44f8 1538 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1539}
1540
e4103cf6 1541int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1542 const char *newname)
e5848d39 1543{
3dd78294 1544 __do_free char *cmd = NULL;
ebc73a67 1545 pid_t fpid;
e5848d39
SH
1546
1547 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1548 * However, IIUC this involves a bit more complicated work to talk to
1549 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1550 */
1551 cmd = on_path("iw", NULL);
9c66dc4f 1552 if (!cmd)
3dd78294 1553 return -1;
e5848d39
SH
1554
1555 fpid = fork();
1556 if (fpid < 0)
3dd78294 1557 return -1;
ebc73a67 1558
e5848d39
SH
1559 if (fpid == 0) {
1560 char pidstr[30];
1561 sprintf(pidstr, "%d", pid);
9c66dc4f 1562 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1563 _exit(EXIT_FAILURE);
e5848d39 1564 }
ebc73a67 1565
e5848d39 1566 if (wait_for_pid(fpid))
3dd78294 1567 return -1;
e5848d39 1568
e5848d39 1569 if (newname)
3dd78294 1570 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1571
3dd78294 1572 return 0;
e5848d39
SH
1573}
1574
8d357196 1575int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1576{
3dd78294 1577 __do_free char *physname = NULL;
8befa924
SH
1578 int index;
1579
8befa924
SH
1580 if (!ifname)
1581 return -EINVAL;
1582
32571606 1583 index = if_nametoindex(ifname);
49428bf3
DY
1584 if (!index)
1585 return -EINVAL;
32571606 1586
ebc73a67
CB
1587 physname = is_wlan(ifname);
1588 if (physname)
e5848d39
SH
1589 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1590
8d357196 1591 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1592}
1593
b84f58b9 1594int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1595{
d16bda44
CB
1596 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1597 struct nl_handler nlh;
1598 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1599 int err;
ebc73a67 1600 struct ifinfomsg *ifi;
0ad19a3f 1601
d16bda44 1602 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1603 if (err)
1604 return err;
0ad19a3f 1605
0ad19a3f 1606 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1607 if (!nlmsg)
d16bda44 1608 return ret_errno(ENOMEM);
0ad19a3f 1609
06f976ca 1610 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1611 if (!answer)
d16bda44 1612 return ret_errno(ENOMEM);
0ad19a3f 1613
ebc73a67 1614 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1615 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1616
1617 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1618 if (!ifi)
d16bda44
CB
1619 return ret_errno(ENOMEM);
1620
06f976ca
SZ
1621 ifi->ifi_family = AF_UNSPEC;
1622 ifi->ifi_index = ifindex;
0ad19a3f 1623
d16bda44 1624 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1625}
1626
b84f58b9
DL
1627int lxc_netdev_delete_by_name(const char *name)
1628{
1629 int index;
1630
1631 index = if_nametoindex(name);
1632 if (!index)
1633 return -EINVAL;
1634
1635 return lxc_netdev_delete_by_index(index);
1636}
1637
1638int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1639{
d16bda44
CB
1640 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1641 struct nl_handler nlh;
1642 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1643 int err, len;
06f976ca 1644 struct ifinfomsg *ifi;
b9a5bb58 1645
d16bda44 1646 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1647 if (err)
1648 return err;
b9a5bb58 1649
b84f58b9 1650 len = strlen(newname);
d16bda44
CB
1651 if (len == 1 || len >= IFNAMSIZ)
1652 return ret_errno(EINVAL);
b84f58b9 1653
b9a5bb58
DL
1654 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1655 if (!nlmsg)
d16bda44 1656 return ret_errno(ENOMEM);
b9a5bb58 1657
06f976ca 1658 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1659 if (!answer)
d16bda44 1660 return ret_errno(ENOMEM);
b9a5bb58 1661
ebc73a67 1662 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1663 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1664
1665 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1666 if (!ifi)
d16bda44
CB
1667 return ret_errno(ENOMEM);
1668
06f976ca
SZ
1669 ifi->ifi_family = AF_UNSPEC;
1670 ifi->ifi_index = ifindex;
b84f58b9
DL
1671
1672 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1673 return ret_errno(ENOMEM);
b9a5bb58 1674
d16bda44 1675 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1676}
1677
b84f58b9
DL
1678int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1679{
1680 int len, index;
1681
1682 len = strlen(oldname);
dae3fdf6 1683 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1684 return -EINVAL;
1685
1686 index = if_nametoindex(oldname);
1687 if (!index)
1688 return -EINVAL;
1689
1690 return lxc_netdev_rename_by_index(index, newname);
1691}
1692
8befa924 1693int netdev_set_flag(const char *name, int flag)
0ad19a3f 1694{
d16bda44
CB
1695 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1696 struct nl_handler nlh;
1697 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1698 int err, index, len;
06f976ca 1699 struct ifinfomsg *ifi;
0ad19a3f 1700
d16bda44 1701 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1702 if (err)
1703 return err;
0ad19a3f 1704
1705 len = strlen(name);
dae3fdf6 1706 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1707 return ret_errno(EINVAL);
0ad19a3f 1708
1709 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1710 if (!nlmsg)
d16bda44 1711 return ret_errno(ENOMEM);
0ad19a3f 1712
06f976ca 1713 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1714 if (!answer)
d16bda44 1715 return ret_errno(ENOMEM);
0ad19a3f 1716
1717 index = if_nametoindex(name);
1718 if (!index)
d16bda44 1719 return ret_errno(EINVAL);
0ad19a3f 1720
ebc73a67 1721 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1722 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1723
1724 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1725 if (!ifi)
1726 return ret_errno(ENOMEM);
1727
06f976ca
SZ
1728 ifi->ifi_family = AF_UNSPEC;
1729 ifi->ifi_index = index;
1730 ifi->ifi_change |= IFF_UP;
1731 ifi->ifi_flags |= flag;
0ad19a3f 1732
d16bda44 1733 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1734}
1735
ebc73a67 1736int netdev_get_flag(const char *name, int *flag)
efa1cf45 1737{
d16bda44
CB
1738 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1739 struct nl_handler nlh;
1740 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1741 int err, index, len;
a4318300 1742 struct ifinfomsg *ifi;
efa1cf45
DY
1743
1744 if (!name)
d16bda44 1745 return ret_errno(EINVAL);
efa1cf45 1746
d16bda44 1747 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1748 if (err)
1749 return err;
1750
efa1cf45
DY
1751 len = strlen(name);
1752 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1753 return ret_errno(EINVAL);
efa1cf45 1754
efa1cf45
DY
1755 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1756 if (!nlmsg)
d16bda44 1757 return ret_errno(ENOMEM);
efa1cf45 1758
06f976ca 1759 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1760 if (!answer)
d16bda44 1761 return ret_errno(ENOMEM);
efa1cf45 1762
efa1cf45
DY
1763 index = if_nametoindex(name);
1764 if (!index)
d16bda44 1765 return ret_errno(EINVAL);
efa1cf45 1766
06f976ca
SZ
1767 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1768 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1769
1770 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1771 if (!ifi)
1772 return ret_errno(ENOMEM);
1773
06f976ca
SZ
1774 ifi->ifi_family = AF_UNSPEC;
1775 ifi->ifi_index = index;
efa1cf45 1776
d16bda44 1777 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1778 if (err)
d16bda44 1779 return ret_set_errno(-1, errno);
efa1cf45 1780
06f976ca 1781 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1782
1783 *flag = ifi->ifi_flags;
efa1cf45
DY
1784 return err;
1785}
1786
1787/*
1788 * \brief Check a interface is up or not.
1789 *
1790 * \param name: name for the interface.
1791 *
1792 * \return int.
1793 * 0 means interface is down.
1794 * 1 means interface is up.
1795 * Others means error happened, and ret-value is the error number.
1796 */
ebc73a67 1797int lxc_netdev_isup(const char *name)
efa1cf45 1798{
ebc73a67 1799 int err, flag;
efa1cf45
DY
1800
1801 err = netdev_get_flag(name, &flag);
1802 if (err)
ebc73a67
CB
1803 return err;
1804
efa1cf45
DY
1805 if (flag & IFF_UP)
1806 return 1;
ebc73a67 1807
efa1cf45 1808 return 0;
efa1cf45
DY
1809}
1810
0130df54
SH
1811int netdev_get_mtu(int ifindex)
1812{
a5f5cb41 1813 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1814 struct nl_handler nlh;
a5f5cb41
CB
1815 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1816 int readmore = 0, recv_len = 0;
1817 int answer_len, err, res;
06f976ca 1818 struct ifinfomsg *ifi;
0130df54 1819 struct nlmsghdr *msg;
0130df54 1820
a5f5cb41 1821 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1822 if (err)
1823 return err;
1824
0130df54
SH
1825 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1826 if (!nlmsg)
a5f5cb41 1827 return ret_errno(ENOMEM);
0130df54 1828
06f976ca 1829 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1830 if (!answer)
a5f5cb41 1831 return ret_errno(ENOMEM);
0130df54
SH
1832
1833 /* Save the answer buffer length, since it will be overwritten
1834 * on the first receive (and we might need to receive more than
ebc73a67
CB
1835 * once.
1836 */
06f976ca
SZ
1837 answer_len = answer->nlmsghdr->nlmsg_len;
1838
ebc73a67 1839 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1840 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1841
06f976ca 1842 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1843 if (!ifi)
a5f5cb41
CB
1844 return ret_errno(ENOMEM);
1845
06f976ca 1846 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1847
1848 /* Send the request for addresses, which returns all addresses
1849 * on all interfaces. */
a5f5cb41 1850 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1851 if (err < 0)
a5f5cb41 1852 return ret_set_errno(-1, errno);
0130df54 1853
6ce39620
CB
1854#pragma GCC diagnostic push
1855#pragma GCC diagnostic ignored "-Wcast-align"
1856
0130df54
SH
1857 do {
1858 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1859 * overwritten by a previous receive.
1860 */
06f976ca 1861 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1862
1863 /* Get the (next) batch of reply messages */
a5f5cb41 1864 err = netlink_rcv(nlh_ptr, answer);
0130df54 1865 if (err < 0)
a5f5cb41 1866 return ret_set_errno(-1, errno);
0130df54
SH
1867
1868 recv_len = err;
0130df54
SH
1869
1870 /* Satisfy the typing for the netlink macros */
06f976ca 1871 msg = answer->nlmsghdr;
0130df54
SH
1872
1873 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1874 /* Stop reading if we see an error message */
1875 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1876 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1877 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1878 }
1879
1880 /* Stop reading if we see a NLMSG_DONE message */
1881 if (msg->nlmsg_type == NLMSG_DONE) {
1882 readmore = 0;
1883 break;
1884 }
1885
06f976ca 1886 ifi = NLMSG_DATA(msg);
0130df54
SH
1887 if (ifi->ifi_index == ifindex) {
1888 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1889 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1890
0130df54 1891 res = 0;
ebc73a67 1892 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1893 /*
a5f5cb41 1894 * Found a local address for the
ebc73a67
CB
1895 * requested interface, return it.
1896 */
0130df54 1897 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1898 memcpy(&res, RTA_DATA(rta), sizeof(int));
1899 return res;
0130df54 1900 }
a5f5cb41 1901
0130df54
SH
1902 rta = RTA_NEXT(rta, attr_len);
1903 }
0130df54
SH
1904 }
1905
ebc73a67
CB
1906 /* Keep reading more data from the socket if the last
1907 * message had the NLF_F_MULTI flag set.
1908 */
0130df54
SH
1909 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1910
ebc73a67 1911 /* Look at the next message received in this buffer. */
0130df54
SH
1912 msg = NLMSG_NEXT(msg, recv_len);
1913 }
1914 } while (readmore);
1915
6ce39620
CB
1916#pragma GCC diagnostic pop
1917
ebc73a67 1918 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 1919 return -1;
0130df54
SH
1920}
1921
d472214b 1922int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1923{
a5f5cb41
CB
1924 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1925 struct nl_handler nlh;
1926 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 1927 int err, len;
06f976ca 1928 struct ifinfomsg *ifi;
75d09f83 1929
a5f5cb41 1930 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1931 if (err)
1932 return err;
75d09f83
DL
1933
1934 len = strlen(name);
dae3fdf6 1935 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1936 return ret_errno(EINVAL);
75d09f83
DL
1937
1938 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1939 if (!nlmsg)
a5f5cb41 1940 return ret_errno(ENOMEM);
75d09f83 1941
06f976ca 1942 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 1943 if (!answer)
a5f5cb41 1944 return ret_errno(ENOMEM);
75d09f83 1945
ebc73a67 1946 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1947 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1948
1949 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1950 if (!ifi)
1951 return ret_errno(ENOMEM);
1952
06f976ca 1953 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1954
1955 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 1956 return ret_errno(ENOMEM);
75d09f83
DL
1957
1958 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 1959 return ret_errno(ENOMEM);
75d09f83 1960
a5f5cb41 1961 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
1962}
1963
d472214b 1964int lxc_netdev_up(const char *name)
0ad19a3f 1965{
d472214b 1966 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1967}
1968
d472214b 1969int lxc_netdev_down(const char *name)
0ad19a3f 1970{
d472214b 1971 return netdev_set_flag(name, 0);
0ad19a3f 1972}
1973
54256301 1974int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1975{
a5f5cb41
CB
1976 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1977 struct nl_handler nlh;
1978 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1979 int err, len;
06f976ca 1980 struct ifinfomsg *ifi;
0ad19a3f 1981 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 1982
a5f5cb41 1983 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1984 if (err)
1985 return err;
0ad19a3f 1986
1987 len = strlen(name1);
dae3fdf6 1988 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1989 return ret_errno(EINVAL);
0ad19a3f 1990
1991 len = strlen(name2);
dae3fdf6 1992 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1993 return ret_errno(EINVAL);
0ad19a3f 1994
1995 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1996 if (!nlmsg)
a5f5cb41 1997 return ret_errno(ENOMEM);
0ad19a3f 1998
06f976ca 1999 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2000 if (!answer)
a5f5cb41 2001 return ret_errno(ENOMEM);
0ad19a3f 2002
a5f5cb41 2003 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2004 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2005
2006 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 2007 if (!ifi)
a5f5cb41
CB
2008 return ret_errno(ENOMEM);
2009
06f976ca 2010 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2011
79e68309 2012 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2013 if (!nest1)
a5f5cb41 2014 return ret_errno(EINVAL);
0ad19a3f 2015
2016 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2017 return ret_errno(ENOMEM);
0ad19a3f 2018
2019 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2020 if (!nest2)
a5f5cb41 2021 return ret_errno(ENOMEM);
0ad19a3f 2022
2023 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2024 if (!nest3)
a5f5cb41 2025 return ret_errno(ENOMEM);
0ad19a3f 2026
06f976ca 2027 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2028 if (!ifi)
2029 return ret_errno(ENOMEM);
0ad19a3f 2030
2031 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2032 return ret_errno(ENOMEM);
0ad19a3f 2033
54256301 2034 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2035 return ret_errno(ENOMEM);
54256301
CB
2036
2037 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2038 return ret_errno(ENOMEM);
54256301 2039
0ad19a3f 2040 nla_end_nested(nlmsg, nest3);
0ad19a3f 2041 nla_end_nested(nlmsg, nest2);
0ad19a3f 2042 nla_end_nested(nlmsg, nest1);
2043
2044 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2045 return ret_errno(ENOMEM);
0ad19a3f 2046
a5f5cb41 2047 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2048}
2049
ebc73a67 2050/* TODO: merge with lxc_macvlan_create */
7c11d57a 2051int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 2052{
a5f5cb41
CB
2053 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2054 struct nl_handler nlh;
2055 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2056 int err, len, lindex;
06f976ca 2057 struct ifinfomsg *ifi;
26c39028 2058 struct rtattr *nest, *nest2;
26c39028 2059
a5f5cb41 2060 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2061 if (err)
2062 return err;
26c39028
JHS
2063
2064 len = strlen(master);
dae3fdf6 2065 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2066 return ret_errno(EINVAL);
26c39028
JHS
2067
2068 len = strlen(name);
dae3fdf6 2069 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2070 return ret_errno(EINVAL);
26c39028
JHS
2071
2072 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2073 if (!nlmsg)
a5f5cb41 2074 return ret_errno(ENOMEM);
26c39028 2075
06f976ca 2076 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2077 if (!answer)
a5f5cb41 2078 return ret_errno(ENOMEM);
26c39028
JHS
2079
2080 lindex = if_nametoindex(master);
2081 if (!lindex)
a5f5cb41 2082 return ret_errno(EINVAL);
26c39028 2083
a5f5cb41 2084 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2085 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2086
2087 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2088 if (!ifi)
2089 return ret_errno(ENOMEM);
2090
06f976ca 2091 ifi->ifi_family = AF_UNSPEC;
26c39028 2092
79e68309 2093 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2094 if (!nest)
a5f5cb41 2095 return ret_errno(ENOMEM);
26c39028
JHS
2096
2097 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2098 return ret_errno(ENOMEM);
26c39028
JHS
2099
2100 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2101 if (!nest2)
a5f5cb41 2102 return ret_errno(ENOMEM);
e892973e 2103
26c39028 2104 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2105 return ret_errno(ENOMEM);
e892973e 2106
26c39028 2107 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2108 nla_end_nested(nlmsg, nest);
2109
2110 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2111 return ret_errno(ENOMEM);
26c39028
JHS
2112
2113 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2114 return ret_errno(ENOMEM);
2115
2116 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2117}
2118
e892973e 2119int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 2120{
a5f5cb41
CB
2121 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2122 struct nl_handler nlh;
2123 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2124 int err, index, len;
06f976ca 2125 struct ifinfomsg *ifi;
e892973e 2126 struct rtattr *nest, *nest2;
0ad19a3f 2127
a5f5cb41 2128 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2129 if (err)
2130 return err;
0ad19a3f 2131
2132 len = strlen(master);
dae3fdf6 2133 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2134 return ret_errno(EINVAL);
0ad19a3f 2135
2136 len = strlen(name);
dae3fdf6 2137 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2138 return ret_errno(EINVAL);
0ad19a3f 2139
2140 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2141 if (!nlmsg)
a5f5cb41 2142 return ret_errno(ENOMEM);
0ad19a3f 2143
06f976ca 2144 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2145 if (!answer)
a5f5cb41 2146 return ret_errno(ENOMEM);
0ad19a3f 2147
2148 index = if_nametoindex(master);
2149 if (!index)
a5f5cb41 2150 return ret_errno(EINVAL);
0ad19a3f 2151
a5f5cb41 2152 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2153 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2154
2155 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2156 if (!ifi)
2157 return ret_errno(ENOMEM);
2158
06f976ca 2159 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2160
79e68309 2161 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2162 if (!nest)
a5f5cb41 2163 return ret_errno(ENOMEM);
0ad19a3f 2164
2165 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2166 return ret_errno(ENOMEM);
0ad19a3f 2167
e892973e
DL
2168 if (mode) {
2169 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2170 if (!nest2)
a5f5cb41 2171 return ret_errno(ENOMEM);
e892973e
DL
2172
2173 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2174 return ret_errno(ENOMEM);
e892973e
DL
2175
2176 nla_end_nested(nlmsg, nest2);
2177 }
2178
0ad19a3f 2179 nla_end_nested(nlmsg, nest);
2180
2181 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2182 return ret_errno(ENOMEM);
0ad19a3f 2183
2184 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2185 return ret_errno(ENOMEM);
0ad19a3f 2186
a5f5cb41 2187 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2188}
2189
2190static int proc_sys_net_write(const char *path, const char *value)
2191{
ebc73a67
CB
2192 int fd;
2193 int err = 0;
0ad19a3f 2194
2195 fd = open(path, O_WRONLY);
2196 if (fd < 0)
2197 return -errno;
2198
f640cf46 2199 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2200 err = -errno;
2201
2202 close(fd);
2203 return err;
2204}
2205
6dfa9581 2206static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2207{
2208 int ret;
2209 char path[PATH_MAX];
6509154d 2210
2211 if (family != AF_INET && family != AF_INET6)
6dfa9581 2212 return -EINVAL;
6509154d 2213
9c66dc4f 2214 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 2215 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
9c66dc4f 2216 if (ret < 0 || (size_t)ret >= sizeof(path))
6dfa9581 2217 return -E2BIG;
6509154d 2218
6dfa9581
TP
2219 return proc_sys_net_write(path, flag ? "1" : "0");
2220}
2221
2222int lxc_ip_forwarding_on(const char *name, int family)
2223{
2224 return ip_forwarding_set(name, family, 1);
2225}
2226
2227int lxc_ip_forwarding_off(const char *name, int family)
2228{
2229 return ip_forwarding_set(name, family, 0);
6509154d 2230}
2231
0ad19a3f 2232static int neigh_proxy_set(const char *ifname, int family, int flag)
2233{
9ba8130c 2234 int ret;
419590da 2235 char path[PATH_MAX];
0ad19a3f 2236
2237 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2238 return -EINVAL;
0ad19a3f 2239
9c66dc4f 2240 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
2241 family == AF_INET ? "ipv4" : "ipv6", ifname,
2242 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2243 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 2244 return -E2BIG;
0ad19a3f 2245
ebc73a67 2246 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2247}
2248
6509154d 2249static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2250{
2251 int ret;
2252 char path[PATH_MAX];
2253 char buf[1] = "";
2254
2255 if (family != AF_INET && family != AF_INET6)
596a002c 2256 return ret_set_errno(-1, EINVAL);
6509154d 2257
9c66dc4f 2258 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6509154d 2259 family == AF_INET ? "ipv4" : "ipv6", ifname,
2260 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2261 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 2262 return ret_set_errno(-1, E2BIG);
6509154d 2263
2264 return lxc_read_file_expect(path, buf, 1, "1");
2265}
2266
497353b6 2267int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2268{
2269 return neigh_proxy_set(name, family, 1);
2270}
2271
497353b6 2272int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2273{
2274 return neigh_proxy_set(name, family, 0);
2275}
2276
2277int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2278{
1f1b18e7
DL
2279 int i = 0;
2280 unsigned val;
ebc73a67
CB
2281 char c;
2282 unsigned char *data;
1f1b18e7
DL
2283
2284 sockaddr->sa_family = ARPHRD_ETHER;
2285 data = (unsigned char *)sockaddr->sa_data;
2286
2287 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2288 c = *macaddr++;
2289 if (isdigit(c))
2290 val = c - '0';
2291 else if (c >= 'a' && c <= 'f')
2292 val = c - 'a' + 10;
2293 else if (c >= 'A' && c <= 'F')
2294 val = c - 'A' + 10;
2295 else
2296 return -EINVAL;
2297
2298 val <<= 4;
2299 c = *macaddr;
2300 if (isdigit(c))
2301 val |= c - '0';
2302 else if (c >= 'a' && c <= 'f')
2303 val |= c - 'a' + 10;
2304 else if (c >= 'A' && c <= 'F')
2305 val |= c - 'A' + 10;
2306 else if (c == ':' || c == 0)
2307 val >>= 4;
2308 else
2309 return -EINVAL;
2310 if (c != 0)
2311 macaddr++;
2312 *data++ = (unsigned char)(val & 0377);
2313 i++;
2314
2315 if (*macaddr == ':')
2316 macaddr++;
0ad19a3f 2317 }
0ad19a3f 2318
1f1b18e7 2319 return 0;
0ad19a3f 2320}
2321
ebc73a67
CB
2322static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2323 void *acast, int prefix)
0ad19a3f 2324{
a5f5cb41
CB
2325 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2326 struct nl_handler nlh;
2327 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2328 int addrlen, err;
06f976ca 2329 struct ifaddrmsg *ifa;
0ad19a3f 2330
ebc73a67
CB
2331 addrlen = family == AF_INET ? sizeof(struct in_addr)
2332 : sizeof(struct in6_addr);
4bf1968d 2333
a5f5cb41 2334 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2335 if (err)
2336 return err;
0ad19a3f 2337
0ad19a3f 2338 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2339 if (!nlmsg)
a5f5cb41 2340 return ret_errno(ENOMEM);
0ad19a3f 2341
06f976ca 2342 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2343 if (!answer)
a5f5cb41 2344 return ret_errno(ENOMEM);
0ad19a3f 2345
a5f5cb41 2346 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2347 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2348
2349 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2350 if (!ifa)
a5f5cb41
CB
2351 return ret_errno(ENOMEM);
2352
06f976ca
SZ
2353 ifa->ifa_prefixlen = prefix;
2354 ifa->ifa_index = ifindex;
2355 ifa->ifa_family = family;
2356 ifa->ifa_scope = 0;
acf47e1b 2357
4bf1968d 2358 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2359 return ret_errno(EINVAL);
0ad19a3f 2360
4bf1968d 2361 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2362 return ret_errno(EINVAL);
0ad19a3f 2363
d8948a52 2364 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2365 return ret_errno(EINVAL);
1f1b18e7 2366
ebc73a67 2367 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2368 if (family == AF_INET6 &&
2369 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2370 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2371 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2372
a5f5cb41 2373 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2374}
2375
1f1b18e7 2376int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2377 struct in6_addr *mcast, struct in6_addr *acast,
2378 int prefix)
1f1b18e7
DL
2379{
2380 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2381}
2382
ebc73a67
CB
2383int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2384 int prefix)
1f1b18e7
DL
2385{
2386 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2387}
2388
ebc73a67
CB
2389/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2390 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2391 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2392 */
6ce39620
CB
2393#pragma GCC diagnostic push
2394#pragma GCC diagnostic ignored "-Wcast-align"
2395
ebc73a67
CB
2396static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2397{
2398 int addrlen;
06f976ca
SZ
2399 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2400 struct rtattr *rta = IFA_RTA(ifa);
2401 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2402
06f976ca 2403 if (ifa->ifa_family != family)
19a26f82
MK
2404 return 0;
2405
ebc73a67
CB
2406 addrlen = family == AF_INET ? sizeof(struct in_addr)
2407 : sizeof(struct in6_addr);
19a26f82
MK
2408
2409 /* Loop over the rtattr's in this message */
ebc73a67 2410 while (RTA_OK(rta, attr_len)) {
19a26f82 2411 /* Found a local address for the requested interface,
ebc73a67
CB
2412 * return it.
2413 */
2414 if (rta->rta_type == IFA_LOCAL ||
2415 rta->rta_type == IFA_ADDRESS) {
2416 /* Sanity check. The family check above should make sure
2417 * the address length is correct, but check here just in
2418 * case.
2419 */
19a26f82
MK
2420 if (RTA_PAYLOAD(rta) != addrlen)
2421 return -1;
2422
ebc73a67
CB
2423 /* We might have found an IFA_ADDRESS before, which we
2424 * now overwrite with an IFA_LOCAL.
2425 */
dd66e5ad 2426 if (!*res) {
19a26f82 2427 *res = malloc(addrlen);
dd66e5ad
DE
2428 if (!*res)
2429 return -1;
2430 }
19a26f82
MK
2431
2432 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2433 if (rta->rta_type == IFA_LOCAL)
2434 break;
2435 }
2436 rta = RTA_NEXT(rta, attr_len);
2437 }
2438 return 0;
2439}
2440
6ce39620
CB
2441#pragma GCC diagnostic pop
2442
19a26f82
MK
2443static int ip_addr_get(int family, int ifindex, void **res)
2444{
a5f5cb41
CB
2445 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2446 struct nl_handler nlh;
2447 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2448 int answer_len, err;
06f976ca 2449 struct ifaddrmsg *ifa;
19a26f82 2450 struct nlmsghdr *msg;
ebc73a67 2451 int readmore = 0, recv_len = 0;
19a26f82 2452
a5f5cb41 2453 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2454 if (err)
2455 return err;
2456
19a26f82
MK
2457 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2458 if (!nlmsg)
a5f5cb41 2459 return ret_errno(ENOMEM);
19a26f82 2460
06f976ca 2461 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2462 if (!answer)
a5f5cb41 2463 return ret_errno(ENOMEM);
19a26f82 2464
ebc73a67
CB
2465 /* Save the answer buffer length, since it will be overwritten on the
2466 * first receive (and we might need to receive more than once).
2467 */
06f976ca
SZ
2468 answer_len = answer->nlmsghdr->nlmsg_len;
2469
ebc73a67 2470 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2471 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2472
06f976ca 2473 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2474 if (!ifa)
a5f5cb41
CB
2475 return ret_errno(ENOMEM);
2476
06f976ca 2477 ifa->ifa_family = family;
19a26f82 2478
ebc73a67
CB
2479 /* Send the request for addresses, which returns all addresses on all
2480 * interfaces.
2481 */
a5f5cb41 2482 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2483 if (err < 0)
a5f5cb41 2484 return ret_set_errno(err, errno);
19a26f82 2485
6ce39620
CB
2486#pragma GCC diagnostic push
2487#pragma GCC diagnostic ignored "-Wcast-align"
2488
19a26f82
MK
2489 do {
2490 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2491 * overwritten by a previous receive.
2492 */
06f976ca 2493 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2494
ebc73a67 2495 /* Get the (next) batch of reply messages. */
a5f5cb41 2496 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2497 if (err < 0)
a5f5cb41 2498 return ret_set_errno(err, errno);
19a26f82
MK
2499
2500 recv_len = err;
2501 err = 0;
2502
ebc73a67 2503 /* Satisfy the typing for the netlink macros. */
06f976ca 2504 msg = answer->nlmsghdr;
19a26f82
MK
2505
2506 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2507 /* Stop reading if we see an error message. */
19a26f82 2508 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2509 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2510 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2511 }
2512
ebc73a67 2513 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2514 if (msg->nlmsg_type == NLMSG_DONE) {
2515 readmore = 0;
2516 break;
2517 }
2518
a5f5cb41
CB
2519 if (msg->nlmsg_type != RTM_NEWADDR)
2520 return ret_errno(EINVAL);
19a26f82 2521
06f976ca
SZ
2522 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2523 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2524 if (ifa_get_local_ip(family, msg, res) < 0)
2525 return ret_errno(EINVAL);
51e7a874 2526
ebc73a67 2527 /* Found a result, stop searching. */
19a26f82 2528 if (*res)
a5f5cb41 2529 return 0;
19a26f82
MK
2530 }
2531
ebc73a67
CB
2532 /* Keep reading more data from the socket if the last
2533 * message had the NLF_F_MULTI flag set.
2534 */
19a26f82
MK
2535 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2536
ebc73a67 2537 /* Look at the next message received in this buffer. */
19a26f82
MK
2538 msg = NLMSG_NEXT(msg, recv_len);
2539 }
2540 } while (readmore);
2541
6ce39620
CB
2542#pragma GCC diagnostic pop
2543
19a26f82 2544 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2545 * error.
2546 */
a5f5cb41 2547 return -1;
19a26f82
MK
2548}
2549
2550int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2551{
ebc73a67 2552 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2553}
2554
ebc73a67 2555int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2556{
ebc73a67 2557 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2558}
2559
f8fee0e2
MK
2560static int ip_gateway_add(int family, int ifindex, void *gw)
2561{
a5f5cb41 2562 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2563 struct nl_handler nlh;
a5f5cb41
CB
2564 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2565 int addrlen, err;
06f976ca 2566 struct rtmsg *rt;
f8fee0e2 2567
ebc73a67
CB
2568 addrlen = family == AF_INET ? sizeof(struct in_addr)
2569 : sizeof(struct in6_addr);
f8fee0e2 2570
a5f5cb41 2571 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2572 if (err)
2573 return err;
2574
f8fee0e2
MK
2575 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2576 if (!nlmsg)
a5f5cb41 2577 return ret_errno(ENOMEM);
f8fee0e2 2578
06f976ca 2579 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2580 if (!answer)
a5f5cb41 2581 return ret_errno(ENOMEM);
f8fee0e2 2582
a5f5cb41 2583 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2584 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2585
2586 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2587 if (!rt)
a5f5cb41
CB
2588 return ret_errno(ENOMEM);
2589
06f976ca
SZ
2590 rt->rtm_family = family;
2591 rt->rtm_table = RT_TABLE_MAIN;
2592 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2593 rt->rtm_protocol = RTPROT_BOOT;
2594 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2595 /* "default" destination */
06f976ca 2596 rt->rtm_dst_len = 0;
f8fee0e2 2597
a2f9a670 2598 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2599 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2600 return ret_errno(ENOMEM);
f8fee0e2
MK
2601
2602 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2603 * addresses for the gateway.
2604 */
f8fee0e2 2605 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2606 return ret_errno(EINVAL);
f8fee0e2 2607
a5f5cb41 2608 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2609}
2610
2611int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2612{
2613 return ip_gateway_add(AF_INET, ifindex, gw);
2614}
2615
2616int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2617{
2618 return ip_gateway_add(AF_INET6, ifindex, gw);
2619}
581c75e7 2620bool is_ovs_bridge(const char *bridge)
0d204771 2621{
ebc73a67 2622 int ret;
0d204771 2623 struct stat sb;
ebc73a67 2624 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2625
ebc73a67
CB
2626 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2627 bridge);
2628 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2629 return false;
2630
2631 ret = stat(brdirname, &sb);
2632 if (ret < 0 && errno == ENOENT)
0d204771 2633 return true;
ebc73a67 2634
0d204771
SH
2635 return false;
2636}
2637
581c75e7
CB
2638struct ovs_veth_args {
2639 const char *bridge;
2640 const char *nic;
2641};
2642
cb0dc11b
CB
2643/* Called from a background thread - when nic goes away, remove it from the
2644 * bridge.
c43cbc04 2645 */
581c75e7 2646static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2647{
581c75e7 2648 struct ovs_veth_args *args = data;
cb0dc11b 2649
9c66dc4f 2650 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2651 return -1;
c43cbc04
SH
2652}
2653
581c75e7 2654int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2655{
c43cbc04 2656 int ret;
419590da 2657 char cmd_output[PATH_MAX];
581c75e7 2658 struct ovs_veth_args args;
6ad22d06 2659
581c75e7
CB
2660 args.bridge = bridge;
2661 args.nic = nic;
2662 ret = run_command(cmd_output, sizeof(cmd_output),
2663 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2664 if (ret < 0)
2665 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2666
581c75e7
CB
2667 return 0;
2668}
ebc73a67 2669
581c75e7
CB
2670static int lxc_ovs_attach_bridge_exec(void *data)
2671{
2672 struct ovs_veth_args *args = data;
ebc73a67 2673
9c66dc4f 2674 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2675 return -1;
2676}
ebc73a67 2677
581c75e7
CB
2678static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2679{
2680 int ret;
419590da 2681 char cmd_output[PATH_MAX];
581c75e7 2682 struct ovs_veth_args args;
ebc73a67 2683
581c75e7
CB
2684 args.bridge = bridge;
2685 args.nic = nic;
2686 ret = run_command(cmd_output, sizeof(cmd_output),
2687 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2688 if (ret < 0)
2689 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2690
581c75e7 2691 return 0;
0d204771 2692}
0d204771 2693
581c75e7 2694int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2695{
ebc73a67 2696 int err, fd, index;
9de31d5a 2697 size_t retlen;
0ad19a3f 2698 struct ifreq ifr;
2699
dae3fdf6 2700 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2701 return -EINVAL;
0ad19a3f 2702
2703 index = if_nametoindex(ifname);
2704 if (!index)
3cfc0f3a 2705 return -EINVAL;
0ad19a3f 2706
0d204771 2707 if (is_ovs_bridge(bridge))
581c75e7 2708 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2709
ad9429e5 2710 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2711 if (fd < 0)
3cfc0f3a 2712 return -errno;
0ad19a3f 2713
9de31d5a 2714 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2715 if (retlen >= IFNAMSIZ) {
2716 close(fd);
9de31d5a 2717 return -E2BIG;
42cc4083 2718 }
9de31d5a 2719
ebc73a67 2720 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2721 ifr.ifr_ifindex = index;
7d163508 2722 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2723 close(fd);
3cfc0f3a
MN
2724 if (err)
2725 err = -errno;
0ad19a3f 2726
2727 return err;
2728}
72d0e1cb 2729
ebc73a67 2730static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2731 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2732 [LXC_NET_VETH] = "veth",
2733 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2734 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2735 [LXC_NET_PHYS] = "phys",
b343592b
BP
2736 [LXC_NET_VLAN] = "vlan",
2737 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2738};
2739
2740const char *lxc_net_type_to_str(int type)
2741{
2742 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2743 return NULL;
ebc73a67 2744
72d0e1cb
SG
2745 return lxc_network_types[type];
2746}
8befa924 2747
3646ffd9 2748static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2749
3646ffd9 2750char *lxc_ifname_alnum_case_sensitive(char *template)
a0265685 2751{
2d7bf744 2752 int ret;
b1e44ed1 2753 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2754 char name[IFNAMSIZ];
2755 bool exists = false;
2756 size_t i = 0;
280cc35f 2757#ifdef HAVE_RAND_R
2758 unsigned int seed;
2759
2760 seed = randseed(false);
2761#else
2762
2763 (void)randseed(true);
2764#endif
a0265685 2765
535e8859
CB
2766 if (strlen(template) >= IFNAMSIZ)
2767 return NULL;
2768
ebc73a67 2769 /* Get all the network interfaces. */
b1e44ed1 2770 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
9c66dc4f
CB
2771 if (ret < 0)
2772 return log_error_errno(NULL, errno, "Failed to get network interfaces");
a0265685 2773
ebc73a67 2774 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2775 for (;;) {
966e9f1f 2776 name[0] = '\0';
94b1cade 2777 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2778
966e9f1f 2779 exists = false;
280cc35f 2780
a0265685
SG
2781 for (i = 0; i < strlen(name); i++) {
2782 if (name[i] == 'X') {
2783#ifdef HAVE_RAND_R
8523344a 2784 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2785#else
8523344a 2786 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2787#endif
2788 }
2789 }
2790
2791 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2792 if (!strcmp(ifa->ifa_name, name)) {
2793 exists = true;
a0265685
SG
2794 break;
2795 }
2796 }
2797
966e9f1f 2798 if (!exists)
a0265685 2799 break;
a0265685
SG
2800 }
2801
b1e44ed1 2802 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2803 (void)strlcpy(template, name, strlen(template) + 1);
2804
2805 return template;
a0265685
SG
2806}
2807
8befa924
SH
2808int setup_private_host_hw_addr(char *veth1)
2809{
ebc73a67 2810 int err, sockfd;
8befa924 2811 struct ifreq ifr;
8befa924 2812
ad9429e5 2813 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2814 if (sockfd < 0)
2815 return -errno;
2816
ebc73a67 2817 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2818 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2819 close(sockfd);
ebc73a67 2820 return -E2BIG;
87c6e5db 2821 }
ebc73a67 2822
8befa924
SH
2823 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2824 if (err < 0) {
8befa924 2825 close(sockfd);
8befa924
SH
2826 return -errno;
2827 }
2828
2829 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2830 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2831 close(sockfd);
8befa924
SH
2832 if (err < 0)
2833 return -errno;
2834
2835 return 0;
2836}
811ef482
CB
2837
2838int lxc_find_gateway_addresses(struct lxc_handler *handler)
2839{
2840 struct lxc_list *network = &handler->conf->network;
2841 struct lxc_list *iterator;
2842 struct lxc_netdev *netdev;
2843 int link_index;
2844
2845 lxc_list_for_each(iterator, network) {
2846 netdev = iterator->elem;
2847
2848 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2849 continue;
2850
9c66dc4f
CB
2851 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2852 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2853
f2711167 2854 if (is_empty_string(netdev->link)) {
9c66dc4f 2855 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2856 }
2857
2858 link_index = if_nametoindex(netdev->link);
2859 if (!link_index)
2860 return -EINVAL;
2861
2862 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2863 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2864 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2865 }
2866
2867 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2868 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2869 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2870 }
2871 }
2872
2873 return 0;
2874}
2875
2876#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2877static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2878 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2879{
2880 int ret;
2881 pid_t child;
2882 int bytes, pipefd[2];
2883 char *token, *saveptr = NULL;
095ead80 2884 char netdev_link[IFNAMSIZ];
419590da 2885 char buffer[PATH_MAX] = {0};
94b1cade 2886 size_t retlen;
811ef482 2887
9c66dc4f
CB
2888 if (netdev->type != LXC_NET_VETH)
2889 return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2890
2891 ret = pipe(pipefd);
9c66dc4f
CB
2892 if (ret < 0)
2893 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2894
2895 child = fork();
2896 if (child < 0) {
811ef482
CB
2897 close(pipefd[0]);
2898 close(pipefd[1]);
9c66dc4f 2899 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2900 }
2901
2902 if (child == 0) {
8335fd40 2903 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2904
2905 close(pipefd[0]);
2906
2907 ret = dup2(pipefd[1], STDOUT_FILENO);
2908 if (ret >= 0)
2909 ret = dup2(pipefd[1], STDERR_FILENO);
2910 close(pipefd[1]);
2911 if (ret < 0) {
2912 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2913 _exit(EXIT_FAILURE);
811ef482
CB
2914 }
2915
f2711167 2916 if (!is_empty_string(netdev->link))
9de31d5a 2917 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2918 else
9de31d5a
CB
2919 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2920 if (retlen >= IFNAMSIZ) {
2921 SYSERROR("Invalid network device name");
2922 _exit(EXIT_FAILURE);
2923 }
811ef482 2924
8335fd40
CB
2925 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2926 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2927 _exit(EXIT_FAILURE);
8335fd40 2928 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2929
2930 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2931 lxcname, pidstr, netdev_link,
3473ca76
CB
2932 !is_empty_string(netdev->name) ? netdev->name : "(null)");
2933 if (!is_empty_string(netdev->name))
811ef482
CB
2934 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2935 lxcpath, lxcname, pidstr, "veth", netdev_link,
2936 netdev->name, (char *)NULL);
2937 else
2938 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2939 lxcpath, lxcname, pidstr, "veth", netdev_link,
2940 (char *)NULL);
2941 SYSERROR("Failed to execute lxc-user-nic");
78070056 2942 _exit(EXIT_FAILURE);
811ef482
CB
2943 }
2944
2945 /* close the write-end of the pipe */
2946 close(pipefd[1]);
2947
9c66dc4f 2948 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2949 if (bytes < 0) {
74c6e2b0 2950 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2951 close(pipefd[0]);
6b9f82a9
CB
2952 } else {
2953 buffer[bytes - 1] = '\0';
811ef482 2954 }
811ef482
CB
2955
2956 ret = wait_for_pid(child);
2957 close(pipefd[0]);
9c66dc4f
CB
2958 if (ret != 0 || bytes < 0)
2959 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2960 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2961
2962 /* netdev->name */
2963 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2964 if (!token)
2965 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2966
e389f2af
CB
2967 /*
2968 * lxc-user-nic will take care of proper network device naming. So
2969 * netdev->name and netdev->created_name need to be identical to not
2970 * trigger another rename later on.
2971 */
2972 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2973 if (retlen < IFNAMSIZ)
2974 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
9c66dc4f
CB
2975 if (retlen >= IFNAMSIZ)
2976 return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2977
74c6e2b0 2978 /* netdev->ifindex */
811ef482 2979 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2980 if (!token)
2981 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2982
74c6e2b0 2983 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f
CB
2984 if (ret < 0)
2985 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2986
74c6e2b0 2987 /* netdev->priv.veth_attr.veth1 */
811ef482 2988 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2989 if (!token)
2990 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2991
94b1cade 2992 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f
CB
2993 if (retlen >= IFNAMSIZ)
2994 return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
2995
2996 /* netdev->priv.veth_attr.ifindex */
2997 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2998 if (!token)
2999 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
3000
3001 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f
CB
3002 if (ret < 0)
3003 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 3004
4d781681 3005 if (netdev->upscript) {
3006 char *argv[] = {
3007 "veth",
3008 netdev->link,
3009 netdev->priv.veth_attr.veth1,
3010 NULL,
3011 };
3012
e389f2af
CB
3013 ret = run_script_argv(lxcname, hooks_version, "net",
3014 netdev->upscript, "up", argv);
4d781681 3015 if (ret < 0)
3016 return -1;
3017 }
3018
811ef482
CB
3019 return 0;
3020}
3021
f0ecc19d 3022static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3023 struct lxc_netdev *netdev,
3024 const char *netns_path)
811ef482
CB
3025{
3026 int bytes, ret;
3027 pid_t child;
3028 int pipefd[2];
419590da 3029 char buffer[PATH_MAX] = {0};
811ef482 3030
9c66dc4f
CB
3031 if (netdev->type != LXC_NET_VETH)
3032 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3033
3034 ret = pipe(pipefd);
9c66dc4f
CB
3035 if (ret < 0)
3036 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3037
3038 child = fork();
3039 if (child < 0) {
811ef482
CB
3040 close(pipefd[0]);
3041 close(pipefd[1]);
9c66dc4f 3042 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3043 }
3044
3045 if (child == 0) {
8843fde4 3046 char *hostveth;
811ef482
CB
3047
3048 close(pipefd[0]);
3049
3050 ret = dup2(pipefd[1], STDOUT_FILENO);
3051 if (ret >= 0)
3052 ret = dup2(pipefd[1], STDERR_FILENO);
3053 close(pipefd[1]);
3054 if (ret < 0) {
3055 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3056 _exit(EXIT_FAILURE);
811ef482
CB
3057 }
3058
f2711167 3059 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3060 hostveth = netdev->priv.veth_attr.pair;
3061 else
3062 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3063 if (is_empty_string(hostveth)) {
74c6e2b0 3064 SYSERROR("Host side veth device name is missing");
a30b9023 3065 _exit(EXIT_FAILURE);
74c6e2b0
CB
3066 }
3067
f2711167
CB
3068 if (is_empty_string(netdev->link)) {
3069 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3070 _exit(EXIT_FAILURE);
74c6e2b0 3071 }
811ef482 3072
811ef482 3073 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3074 lxcname, netns_path, netdev->link, hostveth);
811ef482 3075 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3076 lxcname, netns_path, "veth", netdev->link, hostveth,
3077 (char *)NULL);
811ef482 3078 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3079 _exit(EXIT_FAILURE);
811ef482
CB
3080 }
3081
3082 close(pipefd[1]);
3083
9c66dc4f 3084 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3085 if (bytes < 0) {
3086 SYSERROR("Failed to read from pipe file descriptor.");
3087 close(pipefd[0]);
6b9f82a9
CB
3088 } else {
3089 buffer[bytes - 1] = '\0';
811ef482 3090 }
811ef482 3091
6b9f82a9 3092 ret = wait_for_pid(child);
9c66dc4f
CB
3093 close_prot_errno_disarm(pipefd[0]);
3094 if (ret != 0 || bytes < 0)
3095 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3096 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3097
811ef482
CB
3098 return 0;
3099}
3100
1bd8d726
CB
3101bool lxc_delete_network_unpriv(struct lxc_handler *handler)
3102{
3103 int ret;
3104 struct lxc_list *iterator;
3105 struct lxc_list *network = &handler->conf->network;
3106 /* strlen("/proc/") = 6
3107 * +
8335fd40 3108 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3109 * +
3110 * strlen("/fd/") = 4
3111 * +
8335fd40 3112 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3113 * +
3114 * \0
3115 */
8335fd40 3116 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3117
3118 *netns_path = '\0';
3119
9c66dc4f
CB
3120 if (handler->nsfd[LXC_NS_NET] < 0)
3121 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726
CB
3122
3123 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 3124 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
3125 if (ret < 0 || ret >= sizeof(netns_path))
3126 return false;
3127
3128 lxc_list_for_each(iterator, network) {
3129 char *hostveth = NULL;
3130 struct lxc_netdev *netdev = iterator->elem;
3131
3132 /* We can only delete devices whose ifindex we have. If we don't
3133 * have the index it means that we didn't create it.
3134 */
3135 if (!netdev->ifindex)
3136 continue;
3137
3138 if (netdev->type == LXC_NET_PHYS) {
3139 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3140 netdev->link);
3141 if (ret < 0)
9c66dc4f 3142 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3143 netdev->ifindex, netdev->link);
3144 else
9c66dc4f 3145 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3146 netdev->ifindex, netdev->link);
b3259dc6
TP
3147
3148 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3149 if (ret < 0)
3150 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3151 netdev->ifindex, netdev->link);
66a7c406 3152 goto clear_ifindices;
1bd8d726
CB
3153 }
3154
3155 ret = netdev_deconf[netdev->type](handler, netdev);
3156 if (ret < 0)
3157 WARN("Failed to deconfigure network device");
3158
3159 if (netdev->type != LXC_NET_VETH)
66a7c406 3160 goto clear_ifindices;
1bd8d726 3161
f2711167 3162 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3163 goto clear_ifindices;
1bd8d726 3164
f2711167 3165 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3166 hostveth = netdev->priv.veth_attr.pair;
3167 else
3168 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3169 if (is_empty_string(hostveth))
66a7c406 3170 goto clear_ifindices;
8843fde4 3171
1bd8d726
CB
3172 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3173 handler->name, netdev,
3174 netns_path);
3175 if (ret < 0) {
9c66dc4f 3176 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3177 goto clear_ifindices;
1bd8d726 3178 }
9c66dc4f 3179 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3180
3181clear_ifindices:
0858c829
CB
3182 /*
3183 * We need to clear any ifindices we recorded so liblxc won't
3184 * have cached stale data which would cause it to fail on
3185 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3186 */
3187 netdev->ifindex = 0;
3188 if (netdev->type == LXC_NET_PHYS) {
3189 netdev->priv.phys_attr.ifindex = 0;
3190 } else if (netdev->type == LXC_NET_VETH) {
3191 netdev->priv.veth_attr.veth1[0] = '\0';
3192 netdev->priv.veth_attr.ifindex = 0;
3193 }
1bd8d726
CB
3194 }
3195
bb84beda 3196 return true;
1bd8d726
CB
3197}
3198
6509154d 3199static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3200 struct lxc_list *cur, *next;
3201 struct lxc_inetdev *inet4dev;
3202 struct lxc_inet6dev *inet6dev;
3203 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3204 int err = 0;
5fe147e9
TP
3205 unsigned int lo_ifindex = 0, link_ifindex = 0;
3206
3207 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3208 if (link_ifindex == 0)
3209 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3210
6509154d 3211
3212 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3213 if (!lxc_list_empty(&netdev->ipv4)) {
3214 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3215 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3216 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3217 }
3218
3219 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3220 if (!lxc_list_empty(&netdev->ipv6)) {
3221 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3222 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3223 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3224
3225 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3226 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3227 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3228 }
3229
b670016a 3230 /* Perform IPVLAN specific checks. */
3231 if (netdev->type == LXC_NET_IPVLAN) {
3232 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3233 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3234 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3235
3236 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3237 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3238 if (lo_ifindex == 0)
3239 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3240 }
3241
6509154d 3242 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3243 inet4dev = cur->elem;
3244 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3245 return ret_set_errno(-1, -errno);
6509154d 3246
5fe147e9 3247 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3248 return ret_set_errno(-1, EINVAL);
b670016a 3249
3250 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3251 if (netdev->type == LXC_NET_IPVLAN) {
3252 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3253 if (err < 0)
3254 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3255 }
6509154d 3256 }
3257
3258 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3259 inet6dev = cur->elem;
3260 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3261 return ret_set_errno(-1, -errno);
6509154d 3262
5fe147e9 3263 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3264 return ret_set_errno(-1, EINVAL);
b670016a 3265
3266 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3267 if (netdev->type == LXC_NET_IPVLAN) {
3268 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3269 if (err < 0)
3270 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3271 }
6509154d 3272 }
3273
3274 return 0;
3275}
3276
9c66dc4f
CB
3277static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3278{
b670016a 3279 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3280 bool had_error = false;
3281 unsigned int link_ifindex = 0;
b670016a 3282
9c66dc4f
CB
3283 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3284 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3285
3286 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3287 if (lo_ifindex > 0) {
3288 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3289 had_error = true;
b670016a 3290 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3291 }
3292 }
3293
3294 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3295 if (!is_empty_string(link)) {
5fe147e9 3296 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3297 if (link_ifindex == 0)
3298 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3299
3300 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3301 had_error = true;
b670016a 3302 }
3303
9c66dc4f 3304 if (had_error)
596a002c 3305 return ret_set_errno(-1, EINVAL);
b670016a 3306
3307 return 0;
3308}
3309
9c66dc4f
CB
3310static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3311{
b670016a 3312 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3313 bool had_error = false;
3314 unsigned int link_ifindex = 0;
b670016a 3315
9c66dc4f
CB
3316 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3317 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3318
3319 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3320 if (lo_ifindex > 0) {
3321 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3322 had_error = true;
b670016a 3323 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3324 }
3325 }
3326
3327 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3328 if (!is_empty_string(link)) {
5fe147e9
TP
3329 link_ifindex = if_nametoindex(link);
3330 if (link_ifindex == 0) {
3331 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3332 return ret_set_errno(-1, EINVAL);
3333 }
3334
3335 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3336 had_error = true;
b670016a 3337 }
3338
9c66dc4f 3339 if (had_error)
596a002c 3340 return ret_set_errno(-1, EINVAL);
b670016a 3341
3342 return 0;
3343}
3344
6509154d 3345static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3346 unsigned int lo_ifindex = 0;
3347 unsigned int errCount = 0;
6509154d 3348 struct lxc_list *cur, *next;
3349 struct lxc_inetdev *inet4dev;
3350 struct lxc_inet6dev *inet6dev;
6509154d 3351
b670016a 3352 /* Perform IPVLAN specific checks. */
3353 if (netdev->type == LXC_NET_IPVLAN) {
3354 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3355 lo_ifindex = if_nametoindex(loop_device);
b670016a 3356 if (lo_ifindex == 0) {
3357 errCount++;
3ebffb98 3358 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3359 }
b670016a 3360 }
6509154d 3361
b670016a 3362 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3363 inet4dev = cur->elem;
3364 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3365 errCount++;
6509154d 3366 }
3367
3368 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3369 inet6dev = cur->elem;
b670016a 3370 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3371 errCount++;
6509154d 3372 }
3373
b670016a 3374 if (errCount > 0)
596a002c 3375 return ret_set_errno(-1, EINVAL);
6509154d 3376
3377 return 0;
3378}
3379
e389f2af 3380static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3381{
811ef482
CB
3382 struct lxc_list *iterator;
3383 struct lxc_list *network = &handler->conf->network;
3384
811ef482
CB
3385 lxc_list_for_each(iterator, network) {
3386 struct lxc_netdev *netdev = iterator->elem;
3387
9c66dc4f
CB
3388 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3389 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3390
6509154d 3391 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3392 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3393 if (lxc_setup_l2proxy(netdev))
3394 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3395 }
3396
9c66dc4f
CB
3397 if (netdev_conf[netdev->type](handler, netdev))
3398 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3399 }
3400
3401 return 0;
3402}
3403
e389f2af 3404int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3405{
e389f2af
CB
3406 pid_t pid = handler->pid;
3407 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3408 struct lxc_list *iterator;
3409
e0010464 3410 if (am_guest_unpriv())
74c6e2b0 3411 return 0;
811ef482
CB
3412
3413 lxc_list_for_each(iterator, network) {
3dd78294 3414 __do_free char *physname = NULL;
e389f2af 3415 int ret;
811ef482
CB
3416 struct lxc_netdev *netdev = iterator->elem;
3417
811ef482
CB
3418 if (!netdev->ifindex)
3419 continue;
3420
3dd78294
CB
3421 if (netdev->type == LXC_NET_PHYS)
3422 physname = is_wlan(netdev->link);
3423
3424 if (physname)
9f8cf6e1 3425 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
3dd78294 3426 else
9f8cf6e1 3427 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
9c66dc4f
CB
3428 if (ret)
3429 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3430 netdev->created_name,
3431 netdev->ifindex, pid);
811ef482 3432
24190194
CB
3433 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3434 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3435 }
3436
3437 return 0;
3438}
3439
3c09b97c
CB
3440static int network_requires_advanced_setup(int type)
3441{
3442 if (type == LXC_NET_EMPTY)
3443 return false;
3444
3445 if (type == LXC_NET_NONE)
3446 return false;
3447
3448 return true;
3449}
3450
e389f2af 3451static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3452{
e389f2af
CB
3453 int hooks_version = handler->conf->hooks_version;
3454 const char *lxcname = handler->name;
3455 const char *lxcpath = handler->lxcpath;
3456 struct lxc_list *network = &handler->conf->network;
3457 pid_t pid = handler->pid;
74c6e2b0
CB
3458 struct lxc_list *iterator;
3459
74c6e2b0
CB
3460 lxc_list_for_each(iterator, network) {
3461 struct lxc_netdev *netdev = iterator->elem;
3462
3c09b97c 3463 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3464 continue;
3465
9c66dc4f
CB
3466 if (netdev->type != LXC_NET_VETH)
3467 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3468 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3469
3470 if (netdev->mtu)
3471 INFO("mtu ignored due to insufficient privilege");
3472
e389f2af
CB
3473 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3474 pid, hooks_version))
74c6e2b0
CB
3475 return -1;
3476 }
3477
3478 return 0;
3479}
3480
1bd8d726 3481bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3482{
3483 int ret;
3484 struct lxc_list *iterator;
3485 struct lxc_list *network = &handler->conf->network;
1bd8d726 3486
811ef482
CB
3487 lxc_list_for_each(iterator, network) {
3488 char *hostveth = NULL;
3489 struct lxc_netdev *netdev = iterator->elem;
3490
3491 /* We can only delete devices whose ifindex we have. If we don't
3492 * have the index it means that we didn't create it.
3493 */
3494 if (!netdev->ifindex)
3495 continue;
3496
0104c121
CB
3497 /*
3498 * If the network device has been moved back from the
3499 * containers network namespace, update the ifindex.
3500 */
3501 netdev->ifindex = if_nametoindex(netdev->name);
3502
6509154d 3503 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3504 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3505 if (lxc_delete_l2proxy(netdev))
3506 WARN("Failed to delete all l2proxy config");
3507 /* Don't return, let the network be cleaned up as normal. */
3508 }
3509
811ef482
CB
3510 if (netdev->type == LXC_NET_PHYS) {
3511 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3512 if (ret < 0)
3513 WARN("Failed to rename interface with index %d "
b809f232
CB
3514 "from \"%s\" to its initial name \"%s\"",
3515 netdev->ifindex, netdev->name, netdev->link);
0b154989 3516 else {
29589196
CB
3517 TRACE("Renamed interface with index %d from "
3518 "\"%s\" to its initial name \"%s\"",
3519 netdev->ifindex, netdev->name,
3520 netdev->link);
0b154989
TP
3521
3522 /* Restore original MTU */
3523 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3524 if (ret < 0) {
3525 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3526 netdev->link, netdev->priv.phys_attr.mtu);
3527 } else {
3528 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3529 netdev->link, netdev->priv.phys_attr.mtu);
3530 }
3531 }
b3259dc6
TP
3532
3533 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3534 if (ret < 0)
3535 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3536 netdev->ifindex, netdev->link);
66a7c406 3537 goto clear_ifindices;
811ef482
CB
3538 }
3539
3540 ret = netdev_deconf[netdev->type](handler, netdev);
3541 if (ret < 0)
3542 WARN("Failed to deconfigure network device");
3543
811ef482 3544 if (netdev->type != LXC_NET_VETH)
66a7c406 3545 goto clear_ifindices;
811ef482 3546
811ef482
CB
3547 /* Explicitly delete host veth device to prevent lingering
3548 * devices. We had issues in LXD around this.
3549 */
f2711167 3550 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3551 hostveth = netdev->priv.veth_attr.pair;
3552 else
3553 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3554 if (is_empty_string(hostveth))
66a7c406 3555 goto clear_ifindices;
811ef482 3556
1ee56cff
CB
3557 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3558 ret = lxc_netdev_delete_by_name(hostveth);
3559 if (ret < 0)
3560 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3561
1ee56cff
CB
3562 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3563 } else if (!is_empty_string(netdev->link)) {
3564 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3565 if (ret < 0)
3566 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3567
1ee56cff
CB
3568 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3569 }
811ef482 3570
66a7c406 3571clear_ifindices:
ad2ddfcd 3572 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3573 * have cached stale data which would cause it to fail on reboot
3574 * we're we don't re-read the on-disk config file.
3575 */
3576 netdev->ifindex = 0;
3577 if (netdev->type == LXC_NET_PHYS) {
3578 netdev->priv.phys_attr.ifindex = 0;
3579 } else if (netdev->type == LXC_NET_VETH) {
3580 netdev->priv.veth_attr.veth1[0] = '\0';
3581 netdev->priv.veth_attr.ifindex = 0;
3582 }
811ef482
CB
3583 }
3584
bb84beda 3585 return true;
811ef482
CB
3586}
3587
3588int lxc_requests_empty_network(struct lxc_handler *handler)
3589{
3590 struct lxc_list *network = &handler->conf->network;
3591 struct lxc_list *iterator;
3592 bool found_none = false, found_nic = false;
3593
3594 if (lxc_list_empty(network))
3595 return 0;
3596
9c66dc4f 3597 lxc_list_for_each (iterator, network) {
811ef482
CB
3598 struct lxc_netdev *netdev = iterator->elem;
3599
3600 if (netdev->type == LXC_NET_NONE)
3601 found_none = true;
3602 else
3603 found_nic = true;
3604 }
9c66dc4f 3605
811ef482
CB
3606 if (found_none && !found_nic)
3607 return 1;
9c66dc4f 3608
811ef482
CB
3609 return 0;
3610}
3611
3612/* try to move physical nics to the init netns */
b809f232 3613int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3614{
9c66dc4f
CB
3615 __do_close int oldfd = -EBADF;
3616 int netnsfd = handler->nsfd[LXC_NS_NET];
3617 struct lxc_conf *conf = handler->conf;
811ef482 3618 int ret;
811ef482 3619 char ifname[IFNAMSIZ];
b809f232 3620 struct lxc_list *iterator;
811ef482 3621
b809f232
CB
3622 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3623 * the parent network namespace. We won't have this capability if we are
3624 * unprivileged.
3625 */
d0fbc7ba 3626 if (!handler->am_root)
b809f232 3627 return 0;
811ef482 3628
b809f232 3629 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3630
0037ab49 3631 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3632 if (oldfd < 0)
3633 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3634
b809f232 3635 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3636 if (ret < 0)
3637 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3638
b809f232
CB
3639 lxc_list_for_each(iterator, &conf->network) {
3640 struct lxc_netdev *netdev = iterator->elem;
811ef482 3641
b809f232
CB
3642 if (netdev->type != LXC_NET_PHYS)
3643 continue;
3644
3645 /* Retrieve the name of the interface in the container's network
3646 * namespace.
3647 */
3648 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3649 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3650 continue;
3651 }
b809f232 3652
0037ab49 3653 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3654 if (ret < 0)
9c66dc4f 3655 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3656 else
9c66dc4f 3657 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3658 }
811ef482 3659
b809f232 3660 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3661 if (ret < 0)
3662 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3663
3664 return 0;
811ef482
CB
3665}
3666
3667static int setup_hw_addr(char *hwaddr, const char *ifname)
3668{
9c66dc4f 3669 __do_close int fd = -EBADF;
811ef482
CB
3670 struct sockaddr sockaddr;
3671 struct ifreq ifr;
9c66dc4f 3672 int ret;
811ef482
CB
3673
3674 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3675 if (ret)
3676 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3677
3678 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3679 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3680 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3681
ad9429e5 3682 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3683 if (fd < 0)
3684 return -1;
3685
3686 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3687 if (ret)
6d1400b5 3688 SYSERROR("Failed to perform ioctl");
3689
9c66dc4f 3690 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3691
3692 return ret;
3693}
3694
3695static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3696{
3697 struct lxc_list *iterator;
3698 int err;
3699
3700 lxc_list_for_each(iterator, ip) {
3701 struct lxc_inetdev *inetdev = iterator->elem;
3702
3703 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3704 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3705 if (err)
3706 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3707 }
3708
3709 return 0;
3710}
3711
3712static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3713{
3714 struct lxc_list *iterator;
3715 int err;
3716
3717 lxc_list_for_each(iterator, ip) {
3718 struct lxc_inet6dev *inet6dev = iterator->elem;
3719
3720 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3721 &inet6dev->mcast, &inet6dev->acast,
3722 inet6dev->prefix);
9c66dc4f
CB
3723 if (err)
3724 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3725 }
3726
3727 return 0;
3728}
3729
8bf64b77 3730static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3731{
811ef482 3732 int err;
009d6127 3733 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3734
3735 /* empty network namespace */
8bf64b77
CB
3736 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3737 err = lxc_netdev_up("lo");
9c66dc4f
CB
3738 if (err)
3739 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3740 }
3741
811ef482 3742 /* set a mac address */
9c66dc4f
CB
3743 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3744 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3745
3746 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3747 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3748 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3749
3750 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3751 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3752 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3753
3754 /* set the network device up */
3755 if (netdev->flags & IFF_UP) {
8bf64b77 3756 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3757 if (err)
3758 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3759
3760 /* the network is up, make the loopback up too */
3761 err = lxc_netdev_up("lo");
9c66dc4f
CB
3762 if (err)
3763 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3764 }
3765
811ef482 3766 /* setup ipv4 gateway on the interface */
a2f9a670 3767 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3768 if (!(netdev->flags & IFF_UP))
3769 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3770
9c66dc4f
CB
3771 if (lxc_list_empty(&netdev->ipv4))
3772 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3773
a2f9a670 3774 /* Setup device route if ipv4_gateway_dev is enabled */
3775 if (netdev->ipv4_gateway_dev) {
3776 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3777 if (err < 0)
3778 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3779 } else {
009d6127 3780 /* Check the gateway address is valid */
3781 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3782 return ret_set_errno(-1, errno);
009d6127 3783
3784 /* Try adding a default route to the gateway address */
811ef482 3785 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3786 if (err < 0) {
3787 /* If adding the default route fails, this could be because the
3788 * gateway address is in a different subnet to the container's address.
3789 * To work around this, we try adding a static device route to the
3790 * gateway address first, and then try again.
3791 */
a2f9a670 3792 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3793 if (err < 0)
3794 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3795
a2f9a670 3796 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3797 if (err < 0)
3798 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3799 }
3800 }
3801 }
3802
3803 /* setup ipv6 gateway on the interface */
a2f9a670 3804 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3805 if (!(netdev->flags & IFF_UP))
3806 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3807
9c66dc4f
CB
3808 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3809 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3810
a2f9a670 3811 /* Setup device route if ipv6_gateway_dev is enabled */
3812 if (netdev->ipv6_gateway_dev) {
3813 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3814 if (err < 0)
3815 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3816 } else {
009d6127 3817 /* Check the gateway address is valid */
3818 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3819 return ret_set_errno(-1, errno);
009d6127 3820
3821 /* Try adding a default route to the gateway address */
811ef482 3822 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3823 if (err < 0) {
3824 /* If adding the default route fails, this could be because the
3825 * gateway address is in a different subnet to the container's address.
3826 * To work around this, we try adding a static device route to the
3827 * gateway address first, and then try again.
3828 */
a2f9a670 3829 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3830 if (err < 0)
3831 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3832
a2f9a670 3833 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3834 if (err < 0)
3835 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3836 }
3837 }
3838 }
3839
8bf64b77 3840 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3841
3842 return 0;
3843}
3844
3845int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3846 struct lxc_list *network)
3847{
3848 struct lxc_list *iterator;
811ef482 3849
8bf64b77 3850 lxc_list_for_each (iterator, network) {
e389f2af 3851 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 3852 int ret;
811ef482 3853
8bf64b77
CB
3854 ret = netdev_ns_conf[netdev->type](netdev);
3855 if (!ret)
3856 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
3857 if (ret)
3858 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482
CB
3859 }
3860
3861 if (!lxc_list_empty(network))
e389f2af 3862 INFO("Network has been setup");
811ef482
CB
3863
3864 return 0;
3865}
7ab1ba02 3866
3c09b97c 3867int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3868{
3869 struct lxc_list *iterator;
3870 struct lxc_list *network = &handler->conf->network;
3871 int data_sock = handler->data_sock[0];
3872
7ab1ba02
CB
3873 lxc_list_for_each(iterator, network) {
3874 int ret;
3875 struct lxc_netdev *netdev = iterator->elem;
3876
3c09b97c 3877 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3878 continue;
3879
7fbb15ec 3880 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3881 if (ret < 0)
7ab1ba02 3882 return -1;
e389f2af
CB
3883
3884 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3885 if (ret < 0)
3886 return -1;
3887
3888 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3889 }
3890
3891 return 0;
3892}
3893
3c09b97c 3894int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3895{
3896 struct lxc_list *iterator;
3897 struct lxc_list *network = &handler->conf->network;
3898 int data_sock = handler->data_sock[1];
3899
7ab1ba02
CB
3900 lxc_list_for_each(iterator, network) {
3901 int ret;
3902 struct lxc_netdev *netdev = iterator->elem;
3903
3c09b97c 3904 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3905 continue;
3906
e3233f26 3907 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3908 if (ret < 0)
7ab1ba02 3909 return -1;
e389f2af
CB
3910
3911 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3912 if (ret < 0)
3913 return -1;
54256301 3914
e389f2af 3915 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3916 }
3917
3918 return 0;
3919}
a1ae535a
CB
3920
3921int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3922{
3923 struct lxc_list *iterator, *network;
3924 int data_sock = handler->data_sock[0];
3925
3926 if (!handler->am_root)
3927 return 0;
3928
3929 network = &handler->conf->network;
3930 lxc_list_for_each(iterator, network) {
3931 int ret;
3932 struct lxc_netdev *netdev = iterator->elem;
3933
3934 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3935 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3936 if (ret < 0)
7729f8e5 3937 return -1;
a1ae535a
CB
3938
3939 /* Send network device ifindex in the child's namespace to
3940 * parent.
3941 */
7fbb15ec 3942 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3943 if (ret < 0)
7729f8e5 3944 return -1;
a1ae535a
CB
3945 }
3946
e389f2af
CB
3947 if (!lxc_list_empty(network))
3948 TRACE("Sent network device names and ifindices to parent");
3949
a1ae535a 3950 return 0;
a1ae535a
CB
3951}
3952
3953int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3954{
3955 struct lxc_list *iterator, *network;
3956 int data_sock = handler->data_sock[1];
3957
3958 if (!handler->am_root)
3959 return 0;
3960
3961 network = &handler->conf->network;
3962 lxc_list_for_each(iterator, network) {
3963 int ret;
3964 struct lxc_netdev *netdev = iterator->elem;
3965
3966 /* Receive network device name in the child's namespace to
3967 * parent.
3968 */
e3233f26 3969 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3970 if (ret < 0)
7729f8e5 3971 return -1;
a1ae535a
CB
3972
3973 /* Receive network device ifindex in the child's namespace to
3974 * parent.
3975 */
e3233f26 3976 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3977 if (ret < 0)
7729f8e5 3978 return -1;
a1ae535a
CB
3979 }
3980
3981 return 0;
a1ae535a 3982}
bb84beda
CB
3983
3984void lxc_delete_network(struct lxc_handler *handler)
3985{
3986 bool bret;
3987
3988 if (handler->am_root)
3989 bret = lxc_delete_network_priv(handler);
3990 else
3991 bret = lxc_delete_network_unpriv(handler);
3992 if (!bret)
3993 DEBUG("Failed to delete network devices");
3994 else
3995 DEBUG("Deleted network devices");
3996}
1cd95214 3997
1cd95214
CB
3998int lxc_netns_set_nsid(int fd)
3999{
41a3300d 4000 int ret;
0ce60f0d
CB
4001 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4002 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4003 NLMSG_ALIGN(1024)];
1cd95214 4004 struct nl_handler nlh;
a5f5cb41 4005 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
4006 struct nlmsghdr *hdr;
4007 struct rtgenmsg *msg;
9d036caa
CB
4008 const __s32 ns_id = -1;
4009 const __u32 netns_fd = fd;
1cd95214 4010
a5f5cb41 4011 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4012 if (ret < 0)
41a3300d 4013 return -1;
1cd95214 4014
0ce60f0d 4015 memset(buf, 0, sizeof(buf));
6ce39620
CB
4016
4017#pragma GCC diagnostic push
4018#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4019 hdr = (struct nlmsghdr *)buf;
4020 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4021#pragma GCC diagnostic pop
1cd95214 4022
0ce60f0d
CB
4023 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4024 hdr->nlmsg_type = RTM_NEWNSID;
4025 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4026 hdr->nlmsg_pid = 0;
4027 hdr->nlmsg_seq = RTM_NEWNSID;
4028 msg->rtgen_family = AF_UNSPEC;
1cd95214 4029
9d036caa
CB
4030 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4031 if (ret < 0)
a5f5cb41 4032 return ret_errno(ENOMEM);
9d036caa
CB
4033
4034 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4035 if (ret < 0)
a5f5cb41 4036 return ret_errno(ENOMEM);
1cd95214 4037
a5f5cb41 4038 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4039}
938980ba
CB
4040
4041static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4042{
4043
4044 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4045
4046 while (RTA_OK(rta, len)) {
4047 unsigned short type = rta->rta_type;
4048
4049 if ((type <= max) && (!tb[type]))
4050 tb[type] = rta;
4051
6ce39620
CB
4052#pragma GCC diagnostic push
4053#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4054 rta = RTA_NEXT(rta, len);
6ce39620 4055#pragma GCC diagnostic pop
938980ba
CB
4056 }
4057
4058 return 0;
4059}
4060
4061static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4062{
4063 return *(__s32 *)RTA_DATA(rta);
4064}
4065
4066#ifndef NETNS_RTA
4067#define NETNS_RTA(r) \
4068 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4069#endif
4070
4071int lxc_netns_get_nsid(int fd)
4072{
a5f5cb41
CB
4073 struct nl_handler nlh;
4074 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4075 int ret;
4076 ssize_t len;
4077 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4078 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4079 NLMSG_ALIGN(1024)];
938980ba 4080 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4081 struct nlmsghdr *hdr;
4082 struct rtgenmsg *msg;
938980ba
CB
4083 __u32 netns_fd = fd;
4084
a5f5cb41 4085 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4086 if (ret < 0)
4087 return -1;
4088
4089 memset(buf, 0, sizeof(buf));
6ce39620
CB
4090
4091#pragma GCC diagnostic push
4092#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4093 hdr = (struct nlmsghdr *)buf;
4094 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4095#pragma GCC diagnostic pop
938980ba
CB
4096
4097 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4098 hdr->nlmsg_type = RTM_GETNSID;
4099 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4100 hdr->nlmsg_pid = 0;
4101 hdr->nlmsg_seq = RTM_GETNSID;
4102 msg->rtgen_family = AF_UNSPEC;
4103
9d036caa 4104 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4105 if (ret < 0)
4106 return ret_errno(ENOMEM);
938980ba 4107
a5f5cb41 4108 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4109 if (ret < 0)
4110 return -1;
4111
4112 msg = NLMSG_DATA(hdr);
4113 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4114 if (len < 0)
a5f5cb41 4115 return ret_errno(EINVAL);
938980ba 4116
6ce39620
CB
4117#pragma GCC diagnostic push
4118#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4119 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4120 if (tb[__LXC_NETNSA_NSID])
4121 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4122#pragma GCC diagnostic pop
938980ba
CB
4123
4124 return -1;
4125}
e389f2af
CB
4126
4127int lxc_create_network(struct lxc_handler *handler)
4128{
4129 int ret;
4130
e389f2af
CB
4131 if (handler->am_root) {
4132 ret = lxc_create_network_priv(handler);
4133 if (ret)
4134 return -1;
4135
4136 return lxc_network_move_created_netdev_priv(handler);
4137 }
4138
4139 return lxc_create_network_unpriv(handler);
4140}