]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: Adds OVS VLAN setup functions
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
8bf64b77 50typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
3ebffb98 51static const char loop_device[] = "lo";
811ef482 52
b670016a 53static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 54{
d16bda44 55 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 56 struct nl_handler nlh;
d16bda44
CB
57 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
58 int addrlen, err;
8f82874c 59 struct rtmsg *rt;
8f82874c 60
61 addrlen = family == AF_INET ? sizeof(struct in_addr)
62 : sizeof(struct in6_addr);
63
d16bda44 64 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 65 if (err)
66 return err;
67
8f82874c 68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
d16bda44 70 return -ENOMEM;
8f82874c 71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
a5f5cb41 74 return -ENOMEM;
8f82874c 75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
a5f5cb41 82 return -ENOMEM;
d16bda44 83
8f82874c 84 rt->rtm_family = family;
85 rt->rtm_table = RT_TABLE_MAIN;
86 rt->rtm_scope = RT_SCOPE_LINK;
87 rt->rtm_protocol = RTPROT_BOOT;
88 rt->rtm_type = RTN_UNICAST;
89 rt->rtm_dst_len = netmask;
90
8f82874c 91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
92 return -EINVAL;
93
8f82874c 94 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
95 return -EINVAL;
96
97 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 98}
99
100static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
101{
b670016a 102 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 103}
104
105static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
106{
b670016a 107 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
108}
109
110static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
111{
112 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
113}
114
115static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
116{
117 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 118}
119
d4a7da46 120static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
121{
122 struct lxc_list *iterator;
123 int err;
124
125 lxc_list_for_each(iterator, ip) {
126 struct lxc_inetdev *inetdev = iterator->elem;
127
128 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
129 if (err)
130 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 131 }
132
133 return 0;
134}
135
136static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
137{
138 struct lxc_list *iterator;
139 int err;
140
141 lxc_list_for_each(iterator, ip) {
142 struct lxc_inet6dev *inet6dev = iterator->elem;
143
144 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
145 if (err)
146 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 147 }
148
149 return 0;
150}
151
6dfa9581
TP
152static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
153{
154 struct lxc_list *iterator;
155 int err;
156
157 lxc_list_for_each(iterator, ip) {
158 struct lxc_inetdev *inetdev = iterator->elem;
159
160 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
161
162 if (err)
9c66dc4f 163 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
164 }
165
166 return 0;
167}
168
169static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
170{
171 struct lxc_list *iterator;
172 int err;
173
174 lxc_list_for_each(iterator, ip) {
175 struct lxc_inet6dev *inet6dev = iterator->elem;
176
177 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
178 if (err)
9c66dc4f 179 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
180 }
181
182 return 0;
183}
184
185struct ip_proxy_args {
186 const char *ip;
187 const char *dev;
188};
189
5fe147e9 190static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 191{
d16bda44 192 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 193 struct nl_handler nlh;
d16bda44
CB
194 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
195 int addrlen, err;
5fe147e9 196 struct ndmsg *rt;
6dfa9581 197
5fe147e9 198 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 199
d16bda44 200 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
201 if (err)
202 return err;
6dfa9581 203
5fe147e9
TP
204 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
205 if (!nlmsg)
d16bda44 206 return -ENOMEM;
6dfa9581 207
5fe147e9
TP
208 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
209 if (!answer)
d16bda44 210 return -ENOMEM;
6dfa9581 211
5fe147e9
TP
212 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
213 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 214
5fe147e9
TP
215 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
216 if (!rt)
d16bda44
CB
217 return -ENOMEM;
218
5fe147e9
TP
219 rt->ndm_ifindex = ifindex;
220 rt->ndm_flags = NTF_PROXY;
221 rt->ndm_type = NDA_DST;
222 rt->ndm_family = family;
6dfa9581 223
5fe147e9 224 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 225 return -EINVAL;
6dfa9581 226
d16bda44 227 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
228}
229
230static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
231{
232 int ret;
233 char path[PATH_MAX];
234 char buf[1] = "";
235
236 if (family != AF_INET && family != AF_INET6)
596a002c 237 return ret_set_errno(-1, EINVAL);
6dfa9581 238
9c66dc4f 239 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581
TP
240 family == AF_INET ? "ipv4" : "ipv6", ifname,
241 "forwarding");
9c66dc4f 242 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 243 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
244
245 return lxc_read_file_expect(path, buf, 1, "1");
246}
247
622f05c7
TP
248struct bridge_vlan_info {
249 __u16 flags;
250 __u16 vid;
251};
252
253static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
254{
255 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
256 struct nl_handler nlh;
257 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
258 int err;
259 struct ifinfomsg *ifi;
260 struct rtattr *nest;
261 unsigned short bridge_flags = 0;
262 struct bridge_vlan_info vlan_info;
263
264 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
265 if (err)
266 return err;
267
268 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
269 if (!nlmsg)
270 return ret_errno(ENOMEM);
271
272 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
273 if (!answer)
274 return ret_errno(ENOMEM);
275
276 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
277 nlmsg->nlmsghdr->nlmsg_type = operation;
278
279 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
280 if (!ifi)
281 return ret_errno(ENOMEM);
282 ifi->ifi_family = AF_BRIDGE;
283 ifi->ifi_index = ifindex;
284
285 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
286 if (!nest)
287 return ret_errno(ENOMEM);
288
289 bridge_flags |= BRIDGE_FLAGS_MASTER;
290 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
291 return ret_errno(ENOMEM);
292
293 vlan_info.vid = vlan_id;
294 vlan_info.flags = 0;
295 if (!tagged)
296 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
297
298 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
299 return ret_errno(ENOMEM);
300
301 nla_end_nested(nlmsg, nest);
302
303 return netlink_transaction(nlh_ptr, nlmsg, answer);
304}
305
306static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
307{
308 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
309}
310
311static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
312{
313 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
314}
315
316static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
317{
318 struct lxc_list *iterator;
319 int err;
320
321 lxc_list_for_each(iterator, vlan_ids) {
322 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
323
324 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
325 if (err)
326 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
327 }
328
329 return 0;
330}
331
33320936
TP
332static int validate_veth(struct lxc_netdev *netdev)
333{
334 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
335 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
336 if (netdev->priv.veth_attr.vlan_id_set)
337 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
338
339 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
340 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
341 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
342 }
343
344 if (netdev->priv.veth_attr.vlan_id_set) {
345 struct lxc_list *it;
346 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
347 unsigned short i = PTR_TO_USHORT(it->elem);
348 if (i == netdev->priv.veth_attr.vlan_id)
349 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
350 }
351 }
352
353 return 0;
354}
355
356static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
357{
358 int err, rc, veth1index;
359 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
360 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
361
362 /* Skip setup if no VLAN options are specified. */
363 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
364 return 0;
365
366 /* Check vlan filtering is enabled on parent bridge. */
367 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
368 if (rc < 0 || (size_t)rc >= sizeof(path))
369 return -1;
370
371 rc = lxc_read_from_file(path, buf, sizeof(buf));
372 if (rc < 0)
373 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
374
375 buf[rc - 1] = '\0';
376
377 if (strcmp(buf, "1") != 0)
378 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
379
380 /* Get veth1 ifindex for use with netlink. */
381 veth1index = if_nametoindex(veth1);
382 if (!veth1index)
383 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
384
385 /* Configure untagged VLAN settings on bridge port if specified. */
386 if (netdev->priv.veth_attr.vlan_id_set) {
387 unsigned short default_pvid;
388
389 /* Get the bridge's default VLAN PVID. */
390 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
391 if (rc < 0 || (size_t)rc >= sizeof(path))
392 return -1;
393
394 rc = lxc_read_from_file(path, buf, sizeof(buf));
395 if (rc < 0)
396 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
397
398 buf[rc - 1] = '\0';
399 err = get_u16(&default_pvid, buf, 0);
400 if (err)
401 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
402
403 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
404 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
405 err = lxc_bridge_vlan_del(veth1index, default_pvid);
406 if (err)
407 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
408 }
409
410 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
411 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
412 if (err)
413 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
414 }
415 }
416
417 /* Configure tagged VLAN settings on bridge port if specified. */
418 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
419 if (err)
420 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
421
422 return 0;
423}
424
8f7c3358
TP
425struct ovs_veth_vlan_args {
426 const char *nic;
427 const char *vlan_mode; /* Port VLAN mode. */
428 short vlan_id; /* PVID VLAN ID. */
429 const char *trunks; /* Comma delimited list of tagged VLAN IDs. */
430};
431
432
433static int lxc_ovs_setup_bridge_vlan_exec(void *data)
434{
435 struct ovs_veth_vlan_args *args = data;
436 const char *vlan_mode = "", *tag = "", *trunks = "";
437
438 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
439
440 if (args->vlan_id >= 0) {
441 char buf[5];
442 int rc;
443
444 rc = snprintf(buf, sizeof(buf), "%u", args->vlan_id);
445 if (rc < 0 || (size_t)rc >= sizeof(buf))
446 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%u\"", args->vlan_id);
447
448 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
449 }
450
451
452 if (strcmp(args->trunks, "") != 0)
453 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
454
455 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
456 if (strcmp(tag, "") != 0 && strcmp(trunks, "") != 0)
457 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
458 else if (strcmp(tag, "") != 0)
459 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
460 else if (strcmp(trunks, "") != 0)
461 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
462 else
463 return -EINVAL;
464
465 return -errno;
466}
467
468static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
469{
470 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
471 struct ovs_veth_vlan_args args;
472 args.nic = veth1;
473 args.vlan_mode = "";
474 args.vlan_id = -1;
475 args.trunks = "";
476
477 /* Skip setup if no VLAN options are specified. */
478 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
479 return 0;
480
481 /* Configure untagged VLAN settings on bridge port if specified. */
482 if (netdev->priv.veth_attr.vlan_id_set) {
483 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
484 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
485
486 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
487 * Also set the vlan_mode=access, which will drop any tagged frames.
488 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
489 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
490 */
491 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
492 args.vlan_mode = "access";
493 args.vlan_id = netdev->priv.veth_attr.vlan_id;
494 }
495 }
496
497 if (taggedLength > 0) {
498 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
499
500 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
501 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
502 args.vlan_mode = "native-untagged";
503 }
504
505 struct lxc_list *iterator;
506 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
507 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
508 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
509 int rc;
510
511 rc = snprintf(buf, sizeof(buf), "%u", vlan_id);
512 if (rc < 0 || (size_t)rc >= sizeof(buf))
513 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
514
515 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
516 }
517 }
518
519 if (strcmp(args.vlan_mode, "") != 0) {
520 int ret;
521 char cmd_output[PATH_MAX];
522
523 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
524 if (ret < 0)
525 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
526 }
527
528 return 0;
529}
530
811ef482
CB
531static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
532{
54256301 533 int err;
a00fbab5 534 unsigned int mtu = 1500;
811ef482
CB
535 char *veth1, *veth2;
536 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 537
33320936
TP
538 err = validate_veth(netdev);
539 if (err)
540 return err;
541
f2711167 542 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
543 veth1 = netdev->priv.veth_attr.pair;
544 if (handler->conf->reboot)
545 lxc_netdev_delete_by_name(veth1);
546 } else {
547 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
548 if (err < 0 || (size_t)err >= sizeof(veth1buf))
549 return -1;
550
3646ffd9 551 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
552 if (!veth1)
553 return -1;
554
555 /* store away for deconf */
556 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
557 }
558
d34212ad
CB
559 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
560 if (err < 0 || (size_t)err >= sizeof(veth2buf))
561 return -1;
562
3646ffd9 563 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 564 if (!veth2)
54256301
CB
565 return -1;
566
a00fbab5
TP
567 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
568 if (netdev->mtu) {
569 if (lxc_safe_uint(netdev->mtu, &mtu))
570 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 571 } else if (!is_empty_string(netdev->link)) {
54256301 572 int ifindex_mtu;
811ef482 573
54256301
CB
574 ifindex_mtu = if_nametoindex(netdev->link);
575 if (ifindex_mtu) {
576 mtu = netdev_get_mtu(ifindex_mtu);
577 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
578 }
579 }
580
581 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
582 if (err)
583 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 584
24190194
CB
585 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
586
811ef482
CB
587 /* changing the high byte of the mac address to 0xfe, the bridge interface
588 * will always keep the host's mac address and not take the mac address
589 * of a container */
590 err = setup_private_host_hw_addr(veth1);
591 if (err) {
6d1400b5 592 errno = -err;
593 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
594 goto out_delete;
595 }
596
8da62485
CB
597 /* Retrieve ifindex of the host's veth device. */
598 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
599 if (!netdev->priv.veth_attr.ifindex) {
600 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
601 goto out_delete;
602 }
603
811ef482
CB
604 if (mtu) {
605 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 606 if (err) {
6d1400b5 607 errno = -err;
54256301 608 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
609 goto out_delete;
610 }
611 }
612
f2711167 613 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
614 if (!lxc_nic_exists(netdev->link)) {
615 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
616 goto out_delete;
617 }
618
811ef482
CB
619 err = lxc_bridge_attach(netdev->link, veth1);
620 if (err) {
6d1400b5 621 errno = -err;
26da53c3 622 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
623 goto out_delete;
624 }
625 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936
TP
626
627 if (!is_ovs_bridge(netdev->link)) {
628 err = setup_veth_native_bridge_vlan(veth1, netdev);
629 if (err) {
630 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
631 goto out_delete;
632 }
633 }
811ef482
CB
634 }
635
636 err = lxc_netdev_up(veth1);
637 if (err) {
6d1400b5 638 errno = -err;
639 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
640 goto out_delete;
641 }
642
d4a7da46 643 /* setup ipv4 routes on the host interface */
644 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
645 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
646 goto out_delete;
647 }
648
649 /* setup ipv6 routes on the host interface */
650 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
651 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
652 goto out_delete;
653 }
654
6dfa9581 655 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
656 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
657 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
658 appear to be added successfully but then do not appear in the proxy list. The length of time
659 slept doesn't appear to be important, only that the process sleeps for a short period of time.
660 */
661 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
662
6dfa9581
TP
663 if (netdev->ipv4_gateway) {
664 char bufinet4[INET_ADDRSTRLEN];
665 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 666 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
667 goto out_delete;
668 }
669
670 err = lxc_ip_forwarding_on(veth1, AF_INET);
671 if (err) {
9c66dc4f 672 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
673 goto out_delete;
674 }
675
5fe147e9 676 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 677 if (err) {
9c66dc4f 678 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
679 goto out_delete;
680 }
681 }
682
683 if (netdev->ipv6_gateway) {
684 char bufinet6[INET6_ADDRSTRLEN];
685
686 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 687 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
688 goto out_delete;
689 }
690
691 /* Check for sysctl net.ipv6.conf.all.forwarding=1
692 Kernel requires this to route any packets for IPv6.
693 */
694 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
695 if (err) {
9c66dc4f 696 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
697 goto out_delete;
698 }
699
700 err = lxc_ip_forwarding_on(veth1, AF_INET6);
701 if (err) {
9c66dc4f 702 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
703 goto out_delete;
704 }
705
706 err = lxc_neigh_proxy_on(veth1, AF_INET6);
707 if (err) {
9c66dc4f 708 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
709 goto out_delete;
710 }
711
5fe147e9 712 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 713 if (err) {
9c66dc4f 714 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
715 goto out_delete;
716 }
717 }
718
719 /* setup ipv4 address routes on the host interface */
720 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
721 if (err) {
9c66dc4f 722 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
723 goto out_delete;
724 }
725
726 /* setup ipv6 address routes on the host interface */
727 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
728 if (err) {
9c66dc4f 729 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
730 goto out_delete;
731 }
732 }
733
811ef482 734 if (netdev->upscript) {
14a7b0f9
CB
735 char *argv[] = {
736 "veth",
737 netdev->link,
990b9ac3 738 veth1,
14a7b0f9
CB
739 NULL,
740 };
741
742 err = run_script_argv(handler->name,
743 handler->conf->hooks_version, "net",
744 netdev->upscript, "up", argv);
745 if (err < 0)
811ef482
CB
746 goto out_delete;
747 }
748
54256301 749 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
750
751 return 0;
752
753out_delete:
54256301 754 lxc_netdev_delete_by_name(veth1);
811ef482
CB
755 return -1;
756}
757
758static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
759{
8021de25 760 char peer[IFNAMSIZ];
811ef482
CB
761 int err;
762
f2711167 763 if (is_empty_string(netdev->link)) {
811ef482
CB
764 ERROR("No link for macvlan network device specified");
765 return -1;
766 }
767
8021de25
CB
768 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
769 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
770 return -1;
771
3646ffd9 772 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
773 return -1;
774
775 err = lxc_macvlan_create(netdev->link, peer,
776 netdev->priv.macvlan_attr.mode);
777 if (err) {
6d1400b5 778 errno = -err;
779 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
780 peer, netdev->link);
966e9f1f 781 goto on_error;
811ef482
CB
782 }
783
9f8cf6e1
CB
784 strlcpy(netdev->created_name, peer, IFNAMSIZ);
785
811ef482
CB
786 netdev->ifindex = if_nametoindex(peer);
787 if (!netdev->ifindex) {
788 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 789 goto on_error;
811ef482
CB
790 }
791
3bef7b7b 792 if (netdev->mtu) {
54256301
CB
793 unsigned int mtu;
794
3bef7b7b
TP
795 err = lxc_safe_uint(netdev->mtu, &mtu);
796 if (err < 0) {
797 errno = -err;
798 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
799 goto on_error;
800 }
801
802 err = lxc_netdev_set_mtu(peer, mtu);
803 if (err < 0) {
804 errno = -err;
805 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
806 goto on_error;
807 }
808 }
809
811ef482 810 if (netdev->upscript) {
14a7b0f9
CB
811 char *argv[] = {
812 "macvlan",
813 netdev->link,
814 NULL,
815 };
816
817 err = run_script_argv(handler->name,
818 handler->conf->hooks_version, "net",
819 netdev->upscript, "up", argv);
820 if (err < 0)
966e9f1f 821 goto on_error;
811ef482
CB
822 }
823
824 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
825 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
826
827 return 0;
966e9f1f
CB
828
829on_error:
811ef482 830 lxc_netdev_delete_by_name(peer);
811ef482
CB
831 return -1;
832}
833
c9f52382 834static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
835{
d16bda44
CB
836 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
837 struct nl_handler nlh;
838 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 839 int err, index, len;
840 struct ifinfomsg *ifi;
c9f52382 841 struct rtattr *nest, *nest2;
c9f52382 842
843 len = strlen(master);
844 if (len == 1 || len >= IFNAMSIZ)
d16bda44 845 return ret_errno(EINVAL);
c9f52382 846
847 len = strlen(name);
848 if (len == 1 || len >= IFNAMSIZ)
d16bda44 849 return ret_errno(EINVAL);
c9f52382 850
851 index = if_nametoindex(master);
852 if (!index)
d16bda44 853 return ret_errno(EINVAL);
c9f52382 854
d16bda44 855 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 856 if (err)
df62850d 857 return err;
c9f52382 858
c9f52382 859 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
860 if (!nlmsg)
d16bda44 861 return ret_errno(ENOMEM);
c9f52382 862
863 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
864 if (!answer)
d16bda44 865 return ret_errno(ENOMEM);
c9f52382 866
867 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
868 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
869
870 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
871 if (!ifi)
872 return ret_errno(ENOMEM);
c9f52382 873 ifi->ifi_family = AF_UNSPEC;
874
c9f52382 875 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
876 if (!nest)
d16bda44 877 return ret_errno(EPROTO);
c9f52382 878
879 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 880 return ret_errno(EPROTO);
c9f52382 881
5755765e
KT
882 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
883 if (!nest2)
884 return ret_errno(EPROTO);
885
3a934e2e 886 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
887 return ret_errno(EPROTO);
888
cf88a827
TP
889 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
890 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
891 * according to ipvlan docs.
5755765e 892 */
cf88a827 893 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 894 return ret_errno(EPROTO);
c9f52382 895
5755765e 896 nla_end_nested(nlmsg, nest2);
c9f52382 897 nla_end_nested(nlmsg, nest);
898
899 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 900 return ret_errno(EPROTO);
c9f52382 901
902 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
903 return ret_errno(EPROTO);
904
905 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 906}
907
908static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
909{
dd119206 910 char peer[IFNAMSIZ];
c9f52382 911 int err;
912
f2711167 913 if (is_empty_string(netdev->link)) {
c9f52382 914 ERROR("No link for ipvlan network device specified");
915 return -1;
916 }
917
dd119206
CB
918 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
919 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 920 return -1;
921
3646ffd9 922 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 923 return -1;
924
dd119206
CB
925 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
926 netdev->priv.ipvlan_attr.isolation);
c9f52382 927 if (err) {
dd119206
CB
928 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
929 peer, netdev->link);
c9f52382 930 goto on_error;
931 }
932
e7fdd504
CB
933 strlcpy(netdev->created_name, peer, IFNAMSIZ);
934
c9f52382 935 netdev->ifindex = if_nametoindex(peer);
936 if (!netdev->ifindex) {
937 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
938 goto on_error;
939 }
940
006e135e 941 if (netdev->mtu) {
54256301
CB
942 unsigned int mtu;
943
006e135e 944 err = lxc_safe_uint(netdev->mtu, &mtu);
945 if (err < 0) {
946 errno = -err;
54256301 947 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 948 goto on_error;
949 }
950
951 err = lxc_netdev_set_mtu(peer, mtu);
952 if (err < 0) {
953 errno = -err;
54256301 954 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 955 goto on_error;
956 }
957 }
958
c9f52382 959 if (netdev->upscript) {
960 char *argv[] = {
961 "ipvlan",
962 netdev->link,
963 NULL,
964 };
965
dd119206
CB
966 err = run_script_argv(handler->name, handler->conf->hooks_version,
967 "net", netdev->upscript, "up", argv);
c9f52382 968 if (err < 0)
969 goto on_error;
970 }
971
dd119206
CB
972 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
973 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 974
975 return 0;
976
977on_error:
978 lxc_netdev_delete_by_name(peer);
979 return -1;
980}
981
811ef482
CB
982static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
983{
984 char peer[IFNAMSIZ];
985 int err;
986 static uint16_t vlan_cntr = 0;
811ef482 987
f2711167 988 if (is_empty_string(netdev->link)) {
811ef482
CB
989 ERROR("No link for vlan network device specified");
990 return -1;
991 }
992
d4d68410
CB
993 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
994 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
995 if (err < 0 || (size_t)err >= sizeof(peer))
996 return -1;
997
998 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
999 if (err) {
6d1400b5 1000 errno = -err;
1001 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1002 peer, netdev->link);
811ef482
CB
1003 return -1;
1004 }
1005
83530dba
CB
1006 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1007
811ef482
CB
1008 netdev->ifindex = if_nametoindex(peer);
1009 if (!netdev->ifindex) {
1010 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 1011 goto on_error;
1012 }
1013
1014 if (netdev->mtu) {
54256301
CB
1015 unsigned int mtu;
1016
3e2a7b08 1017 err = lxc_safe_uint(netdev->mtu, &mtu);
1018 if (err < 0) {
1019 errno = -err;
54256301 1020 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1021 goto on_error;
1022 }
1023
1024 err = lxc_netdev_set_mtu(peer, mtu);
54256301 1025 if (err < 0) {
3e2a7b08 1026 errno = -err;
54256301 1027 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 1028 goto on_error;
1029 }
811ef482
CB
1030 }
1031
3a73d9f1 1032 if (netdev->upscript) {
1033 char *argv[] = {
1034 "vlan",
1035 netdev->link,
1036 NULL,
1037 };
1038
d4d68410
CB
1039 err = run_script_argv(handler->name, handler->conf->hooks_version,
1040 "net", netdev->upscript, "up", argv);
19abca58 1041 if (err < 0) {
3e2a7b08 1042 goto on_error;
19abca58 1043 }
3a73d9f1 1044 }
1045
d4d68410
CB
1046 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
1047 netdev->ifindex);
811ef482
CB
1048
1049 return 0;
3e2a7b08 1050
1051on_error:
1052 lxc_netdev_delete_by_name(peer);
1053 return -1;
811ef482
CB
1054}
1055
1056static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1057{
0b154989 1058 int err, mtu_orig = 0;
14a7b0f9 1059
9c66dc4f
CB
1060 if (is_empty_string(netdev->link))
1061 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 1062
75b074ee
CB
1063 /*
1064 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
1065 * network namespace because we need it to move the device from the
1066 * host's network namespace to the container's network namespace later
1067 * on.
1068 * Note that netdev->link will contain the name of the physical network
1069 * device in the host's namespace.
1070 */
811ef482 1071 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
1072 if (!netdev->ifindex)
1073 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 1074
61302ef7 1075 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 1076 if (is_empty_string(netdev->name))
8bf64b77 1077 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 1078
75b074ee
CB
1079 /*
1080 * Store the ifindex of the host's network device in the host's
790255cf
CB
1081 * namespace.
1082 */
1083 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1084
75b074ee
CB
1085 /*
1086 * Get original device MTU setting and store for restoration after
1087 * container shutdown.
1088 */
0b154989 1089 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
1090 if (mtu_orig < 0)
1091 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
1092
1093 netdev->priv.phys_attr.mtu = mtu_orig;
1094
3bef7b7b 1095 if (netdev->mtu) {
54256301
CB
1096 unsigned int mtu;
1097
3bef7b7b 1098 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
1099 if (err < 0)
1100 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 1101
3bef7b7b 1102 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
1103 if (err < 0)
1104 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
1105 }
1106
1107 if (netdev->upscript) {
1108 char *argv[] = {
1109 "phys",
1110 netdev->link,
1111 NULL,
1112 };
1113
75b074ee
CB
1114 err = run_script_argv(handler->name, handler->conf->hooks_version,
1115 "net", netdev->upscript, "up", argv);
9c66dc4f 1116 if (err < 0)
3bef7b7b 1117 return -1;
3bef7b7b
TP
1118 }
1119
75b074ee
CB
1120 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
1121 netdev->ifindex);
811ef482
CB
1122
1123 return 0;
1124}
1125
1126static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1127{
14a7b0f9
CB
1128 int ret;
1129 char *argv[] = {
1130 "empty",
1131 NULL,
1132 };
1133
811ef482 1134 netdev->ifindex = 0;
14a7b0f9
CB
1135 if (!netdev->upscript)
1136 return 0;
1137
1138 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1139 "net", netdev->upscript, "up", argv);
1140 if (ret < 0)
1141 return -1;
1142
811ef482
CB
1143 return 0;
1144}
1145
1146static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1147{
1148 netdev->ifindex = 0;
1149 return 0;
1150}
1151
1152static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
1153 [LXC_NET_VETH] = instantiate_veth,
1154 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 1155 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
1156 [LXC_NET_VLAN] = instantiate_vlan,
1157 [LXC_NET_PHYS] = instantiate_phys,
1158 [LXC_NET_EMPTY] = instantiate_empty,
1159 [LXC_NET_NONE] = instantiate_none,
1160};
1161
9d0406c7 1162static int __instantiate_ns_common(struct lxc_netdev *netdev)
8bf64b77
CB
1163{
1164 char current_ifname[IFNAMSIZ];
1165
1166 netdev->ifindex = if_nametoindex(netdev->created_name);
1167 if (!netdev->ifindex)
1168 return log_error_errno(-1,
1169 errno, "Failed to retrieve ifindex for network device with name %s",
1170 netdev->created_name);
1171
3473ca76 1172 if (is_empty_string(netdev->name))
8bf64b77
CB
1173 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1174
1175 if (strcmp(netdev->created_name, netdev->name) != 0) {
1176 int ret;
1177
1178 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
1179 if (ret)
9c66dc4f 1180 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
8bf64b77
CB
1181 netdev->created_name,
1182 netdev->name);
1183
1184 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
1185 }
1186
1187 /*
1188 * Re-read the name of the interface because its name has changed and
1189 * would be automatically allocated by the system
1190 */
1191 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1192 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1193
1194 /*
1195 * Now update the recorded name of the network device to reflect the
1196 * name of the network device in the child's network namespace. We will
1197 * later on send this information back to the parent.
1198 */
1199 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
1200
1201 return 0;
1202}
1203
717f77f2 1204static int instantiate_ns_veth(struct lxc_netdev *netdev)
8bf64b77 1205{
8bf64b77 1206
9d0406c7 1207 return __instantiate_ns_common(netdev);
8bf64b77
CB
1208}
1209
1210static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
1211{
9d0406c7 1212 return __instantiate_ns_common(netdev);
8bf64b77
CB
1213}
1214
1215static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
1216{
9d0406c7 1217 return __instantiate_ns_common(netdev);
8bf64b77
CB
1218}
1219
1220static int instantiate_ns_vlan(struct lxc_netdev *netdev)
1221{
9d0406c7 1222 return __instantiate_ns_common(netdev);
8bf64b77
CB
1223}
1224
1225static int instantiate_ns_phys(struct lxc_netdev *netdev)
1226{
9d0406c7 1227 return __instantiate_ns_common(netdev);
8bf64b77
CB
1228}
1229
1230static int instantiate_ns_empty(struct lxc_netdev *netdev)
1231{
1232 return 0;
1233}
1234
1235static int instantiate_ns_none(struct lxc_netdev *netdev)
1236{
1237 return 0;
1238}
1239
1240static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
1241 [LXC_NET_VETH] = instantiate_ns_veth,
1242 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
1243 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
1244 [LXC_NET_VLAN] = instantiate_ns_vlan,
1245 [LXC_NET_PHYS] = instantiate_ns_phys,
1246 [LXC_NET_EMPTY] = instantiate_ns_empty,
1247 [LXC_NET_NONE] = instantiate_ns_none,
1248};
1249
811ef482
CB
1250static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1251{
14a7b0f9
CB
1252 int ret;
1253 char *argv[] = {
1254 "veth",
1255 netdev->link,
1256 NULL,
1257 NULL,
1258 };
1259
1260 if (!netdev->downscript)
1261 return 0;
811ef482 1262
f2711167 1263 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1264 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1265 else
14a7b0f9
CB
1266 argv[2] = netdev->priv.veth_attr.veth1;
1267
1268 ret = run_script_argv(handler->name,
1269 handler->conf->hooks_version, "net",
1270 netdev->downscript, "down", argv);
1271 if (ret < 0)
1272 return -1;
811ef482 1273
811ef482
CB
1274 return 0;
1275}
1276
1277static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1278{
14a7b0f9
CB
1279 int ret;
1280 char *argv[] = {
1281 "macvlan",
1282 netdev->link,
1283 NULL,
1284 };
1285
1286 if (!netdev->downscript)
1287 return 0;
1288
1289 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1290 "net", netdev->downscript, "down", argv);
1291 if (ret < 0)
1292 return -1;
811ef482 1293
811ef482
CB
1294 return 0;
1295}
1296
c9f52382 1297static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1298{
1299 int ret;
1300 char *argv[] = {
1301 "ipvlan",
1302 netdev->link,
1303 NULL,
1304 };
1305
1306 if (!netdev->downscript)
1307 return 0;
1308
1309 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1310 "net", netdev->downscript, "down", argv);
1311 if (ret < 0)
1312 return -1;
1313
1314 return 0;
1315}
1316
811ef482
CB
1317static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1318{
3a73d9f1 1319 int ret;
1320 char *argv[] = {
1321 "vlan",
1322 netdev->link,
1323 NULL,
1324 };
1325
1326 if (!netdev->downscript)
1327 return 0;
1328
1329 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1330 "net", netdev->downscript, "down", argv);
1331 if (ret < 0)
1332 return -1;
1333
811ef482
CB
1334 return 0;
1335}
1336
1337static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1338{
14a7b0f9
CB
1339 int ret;
1340 char *argv[] = {
1341 "phys",
1342 netdev->link,
1343 NULL,
1344 };
1345
1346 if (!netdev->downscript)
1347 return 0;
1348
1349 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1350 "net", netdev->downscript, "down", argv);
1351 if (ret < 0)
1352 return -1;
811ef482 1353
811ef482
CB
1354 return 0;
1355}
1356
1357static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1358{
14a7b0f9
CB
1359 int ret;
1360 char *argv[] = {
1361 "empty",
1362 NULL,
1363 };
1364
1365 if (!netdev->downscript)
1366 return 0;
1367
1368 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1369 "net", netdev->downscript, "down", argv);
1370 if (ret < 0)
1371 return -1;
811ef482 1372
811ef482
CB
1373 return 0;
1374}
1375
1376static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1377{
1378 return 0;
1379}
1380
1381static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1382 [LXC_NET_VETH] = shutdown_veth,
1383 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1384 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1385 [LXC_NET_VLAN] = shutdown_vlan,
1386 [LXC_NET_PHYS] = shutdown_phys,
1387 [LXC_NET_EMPTY] = shutdown_empty,
1388 [LXC_NET_NONE] = shutdown_none,
1389};
1390
0037ab49
TP
1391static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1392{
d16bda44 1393 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1394 struct nl_handler nlh;
d16bda44
CB
1395 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1396 int err;
0037ab49 1397 struct ifinfomsg *ifi;
0037ab49 1398
d16bda44 1399 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1400 if (err)
1401 return err;
1402
0037ab49
TP
1403 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1404 if (!nlmsg)
d16bda44 1405 return ret_errno(ENOMEM);
0037ab49
TP
1406
1407 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1408 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1409
1410 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1411 if (!ifi)
d16bda44
CB
1412 return ret_errno(ENOMEM);
1413
0037ab49
TP
1414 ifi->ifi_family = AF_UNSPEC;
1415 ifi->ifi_index = ifindex;
1416
1417 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1418 return ret_errno(ENOMEM);
0037ab49 1419
3473ca76 1420 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1421 return ret_errno(ENOMEM);
0037ab49 1422
d16bda44 1423 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1424}
1425
ebc73a67 1426int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1427{
d16bda44 1428 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1429 struct nl_handler nlh;
d16bda44
CB
1430 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1431 int err;
06f976ca 1432 struct ifinfomsg *ifi;
0ad19a3f 1433
d16bda44 1434 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1435 if (err)
1436 return err;
0ad19a3f 1437
0ad19a3f 1438 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1439 if (!nlmsg)
d16bda44 1440 return ret_errno(ENOMEM);
0ad19a3f 1441
ebc73a67 1442 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1443 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1444
1445 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1446 if (!ifi)
d16bda44
CB
1447 return ret_errno(ENOMEM);
1448
06f976ca
SZ
1449 ifi->ifi_family = AF_UNSPEC;
1450 ifi->ifi_index = ifindex;
0ad19a3f 1451
1452 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1453 return ret_errno(ENOMEM);
0ad19a3f 1454
3473ca76 1455 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1456 return ret_errno(ENOMEM);
8d357196 1457
d16bda44 1458 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1459}
1460
ebc73a67
CB
1461/* If we are asked to move a wireless interface, then we must actually move its
1462 * phyN device. Detect that condition and return the physname here. The physname
1463 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1464 */
1465#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1466char *is_wlan(const char *ifname)
e5848d39 1467{
4110345b
CB
1468 __do_fclose FILE *f = NULL;
1469 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1470 int i, ret;
e5848d39 1471 long physlen;
ebc73a67 1472 size_t len;
e5848d39 1473
ebc73a67 1474 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1475 path = must_realloc(NULL, len + 1);
e5848d39 1476 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1477 if (ret < 0 || (size_t)ret >= len)
4110345b 1478 return NULL;
ebc73a67 1479
4110345b 1480 f = fopen(path, "re");
ebc73a67 1481 if (!f)
4110345b 1482 return NULL;
ebc73a67 1483
1a0e70ac 1484 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1485 fseek(f, 0, SEEK_END);
1486 physlen = ftell(f);
1487 fseek(f, 0, SEEK_SET);
4110345b
CB
1488 if (physlen < 0)
1489 return NULL;
ebc73a67
CB
1490
1491 physname = malloc(physlen + 1);
4110345b
CB
1492 if (!physname)
1493 return NULL;
ebc73a67
CB
1494
1495 memset(physname, 0, physlen + 1);
e5848d39 1496 ret = fread(physname, 1, physlen, f);
e5848d39 1497 if (ret < 0)
4110345b 1498 return NULL;
e5848d39 1499
ebc73a67 1500 for (i = 0; i < physlen; i++) {
e5848d39
SH
1501 if (physname[i] == '\n')
1502 physname[i] = '\0';
ebc73a67 1503
e5848d39
SH
1504 if (physname[i] == '\0')
1505 break;
1506 }
1507
4110345b 1508 return move_ptr(physname);
e5848d39
SH
1509}
1510
ebc73a67
CB
1511static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1512 const char *new)
e5848d39 1513{
ebc73a67 1514 pid_t fpid;
e5848d39 1515
ebc73a67 1516 fpid = fork();
e5848d39
SH
1517 if (fpid < 0)
1518 return -1;
ebc73a67 1519
e5848d39
SH
1520 if (fpid != 0)
1521 return wait_for_pid(fpid);
ebc73a67 1522
e5848d39
SH
1523 if (!switch_to_ns(pid, "net"))
1524 return -1;
ebc73a67 1525
05ec44f8 1526 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1527}
1528
e4103cf6 1529int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1530 const char *newname)
e5848d39 1531{
3dd78294 1532 __do_free char *cmd = NULL;
ebc73a67 1533 pid_t fpid;
e5848d39
SH
1534
1535 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1536 * However, IIUC this involves a bit more complicated work to talk to
1537 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1538 */
1539 cmd = on_path("iw", NULL);
9c66dc4f 1540 if (!cmd)
3dd78294 1541 return -1;
e5848d39
SH
1542
1543 fpid = fork();
1544 if (fpid < 0)
3dd78294 1545 return -1;
ebc73a67 1546
e5848d39
SH
1547 if (fpid == 0) {
1548 char pidstr[30];
1549 sprintf(pidstr, "%d", pid);
9c66dc4f 1550 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1551 _exit(EXIT_FAILURE);
e5848d39 1552 }
ebc73a67 1553
e5848d39 1554 if (wait_for_pid(fpid))
3dd78294 1555 return -1;
e5848d39 1556
e5848d39 1557 if (newname)
3dd78294 1558 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1559
3dd78294 1560 return 0;
e5848d39
SH
1561}
1562
8d357196 1563int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1564{
3dd78294 1565 __do_free char *physname = NULL;
8befa924
SH
1566 int index;
1567
8befa924
SH
1568 if (!ifname)
1569 return -EINVAL;
1570
32571606 1571 index = if_nametoindex(ifname);
49428bf3
DY
1572 if (!index)
1573 return -EINVAL;
32571606 1574
ebc73a67
CB
1575 physname = is_wlan(ifname);
1576 if (physname)
e5848d39
SH
1577 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1578
8d357196 1579 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1580}
1581
b84f58b9 1582int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1583{
d16bda44
CB
1584 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1585 struct nl_handler nlh;
1586 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1587 int err;
ebc73a67 1588 struct ifinfomsg *ifi;
0ad19a3f 1589
d16bda44 1590 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1591 if (err)
1592 return err;
0ad19a3f 1593
0ad19a3f 1594 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1595 if (!nlmsg)
d16bda44 1596 return ret_errno(ENOMEM);
0ad19a3f 1597
06f976ca 1598 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1599 if (!answer)
d16bda44 1600 return ret_errno(ENOMEM);
0ad19a3f 1601
ebc73a67 1602 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1603 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1604
1605 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1606 if (!ifi)
d16bda44
CB
1607 return ret_errno(ENOMEM);
1608
06f976ca
SZ
1609 ifi->ifi_family = AF_UNSPEC;
1610 ifi->ifi_index = ifindex;
0ad19a3f 1611
d16bda44 1612 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1613}
1614
b84f58b9
DL
1615int lxc_netdev_delete_by_name(const char *name)
1616{
1617 int index;
1618
1619 index = if_nametoindex(name);
1620 if (!index)
1621 return -EINVAL;
1622
1623 return lxc_netdev_delete_by_index(index);
1624}
1625
1626int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1627{
d16bda44
CB
1628 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1629 struct nl_handler nlh;
1630 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1631 int err, len;
06f976ca 1632 struct ifinfomsg *ifi;
b9a5bb58 1633
d16bda44 1634 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1635 if (err)
1636 return err;
b9a5bb58 1637
b84f58b9 1638 len = strlen(newname);
d16bda44
CB
1639 if (len == 1 || len >= IFNAMSIZ)
1640 return ret_errno(EINVAL);
b84f58b9 1641
b9a5bb58
DL
1642 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1643 if (!nlmsg)
d16bda44 1644 return ret_errno(ENOMEM);
b9a5bb58 1645
06f976ca 1646 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1647 if (!answer)
d16bda44 1648 return ret_errno(ENOMEM);
b9a5bb58 1649
ebc73a67 1650 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1651 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1652
1653 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1654 if (!ifi)
d16bda44
CB
1655 return ret_errno(ENOMEM);
1656
06f976ca
SZ
1657 ifi->ifi_family = AF_UNSPEC;
1658 ifi->ifi_index = ifindex;
b84f58b9
DL
1659
1660 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1661 return ret_errno(ENOMEM);
b9a5bb58 1662
d16bda44 1663 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1664}
1665
b84f58b9
DL
1666int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1667{
1668 int len, index;
1669
1670 len = strlen(oldname);
dae3fdf6 1671 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1672 return -EINVAL;
1673
1674 index = if_nametoindex(oldname);
1675 if (!index)
1676 return -EINVAL;
1677
1678 return lxc_netdev_rename_by_index(index, newname);
1679}
1680
8befa924 1681int netdev_set_flag(const char *name, int flag)
0ad19a3f 1682{
d16bda44
CB
1683 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1684 struct nl_handler nlh;
1685 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1686 int err, index, len;
06f976ca 1687 struct ifinfomsg *ifi;
0ad19a3f 1688
d16bda44 1689 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1690 if (err)
1691 return err;
0ad19a3f 1692
1693 len = strlen(name);
dae3fdf6 1694 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1695 return ret_errno(EINVAL);
0ad19a3f 1696
1697 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1698 if (!nlmsg)
d16bda44 1699 return ret_errno(ENOMEM);
0ad19a3f 1700
06f976ca 1701 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1702 if (!answer)
d16bda44 1703 return ret_errno(ENOMEM);
0ad19a3f 1704
1705 index = if_nametoindex(name);
1706 if (!index)
d16bda44 1707 return ret_errno(EINVAL);
0ad19a3f 1708
ebc73a67 1709 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1710 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1711
1712 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1713 if (!ifi)
1714 return ret_errno(ENOMEM);
1715
06f976ca
SZ
1716 ifi->ifi_family = AF_UNSPEC;
1717 ifi->ifi_index = index;
1718 ifi->ifi_change |= IFF_UP;
1719 ifi->ifi_flags |= flag;
0ad19a3f 1720
d16bda44 1721 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1722}
1723
ebc73a67 1724int netdev_get_flag(const char *name, int *flag)
efa1cf45 1725{
d16bda44
CB
1726 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1727 struct nl_handler nlh;
1728 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1729 int err, index, len;
a4318300 1730 struct ifinfomsg *ifi;
efa1cf45
DY
1731
1732 if (!name)
d16bda44 1733 return ret_errno(EINVAL);
efa1cf45 1734
d16bda44 1735 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1736 if (err)
1737 return err;
1738
efa1cf45
DY
1739 len = strlen(name);
1740 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1741 return ret_errno(EINVAL);
efa1cf45 1742
efa1cf45
DY
1743 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1744 if (!nlmsg)
d16bda44 1745 return ret_errno(ENOMEM);
efa1cf45 1746
06f976ca 1747 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1748 if (!answer)
d16bda44 1749 return ret_errno(ENOMEM);
efa1cf45 1750
efa1cf45
DY
1751 index = if_nametoindex(name);
1752 if (!index)
d16bda44 1753 return ret_errno(EINVAL);
efa1cf45 1754
06f976ca
SZ
1755 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1756 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1757
1758 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1759 if (!ifi)
1760 return ret_errno(ENOMEM);
1761
06f976ca
SZ
1762 ifi->ifi_family = AF_UNSPEC;
1763 ifi->ifi_index = index;
efa1cf45 1764
d16bda44 1765 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1766 if (err)
d16bda44 1767 return ret_set_errno(-1, errno);
efa1cf45 1768
06f976ca 1769 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1770
1771 *flag = ifi->ifi_flags;
efa1cf45
DY
1772 return err;
1773}
1774
1775/*
1776 * \brief Check a interface is up or not.
1777 *
1778 * \param name: name for the interface.
1779 *
1780 * \return int.
1781 * 0 means interface is down.
1782 * 1 means interface is up.
1783 * Others means error happened, and ret-value is the error number.
1784 */
ebc73a67 1785int lxc_netdev_isup(const char *name)
efa1cf45 1786{
ebc73a67 1787 int err, flag;
efa1cf45
DY
1788
1789 err = netdev_get_flag(name, &flag);
1790 if (err)
ebc73a67
CB
1791 return err;
1792
efa1cf45
DY
1793 if (flag & IFF_UP)
1794 return 1;
ebc73a67 1795
efa1cf45 1796 return 0;
efa1cf45
DY
1797}
1798
0130df54
SH
1799int netdev_get_mtu(int ifindex)
1800{
a5f5cb41 1801 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1802 struct nl_handler nlh;
a5f5cb41
CB
1803 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1804 int readmore = 0, recv_len = 0;
1805 int answer_len, err, res;
06f976ca 1806 struct ifinfomsg *ifi;
0130df54 1807 struct nlmsghdr *msg;
0130df54 1808
a5f5cb41 1809 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1810 if (err)
1811 return err;
1812
0130df54
SH
1813 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1814 if (!nlmsg)
a5f5cb41 1815 return ret_errno(ENOMEM);
0130df54 1816
06f976ca 1817 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1818 if (!answer)
a5f5cb41 1819 return ret_errno(ENOMEM);
0130df54
SH
1820
1821 /* Save the answer buffer length, since it will be overwritten
1822 * on the first receive (and we might need to receive more than
ebc73a67
CB
1823 * once.
1824 */
06f976ca
SZ
1825 answer_len = answer->nlmsghdr->nlmsg_len;
1826
ebc73a67 1827 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1828 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1829
06f976ca 1830 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1831 if (!ifi)
a5f5cb41
CB
1832 return ret_errno(ENOMEM);
1833
06f976ca 1834 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1835
1836 /* Send the request for addresses, which returns all addresses
1837 * on all interfaces. */
a5f5cb41 1838 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1839 if (err < 0)
a5f5cb41 1840 return ret_set_errno(-1, errno);
0130df54 1841
6ce39620
CB
1842#pragma GCC diagnostic push
1843#pragma GCC diagnostic ignored "-Wcast-align"
1844
0130df54
SH
1845 do {
1846 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1847 * overwritten by a previous receive.
1848 */
06f976ca 1849 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1850
1851 /* Get the (next) batch of reply messages */
a5f5cb41 1852 err = netlink_rcv(nlh_ptr, answer);
0130df54 1853 if (err < 0)
a5f5cb41 1854 return ret_set_errno(-1, errno);
0130df54
SH
1855
1856 recv_len = err;
0130df54
SH
1857
1858 /* Satisfy the typing for the netlink macros */
06f976ca 1859 msg = answer->nlmsghdr;
0130df54
SH
1860
1861 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1862 /* Stop reading if we see an error message */
1863 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1864 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1865 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1866 }
1867
1868 /* Stop reading if we see a NLMSG_DONE message */
1869 if (msg->nlmsg_type == NLMSG_DONE) {
1870 readmore = 0;
1871 break;
1872 }
1873
06f976ca 1874 ifi = NLMSG_DATA(msg);
0130df54
SH
1875 if (ifi->ifi_index == ifindex) {
1876 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1877 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1878
0130df54 1879 res = 0;
ebc73a67 1880 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1881 /*
a5f5cb41 1882 * Found a local address for the
ebc73a67
CB
1883 * requested interface, return it.
1884 */
0130df54 1885 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1886 memcpy(&res, RTA_DATA(rta), sizeof(int));
1887 return res;
0130df54 1888 }
a5f5cb41 1889
0130df54
SH
1890 rta = RTA_NEXT(rta, attr_len);
1891 }
0130df54
SH
1892 }
1893
ebc73a67
CB
1894 /* Keep reading more data from the socket if the last
1895 * message had the NLF_F_MULTI flag set.
1896 */
0130df54
SH
1897 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1898
ebc73a67 1899 /* Look at the next message received in this buffer. */
0130df54
SH
1900 msg = NLMSG_NEXT(msg, recv_len);
1901 }
1902 } while (readmore);
1903
6ce39620
CB
1904#pragma GCC diagnostic pop
1905
ebc73a67 1906 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 1907 return -1;
0130df54
SH
1908}
1909
d472214b 1910int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1911{
a5f5cb41
CB
1912 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1913 struct nl_handler nlh;
1914 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 1915 int err, len;
06f976ca 1916 struct ifinfomsg *ifi;
75d09f83 1917
a5f5cb41 1918 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1919 if (err)
1920 return err;
75d09f83
DL
1921
1922 len = strlen(name);
dae3fdf6 1923 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1924 return ret_errno(EINVAL);
75d09f83
DL
1925
1926 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1927 if (!nlmsg)
a5f5cb41 1928 return ret_errno(ENOMEM);
75d09f83 1929
06f976ca 1930 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 1931 if (!answer)
a5f5cb41 1932 return ret_errno(ENOMEM);
75d09f83 1933
ebc73a67 1934 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1935 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1936
1937 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1938 if (!ifi)
1939 return ret_errno(ENOMEM);
1940
06f976ca 1941 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1942
1943 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 1944 return ret_errno(ENOMEM);
75d09f83
DL
1945
1946 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 1947 return ret_errno(ENOMEM);
75d09f83 1948
a5f5cb41 1949 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
1950}
1951
d472214b 1952int lxc_netdev_up(const char *name)
0ad19a3f 1953{
d472214b 1954 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1955}
1956
d472214b 1957int lxc_netdev_down(const char *name)
0ad19a3f 1958{
d472214b 1959 return netdev_set_flag(name, 0);
0ad19a3f 1960}
1961
54256301 1962int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1963{
a5f5cb41
CB
1964 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1965 struct nl_handler nlh;
1966 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1967 int err, len;
06f976ca 1968 struct ifinfomsg *ifi;
0ad19a3f 1969 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 1970
a5f5cb41 1971 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1972 if (err)
1973 return err;
0ad19a3f 1974
1975 len = strlen(name1);
dae3fdf6 1976 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1977 return ret_errno(EINVAL);
0ad19a3f 1978
1979 len = strlen(name2);
dae3fdf6 1980 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1981 return ret_errno(EINVAL);
0ad19a3f 1982
1983 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1984 if (!nlmsg)
a5f5cb41 1985 return ret_errno(ENOMEM);
0ad19a3f 1986
06f976ca 1987 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1988 if (!answer)
a5f5cb41 1989 return ret_errno(ENOMEM);
0ad19a3f 1990
a5f5cb41 1991 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1992 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1993
1994 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1995 if (!ifi)
a5f5cb41
CB
1996 return ret_errno(ENOMEM);
1997
06f976ca 1998 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1999
79e68309 2000 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2001 if (!nest1)
a5f5cb41 2002 return ret_errno(EINVAL);
0ad19a3f 2003
2004 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 2005 return ret_errno(ENOMEM);
0ad19a3f 2006
2007 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2008 if (!nest2)
a5f5cb41 2009 return ret_errno(ENOMEM);
0ad19a3f 2010
2011 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2012 if (!nest3)
a5f5cb41 2013 return ret_errno(ENOMEM);
0ad19a3f 2014
06f976ca 2015 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2016 if (!ifi)
2017 return ret_errno(ENOMEM);
0ad19a3f 2018
2019 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 2020 return ret_errno(ENOMEM);
0ad19a3f 2021
54256301 2022 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 2023 return ret_errno(ENOMEM);
54256301
CB
2024
2025 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 2026 return ret_errno(ENOMEM);
54256301 2027
0ad19a3f 2028 nla_end_nested(nlmsg, nest3);
0ad19a3f 2029 nla_end_nested(nlmsg, nest2);
0ad19a3f 2030 nla_end_nested(nlmsg, nest1);
2031
2032 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 2033 return ret_errno(ENOMEM);
0ad19a3f 2034
a5f5cb41 2035 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2036}
2037
ebc73a67 2038/* TODO: merge with lxc_macvlan_create */
7c11d57a 2039int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 2040{
a5f5cb41
CB
2041 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2042 struct nl_handler nlh;
2043 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2044 int err, len, lindex;
06f976ca 2045 struct ifinfomsg *ifi;
26c39028 2046 struct rtattr *nest, *nest2;
26c39028 2047
a5f5cb41 2048 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2049 if (err)
2050 return err;
26c39028
JHS
2051
2052 len = strlen(master);
dae3fdf6 2053 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2054 return ret_errno(EINVAL);
26c39028
JHS
2055
2056 len = strlen(name);
dae3fdf6 2057 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2058 return ret_errno(EINVAL);
26c39028
JHS
2059
2060 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2061 if (!nlmsg)
a5f5cb41 2062 return ret_errno(ENOMEM);
26c39028 2063
06f976ca 2064 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 2065 if (!answer)
a5f5cb41 2066 return ret_errno(ENOMEM);
26c39028
JHS
2067
2068 lindex = if_nametoindex(master);
2069 if (!lindex)
a5f5cb41 2070 return ret_errno(EINVAL);
26c39028 2071
a5f5cb41 2072 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2073 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2074
2075 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2076 if (!ifi)
2077 return ret_errno(ENOMEM);
2078
06f976ca 2079 ifi->ifi_family = AF_UNSPEC;
26c39028 2080
79e68309 2081 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 2082 if (!nest)
a5f5cb41 2083 return ret_errno(ENOMEM);
26c39028
JHS
2084
2085 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 2086 return ret_errno(ENOMEM);
26c39028
JHS
2087
2088 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2089 if (!nest2)
a5f5cb41 2090 return ret_errno(ENOMEM);
e892973e 2091
26c39028 2092 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 2093 return ret_errno(ENOMEM);
e892973e 2094
26c39028 2095 nla_end_nested(nlmsg, nest2);
26c39028
JHS
2096 nla_end_nested(nlmsg, nest);
2097
2098 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 2099 return ret_errno(ENOMEM);
26c39028
JHS
2100
2101 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
2102 return ret_errno(ENOMEM);
2103
2104 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
2105}
2106
e892973e 2107int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 2108{
a5f5cb41
CB
2109 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2110 struct nl_handler nlh;
2111 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2112 int err, index, len;
06f976ca 2113 struct ifinfomsg *ifi;
e892973e 2114 struct rtattr *nest, *nest2;
0ad19a3f 2115
a5f5cb41 2116 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2117 if (err)
2118 return err;
0ad19a3f 2119
2120 len = strlen(master);
dae3fdf6 2121 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2122 return ret_errno(EINVAL);
0ad19a3f 2123
2124 len = strlen(name);
dae3fdf6 2125 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2126 return ret_errno(EINVAL);
0ad19a3f 2127
2128 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2129 if (!nlmsg)
a5f5cb41 2130 return ret_errno(ENOMEM);
0ad19a3f 2131
06f976ca 2132 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2133 if (!answer)
a5f5cb41 2134 return ret_errno(ENOMEM);
0ad19a3f 2135
2136 index = if_nametoindex(master);
2137 if (!index)
a5f5cb41 2138 return ret_errno(EINVAL);
0ad19a3f 2139
a5f5cb41 2140 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2141 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2142
2143 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2144 if (!ifi)
2145 return ret_errno(ENOMEM);
2146
06f976ca 2147 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2148
79e68309 2149 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2150 if (!nest)
a5f5cb41 2151 return ret_errno(ENOMEM);
0ad19a3f 2152
2153 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2154 return ret_errno(ENOMEM);
0ad19a3f 2155
e892973e
DL
2156 if (mode) {
2157 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2158 if (!nest2)
a5f5cb41 2159 return ret_errno(ENOMEM);
e892973e
DL
2160
2161 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2162 return ret_errno(ENOMEM);
e892973e
DL
2163
2164 nla_end_nested(nlmsg, nest2);
2165 }
2166
0ad19a3f 2167 nla_end_nested(nlmsg, nest);
2168
2169 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2170 return ret_errno(ENOMEM);
0ad19a3f 2171
2172 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2173 return ret_errno(ENOMEM);
0ad19a3f 2174
a5f5cb41 2175 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2176}
2177
2178static int proc_sys_net_write(const char *path, const char *value)
2179{
ebc73a67
CB
2180 int fd;
2181 int err = 0;
0ad19a3f 2182
2183 fd = open(path, O_WRONLY);
2184 if (fd < 0)
2185 return -errno;
2186
f640cf46 2187 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2188 err = -errno;
2189
2190 close(fd);
2191 return err;
2192}
2193
6dfa9581 2194static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2195{
2196 int ret;
2197 char path[PATH_MAX];
6509154d 2198
2199 if (family != AF_INET && family != AF_INET6)
6dfa9581 2200 return -EINVAL;
6509154d 2201
9c66dc4f 2202 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 2203 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
9c66dc4f 2204 if (ret < 0 || (size_t)ret >= sizeof(path))
6dfa9581 2205 return -E2BIG;
6509154d 2206
6dfa9581
TP
2207 return proc_sys_net_write(path, flag ? "1" : "0");
2208}
2209
2210int lxc_ip_forwarding_on(const char *name, int family)
2211{
2212 return ip_forwarding_set(name, family, 1);
2213}
2214
2215int lxc_ip_forwarding_off(const char *name, int family)
2216{
2217 return ip_forwarding_set(name, family, 0);
6509154d 2218}
2219
0ad19a3f 2220static int neigh_proxy_set(const char *ifname, int family, int flag)
2221{
9ba8130c 2222 int ret;
419590da 2223 char path[PATH_MAX];
0ad19a3f 2224
2225 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2226 return -EINVAL;
0ad19a3f 2227
9c66dc4f 2228 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
2229 family == AF_INET ? "ipv4" : "ipv6", ifname,
2230 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2231 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 2232 return -E2BIG;
0ad19a3f 2233
ebc73a67 2234 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2235}
2236
6509154d 2237static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2238{
2239 int ret;
2240 char path[PATH_MAX];
2241 char buf[1] = "";
2242
2243 if (family != AF_INET && family != AF_INET6)
596a002c 2244 return ret_set_errno(-1, EINVAL);
6509154d 2245
9c66dc4f 2246 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6509154d 2247 family == AF_INET ? "ipv4" : "ipv6", ifname,
2248 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2249 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 2250 return ret_set_errno(-1, E2BIG);
6509154d 2251
2252 return lxc_read_file_expect(path, buf, 1, "1");
2253}
2254
497353b6 2255int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2256{
2257 return neigh_proxy_set(name, family, 1);
2258}
2259
497353b6 2260int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2261{
2262 return neigh_proxy_set(name, family, 0);
2263}
2264
2265int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2266{
1f1b18e7
DL
2267 int i = 0;
2268 unsigned val;
ebc73a67
CB
2269 char c;
2270 unsigned char *data;
1f1b18e7
DL
2271
2272 sockaddr->sa_family = ARPHRD_ETHER;
2273 data = (unsigned char *)sockaddr->sa_data;
2274
2275 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2276 c = *macaddr++;
2277 if (isdigit(c))
2278 val = c - '0';
2279 else if (c >= 'a' && c <= 'f')
2280 val = c - 'a' + 10;
2281 else if (c >= 'A' && c <= 'F')
2282 val = c - 'A' + 10;
2283 else
2284 return -EINVAL;
2285
2286 val <<= 4;
2287 c = *macaddr;
2288 if (isdigit(c))
2289 val |= c - '0';
2290 else if (c >= 'a' && c <= 'f')
2291 val |= c - 'a' + 10;
2292 else if (c >= 'A' && c <= 'F')
2293 val |= c - 'A' + 10;
2294 else if (c == ':' || c == 0)
2295 val >>= 4;
2296 else
2297 return -EINVAL;
2298 if (c != 0)
2299 macaddr++;
2300 *data++ = (unsigned char)(val & 0377);
2301 i++;
2302
2303 if (*macaddr == ':')
2304 macaddr++;
0ad19a3f 2305 }
0ad19a3f 2306
1f1b18e7 2307 return 0;
0ad19a3f 2308}
2309
ebc73a67
CB
2310static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2311 void *acast, int prefix)
0ad19a3f 2312{
a5f5cb41
CB
2313 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2314 struct nl_handler nlh;
2315 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2316 int addrlen, err;
06f976ca 2317 struct ifaddrmsg *ifa;
0ad19a3f 2318
ebc73a67
CB
2319 addrlen = family == AF_INET ? sizeof(struct in_addr)
2320 : sizeof(struct in6_addr);
4bf1968d 2321
a5f5cb41 2322 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2323 if (err)
2324 return err;
0ad19a3f 2325
0ad19a3f 2326 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2327 if (!nlmsg)
a5f5cb41 2328 return ret_errno(ENOMEM);
0ad19a3f 2329
06f976ca 2330 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2331 if (!answer)
a5f5cb41 2332 return ret_errno(ENOMEM);
0ad19a3f 2333
a5f5cb41 2334 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2335 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2336
2337 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2338 if (!ifa)
a5f5cb41
CB
2339 return ret_errno(ENOMEM);
2340
06f976ca
SZ
2341 ifa->ifa_prefixlen = prefix;
2342 ifa->ifa_index = ifindex;
2343 ifa->ifa_family = family;
2344 ifa->ifa_scope = 0;
acf47e1b 2345
4bf1968d 2346 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2347 return ret_errno(EINVAL);
0ad19a3f 2348
4bf1968d 2349 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2350 return ret_errno(EINVAL);
0ad19a3f 2351
d8948a52 2352 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2353 return ret_errno(EINVAL);
1f1b18e7 2354
ebc73a67 2355 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2356 if (family == AF_INET6 &&
2357 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2358 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2359 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2360
a5f5cb41 2361 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2362}
2363
1f1b18e7 2364int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2365 struct in6_addr *mcast, struct in6_addr *acast,
2366 int prefix)
1f1b18e7
DL
2367{
2368 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2369}
2370
ebc73a67
CB
2371int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2372 int prefix)
1f1b18e7
DL
2373{
2374 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2375}
2376
ebc73a67
CB
2377/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2378 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2379 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2380 */
6ce39620
CB
2381#pragma GCC diagnostic push
2382#pragma GCC diagnostic ignored "-Wcast-align"
2383
ebc73a67
CB
2384static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2385{
2386 int addrlen;
06f976ca
SZ
2387 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2388 struct rtattr *rta = IFA_RTA(ifa);
2389 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2390
06f976ca 2391 if (ifa->ifa_family != family)
19a26f82
MK
2392 return 0;
2393
ebc73a67
CB
2394 addrlen = family == AF_INET ? sizeof(struct in_addr)
2395 : sizeof(struct in6_addr);
19a26f82
MK
2396
2397 /* Loop over the rtattr's in this message */
ebc73a67 2398 while (RTA_OK(rta, attr_len)) {
19a26f82 2399 /* Found a local address for the requested interface,
ebc73a67
CB
2400 * return it.
2401 */
2402 if (rta->rta_type == IFA_LOCAL ||
2403 rta->rta_type == IFA_ADDRESS) {
2404 /* Sanity check. The family check above should make sure
2405 * the address length is correct, but check here just in
2406 * case.
2407 */
19a26f82
MK
2408 if (RTA_PAYLOAD(rta) != addrlen)
2409 return -1;
2410
ebc73a67
CB
2411 /* We might have found an IFA_ADDRESS before, which we
2412 * now overwrite with an IFA_LOCAL.
2413 */
dd66e5ad 2414 if (!*res) {
19a26f82 2415 *res = malloc(addrlen);
dd66e5ad
DE
2416 if (!*res)
2417 return -1;
2418 }
19a26f82
MK
2419
2420 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2421 if (rta->rta_type == IFA_LOCAL)
2422 break;
2423 }
2424 rta = RTA_NEXT(rta, attr_len);
2425 }
2426 return 0;
2427}
2428
6ce39620
CB
2429#pragma GCC diagnostic pop
2430
19a26f82
MK
2431static int ip_addr_get(int family, int ifindex, void **res)
2432{
a5f5cb41
CB
2433 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2434 struct nl_handler nlh;
2435 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2436 int answer_len, err;
06f976ca 2437 struct ifaddrmsg *ifa;
19a26f82 2438 struct nlmsghdr *msg;
ebc73a67 2439 int readmore = 0, recv_len = 0;
19a26f82 2440
a5f5cb41 2441 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2442 if (err)
2443 return err;
2444
19a26f82
MK
2445 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2446 if (!nlmsg)
a5f5cb41 2447 return ret_errno(ENOMEM);
19a26f82 2448
06f976ca 2449 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2450 if (!answer)
a5f5cb41 2451 return ret_errno(ENOMEM);
19a26f82 2452
ebc73a67
CB
2453 /* Save the answer buffer length, since it will be overwritten on the
2454 * first receive (and we might need to receive more than once).
2455 */
06f976ca
SZ
2456 answer_len = answer->nlmsghdr->nlmsg_len;
2457
ebc73a67 2458 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2459 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2460
06f976ca 2461 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2462 if (!ifa)
a5f5cb41
CB
2463 return ret_errno(ENOMEM);
2464
06f976ca 2465 ifa->ifa_family = family;
19a26f82 2466
ebc73a67
CB
2467 /* Send the request for addresses, which returns all addresses on all
2468 * interfaces.
2469 */
a5f5cb41 2470 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2471 if (err < 0)
a5f5cb41 2472 return ret_set_errno(err, errno);
19a26f82 2473
6ce39620
CB
2474#pragma GCC diagnostic push
2475#pragma GCC diagnostic ignored "-Wcast-align"
2476
19a26f82
MK
2477 do {
2478 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2479 * overwritten by a previous receive.
2480 */
06f976ca 2481 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2482
ebc73a67 2483 /* Get the (next) batch of reply messages. */
a5f5cb41 2484 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2485 if (err < 0)
a5f5cb41 2486 return ret_set_errno(err, errno);
19a26f82
MK
2487
2488 recv_len = err;
2489 err = 0;
2490
ebc73a67 2491 /* Satisfy the typing for the netlink macros. */
06f976ca 2492 msg = answer->nlmsghdr;
19a26f82
MK
2493
2494 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2495 /* Stop reading if we see an error message. */
19a26f82 2496 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2497 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2498 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2499 }
2500
ebc73a67 2501 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2502 if (msg->nlmsg_type == NLMSG_DONE) {
2503 readmore = 0;
2504 break;
2505 }
2506
a5f5cb41
CB
2507 if (msg->nlmsg_type != RTM_NEWADDR)
2508 return ret_errno(EINVAL);
19a26f82 2509
06f976ca
SZ
2510 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2511 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2512 if (ifa_get_local_ip(family, msg, res) < 0)
2513 return ret_errno(EINVAL);
51e7a874 2514
ebc73a67 2515 /* Found a result, stop searching. */
19a26f82 2516 if (*res)
a5f5cb41 2517 return 0;
19a26f82
MK
2518 }
2519
ebc73a67
CB
2520 /* Keep reading more data from the socket if the last
2521 * message had the NLF_F_MULTI flag set.
2522 */
19a26f82
MK
2523 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2524
ebc73a67 2525 /* Look at the next message received in this buffer. */
19a26f82
MK
2526 msg = NLMSG_NEXT(msg, recv_len);
2527 }
2528 } while (readmore);
2529
6ce39620
CB
2530#pragma GCC diagnostic pop
2531
19a26f82 2532 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2533 * error.
2534 */
a5f5cb41 2535 return -1;
19a26f82
MK
2536}
2537
2538int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2539{
ebc73a67 2540 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2541}
2542
ebc73a67 2543int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2544{
ebc73a67 2545 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2546}
2547
f8fee0e2
MK
2548static int ip_gateway_add(int family, int ifindex, void *gw)
2549{
a5f5cb41 2550 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2551 struct nl_handler nlh;
a5f5cb41
CB
2552 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2553 int addrlen, err;
06f976ca 2554 struct rtmsg *rt;
f8fee0e2 2555
ebc73a67
CB
2556 addrlen = family == AF_INET ? sizeof(struct in_addr)
2557 : sizeof(struct in6_addr);
f8fee0e2 2558
a5f5cb41 2559 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2560 if (err)
2561 return err;
2562
f8fee0e2
MK
2563 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2564 if (!nlmsg)
a5f5cb41 2565 return ret_errno(ENOMEM);
f8fee0e2 2566
06f976ca 2567 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2568 if (!answer)
a5f5cb41 2569 return ret_errno(ENOMEM);
f8fee0e2 2570
a5f5cb41 2571 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2572 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2573
2574 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2575 if (!rt)
a5f5cb41
CB
2576 return ret_errno(ENOMEM);
2577
06f976ca
SZ
2578 rt->rtm_family = family;
2579 rt->rtm_table = RT_TABLE_MAIN;
2580 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2581 rt->rtm_protocol = RTPROT_BOOT;
2582 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2583 /* "default" destination */
06f976ca 2584 rt->rtm_dst_len = 0;
f8fee0e2 2585
a2f9a670 2586 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2587 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2588 return ret_errno(ENOMEM);
f8fee0e2
MK
2589
2590 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2591 * addresses for the gateway.
2592 */
f8fee0e2 2593 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2594 return ret_errno(EINVAL);
f8fee0e2 2595
a5f5cb41 2596 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2597}
2598
2599int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2600{
2601 return ip_gateway_add(AF_INET, ifindex, gw);
2602}
2603
2604int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2605{
2606 return ip_gateway_add(AF_INET6, ifindex, gw);
2607}
581c75e7 2608bool is_ovs_bridge(const char *bridge)
0d204771 2609{
ebc73a67 2610 int ret;
0d204771 2611 struct stat sb;
ebc73a67 2612 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2613
ebc73a67
CB
2614 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2615 bridge);
2616 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2617 return false;
2618
2619 ret = stat(brdirname, &sb);
2620 if (ret < 0 && errno == ENOENT)
0d204771 2621 return true;
ebc73a67 2622
0d204771
SH
2623 return false;
2624}
2625
581c75e7
CB
2626struct ovs_veth_args {
2627 const char *bridge;
2628 const char *nic;
2629};
2630
cb0dc11b
CB
2631/* Called from a background thread - when nic goes away, remove it from the
2632 * bridge.
c43cbc04 2633 */
581c75e7 2634static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2635{
581c75e7 2636 struct ovs_veth_args *args = data;
cb0dc11b 2637
9c66dc4f 2638 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2639 return -1;
c43cbc04
SH
2640}
2641
581c75e7 2642int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2643{
c43cbc04 2644 int ret;
419590da 2645 char cmd_output[PATH_MAX];
581c75e7 2646 struct ovs_veth_args args;
6ad22d06 2647
581c75e7
CB
2648 args.bridge = bridge;
2649 args.nic = nic;
2650 ret = run_command(cmd_output, sizeof(cmd_output),
2651 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2652 if (ret < 0)
2653 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2654
581c75e7
CB
2655 return 0;
2656}
ebc73a67 2657
581c75e7
CB
2658static int lxc_ovs_attach_bridge_exec(void *data)
2659{
2660 struct ovs_veth_args *args = data;
ebc73a67 2661
9c66dc4f 2662 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2663 return -1;
2664}
ebc73a67 2665
581c75e7
CB
2666static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2667{
2668 int ret;
419590da 2669 char cmd_output[PATH_MAX];
581c75e7 2670 struct ovs_veth_args args;
ebc73a67 2671
581c75e7
CB
2672 args.bridge = bridge;
2673 args.nic = nic;
2674 ret = run_command(cmd_output, sizeof(cmd_output),
2675 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2676 if (ret < 0)
2677 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2678
581c75e7 2679 return 0;
0d204771 2680}
0d204771 2681
581c75e7 2682int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2683{
ebc73a67 2684 int err, fd, index;
9de31d5a 2685 size_t retlen;
0ad19a3f 2686 struct ifreq ifr;
2687
dae3fdf6 2688 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2689 return -EINVAL;
0ad19a3f 2690
2691 index = if_nametoindex(ifname);
2692 if (!index)
3cfc0f3a 2693 return -EINVAL;
0ad19a3f 2694
0d204771 2695 if (is_ovs_bridge(bridge))
581c75e7 2696 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2697
ad9429e5 2698 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2699 if (fd < 0)
3cfc0f3a 2700 return -errno;
0ad19a3f 2701
9de31d5a 2702 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2703 if (retlen >= IFNAMSIZ) {
2704 close(fd);
9de31d5a 2705 return -E2BIG;
42cc4083 2706 }
9de31d5a 2707
ebc73a67 2708 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2709 ifr.ifr_ifindex = index;
7d163508 2710 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2711 close(fd);
3cfc0f3a
MN
2712 if (err)
2713 err = -errno;
0ad19a3f 2714
2715 return err;
2716}
72d0e1cb 2717
ebc73a67 2718static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2719 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2720 [LXC_NET_VETH] = "veth",
2721 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2722 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2723 [LXC_NET_PHYS] = "phys",
b343592b
BP
2724 [LXC_NET_VLAN] = "vlan",
2725 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2726};
2727
2728const char *lxc_net_type_to_str(int type)
2729{
2730 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2731 return NULL;
ebc73a67 2732
72d0e1cb
SG
2733 return lxc_network_types[type];
2734}
8befa924 2735
3646ffd9 2736static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2737
3646ffd9 2738char *lxc_ifname_alnum_case_sensitive(char *template)
a0265685 2739{
2d7bf744 2740 int ret;
b1e44ed1 2741 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2742 char name[IFNAMSIZ];
2743 bool exists = false;
2744 size_t i = 0;
280cc35f 2745#ifdef HAVE_RAND_R
2746 unsigned int seed;
2747
2748 seed = randseed(false);
2749#else
2750
2751 (void)randseed(true);
2752#endif
a0265685 2753
535e8859
CB
2754 if (strlen(template) >= IFNAMSIZ)
2755 return NULL;
2756
ebc73a67 2757 /* Get all the network interfaces. */
b1e44ed1 2758 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
9c66dc4f
CB
2759 if (ret < 0)
2760 return log_error_errno(NULL, errno, "Failed to get network interfaces");
a0265685 2761
ebc73a67 2762 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2763 for (;;) {
966e9f1f 2764 name[0] = '\0';
94b1cade 2765 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2766
966e9f1f 2767 exists = false;
280cc35f 2768
a0265685
SG
2769 for (i = 0; i < strlen(name); i++) {
2770 if (name[i] == 'X') {
2771#ifdef HAVE_RAND_R
8523344a 2772 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2773#else
8523344a 2774 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2775#endif
2776 }
2777 }
2778
2779 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2780 if (!strcmp(ifa->ifa_name, name)) {
2781 exists = true;
a0265685
SG
2782 break;
2783 }
2784 }
2785
966e9f1f 2786 if (!exists)
a0265685 2787 break;
a0265685
SG
2788 }
2789
b1e44ed1 2790 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2791 (void)strlcpy(template, name, strlen(template) + 1);
2792
2793 return template;
a0265685
SG
2794}
2795
8befa924
SH
2796int setup_private_host_hw_addr(char *veth1)
2797{
ebc73a67 2798 int err, sockfd;
8befa924 2799 struct ifreq ifr;
8befa924 2800
ad9429e5 2801 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2802 if (sockfd < 0)
2803 return -errno;
2804
ebc73a67 2805 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2806 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2807 close(sockfd);
ebc73a67 2808 return -E2BIG;
87c6e5db 2809 }
ebc73a67 2810
8befa924
SH
2811 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2812 if (err < 0) {
8befa924 2813 close(sockfd);
8befa924
SH
2814 return -errno;
2815 }
2816
2817 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2818 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2819 close(sockfd);
8befa924
SH
2820 if (err < 0)
2821 return -errno;
2822
2823 return 0;
2824}
811ef482
CB
2825
2826int lxc_find_gateway_addresses(struct lxc_handler *handler)
2827{
2828 struct lxc_list *network = &handler->conf->network;
2829 struct lxc_list *iterator;
2830 struct lxc_netdev *netdev;
2831 int link_index;
2832
2833 lxc_list_for_each(iterator, network) {
2834 netdev = iterator->elem;
2835
2836 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2837 continue;
2838
9c66dc4f
CB
2839 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2840 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2841
f2711167 2842 if (is_empty_string(netdev->link)) {
9c66dc4f 2843 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2844 }
2845
2846 link_index = if_nametoindex(netdev->link);
2847 if (!link_index)
2848 return -EINVAL;
2849
2850 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2851 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2852 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2853 }
2854
2855 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2856 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2857 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2858 }
2859 }
2860
2861 return 0;
2862}
2863
2864#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2865static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2866 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2867{
2868 int ret;
2869 pid_t child;
2870 int bytes, pipefd[2];
2871 char *token, *saveptr = NULL;
095ead80 2872 char netdev_link[IFNAMSIZ];
419590da 2873 char buffer[PATH_MAX] = {0};
94b1cade 2874 size_t retlen;
811ef482 2875
9c66dc4f
CB
2876 if (netdev->type != LXC_NET_VETH)
2877 return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2878
2879 ret = pipe(pipefd);
9c66dc4f
CB
2880 if (ret < 0)
2881 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2882
2883 child = fork();
2884 if (child < 0) {
811ef482
CB
2885 close(pipefd[0]);
2886 close(pipefd[1]);
9c66dc4f 2887 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2888 }
2889
2890 if (child == 0) {
8335fd40 2891 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2892
2893 close(pipefd[0]);
2894
2895 ret = dup2(pipefd[1], STDOUT_FILENO);
2896 if (ret >= 0)
2897 ret = dup2(pipefd[1], STDERR_FILENO);
2898 close(pipefd[1]);
2899 if (ret < 0) {
2900 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2901 _exit(EXIT_FAILURE);
811ef482
CB
2902 }
2903
f2711167 2904 if (!is_empty_string(netdev->link))
9de31d5a 2905 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2906 else
9de31d5a
CB
2907 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2908 if (retlen >= IFNAMSIZ) {
2909 SYSERROR("Invalid network device name");
2910 _exit(EXIT_FAILURE);
2911 }
811ef482 2912
8335fd40
CB
2913 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2914 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2915 _exit(EXIT_FAILURE);
8335fd40 2916 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2917
2918 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2919 lxcname, pidstr, netdev_link,
3473ca76
CB
2920 !is_empty_string(netdev->name) ? netdev->name : "(null)");
2921 if (!is_empty_string(netdev->name))
811ef482
CB
2922 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2923 lxcpath, lxcname, pidstr, "veth", netdev_link,
2924 netdev->name, (char *)NULL);
2925 else
2926 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2927 lxcpath, lxcname, pidstr, "veth", netdev_link,
2928 (char *)NULL);
2929 SYSERROR("Failed to execute lxc-user-nic");
78070056 2930 _exit(EXIT_FAILURE);
811ef482
CB
2931 }
2932
2933 /* close the write-end of the pipe */
2934 close(pipefd[1]);
2935
9c66dc4f 2936 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2937 if (bytes < 0) {
74c6e2b0 2938 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2939 close(pipefd[0]);
6b9f82a9
CB
2940 } else {
2941 buffer[bytes - 1] = '\0';
811ef482 2942 }
811ef482
CB
2943
2944 ret = wait_for_pid(child);
2945 close(pipefd[0]);
9c66dc4f
CB
2946 if (ret != 0 || bytes < 0)
2947 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2948 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2949
2950 /* netdev->name */
2951 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2952 if (!token)
2953 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2954
e389f2af
CB
2955 /*
2956 * lxc-user-nic will take care of proper network device naming. So
2957 * netdev->name and netdev->created_name need to be identical to not
2958 * trigger another rename later on.
2959 */
2960 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2961 if (retlen < IFNAMSIZ)
2962 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
9c66dc4f
CB
2963 if (retlen >= IFNAMSIZ)
2964 return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2965
74c6e2b0 2966 /* netdev->ifindex */
811ef482 2967 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2968 if (!token)
2969 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2970
74c6e2b0 2971 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f
CB
2972 if (ret < 0)
2973 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2974
74c6e2b0 2975 /* netdev->priv.veth_attr.veth1 */
811ef482 2976 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2977 if (!token)
2978 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2979
94b1cade 2980 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f
CB
2981 if (retlen >= IFNAMSIZ)
2982 return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
2983
2984 /* netdev->priv.veth_attr.ifindex */
2985 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2986 if (!token)
2987 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
2988
2989 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f
CB
2990 if (ret < 0)
2991 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2992
4d781681 2993 if (netdev->upscript) {
2994 char *argv[] = {
2995 "veth",
2996 netdev->link,
2997 netdev->priv.veth_attr.veth1,
2998 NULL,
2999 };
3000
e389f2af
CB
3001 ret = run_script_argv(lxcname, hooks_version, "net",
3002 netdev->upscript, "up", argv);
4d781681 3003 if (ret < 0)
3004 return -1;
3005 }
3006
811ef482
CB
3007 return 0;
3008}
3009
f0ecc19d 3010static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
3011 struct lxc_netdev *netdev,
3012 const char *netns_path)
811ef482
CB
3013{
3014 int bytes, ret;
3015 pid_t child;
3016 int pipefd[2];
419590da 3017 char buffer[PATH_MAX] = {0};
811ef482 3018
9c66dc4f
CB
3019 if (netdev->type != LXC_NET_VETH)
3020 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
3021
3022 ret = pipe(pipefd);
9c66dc4f
CB
3023 if (ret < 0)
3024 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
3025
3026 child = fork();
3027 if (child < 0) {
811ef482
CB
3028 close(pipefd[0]);
3029 close(pipefd[1]);
9c66dc4f 3030 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
3031 }
3032
3033 if (child == 0) {
8843fde4 3034 char *hostveth;
811ef482
CB
3035
3036 close(pipefd[0]);
3037
3038 ret = dup2(pipefd[1], STDOUT_FILENO);
3039 if (ret >= 0)
3040 ret = dup2(pipefd[1], STDERR_FILENO);
3041 close(pipefd[1]);
3042 if (ret < 0) {
3043 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 3044 _exit(EXIT_FAILURE);
811ef482
CB
3045 }
3046
f2711167 3047 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3048 hostveth = netdev->priv.veth_attr.pair;
3049 else
3050 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3051 if (is_empty_string(hostveth)) {
74c6e2b0 3052 SYSERROR("Host side veth device name is missing");
a30b9023 3053 _exit(EXIT_FAILURE);
74c6e2b0
CB
3054 }
3055
f2711167
CB
3056 if (is_empty_string(netdev->link)) {
3057 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 3058 _exit(EXIT_FAILURE);
74c6e2b0 3059 }
811ef482 3060
811ef482 3061 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 3062 lxcname, netns_path, netdev->link, hostveth);
811ef482 3063 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
3064 lxcname, netns_path, "veth", netdev->link, hostveth,
3065 (char *)NULL);
811ef482 3066 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 3067 _exit(EXIT_FAILURE);
811ef482
CB
3068 }
3069
3070 close(pipefd[1]);
3071
9c66dc4f 3072 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
3073 if (bytes < 0) {
3074 SYSERROR("Failed to read from pipe file descriptor.");
3075 close(pipefd[0]);
6b9f82a9
CB
3076 } else {
3077 buffer[bytes - 1] = '\0';
811ef482 3078 }
811ef482 3079
6b9f82a9 3080 ret = wait_for_pid(child);
9c66dc4f
CB
3081 close_prot_errno_disarm(pipefd[0]);
3082 if (ret != 0 || bytes < 0)
3083 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3084 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 3085
811ef482
CB
3086 return 0;
3087}
3088
1bd8d726
CB
3089bool lxc_delete_network_unpriv(struct lxc_handler *handler)
3090{
3091 int ret;
3092 struct lxc_list *iterator;
3093 struct lxc_list *network = &handler->conf->network;
3094 /* strlen("/proc/") = 6
3095 * +
8335fd40 3096 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
3097 * +
3098 * strlen("/fd/") = 4
3099 * +
8335fd40 3100 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
3101 * +
3102 * \0
3103 */
8335fd40 3104 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
3105
3106 *netns_path = '\0';
3107
9c66dc4f
CB
3108 if (handler->nsfd[LXC_NS_NET] < 0)
3109 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726
CB
3110
3111 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 3112 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
3113 if (ret < 0 || ret >= sizeof(netns_path))
3114 return false;
3115
3116 lxc_list_for_each(iterator, network) {
3117 char *hostveth = NULL;
3118 struct lxc_netdev *netdev = iterator->elem;
3119
3120 /* We can only delete devices whose ifindex we have. If we don't
3121 * have the index it means that we didn't create it.
3122 */
3123 if (!netdev->ifindex)
3124 continue;
3125
3126 if (netdev->type == LXC_NET_PHYS) {
3127 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3128 netdev->link);
3129 if (ret < 0)
9c66dc4f 3130 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3131 netdev->ifindex, netdev->link);
3132 else
9c66dc4f 3133 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3134 netdev->ifindex, netdev->link);
b3259dc6
TP
3135
3136 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3137 if (ret < 0)
3138 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3139 netdev->ifindex, netdev->link);
66a7c406 3140 goto clear_ifindices;
1bd8d726
CB
3141 }
3142
3143 ret = netdev_deconf[netdev->type](handler, netdev);
3144 if (ret < 0)
3145 WARN("Failed to deconfigure network device");
3146
3147 if (netdev->type != LXC_NET_VETH)
66a7c406 3148 goto clear_ifindices;
1bd8d726 3149
f2711167 3150 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3151 goto clear_ifindices;
1bd8d726 3152
f2711167 3153 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3154 hostveth = netdev->priv.veth_attr.pair;
3155 else
3156 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3157 if (is_empty_string(hostveth))
66a7c406 3158 goto clear_ifindices;
8843fde4 3159
1bd8d726
CB
3160 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3161 handler->name, netdev,
3162 netns_path);
3163 if (ret < 0) {
9c66dc4f 3164 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3165 goto clear_ifindices;
1bd8d726 3166 }
9c66dc4f 3167 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3168
3169clear_ifindices:
0858c829
CB
3170 /*
3171 * We need to clear any ifindices we recorded so liblxc won't
3172 * have cached stale data which would cause it to fail on
3173 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3174 */
3175 netdev->ifindex = 0;
3176 if (netdev->type == LXC_NET_PHYS) {
3177 netdev->priv.phys_attr.ifindex = 0;
3178 } else if (netdev->type == LXC_NET_VETH) {
3179 netdev->priv.veth_attr.veth1[0] = '\0';
3180 netdev->priv.veth_attr.ifindex = 0;
3181 }
1bd8d726
CB
3182 }
3183
bb84beda 3184 return true;
1bd8d726
CB
3185}
3186
6509154d 3187static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3188 struct lxc_list *cur, *next;
3189 struct lxc_inetdev *inet4dev;
3190 struct lxc_inet6dev *inet6dev;
3191 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3192 int err = 0;
5fe147e9
TP
3193 unsigned int lo_ifindex = 0, link_ifindex = 0;
3194
3195 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3196 if (link_ifindex == 0)
3197 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3198
6509154d 3199
3200 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3201 if (!lxc_list_empty(&netdev->ipv4)) {
3202 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3203 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3204 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3205 }
3206
3207 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3208 if (!lxc_list_empty(&netdev->ipv6)) {
3209 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3210 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3211 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3212
3213 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3214 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3215 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3216 }
3217
b670016a 3218 /* Perform IPVLAN specific checks. */
3219 if (netdev->type == LXC_NET_IPVLAN) {
3220 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3221 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3222 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3223
3224 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3225 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3226 if (lo_ifindex == 0)
3227 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3228 }
3229
6509154d 3230 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3231 inet4dev = cur->elem;
3232 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3233 return ret_set_errno(-1, -errno);
6509154d 3234
5fe147e9 3235 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3236 return ret_set_errno(-1, EINVAL);
b670016a 3237
3238 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3239 if (netdev->type == LXC_NET_IPVLAN) {
3240 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3241 if (err < 0)
3242 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3243 }
6509154d 3244 }
3245
3246 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3247 inet6dev = cur->elem;
3248 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3249 return ret_set_errno(-1, -errno);
6509154d 3250
5fe147e9 3251 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3252 return ret_set_errno(-1, EINVAL);
b670016a 3253
3254 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3255 if (netdev->type == LXC_NET_IPVLAN) {
3256 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3257 if (err < 0)
3258 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3259 }
6509154d 3260 }
3261
3262 return 0;
3263}
3264
9c66dc4f
CB
3265static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3266{
b670016a 3267 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3268 bool had_error = false;
3269 unsigned int link_ifindex = 0;
b670016a 3270
9c66dc4f
CB
3271 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3272 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3273
3274 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3275 if (lo_ifindex > 0) {
3276 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3277 had_error = true;
b670016a 3278 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3279 }
3280 }
3281
3282 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3283 if (!is_empty_string(link)) {
5fe147e9 3284 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3285 if (link_ifindex == 0)
3286 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3287
3288 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3289 had_error = true;
b670016a 3290 }
3291
9c66dc4f 3292 if (had_error)
596a002c 3293 return ret_set_errno(-1, EINVAL);
b670016a 3294
3295 return 0;
3296}
3297
9c66dc4f
CB
3298static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3299{
b670016a 3300 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3301 bool had_error = false;
3302 unsigned int link_ifindex = 0;
b670016a 3303
9c66dc4f
CB
3304 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3305 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3306
3307 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3308 if (lo_ifindex > 0) {
3309 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3310 had_error = true;
b670016a 3311 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3312 }
3313 }
3314
3315 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3316 if (!is_empty_string(link)) {
5fe147e9
TP
3317 link_ifindex = if_nametoindex(link);
3318 if (link_ifindex == 0) {
3319 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3320 return ret_set_errno(-1, EINVAL);
3321 }
3322
3323 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3324 had_error = true;
b670016a 3325 }
3326
9c66dc4f 3327 if (had_error)
596a002c 3328 return ret_set_errno(-1, EINVAL);
b670016a 3329
3330 return 0;
3331}
3332
6509154d 3333static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3334 unsigned int lo_ifindex = 0;
3335 unsigned int errCount = 0;
6509154d 3336 struct lxc_list *cur, *next;
3337 struct lxc_inetdev *inet4dev;
3338 struct lxc_inet6dev *inet6dev;
6509154d 3339
b670016a 3340 /* Perform IPVLAN specific checks. */
3341 if (netdev->type == LXC_NET_IPVLAN) {
3342 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3343 lo_ifindex = if_nametoindex(loop_device);
b670016a 3344 if (lo_ifindex == 0) {
3345 errCount++;
3ebffb98 3346 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3347 }
b670016a 3348 }
6509154d 3349
b670016a 3350 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3351 inet4dev = cur->elem;
3352 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3353 errCount++;
6509154d 3354 }
3355
3356 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3357 inet6dev = cur->elem;
b670016a 3358 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3359 errCount++;
6509154d 3360 }
3361
b670016a 3362 if (errCount > 0)
596a002c 3363 return ret_set_errno(-1, EINVAL);
6509154d 3364
3365 return 0;
3366}
3367
e389f2af 3368static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3369{
811ef482
CB
3370 struct lxc_list *iterator;
3371 struct lxc_list *network = &handler->conf->network;
3372
811ef482
CB
3373 lxc_list_for_each(iterator, network) {
3374 struct lxc_netdev *netdev = iterator->elem;
3375
9c66dc4f
CB
3376 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3377 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3378
6509154d 3379 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3380 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3381 if (lxc_setup_l2proxy(netdev))
3382 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3383 }
3384
9c66dc4f
CB
3385 if (netdev_conf[netdev->type](handler, netdev))
3386 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3387 }
3388
3389 return 0;
3390}
3391
e389f2af 3392int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3393{
e389f2af
CB
3394 pid_t pid = handler->pid;
3395 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3396 struct lxc_list *iterator;
3397
e0010464 3398 if (am_guest_unpriv())
74c6e2b0 3399 return 0;
811ef482
CB
3400
3401 lxc_list_for_each(iterator, network) {
3dd78294 3402 __do_free char *physname = NULL;
e389f2af 3403 int ret;
811ef482
CB
3404 struct lxc_netdev *netdev = iterator->elem;
3405
811ef482
CB
3406 if (!netdev->ifindex)
3407 continue;
3408
3dd78294
CB
3409 if (netdev->type == LXC_NET_PHYS)
3410 physname = is_wlan(netdev->link);
3411
3412 if (physname)
9f8cf6e1 3413 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
3dd78294 3414 else
9f8cf6e1 3415 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
9c66dc4f
CB
3416 if (ret)
3417 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3418 netdev->created_name,
3419 netdev->ifindex, pid);
811ef482 3420
24190194
CB
3421 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3422 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3423 }
3424
3425 return 0;
3426}
3427
3c09b97c
CB
3428static int network_requires_advanced_setup(int type)
3429{
3430 if (type == LXC_NET_EMPTY)
3431 return false;
3432
3433 if (type == LXC_NET_NONE)
3434 return false;
3435
3436 return true;
3437}
3438
e389f2af 3439static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3440{
e389f2af
CB
3441 int hooks_version = handler->conf->hooks_version;
3442 const char *lxcname = handler->name;
3443 const char *lxcpath = handler->lxcpath;
3444 struct lxc_list *network = &handler->conf->network;
3445 pid_t pid = handler->pid;
74c6e2b0
CB
3446 struct lxc_list *iterator;
3447
74c6e2b0
CB
3448 lxc_list_for_each(iterator, network) {
3449 struct lxc_netdev *netdev = iterator->elem;
3450
3c09b97c 3451 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3452 continue;
3453
9c66dc4f
CB
3454 if (netdev->type != LXC_NET_VETH)
3455 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3456 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3457
3458 if (netdev->mtu)
3459 INFO("mtu ignored due to insufficient privilege");
3460
e389f2af
CB
3461 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3462 pid, hooks_version))
74c6e2b0
CB
3463 return -1;
3464 }
3465
3466 return 0;
3467}
3468
1bd8d726 3469bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3470{
3471 int ret;
3472 struct lxc_list *iterator;
3473 struct lxc_list *network = &handler->conf->network;
1bd8d726 3474
811ef482
CB
3475 lxc_list_for_each(iterator, network) {
3476 char *hostveth = NULL;
3477 struct lxc_netdev *netdev = iterator->elem;
3478
3479 /* We can only delete devices whose ifindex we have. If we don't
3480 * have the index it means that we didn't create it.
3481 */
3482 if (!netdev->ifindex)
3483 continue;
3484
0104c121
CB
3485 /*
3486 * If the network device has been moved back from the
3487 * containers network namespace, update the ifindex.
3488 */
3489 netdev->ifindex = if_nametoindex(netdev->name);
3490
6509154d 3491 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3492 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3493 if (lxc_delete_l2proxy(netdev))
3494 WARN("Failed to delete all l2proxy config");
3495 /* Don't return, let the network be cleaned up as normal. */
3496 }
3497
811ef482
CB
3498 if (netdev->type == LXC_NET_PHYS) {
3499 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3500 if (ret < 0)
3501 WARN("Failed to rename interface with index %d "
b809f232
CB
3502 "from \"%s\" to its initial name \"%s\"",
3503 netdev->ifindex, netdev->name, netdev->link);
0b154989 3504 else {
29589196
CB
3505 TRACE("Renamed interface with index %d from "
3506 "\"%s\" to its initial name \"%s\"",
3507 netdev->ifindex, netdev->name,
3508 netdev->link);
0b154989
TP
3509
3510 /* Restore original MTU */
3511 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3512 if (ret < 0) {
3513 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3514 netdev->link, netdev->priv.phys_attr.mtu);
3515 } else {
3516 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3517 netdev->link, netdev->priv.phys_attr.mtu);
3518 }
3519 }
b3259dc6
TP
3520
3521 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3522 if (ret < 0)
3523 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3524 netdev->ifindex, netdev->link);
66a7c406 3525 goto clear_ifindices;
811ef482
CB
3526 }
3527
3528 ret = netdev_deconf[netdev->type](handler, netdev);
3529 if (ret < 0)
3530 WARN("Failed to deconfigure network device");
3531
811ef482 3532 if (netdev->type != LXC_NET_VETH)
66a7c406 3533 goto clear_ifindices;
811ef482 3534
811ef482
CB
3535 /* Explicitly delete host veth device to prevent lingering
3536 * devices. We had issues in LXD around this.
3537 */
f2711167 3538 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3539 hostveth = netdev->priv.veth_attr.pair;
3540 else
3541 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3542 if (is_empty_string(hostveth))
66a7c406 3543 goto clear_ifindices;
811ef482 3544
1ee56cff
CB
3545 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3546 ret = lxc_netdev_delete_by_name(hostveth);
3547 if (ret < 0)
3548 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3549
1ee56cff
CB
3550 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3551 } else if (!is_empty_string(netdev->link)) {
3552 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3553 if (ret < 0)
3554 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3555
1ee56cff
CB
3556 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3557 }
811ef482 3558
66a7c406 3559clear_ifindices:
ad2ddfcd 3560 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3561 * have cached stale data which would cause it to fail on reboot
3562 * we're we don't re-read the on-disk config file.
3563 */
3564 netdev->ifindex = 0;
3565 if (netdev->type == LXC_NET_PHYS) {
3566 netdev->priv.phys_attr.ifindex = 0;
3567 } else if (netdev->type == LXC_NET_VETH) {
3568 netdev->priv.veth_attr.veth1[0] = '\0';
3569 netdev->priv.veth_attr.ifindex = 0;
3570 }
811ef482
CB
3571 }
3572
bb84beda 3573 return true;
811ef482
CB
3574}
3575
3576int lxc_requests_empty_network(struct lxc_handler *handler)
3577{
3578 struct lxc_list *network = &handler->conf->network;
3579 struct lxc_list *iterator;
3580 bool found_none = false, found_nic = false;
3581
3582 if (lxc_list_empty(network))
3583 return 0;
3584
9c66dc4f 3585 lxc_list_for_each (iterator, network) {
811ef482
CB
3586 struct lxc_netdev *netdev = iterator->elem;
3587
3588 if (netdev->type == LXC_NET_NONE)
3589 found_none = true;
3590 else
3591 found_nic = true;
3592 }
9c66dc4f 3593
811ef482
CB
3594 if (found_none && !found_nic)
3595 return 1;
9c66dc4f 3596
811ef482
CB
3597 return 0;
3598}
3599
3600/* try to move physical nics to the init netns */
b809f232 3601int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3602{
9c66dc4f
CB
3603 __do_close int oldfd = -EBADF;
3604 int netnsfd = handler->nsfd[LXC_NS_NET];
3605 struct lxc_conf *conf = handler->conf;
811ef482 3606 int ret;
811ef482 3607 char ifname[IFNAMSIZ];
b809f232 3608 struct lxc_list *iterator;
811ef482 3609
b809f232
CB
3610 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3611 * the parent network namespace. We won't have this capability if we are
3612 * unprivileged.
3613 */
d0fbc7ba 3614 if (!handler->am_root)
b809f232 3615 return 0;
811ef482 3616
b809f232 3617 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3618
0037ab49 3619 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3620 if (oldfd < 0)
3621 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3622
b809f232 3623 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3624 if (ret < 0)
3625 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3626
b809f232
CB
3627 lxc_list_for_each(iterator, &conf->network) {
3628 struct lxc_netdev *netdev = iterator->elem;
811ef482 3629
b809f232
CB
3630 if (netdev->type != LXC_NET_PHYS)
3631 continue;
3632
3633 /* Retrieve the name of the interface in the container's network
3634 * namespace.
3635 */
3636 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3637 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3638 continue;
3639 }
b809f232 3640
0037ab49 3641 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3642 if (ret < 0)
9c66dc4f 3643 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3644 else
9c66dc4f 3645 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3646 }
811ef482 3647
b809f232 3648 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3649 if (ret < 0)
3650 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3651
3652 return 0;
811ef482
CB
3653}
3654
3655static int setup_hw_addr(char *hwaddr, const char *ifname)
3656{
9c66dc4f 3657 __do_close int fd = -EBADF;
811ef482
CB
3658 struct sockaddr sockaddr;
3659 struct ifreq ifr;
9c66dc4f 3660 int ret;
811ef482
CB
3661
3662 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3663 if (ret)
3664 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3665
3666 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3667 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3668 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3669
ad9429e5 3670 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3671 if (fd < 0)
3672 return -1;
3673
3674 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3675 if (ret)
6d1400b5 3676 SYSERROR("Failed to perform ioctl");
3677
9c66dc4f 3678 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3679
3680 return ret;
3681}
3682
3683static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3684{
3685 struct lxc_list *iterator;
3686 int err;
3687
3688 lxc_list_for_each(iterator, ip) {
3689 struct lxc_inetdev *inetdev = iterator->elem;
3690
3691 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3692 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3693 if (err)
3694 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3695 }
3696
3697 return 0;
3698}
3699
3700static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3701{
3702 struct lxc_list *iterator;
3703 int err;
3704
3705 lxc_list_for_each(iterator, ip) {
3706 struct lxc_inet6dev *inet6dev = iterator->elem;
3707
3708 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3709 &inet6dev->mcast, &inet6dev->acast,
3710 inet6dev->prefix);
9c66dc4f
CB
3711 if (err)
3712 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3713 }
3714
3715 return 0;
3716}
3717
8bf64b77 3718static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3719{
811ef482 3720 int err;
009d6127 3721 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3722
3723 /* empty network namespace */
8bf64b77
CB
3724 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3725 err = lxc_netdev_up("lo");
9c66dc4f
CB
3726 if (err)
3727 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3728 }
3729
811ef482 3730 /* set a mac address */
9c66dc4f
CB
3731 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3732 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3733
3734 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3735 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3736 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3737
3738 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3739 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3740 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3741
3742 /* set the network device up */
3743 if (netdev->flags & IFF_UP) {
8bf64b77 3744 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3745 if (err)
3746 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3747
3748 /* the network is up, make the loopback up too */
3749 err = lxc_netdev_up("lo");
9c66dc4f
CB
3750 if (err)
3751 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3752 }
3753
811ef482 3754 /* setup ipv4 gateway on the interface */
a2f9a670 3755 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3756 if (!(netdev->flags & IFF_UP))
3757 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3758
9c66dc4f
CB
3759 if (lxc_list_empty(&netdev->ipv4))
3760 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3761
a2f9a670 3762 /* Setup device route if ipv4_gateway_dev is enabled */
3763 if (netdev->ipv4_gateway_dev) {
3764 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3765 if (err < 0)
3766 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3767 } else {
009d6127 3768 /* Check the gateway address is valid */
3769 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3770 return ret_set_errno(-1, errno);
009d6127 3771
3772 /* Try adding a default route to the gateway address */
811ef482 3773 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3774 if (err < 0) {
3775 /* If adding the default route fails, this could be because the
3776 * gateway address is in a different subnet to the container's address.
3777 * To work around this, we try adding a static device route to the
3778 * gateway address first, and then try again.
3779 */
a2f9a670 3780 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3781 if (err < 0)
3782 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3783
a2f9a670 3784 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3785 if (err < 0)
3786 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3787 }
3788 }
3789 }
3790
3791 /* setup ipv6 gateway on the interface */
a2f9a670 3792 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3793 if (!(netdev->flags & IFF_UP))
3794 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3795
9c66dc4f
CB
3796 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3797 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3798
a2f9a670 3799 /* Setup device route if ipv6_gateway_dev is enabled */
3800 if (netdev->ipv6_gateway_dev) {
3801 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3802 if (err < 0)
3803 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3804 } else {
009d6127 3805 /* Check the gateway address is valid */
3806 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3807 return ret_set_errno(-1, errno);
009d6127 3808
3809 /* Try adding a default route to the gateway address */
811ef482 3810 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3811 if (err < 0) {
3812 /* If adding the default route fails, this could be because the
3813 * gateway address is in a different subnet to the container's address.
3814 * To work around this, we try adding a static device route to the
3815 * gateway address first, and then try again.
3816 */
a2f9a670 3817 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3818 if (err < 0)
3819 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3820
a2f9a670 3821 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3822 if (err < 0)
3823 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3824 }
3825 }
3826 }
3827
8bf64b77 3828 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3829
3830 return 0;
3831}
3832
3833int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3834 struct lxc_list *network)
3835{
3836 struct lxc_list *iterator;
811ef482 3837
8bf64b77 3838 lxc_list_for_each (iterator, network) {
e389f2af 3839 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 3840 int ret;
811ef482 3841
8bf64b77
CB
3842 ret = netdev_ns_conf[netdev->type](netdev);
3843 if (!ret)
3844 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
3845 if (ret)
3846 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482
CB
3847 }
3848
3849 if (!lxc_list_empty(network))
e389f2af 3850 INFO("Network has been setup");
811ef482
CB
3851
3852 return 0;
3853}
7ab1ba02 3854
3c09b97c 3855int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3856{
3857 struct lxc_list *iterator;
3858 struct lxc_list *network = &handler->conf->network;
3859 int data_sock = handler->data_sock[0];
3860
7ab1ba02
CB
3861 lxc_list_for_each(iterator, network) {
3862 int ret;
3863 struct lxc_netdev *netdev = iterator->elem;
3864
3c09b97c 3865 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3866 continue;
3867
7fbb15ec 3868 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3869 if (ret < 0)
7ab1ba02 3870 return -1;
e389f2af
CB
3871
3872 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3873 if (ret < 0)
3874 return -1;
3875
3876 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3877 }
3878
3879 return 0;
3880}
3881
3c09b97c 3882int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3883{
3884 struct lxc_list *iterator;
3885 struct lxc_list *network = &handler->conf->network;
3886 int data_sock = handler->data_sock[1];
3887
7ab1ba02
CB
3888 lxc_list_for_each(iterator, network) {
3889 int ret;
3890 struct lxc_netdev *netdev = iterator->elem;
3891
3c09b97c 3892 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3893 continue;
3894
e3233f26 3895 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3896 if (ret < 0)
7ab1ba02 3897 return -1;
e389f2af
CB
3898
3899 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3900 if (ret < 0)
3901 return -1;
54256301 3902
e389f2af 3903 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3904 }
3905
3906 return 0;
3907}
a1ae535a
CB
3908
3909int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3910{
3911 struct lxc_list *iterator, *network;
3912 int data_sock = handler->data_sock[0];
3913
3914 if (!handler->am_root)
3915 return 0;
3916
3917 network = &handler->conf->network;
3918 lxc_list_for_each(iterator, network) {
3919 int ret;
3920 struct lxc_netdev *netdev = iterator->elem;
3921
3922 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3923 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3924 if (ret < 0)
7729f8e5 3925 return -1;
a1ae535a
CB
3926
3927 /* Send network device ifindex in the child's namespace to
3928 * parent.
3929 */
7fbb15ec 3930 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3931 if (ret < 0)
7729f8e5 3932 return -1;
a1ae535a
CB
3933 }
3934
e389f2af
CB
3935 if (!lxc_list_empty(network))
3936 TRACE("Sent network device names and ifindices to parent");
3937
a1ae535a 3938 return 0;
a1ae535a
CB
3939}
3940
3941int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3942{
3943 struct lxc_list *iterator, *network;
3944 int data_sock = handler->data_sock[1];
3945
3946 if (!handler->am_root)
3947 return 0;
3948
3949 network = &handler->conf->network;
3950 lxc_list_for_each(iterator, network) {
3951 int ret;
3952 struct lxc_netdev *netdev = iterator->elem;
3953
3954 /* Receive network device name in the child's namespace to
3955 * parent.
3956 */
e3233f26 3957 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3958 if (ret < 0)
7729f8e5 3959 return -1;
a1ae535a
CB
3960
3961 /* Receive network device ifindex in the child's namespace to
3962 * parent.
3963 */
e3233f26 3964 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3965 if (ret < 0)
7729f8e5 3966 return -1;
a1ae535a
CB
3967 }
3968
3969 return 0;
a1ae535a 3970}
bb84beda
CB
3971
3972void lxc_delete_network(struct lxc_handler *handler)
3973{
3974 bool bret;
3975
3976 if (handler->am_root)
3977 bret = lxc_delete_network_priv(handler);
3978 else
3979 bret = lxc_delete_network_unpriv(handler);
3980 if (!bret)
3981 DEBUG("Failed to delete network devices");
3982 else
3983 DEBUG("Deleted network devices");
3984}
1cd95214 3985
1cd95214
CB
3986int lxc_netns_set_nsid(int fd)
3987{
41a3300d 3988 int ret;
0ce60f0d
CB
3989 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3990 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3991 NLMSG_ALIGN(1024)];
1cd95214 3992 struct nl_handler nlh;
a5f5cb41 3993 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
3994 struct nlmsghdr *hdr;
3995 struct rtgenmsg *msg;
9d036caa
CB
3996 const __s32 ns_id = -1;
3997 const __u32 netns_fd = fd;
1cd95214 3998
a5f5cb41 3999 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 4000 if (ret < 0)
41a3300d 4001 return -1;
1cd95214 4002
0ce60f0d 4003 memset(buf, 0, sizeof(buf));
6ce39620
CB
4004
4005#pragma GCC diagnostic push
4006#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
4007 hdr = (struct nlmsghdr *)buf;
4008 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4009#pragma GCC diagnostic pop
1cd95214 4010
0ce60f0d
CB
4011 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4012 hdr->nlmsg_type = RTM_NEWNSID;
4013 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4014 hdr->nlmsg_pid = 0;
4015 hdr->nlmsg_seq = RTM_NEWNSID;
4016 msg->rtgen_family = AF_UNSPEC;
1cd95214 4017
9d036caa
CB
4018 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4019 if (ret < 0)
a5f5cb41 4020 return ret_errno(ENOMEM);
9d036caa
CB
4021
4022 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4023 if (ret < 0)
a5f5cb41 4024 return ret_errno(ENOMEM);
1cd95214 4025
a5f5cb41 4026 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 4027}
938980ba
CB
4028
4029static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4030{
4031
4032 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4033
4034 while (RTA_OK(rta, len)) {
4035 unsigned short type = rta->rta_type;
4036
4037 if ((type <= max) && (!tb[type]))
4038 tb[type] = rta;
4039
6ce39620
CB
4040#pragma GCC diagnostic push
4041#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 4042 rta = RTA_NEXT(rta, len);
6ce39620 4043#pragma GCC diagnostic pop
938980ba
CB
4044 }
4045
4046 return 0;
4047}
4048
4049static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4050{
4051 return *(__s32 *)RTA_DATA(rta);
4052}
4053
4054#ifndef NETNS_RTA
4055#define NETNS_RTA(r) \
4056 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4057#endif
4058
4059int lxc_netns_get_nsid(int fd)
4060{
a5f5cb41
CB
4061 struct nl_handler nlh;
4062 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
4063 int ret;
4064 ssize_t len;
4065 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
4066 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4067 NLMSG_ALIGN(1024)];
938980ba 4068 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
4069 struct nlmsghdr *hdr;
4070 struct rtgenmsg *msg;
938980ba
CB
4071 __u32 netns_fd = fd;
4072
a5f5cb41 4073 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
4074 if (ret < 0)
4075 return -1;
4076
4077 memset(buf, 0, sizeof(buf));
6ce39620
CB
4078
4079#pragma GCC diagnostic push
4080#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4081 hdr = (struct nlmsghdr *)buf;
4082 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 4083#pragma GCC diagnostic pop
938980ba
CB
4084
4085 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4086 hdr->nlmsg_type = RTM_GETNSID;
4087 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4088 hdr->nlmsg_pid = 0;
4089 hdr->nlmsg_seq = RTM_GETNSID;
4090 msg->rtgen_family = AF_UNSPEC;
4091
9d036caa 4092 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
4093 if (ret < 0)
4094 return ret_errno(ENOMEM);
938980ba 4095
a5f5cb41 4096 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
4097 if (ret < 0)
4098 return -1;
4099
4100 msg = NLMSG_DATA(hdr);
4101 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4102 if (len < 0)
a5f5cb41 4103 return ret_errno(EINVAL);
938980ba 4104
6ce39620
CB
4105#pragma GCC diagnostic push
4106#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4107 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4108 if (tb[__LXC_NETNSA_NSID])
4109 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4110#pragma GCC diagnostic pop
938980ba
CB
4111
4112 return -1;
4113}
e389f2af
CB
4114
4115int lxc_create_network(struct lxc_handler *handler)
4116{
4117 int ret;
4118
e389f2af
CB
4119 if (handler->am_root) {
4120 ret = lxc_create_network_priv(handler);
4121 if (ret)
4122 return -1;
4123
4124 return lxc_network_move_created_netdev_priv(handler);
4125 }
4126
4127 return lxc_create_network_unpriv(handler);
4128}