]> git.proxmox.com Git - mirror_lxc.git/blame - src/lxc/network.c
network: Updates netlink_open handling in lxc_ipvlan_create
[mirror_lxc.git] / src / lxc / network.c
CommitLineData
cc73685d 1/* SPDX-License-Identifier: LGPL-2.1+ */
cb0dc11b 2
d38dd64a
CB
3#ifndef _GNU_SOURCE
4#define _GNU_SOURCE 1
5#endif
6#include <arpa/inet.h>
cb0dc11b
CB
7#include <ctype.h>
8#include <errno.h>
9#include <fcntl.h>
0ad19a3f 10#include <linux/netlink.h>
11#include <linux/rtnetlink.h>
12#include <linux/sockios.h>
cb0dc11b
CB
13#include <net/ethernet.h>
14#include <net/if.h>
15#include <net/if_arp.h>
16#include <netinet/in.h>
d38dd64a
CB
17#include <stdio.h>
18#include <stdlib.h>
19#include <string.h>
cb0dc11b
CB
20#include <sys/inotify.h>
21#include <sys/ioctl.h>
22#include <sys/param.h>
23#include <sys/socket.h>
24#include <sys/stat.h>
25#include <sys/types.h>
d38dd64a
CB
26#include <time.h>
27#include <unistd.h>
f549edcc 28
d38dd64a 29#include "../include/netns_ifaddrs.h"
7ab1ba02 30#include "af_unix.h"
72d0e1cb 31#include "conf.h"
811ef482 32#include "config.h"
e3233f26 33#include "file_utils.h"
cb0dc11b 34#include "log.h"
8335fd40 35#include "macro.h"
95ea3d1f 36#include "memory_utils.h"
cb0dc11b
CB
37#include "network.h"
38#include "nl.h"
f40988c7 39#include "process_utils.h"
59524108 40#include "syscall_wrappers.h"
0d204771 41#include "utils.h"
0ad19a3f 42
9de31d5a
CB
43#ifndef HAVE_STRLCPY
44#include "include/strlcpy.h"
45#endif
46
ac2cecc4 47lxc_log_define(network, lxc);
f8fee0e2 48
811ef482 49typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
8bf64b77 50typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
3ebffb98 51static const char loop_device[] = "lo";
811ef482 52
b670016a 53static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
8f82874c 54{
d16bda44 55 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
8f82874c 56 struct nl_handler nlh;
d16bda44
CB
57 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
58 int addrlen, err;
8f82874c 59 struct rtmsg *rt;
8f82874c 60
61 addrlen = family == AF_INET ? sizeof(struct in_addr)
62 : sizeof(struct in6_addr);
63
d16bda44 64 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
8f82874c 65 if (err)
66 return err;
67
8f82874c 68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
d16bda44 70 return -ENOMEM;
8f82874c 71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
a5f5cb41 74 return -ENOMEM;
8f82874c 75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
b670016a 78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
8f82874c 79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
a5f5cb41 82 return -ENOMEM;
d16bda44 83
8f82874c 84 rt->rtm_family = family;
85 rt->rtm_table = RT_TABLE_MAIN;
86 rt->rtm_scope = RT_SCOPE_LINK;
87 rt->rtm_protocol = RTPROT_BOOT;
88 rt->rtm_type = RTN_UNICAST;
89 rt->rtm_dst_len = netmask;
90
8f82874c 91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
d16bda44
CB
92 return -EINVAL;
93
8f82874c 94 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
d16bda44
CB
95 return -EINVAL;
96
97 return netlink_transaction(nlh_ptr, nlmsg, answer);
8f82874c 98}
99
100static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
101{
b670016a 102 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
8f82874c 103}
104
105static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
106{
b670016a 107 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
108}
109
110static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
111{
112 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
113}
114
115static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
116{
117 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
8f82874c 118}
119
d4a7da46 120static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
121{
122 struct lxc_list *iterator;
123 int err;
124
125 lxc_list_for_each(iterator, ip) {
126 struct lxc_inetdev *inetdev = iterator->elem;
127
128 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
9c66dc4f
CB
129 if (err)
130 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
d4a7da46 131 }
132
133 return 0;
134}
135
136static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
137{
138 struct lxc_list *iterator;
139 int err;
140
141 lxc_list_for_each(iterator, ip) {
142 struct lxc_inet6dev *inet6dev = iterator->elem;
143
144 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
9c66dc4f
CB
145 if (err)
146 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
d4a7da46 147 }
148
149 return 0;
150}
151
6dfa9581
TP
152static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
153{
154 struct lxc_list *iterator;
155 int err;
156
157 lxc_list_for_each(iterator, ip) {
158 struct lxc_inetdev *inetdev = iterator->elem;
159
160 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
161
162 if (err)
9c66dc4f 163 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
164 }
165
166 return 0;
167}
168
169static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
170{
171 struct lxc_list *iterator;
172 int err;
173
174 lxc_list_for_each(iterator, ip) {
175 struct lxc_inet6dev *inet6dev = iterator->elem;
176
177 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
178 if (err)
9c66dc4f 179 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
6dfa9581
TP
180 }
181
182 return 0;
183}
184
185struct ip_proxy_args {
186 const char *ip;
187 const char *dev;
188};
189
5fe147e9 190static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
6dfa9581 191{
d16bda44 192 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
5fe147e9 193 struct nl_handler nlh;
d16bda44
CB
194 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
195 int addrlen, err;
5fe147e9 196 struct ndmsg *rt;
6dfa9581 197
5fe147e9 198 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
6dfa9581 199
d16bda44 200 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
5fe147e9
TP
201 if (err)
202 return err;
6dfa9581 203
5fe147e9
TP
204 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
205 if (!nlmsg)
d16bda44 206 return -ENOMEM;
6dfa9581 207
5fe147e9
TP
208 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
209 if (!answer)
d16bda44 210 return -ENOMEM;
6dfa9581 211
5fe147e9
TP
212 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
213 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
6dfa9581 214
5fe147e9
TP
215 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
216 if (!rt)
d16bda44
CB
217 return -ENOMEM;
218
5fe147e9
TP
219 rt->ndm_ifindex = ifindex;
220 rt->ndm_flags = NTF_PROXY;
221 rt->ndm_type = NDA_DST;
222 rt->ndm_family = family;
6dfa9581 223
5fe147e9 224 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
d16bda44 225 return -EINVAL;
6dfa9581 226
d16bda44 227 return netlink_transaction(nlh_ptr, nlmsg, answer);
6dfa9581
TP
228}
229
230static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
231{
232 int ret;
233 char path[PATH_MAX];
234 char buf[1] = "";
235
236 if (family != AF_INET && family != AF_INET6)
596a002c 237 return ret_set_errno(-1, EINVAL);
6dfa9581 238
9c66dc4f 239 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581
TP
240 family == AF_INET ? "ipv4" : "ipv6", ifname,
241 "forwarding");
9c66dc4f 242 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 243 return ret_set_errno(-1, E2BIG);
6dfa9581
TP
244
245 return lxc_read_file_expect(path, buf, 1, "1");
246}
247
622f05c7
TP
248struct bridge_vlan_info {
249 __u16 flags;
250 __u16 vid;
251};
252
253static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
254{
255 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
256 struct nl_handler nlh;
257 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
258 int err;
259 struct ifinfomsg *ifi;
260 struct rtattr *nest;
261 unsigned short bridge_flags = 0;
262 struct bridge_vlan_info vlan_info;
263
264 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
265 if (err)
266 return err;
267
268 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
269 if (!nlmsg)
270 return ret_errno(ENOMEM);
271
272 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
273 if (!answer)
274 return ret_errno(ENOMEM);
275
276 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
277 nlmsg->nlmsghdr->nlmsg_type = operation;
278
279 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
280 if (!ifi)
281 return ret_errno(ENOMEM);
282 ifi->ifi_family = AF_BRIDGE;
283 ifi->ifi_index = ifindex;
284
285 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
286 if (!nest)
287 return ret_errno(ENOMEM);
288
289 bridge_flags |= BRIDGE_FLAGS_MASTER;
290 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
291 return ret_errno(ENOMEM);
292
293 vlan_info.vid = vlan_id;
294 vlan_info.flags = 0;
295 if (!tagged)
296 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
297
298 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
299 return ret_errno(ENOMEM);
300
301 nla_end_nested(nlmsg, nest);
302
303 return netlink_transaction(nlh_ptr, nlmsg, answer);
304}
305
306static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
307{
308 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
309}
310
311static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
312{
313 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
314}
315
316static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
317{
318 struct lxc_list *iterator;
319 int err;
320
321 lxc_list_for_each(iterator, vlan_ids) {
322 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
323
324 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
325 if (err)
326 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
327 }
328
329 return 0;
330}
331
33320936
TP
332static int validate_veth(struct lxc_netdev *netdev)
333{
334 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
335 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
336 if (netdev->priv.veth_attr.vlan_id_set)
337 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
338
339 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
340 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
341 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
342 }
343
344 if (netdev->priv.veth_attr.vlan_id_set) {
345 struct lxc_list *it;
346 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
347 unsigned short i = PTR_TO_USHORT(it->elem);
348 if (i == netdev->priv.veth_attr.vlan_id)
349 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
350 }
351 }
352
353 return 0;
354}
355
356static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
357{
358 int err, rc, veth1index;
359 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
360 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
361
362 /* Skip setup if no VLAN options are specified. */
363 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
364 return 0;
365
366 /* Check vlan filtering is enabled on parent bridge. */
367 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
368 if (rc < 0 || (size_t)rc >= sizeof(path))
369 return -1;
370
371 rc = lxc_read_from_file(path, buf, sizeof(buf));
372 if (rc < 0)
373 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
374
375 buf[rc - 1] = '\0';
376
377 if (strcmp(buf, "1") != 0)
378 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
379
380 /* Get veth1 ifindex for use with netlink. */
381 veth1index = if_nametoindex(veth1);
382 if (!veth1index)
383 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
384
385 /* Configure untagged VLAN settings on bridge port if specified. */
386 if (netdev->priv.veth_attr.vlan_id_set) {
387 unsigned short default_pvid;
388
389 /* Get the bridge's default VLAN PVID. */
390 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
391 if (rc < 0 || (size_t)rc >= sizeof(path))
392 return -1;
393
394 rc = lxc_read_from_file(path, buf, sizeof(buf));
395 if (rc < 0)
396 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
397
398 buf[rc - 1] = '\0';
399 err = get_u16(&default_pvid, buf, 0);
400 if (err)
401 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
402
403 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
404 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
405 err = lxc_bridge_vlan_del(veth1index, default_pvid);
406 if (err)
407 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
408 }
409
410 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
411 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
412 if (err)
413 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
414 }
415 }
416
417 /* Configure tagged VLAN settings on bridge port if specified. */
418 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
419 if (err)
420 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
421
422 return 0;
423}
424
811ef482
CB
425static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
426{
54256301 427 int err;
a00fbab5 428 unsigned int mtu = 1500;
811ef482
CB
429 char *veth1, *veth2;
430 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
811ef482 431
33320936
TP
432 err = validate_veth(netdev);
433 if (err)
434 return err;
435
f2711167 436 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
811ef482
CB
437 veth1 = netdev->priv.veth_attr.pair;
438 if (handler->conf->reboot)
439 lxc_netdev_delete_by_name(veth1);
440 } else {
441 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
442 if (err < 0 || (size_t)err >= sizeof(veth1buf))
443 return -1;
444
3646ffd9 445 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
811ef482
CB
446 if (!veth1)
447 return -1;
448
449 /* store away for deconf */
450 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
451 }
452
d34212ad
CB
453 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
454 if (err < 0 || (size_t)err >= sizeof(veth2buf))
455 return -1;
456
3646ffd9 457 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
811ef482 458 if (!veth2)
54256301
CB
459 return -1;
460
a00fbab5
TP
461 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
462 if (netdev->mtu) {
463 if (lxc_safe_uint(netdev->mtu, &mtu))
464 return log_error_errno(-1, errno, "Failed to parse mtu");
f2711167 465 } else if (!is_empty_string(netdev->link)) {
54256301 466 int ifindex_mtu;
811ef482 467
54256301
CB
468 ifindex_mtu = if_nametoindex(netdev->link);
469 if (ifindex_mtu) {
470 mtu = netdev_get_mtu(ifindex_mtu);
471 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
472 }
473 }
474
475 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
9c66dc4f
CB
476 if (err)
477 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
811ef482 478
24190194
CB
479 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
480
811ef482
CB
481 /* changing the high byte of the mac address to 0xfe, the bridge interface
482 * will always keep the host's mac address and not take the mac address
483 * of a container */
484 err = setup_private_host_hw_addr(veth1);
485 if (err) {
6d1400b5 486 errno = -err;
487 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
811ef482
CB
488 goto out_delete;
489 }
490
8da62485
CB
491 /* Retrieve ifindex of the host's veth device. */
492 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
493 if (!netdev->priv.veth_attr.ifindex) {
494 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
495 goto out_delete;
496 }
497
811ef482
CB
498 if (mtu) {
499 err = lxc_netdev_set_mtu(veth1, mtu);
811ef482 500 if (err) {
6d1400b5 501 errno = -err;
54256301 502 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
811ef482
CB
503 goto out_delete;
504 }
505 }
506
f2711167 507 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
26da53c3
TP
508 if (!lxc_nic_exists(netdev->link)) {
509 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
510 goto out_delete;
511 }
512
811ef482
CB
513 err = lxc_bridge_attach(netdev->link, veth1);
514 if (err) {
6d1400b5 515 errno = -err;
26da53c3 516 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
811ef482
CB
517 goto out_delete;
518 }
519 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
33320936
TP
520
521 if (!is_ovs_bridge(netdev->link)) {
522 err = setup_veth_native_bridge_vlan(veth1, netdev);
523 if (err) {
524 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
525 goto out_delete;
526 }
527 }
811ef482
CB
528 }
529
530 err = lxc_netdev_up(veth1);
531 if (err) {
6d1400b5 532 errno = -err;
533 SYSERROR("Failed to set \"%s\" up", veth1);
811ef482
CB
534 goto out_delete;
535 }
536
d4a7da46 537 /* setup ipv4 routes on the host interface */
538 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
539 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
540 goto out_delete;
541 }
542
543 /* setup ipv6 routes on the host interface */
544 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
545 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
546 goto out_delete;
547 }
548
6dfa9581 549 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
954e36b4
TP
550 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
551 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
552 appear to be added successfully but then do not appear in the proxy list. The length of time
553 slept doesn't appear to be important, only that the process sleeps for a short period of time.
554 */
555 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
556
6dfa9581
TP
557 if (netdev->ipv4_gateway) {
558 char bufinet4[INET_ADDRSTRLEN];
559 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
9c66dc4f 560 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
6dfa9581
TP
561 goto out_delete;
562 }
563
564 err = lxc_ip_forwarding_on(veth1, AF_INET);
565 if (err) {
9c66dc4f 566 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
6dfa9581
TP
567 goto out_delete;
568 }
569
5fe147e9 570 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
6dfa9581 571 if (err) {
9c66dc4f 572 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
6dfa9581
TP
573 goto out_delete;
574 }
575 }
576
577 if (netdev->ipv6_gateway) {
578 char bufinet6[INET6_ADDRSTRLEN];
579
580 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
9c66dc4f 581 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
6dfa9581
TP
582 goto out_delete;
583 }
584
585 /* Check for sysctl net.ipv6.conf.all.forwarding=1
586 Kernel requires this to route any packets for IPv6.
587 */
588 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
589 if (err) {
9c66dc4f 590 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
6dfa9581
TP
591 goto out_delete;
592 }
593
594 err = lxc_ip_forwarding_on(veth1, AF_INET6);
595 if (err) {
9c66dc4f 596 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
6dfa9581
TP
597 goto out_delete;
598 }
599
600 err = lxc_neigh_proxy_on(veth1, AF_INET6);
601 if (err) {
9c66dc4f 602 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
6dfa9581
TP
603 goto out_delete;
604 }
605
5fe147e9 606 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
6dfa9581 607 if (err) {
9c66dc4f 608 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
6dfa9581
TP
609 goto out_delete;
610 }
611 }
612
613 /* setup ipv4 address routes on the host interface */
614 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
615 if (err) {
9c66dc4f 616 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
617 goto out_delete;
618 }
619
620 /* setup ipv6 address routes on the host interface */
621 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
622 if (err) {
9c66dc4f 623 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
6dfa9581
TP
624 goto out_delete;
625 }
626 }
627
811ef482 628 if (netdev->upscript) {
14a7b0f9
CB
629 char *argv[] = {
630 "veth",
631 netdev->link,
990b9ac3 632 veth1,
14a7b0f9
CB
633 NULL,
634 };
635
636 err = run_script_argv(handler->name,
637 handler->conf->hooks_version, "net",
638 netdev->upscript, "up", argv);
639 if (err < 0)
811ef482
CB
640 goto out_delete;
641 }
642
54256301 643 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
811ef482
CB
644
645 return 0;
646
647out_delete:
54256301 648 lxc_netdev_delete_by_name(veth1);
811ef482
CB
649 return -1;
650}
651
652static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
653{
8021de25 654 char peer[IFNAMSIZ];
811ef482
CB
655 int err;
656
f2711167 657 if (is_empty_string(netdev->link)) {
811ef482
CB
658 ERROR("No link for macvlan network device specified");
659 return -1;
660 }
661
8021de25
CB
662 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
663 if (err < 0 || (size_t)err >= sizeof(peer))
811ef482
CB
664 return -1;
665
3646ffd9 666 if (!lxc_ifname_alnum_case_sensitive(peer))
811ef482
CB
667 return -1;
668
669 err = lxc_macvlan_create(netdev->link, peer,
670 netdev->priv.macvlan_attr.mode);
671 if (err) {
6d1400b5 672 errno = -err;
673 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
674 peer, netdev->link);
966e9f1f 675 goto on_error;
811ef482
CB
676 }
677
9f8cf6e1
CB
678 strlcpy(netdev->created_name, peer, IFNAMSIZ);
679
811ef482
CB
680 netdev->ifindex = if_nametoindex(peer);
681 if (!netdev->ifindex) {
682 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
966e9f1f 683 goto on_error;
811ef482
CB
684 }
685
3bef7b7b 686 if (netdev->mtu) {
54256301
CB
687 unsigned int mtu;
688
3bef7b7b
TP
689 err = lxc_safe_uint(netdev->mtu, &mtu);
690 if (err < 0) {
691 errno = -err;
692 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
693 goto on_error;
694 }
695
696 err = lxc_netdev_set_mtu(peer, mtu);
697 if (err < 0) {
698 errno = -err;
699 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
700 goto on_error;
701 }
702 }
703
811ef482 704 if (netdev->upscript) {
14a7b0f9
CB
705 char *argv[] = {
706 "macvlan",
707 netdev->link,
708 NULL,
709 };
710
711 err = run_script_argv(handler->name,
712 handler->conf->hooks_version, "net",
713 netdev->upscript, "up", argv);
714 if (err < 0)
966e9f1f 715 goto on_error;
811ef482
CB
716 }
717
718 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
719 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
720
721 return 0;
966e9f1f
CB
722
723on_error:
811ef482 724 lxc_netdev_delete_by_name(peer);
811ef482
CB
725 return -1;
726}
727
c9f52382 728static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
729{
d16bda44
CB
730 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
731 struct nl_handler nlh;
732 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
c9f52382 733 int err, index, len;
734 struct ifinfomsg *ifi;
c9f52382 735 struct rtattr *nest, *nest2;
c9f52382 736
737 len = strlen(master);
738 if (len == 1 || len >= IFNAMSIZ)
d16bda44 739 return ret_errno(EINVAL);
c9f52382 740
741 len = strlen(name);
742 if (len == 1 || len >= IFNAMSIZ)
d16bda44 743 return ret_errno(EINVAL);
c9f52382 744
745 index = if_nametoindex(master);
746 if (!index)
d16bda44 747 return ret_errno(EINVAL);
c9f52382 748
d16bda44 749 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
c9f52382 750 if (err)
df62850d 751 return err;
c9f52382 752
c9f52382 753 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
754 if (!nlmsg)
d16bda44 755 return ret_errno(ENOMEM);
c9f52382 756
757 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
758 if (!answer)
d16bda44 759 return ret_errno(ENOMEM);
c9f52382 760
761 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
762 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
763
764 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
765 if (!ifi)
766 return ret_errno(ENOMEM);
c9f52382 767 ifi->ifi_family = AF_UNSPEC;
768
c9f52382 769 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
770 if (!nest)
d16bda44 771 return ret_errno(EPROTO);
c9f52382 772
773 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
d16bda44 774 return ret_errno(EPROTO);
c9f52382 775
5755765e
KT
776 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
777 if (!nest2)
778 return ret_errno(EPROTO);
779
3a934e2e 780 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
5755765e
KT
781 return ret_errno(EPROTO);
782
cf88a827
TP
783 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
784 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
785 * according to ipvlan docs.
5755765e 786 */
cf88a827 787 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
5755765e 788 return ret_errno(EPROTO);
c9f52382 789
5755765e 790 nla_end_nested(nlmsg, nest2);
c9f52382 791 nla_end_nested(nlmsg, nest);
792
793 if (nla_put_u32(nlmsg, IFLA_LINK, index))
d16bda44 794 return ret_errno(EPROTO);
c9f52382 795
796 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
d16bda44
CB
797 return ret_errno(EPROTO);
798
799 return netlink_transaction(nlh_ptr, nlmsg, answer);
c9f52382 800}
801
802static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
803{
dd119206 804 char peer[IFNAMSIZ];
c9f52382 805 int err;
806
f2711167 807 if (is_empty_string(netdev->link)) {
c9f52382 808 ERROR("No link for ipvlan network device specified");
809 return -1;
810 }
811
dd119206
CB
812 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
813 if (err < 0 || (size_t)err >= sizeof(peer))
c9f52382 814 return -1;
815
3646ffd9 816 if (!lxc_ifname_alnum_case_sensitive(peer))
c9f52382 817 return -1;
818
dd119206
CB
819 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
820 netdev->priv.ipvlan_attr.isolation);
c9f52382 821 if (err) {
dd119206
CB
822 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
823 peer, netdev->link);
c9f52382 824 goto on_error;
825 }
826
e7fdd504
CB
827 strlcpy(netdev->created_name, peer, IFNAMSIZ);
828
c9f52382 829 netdev->ifindex = if_nametoindex(peer);
830 if (!netdev->ifindex) {
831 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
832 goto on_error;
833 }
834
006e135e 835 if (netdev->mtu) {
54256301
CB
836 unsigned int mtu;
837
006e135e 838 err = lxc_safe_uint(netdev->mtu, &mtu);
839 if (err < 0) {
840 errno = -err;
54256301 841 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 842 goto on_error;
843 }
844
845 err = lxc_netdev_set_mtu(peer, mtu);
846 if (err < 0) {
847 errno = -err;
54256301 848 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
006e135e 849 goto on_error;
850 }
851 }
852
c9f52382 853 if (netdev->upscript) {
854 char *argv[] = {
855 "ipvlan",
856 netdev->link,
857 NULL,
858 };
859
dd119206
CB
860 err = run_script_argv(handler->name, handler->conf->hooks_version,
861 "net", netdev->upscript, "up", argv);
c9f52382 862 if (err < 0)
863 goto on_error;
864 }
865
dd119206
CB
866 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
867 netdev->ifindex, netdev->priv.macvlan_attr.mode);
c9f52382 868
869 return 0;
870
871on_error:
872 lxc_netdev_delete_by_name(peer);
873 return -1;
874}
875
811ef482
CB
876static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
877{
878 char peer[IFNAMSIZ];
879 int err;
880 static uint16_t vlan_cntr = 0;
811ef482 881
f2711167 882 if (is_empty_string(netdev->link)) {
811ef482
CB
883 ERROR("No link for vlan network device specified");
884 return -1;
885 }
886
d4d68410
CB
887 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
888 netdev->priv.vlan_attr.vid, vlan_cntr++);
811ef482
CB
889 if (err < 0 || (size_t)err >= sizeof(peer))
890 return -1;
891
892 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
893 if (err) {
6d1400b5 894 errno = -err;
895 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
896 peer, netdev->link);
811ef482
CB
897 return -1;
898 }
899
83530dba
CB
900 strlcpy(netdev->created_name, peer, IFNAMSIZ);
901
811ef482
CB
902 netdev->ifindex = if_nametoindex(peer);
903 if (!netdev->ifindex) {
904 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
3e2a7b08 905 goto on_error;
906 }
907
908 if (netdev->mtu) {
54256301
CB
909 unsigned int mtu;
910
3e2a7b08 911 err = lxc_safe_uint(netdev->mtu, &mtu);
912 if (err < 0) {
913 errno = -err;
54256301 914 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 915 goto on_error;
916 }
917
918 err = lxc_netdev_set_mtu(peer, mtu);
54256301 919 if (err < 0) {
3e2a7b08 920 errno = -err;
54256301 921 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
3e2a7b08 922 goto on_error;
923 }
811ef482
CB
924 }
925
3a73d9f1 926 if (netdev->upscript) {
927 char *argv[] = {
928 "vlan",
929 netdev->link,
930 NULL,
931 };
932
d4d68410
CB
933 err = run_script_argv(handler->name, handler->conf->hooks_version,
934 "net", netdev->upscript, "up", argv);
19abca58 935 if (err < 0) {
3e2a7b08 936 goto on_error;
19abca58 937 }
3a73d9f1 938 }
939
d4d68410
CB
940 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
941 netdev->ifindex);
811ef482
CB
942
943 return 0;
3e2a7b08 944
945on_error:
946 lxc_netdev_delete_by_name(peer);
947 return -1;
811ef482
CB
948}
949
950static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
951{
0b154989 952 int err, mtu_orig = 0;
14a7b0f9 953
9c66dc4f
CB
954 if (is_empty_string(netdev->link))
955 return log_error_errno(-1, errno, "No link for physical interface specified");
811ef482 956
75b074ee
CB
957 /*
958 * Note that we're retrieving the container's ifindex in the host's
790255cf
CB
959 * network namespace because we need it to move the device from the
960 * host's network namespace to the container's network namespace later
961 * on.
962 * Note that netdev->link will contain the name of the physical network
963 * device in the host's namespace.
964 */
811ef482 965 netdev->ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
966 if (!netdev->ifindex)
967 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
811ef482 968
61302ef7 969 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
3473ca76 970 if (is_empty_string(netdev->name))
8bf64b77 971 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
61302ef7 972
75b074ee
CB
973 /*
974 * Store the ifindex of the host's network device in the host's
790255cf
CB
975 * namespace.
976 */
977 netdev->priv.phys_attr.ifindex = netdev->ifindex;
978
75b074ee
CB
979 /*
980 * Get original device MTU setting and store for restoration after
981 * container shutdown.
982 */
0b154989 983 mtu_orig = netdev_get_mtu(netdev->ifindex);
9c66dc4f
CB
984 if (mtu_orig < 0)
985 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
0b154989
TP
986
987 netdev->priv.phys_attr.mtu = mtu_orig;
988
3bef7b7b 989 if (netdev->mtu) {
54256301
CB
990 unsigned int mtu;
991
3bef7b7b 992 err = lxc_safe_uint(netdev->mtu, &mtu);
9c66dc4f
CB
993 if (err < 0)
994 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
14a7b0f9 995
3bef7b7b 996 err = lxc_netdev_set_mtu(netdev->link, mtu);
9c66dc4f
CB
997 if (err < 0)
998 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
3bef7b7b
TP
999 }
1000
1001 if (netdev->upscript) {
1002 char *argv[] = {
1003 "phys",
1004 netdev->link,
1005 NULL,
1006 };
1007
75b074ee
CB
1008 err = run_script_argv(handler->name, handler->conf->hooks_version,
1009 "net", netdev->upscript, "up", argv);
9c66dc4f 1010 if (err < 0)
3bef7b7b 1011 return -1;
3bef7b7b
TP
1012 }
1013
75b074ee
CB
1014 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
1015 netdev->ifindex);
811ef482
CB
1016
1017 return 0;
1018}
1019
1020static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1021{
14a7b0f9
CB
1022 int ret;
1023 char *argv[] = {
1024 "empty",
1025 NULL,
1026 };
1027
811ef482 1028 netdev->ifindex = 0;
14a7b0f9
CB
1029 if (!netdev->upscript)
1030 return 0;
1031
1032 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1033 "net", netdev->upscript, "up", argv);
1034 if (ret < 0)
1035 return -1;
1036
811ef482
CB
1037 return 0;
1038}
1039
1040static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1041{
1042 netdev->ifindex = 0;
1043 return 0;
1044}
1045
1046static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
1047 [LXC_NET_VETH] = instantiate_veth,
1048 [LXC_NET_MACVLAN] = instantiate_macvlan,
c9f52382 1049 [LXC_NET_IPVLAN] = instantiate_ipvlan,
811ef482
CB
1050 [LXC_NET_VLAN] = instantiate_vlan,
1051 [LXC_NET_PHYS] = instantiate_phys,
1052 [LXC_NET_EMPTY] = instantiate_empty,
1053 [LXC_NET_NONE] = instantiate_none,
1054};
1055
9d0406c7 1056static int __instantiate_ns_common(struct lxc_netdev *netdev)
8bf64b77
CB
1057{
1058 char current_ifname[IFNAMSIZ];
1059
1060 netdev->ifindex = if_nametoindex(netdev->created_name);
1061 if (!netdev->ifindex)
1062 return log_error_errno(-1,
1063 errno, "Failed to retrieve ifindex for network device with name %s",
1064 netdev->created_name);
1065
3473ca76 1066 if (is_empty_string(netdev->name))
8bf64b77
CB
1067 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1068
1069 if (strcmp(netdev->created_name, netdev->name) != 0) {
1070 int ret;
1071
1072 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
1073 if (ret)
9c66dc4f 1074 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
8bf64b77
CB
1075 netdev->created_name,
1076 netdev->name);
1077
1078 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
1079 }
1080
1081 /*
1082 * Re-read the name of the interface because its name has changed and
1083 * would be automatically allocated by the system
1084 */
1085 if (!if_indextoname(netdev->ifindex, current_ifname))
9c66dc4f 1086 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
8bf64b77
CB
1087
1088 /*
1089 * Now update the recorded name of the network device to reflect the
1090 * name of the network device in the child's network namespace. We will
1091 * later on send this information back to the parent.
1092 */
1093 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
1094
1095 return 0;
1096}
1097
717f77f2 1098static int instantiate_ns_veth(struct lxc_netdev *netdev)
8bf64b77 1099{
8bf64b77 1100
9d0406c7 1101 return __instantiate_ns_common(netdev);
8bf64b77
CB
1102}
1103
1104static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
1105{
9d0406c7 1106 return __instantiate_ns_common(netdev);
8bf64b77
CB
1107}
1108
1109static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
1110{
9d0406c7 1111 return __instantiate_ns_common(netdev);
8bf64b77
CB
1112}
1113
1114static int instantiate_ns_vlan(struct lxc_netdev *netdev)
1115{
9d0406c7 1116 return __instantiate_ns_common(netdev);
8bf64b77
CB
1117}
1118
1119static int instantiate_ns_phys(struct lxc_netdev *netdev)
1120{
9d0406c7 1121 return __instantiate_ns_common(netdev);
8bf64b77
CB
1122}
1123
1124static int instantiate_ns_empty(struct lxc_netdev *netdev)
1125{
1126 return 0;
1127}
1128
1129static int instantiate_ns_none(struct lxc_netdev *netdev)
1130{
1131 return 0;
1132}
1133
1134static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
1135 [LXC_NET_VETH] = instantiate_ns_veth,
1136 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
1137 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
1138 [LXC_NET_VLAN] = instantiate_ns_vlan,
1139 [LXC_NET_PHYS] = instantiate_ns_phys,
1140 [LXC_NET_EMPTY] = instantiate_ns_empty,
1141 [LXC_NET_NONE] = instantiate_ns_none,
1142};
1143
811ef482
CB
1144static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1145{
14a7b0f9
CB
1146 int ret;
1147 char *argv[] = {
1148 "veth",
1149 netdev->link,
1150 NULL,
1151 NULL,
1152 };
1153
1154 if (!netdev->downscript)
1155 return 0;
811ef482 1156
f2711167 1157 if (!is_empty_string(netdev->priv.veth_attr.pair))
14a7b0f9 1158 argv[2] = netdev->priv.veth_attr.pair;
811ef482 1159 else
14a7b0f9
CB
1160 argv[2] = netdev->priv.veth_attr.veth1;
1161
1162 ret = run_script_argv(handler->name,
1163 handler->conf->hooks_version, "net",
1164 netdev->downscript, "down", argv);
1165 if (ret < 0)
1166 return -1;
811ef482 1167
811ef482
CB
1168 return 0;
1169}
1170
1171static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1172{
14a7b0f9
CB
1173 int ret;
1174 char *argv[] = {
1175 "macvlan",
1176 netdev->link,
1177 NULL,
1178 };
1179
1180 if (!netdev->downscript)
1181 return 0;
1182
1183 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1184 "net", netdev->downscript, "down", argv);
1185 if (ret < 0)
1186 return -1;
811ef482 1187
811ef482
CB
1188 return 0;
1189}
1190
c9f52382 1191static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1192{
1193 int ret;
1194 char *argv[] = {
1195 "ipvlan",
1196 netdev->link,
1197 NULL,
1198 };
1199
1200 if (!netdev->downscript)
1201 return 0;
1202
1203 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1204 "net", netdev->downscript, "down", argv);
1205 if (ret < 0)
1206 return -1;
1207
1208 return 0;
1209}
1210
811ef482
CB
1211static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1212{
3a73d9f1 1213 int ret;
1214 char *argv[] = {
1215 "vlan",
1216 netdev->link,
1217 NULL,
1218 };
1219
1220 if (!netdev->downscript)
1221 return 0;
1222
1223 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1224 "net", netdev->downscript, "down", argv);
1225 if (ret < 0)
1226 return -1;
1227
811ef482
CB
1228 return 0;
1229}
1230
1231static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1232{
14a7b0f9
CB
1233 int ret;
1234 char *argv[] = {
1235 "phys",
1236 netdev->link,
1237 NULL,
1238 };
1239
1240 if (!netdev->downscript)
1241 return 0;
1242
1243 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1244 "net", netdev->downscript, "down", argv);
1245 if (ret < 0)
1246 return -1;
811ef482 1247
811ef482
CB
1248 return 0;
1249}
1250
1251static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1252{
14a7b0f9
CB
1253 int ret;
1254 char *argv[] = {
1255 "empty",
1256 NULL,
1257 };
1258
1259 if (!netdev->downscript)
1260 return 0;
1261
1262 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1263 "net", netdev->downscript, "down", argv);
1264 if (ret < 0)
1265 return -1;
811ef482 1266
811ef482
CB
1267 return 0;
1268}
1269
1270static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1271{
1272 return 0;
1273}
1274
1275static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1276 [LXC_NET_VETH] = shutdown_veth,
1277 [LXC_NET_MACVLAN] = shutdown_macvlan,
c9f52382 1278 [LXC_NET_IPVLAN] = shutdown_ipvlan,
811ef482
CB
1279 [LXC_NET_VLAN] = shutdown_vlan,
1280 [LXC_NET_PHYS] = shutdown_phys,
1281 [LXC_NET_EMPTY] = shutdown_empty,
1282 [LXC_NET_NONE] = shutdown_none,
1283};
1284
0037ab49
TP
1285static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1286{
d16bda44 1287 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0037ab49 1288 struct nl_handler nlh;
d16bda44
CB
1289 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1290 int err;
0037ab49 1291 struct ifinfomsg *ifi;
0037ab49 1292
d16bda44 1293 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0037ab49
TP
1294 if (err)
1295 return err;
1296
0037ab49
TP
1297 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1298 if (!nlmsg)
d16bda44 1299 return ret_errno(ENOMEM);
0037ab49
TP
1300
1301 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1302 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1303
1304 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1305 if (!ifi)
d16bda44
CB
1306 return ret_errno(ENOMEM);
1307
0037ab49
TP
1308 ifi->ifi_family = AF_UNSPEC;
1309 ifi->ifi_index = ifindex;
1310
1311 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
d16bda44 1312 return ret_errno(ENOMEM);
0037ab49 1313
3473ca76 1314 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1315 return ret_errno(ENOMEM);
0037ab49 1316
d16bda44 1317 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0037ab49
TP
1318}
1319
ebc73a67 1320int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
0ad19a3f 1321{
d16bda44 1322 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
0ad19a3f 1323 struct nl_handler nlh;
d16bda44
CB
1324 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1325 int err;
06f976ca 1326 struct ifinfomsg *ifi;
0ad19a3f 1327
d16bda44 1328 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1329 if (err)
1330 return err;
0ad19a3f 1331
0ad19a3f 1332 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1333 if (!nlmsg)
d16bda44 1334 return ret_errno(ENOMEM);
0ad19a3f 1335
ebc73a67 1336 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1337 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1338
1339 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1340 if (!ifi)
d16bda44
CB
1341 return ret_errno(ENOMEM);
1342
06f976ca
SZ
1343 ifi->ifi_family = AF_UNSPEC;
1344 ifi->ifi_index = ifindex;
0ad19a3f 1345
1346 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
d16bda44 1347 return ret_errno(ENOMEM);
0ad19a3f 1348
3473ca76 1349 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
d16bda44 1350 return ret_errno(ENOMEM);
8d357196 1351
d16bda44 1352 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
0ad19a3f 1353}
1354
ebc73a67
CB
1355/* If we are asked to move a wireless interface, then we must actually move its
1356 * phyN device. Detect that condition and return the physname here. The physname
1357 * will be passed to lxc_netdev_move_wlan() which will free it when done.
e5848d39
SH
1358 */
1359#define PHYSNAME "/sys/class/net/%s/phy80211/name"
e4103cf6 1360char *is_wlan(const char *ifname)
e5848d39 1361{
4110345b
CB
1362 __do_fclose FILE *f = NULL;
1363 __do_free char *path = NULL, *physname = NULL;
ebc73a67 1364 int i, ret;
e5848d39 1365 long physlen;
ebc73a67 1366 size_t len;
e5848d39 1367
ebc73a67 1368 len = strlen(ifname) + strlen(PHYSNAME) - 1;
95ea3d1f 1369 path = must_realloc(NULL, len + 1);
e5848d39 1370 ret = snprintf(path, len, PHYSNAME, ifname);
ebc73a67 1371 if (ret < 0 || (size_t)ret >= len)
4110345b 1372 return NULL;
ebc73a67 1373
4110345b 1374 f = fopen(path, "re");
ebc73a67 1375 if (!f)
4110345b 1376 return NULL;
ebc73a67 1377
1a0e70ac 1378 /* Feh - sb.st_size is always 4096. */
e5848d39
SH
1379 fseek(f, 0, SEEK_END);
1380 physlen = ftell(f);
1381 fseek(f, 0, SEEK_SET);
4110345b
CB
1382 if (physlen < 0)
1383 return NULL;
ebc73a67
CB
1384
1385 physname = malloc(physlen + 1);
4110345b
CB
1386 if (!physname)
1387 return NULL;
ebc73a67
CB
1388
1389 memset(physname, 0, physlen + 1);
e5848d39 1390 ret = fread(physname, 1, physlen, f);
e5848d39 1391 if (ret < 0)
4110345b 1392 return NULL;
e5848d39 1393
ebc73a67 1394 for (i = 0; i < physlen; i++) {
e5848d39
SH
1395 if (physname[i] == '\n')
1396 physname[i] = '\0';
ebc73a67 1397
e5848d39
SH
1398 if (physname[i] == '\0')
1399 break;
1400 }
1401
4110345b 1402 return move_ptr(physname);
e5848d39
SH
1403}
1404
ebc73a67
CB
1405static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1406 const char *new)
e5848d39 1407{
ebc73a67 1408 pid_t fpid;
e5848d39 1409
ebc73a67 1410 fpid = fork();
e5848d39
SH
1411 if (fpid < 0)
1412 return -1;
ebc73a67 1413
e5848d39
SH
1414 if (fpid != 0)
1415 return wait_for_pid(fpid);
ebc73a67 1416
e5848d39
SH
1417 if (!switch_to_ns(pid, "net"))
1418 return -1;
ebc73a67 1419
05ec44f8 1420 _exit(lxc_netdev_rename_by_name(old, new));
e5848d39
SH
1421}
1422
e4103cf6 1423int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
ebc73a67 1424 const char *newname)
e5848d39 1425{
3dd78294 1426 __do_free char *cmd = NULL;
ebc73a67 1427 pid_t fpid;
e5848d39
SH
1428
1429 /* Move phyN into the container. TODO - do this using netlink.
ebc73a67
CB
1430 * However, IIUC this involves a bit more complicated work to talk to
1431 * the 80211 module, so for now just call out to iw.
e5848d39
SH
1432 */
1433 cmd = on_path("iw", NULL);
9c66dc4f 1434 if (!cmd)
3dd78294 1435 return -1;
e5848d39
SH
1436
1437 fpid = fork();
1438 if (fpid < 0)
3dd78294 1439 return -1;
ebc73a67 1440
e5848d39
SH
1441 if (fpid == 0) {
1442 char pidstr[30];
1443 sprintf(pidstr, "%d", pid);
9c66dc4f 1444 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
ebd582ae 1445 _exit(EXIT_FAILURE);
e5848d39 1446 }
ebc73a67 1447
e5848d39 1448 if (wait_for_pid(fpid))
3dd78294 1449 return -1;
e5848d39 1450
e5848d39 1451 if (newname)
3dd78294 1452 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
e5848d39 1453
3dd78294 1454 return 0;
e5848d39
SH
1455}
1456
8d357196 1457int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
8befa924 1458{
3dd78294 1459 __do_free char *physname = NULL;
8befa924
SH
1460 int index;
1461
8befa924
SH
1462 if (!ifname)
1463 return -EINVAL;
1464
32571606 1465 index = if_nametoindex(ifname);
49428bf3
DY
1466 if (!index)
1467 return -EINVAL;
32571606 1468
ebc73a67
CB
1469 physname = is_wlan(ifname);
1470 if (physname)
e5848d39
SH
1471 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1472
8d357196 1473 return lxc_netdev_move_by_index(index, pid, newname);
8befa924
SH
1474}
1475
b84f58b9 1476int lxc_netdev_delete_by_index(int ifindex)
0ad19a3f 1477{
d16bda44
CB
1478 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1479 struct nl_handler nlh;
1480 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
b84f58b9 1481 int err;
ebc73a67 1482 struct ifinfomsg *ifi;
0ad19a3f 1483
d16bda44 1484 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1485 if (err)
1486 return err;
0ad19a3f 1487
0ad19a3f 1488 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1489 if (!nlmsg)
d16bda44 1490 return ret_errno(ENOMEM);
0ad19a3f 1491
06f976ca 1492 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1493 if (!answer)
d16bda44 1494 return ret_errno(ENOMEM);
0ad19a3f 1495
ebc73a67 1496 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1497 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1498
1499 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1500 if (!ifi)
d16bda44
CB
1501 return ret_errno(ENOMEM);
1502
06f976ca
SZ
1503 ifi->ifi_family = AF_UNSPEC;
1504 ifi->ifi_index = ifindex;
0ad19a3f 1505
d16bda44 1506 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1507}
1508
b84f58b9
DL
1509int lxc_netdev_delete_by_name(const char *name)
1510{
1511 int index;
1512
1513 index = if_nametoindex(name);
1514 if (!index)
1515 return -EINVAL;
1516
1517 return lxc_netdev_delete_by_index(index);
1518}
1519
1520int lxc_netdev_rename_by_index(int ifindex, const char *newname)
b9a5bb58 1521{
d16bda44
CB
1522 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1523 struct nl_handler nlh;
1524 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1525 int err, len;
06f976ca 1526 struct ifinfomsg *ifi;
b9a5bb58 1527
d16bda44 1528 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1529 if (err)
1530 return err;
b9a5bb58 1531
b84f58b9 1532 len = strlen(newname);
d16bda44
CB
1533 if (len == 1 || len >= IFNAMSIZ)
1534 return ret_errno(EINVAL);
b84f58b9 1535
b9a5bb58
DL
1536 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1537 if (!nlmsg)
d16bda44 1538 return ret_errno(ENOMEM);
b9a5bb58 1539
06f976ca 1540 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
b9a5bb58 1541 if (!answer)
d16bda44 1542 return ret_errno(ENOMEM);
b9a5bb58 1543
ebc73a67 1544 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
06f976ca
SZ
1545 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1546
1547 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1548 if (!ifi)
d16bda44
CB
1549 return ret_errno(ENOMEM);
1550
06f976ca
SZ
1551 ifi->ifi_family = AF_UNSPEC;
1552 ifi->ifi_index = ifindex;
b84f58b9
DL
1553
1554 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
d16bda44 1555 return ret_errno(ENOMEM);
b9a5bb58 1556
d16bda44 1557 return netlink_transaction(nlh_ptr, nlmsg, answer);
b9a5bb58
DL
1558}
1559
b84f58b9
DL
1560int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1561{
1562 int len, index;
1563
1564 len = strlen(oldname);
dae3fdf6 1565 if (len == 1 || len >= IFNAMSIZ)
b84f58b9
DL
1566 return -EINVAL;
1567
1568 index = if_nametoindex(oldname);
1569 if (!index)
1570 return -EINVAL;
1571
1572 return lxc_netdev_rename_by_index(index, newname);
1573}
1574
8befa924 1575int netdev_set_flag(const char *name, int flag)
0ad19a3f 1576{
d16bda44
CB
1577 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1578 struct nl_handler nlh;
1579 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1580 int err, index, len;
06f976ca 1581 struct ifinfomsg *ifi;
0ad19a3f 1582
d16bda44 1583 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1584 if (err)
1585 return err;
0ad19a3f 1586
1587 len = strlen(name);
dae3fdf6 1588 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1589 return ret_errno(EINVAL);
0ad19a3f 1590
1591 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1592 if (!nlmsg)
d16bda44 1593 return ret_errno(ENOMEM);
0ad19a3f 1594
06f976ca 1595 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1596 if (!answer)
d16bda44 1597 return ret_errno(ENOMEM);
0ad19a3f 1598
1599 index = if_nametoindex(name);
1600 if (!index)
d16bda44 1601 return ret_errno(EINVAL);
0ad19a3f 1602
ebc73a67 1603 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1604 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1605
1606 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1607 if (!ifi)
1608 return ret_errno(ENOMEM);
1609
06f976ca
SZ
1610 ifi->ifi_family = AF_UNSPEC;
1611 ifi->ifi_index = index;
1612 ifi->ifi_change |= IFF_UP;
1613 ifi->ifi_flags |= flag;
0ad19a3f 1614
d16bda44 1615 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1616}
1617
ebc73a67 1618int netdev_get_flag(const char *name, int *flag)
efa1cf45 1619{
d16bda44
CB
1620 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1621 struct nl_handler nlh;
1622 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1623 int err, index, len;
a4318300 1624 struct ifinfomsg *ifi;
efa1cf45
DY
1625
1626 if (!name)
d16bda44 1627 return ret_errno(EINVAL);
efa1cf45 1628
d16bda44 1629 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
efa1cf45
DY
1630 if (err)
1631 return err;
1632
efa1cf45
DY
1633 len = strlen(name);
1634 if (len == 1 || len >= IFNAMSIZ)
d16bda44 1635 return ret_errno(EINVAL);
efa1cf45 1636
efa1cf45
DY
1637 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1638 if (!nlmsg)
d16bda44 1639 return ret_errno(ENOMEM);
efa1cf45 1640
06f976ca 1641 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
efa1cf45 1642 if (!answer)
d16bda44 1643 return ret_errno(ENOMEM);
efa1cf45 1644
efa1cf45
DY
1645 index = if_nametoindex(name);
1646 if (!index)
d16bda44 1647 return ret_errno(EINVAL);
efa1cf45 1648
06f976ca
SZ
1649 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1650 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1651
1652 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
d16bda44
CB
1653 if (!ifi)
1654 return ret_errno(ENOMEM);
1655
06f976ca
SZ
1656 ifi->ifi_family = AF_UNSPEC;
1657 ifi->ifi_index = index;
efa1cf45 1658
d16bda44 1659 err = netlink_transaction(nlh_ptr, nlmsg, answer);
efa1cf45 1660 if (err)
d16bda44 1661 return ret_set_errno(-1, errno);
efa1cf45 1662
06f976ca 1663 ifi = NLMSG_DATA(answer->nlmsghdr);
efa1cf45
DY
1664
1665 *flag = ifi->ifi_flags;
efa1cf45
DY
1666 return err;
1667}
1668
1669/*
1670 * \brief Check a interface is up or not.
1671 *
1672 * \param name: name for the interface.
1673 *
1674 * \return int.
1675 * 0 means interface is down.
1676 * 1 means interface is up.
1677 * Others means error happened, and ret-value is the error number.
1678 */
ebc73a67 1679int lxc_netdev_isup(const char *name)
efa1cf45 1680{
ebc73a67 1681 int err, flag;
efa1cf45
DY
1682
1683 err = netdev_get_flag(name, &flag);
1684 if (err)
ebc73a67
CB
1685 return err;
1686
efa1cf45
DY
1687 if (flag & IFF_UP)
1688 return 1;
ebc73a67 1689
efa1cf45 1690 return 0;
efa1cf45
DY
1691}
1692
0130df54
SH
1693int netdev_get_mtu(int ifindex)
1694{
a5f5cb41 1695 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
0130df54 1696 struct nl_handler nlh;
a5f5cb41
CB
1697 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1698 int readmore = 0, recv_len = 0;
1699 int answer_len, err, res;
06f976ca 1700 struct ifinfomsg *ifi;
0130df54 1701 struct nlmsghdr *msg;
0130df54 1702
a5f5cb41 1703 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
0130df54
SH
1704 if (err)
1705 return err;
1706
0130df54
SH
1707 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1708 if (!nlmsg)
a5f5cb41 1709 return ret_errno(ENOMEM);
0130df54 1710
06f976ca 1711 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0130df54 1712 if (!answer)
a5f5cb41 1713 return ret_errno(ENOMEM);
0130df54
SH
1714
1715 /* Save the answer buffer length, since it will be overwritten
1716 * on the first receive (and we might need to receive more than
ebc73a67
CB
1717 * once.
1718 */
06f976ca
SZ
1719 answer_len = answer->nlmsghdr->nlmsg_len;
1720
ebc73a67 1721 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
06f976ca 1722 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
0130df54 1723
06f976ca 1724 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1725 if (!ifi)
a5f5cb41
CB
1726 return ret_errno(ENOMEM);
1727
06f976ca 1728 ifi->ifi_family = AF_UNSPEC;
0130df54
SH
1729
1730 /* Send the request for addresses, which returns all addresses
1731 * on all interfaces. */
a5f5cb41 1732 err = netlink_send(nlh_ptr, nlmsg);
0130df54 1733 if (err < 0)
a5f5cb41 1734 return ret_set_errno(-1, errno);
0130df54 1735
6ce39620
CB
1736#pragma GCC diagnostic push
1737#pragma GCC diagnostic ignored "-Wcast-align"
1738
0130df54
SH
1739 do {
1740 /* Restore the answer buffer length, it might have been
ebc73a67
CB
1741 * overwritten by a previous receive.
1742 */
06f976ca 1743 answer->nlmsghdr->nlmsg_len = answer_len;
0130df54
SH
1744
1745 /* Get the (next) batch of reply messages */
a5f5cb41 1746 err = netlink_rcv(nlh_ptr, answer);
0130df54 1747 if (err < 0)
a5f5cb41 1748 return ret_set_errno(-1, errno);
0130df54
SH
1749
1750 recv_len = err;
0130df54
SH
1751
1752 /* Satisfy the typing for the netlink macros */
06f976ca 1753 msg = answer->nlmsghdr;
0130df54
SH
1754
1755 while (NLMSG_OK(msg, recv_len)) {
0130df54
SH
1756 /* Stop reading if we see an error message */
1757 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
1758 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1759 return ret_set_errno(errmsg->error, errno);
0130df54
SH
1760 }
1761
1762 /* Stop reading if we see a NLMSG_DONE message */
1763 if (msg->nlmsg_type == NLMSG_DONE) {
1764 readmore = 0;
1765 break;
1766 }
1767
06f976ca 1768 ifi = NLMSG_DATA(msg);
0130df54
SH
1769 if (ifi->ifi_index == ifindex) {
1770 struct rtattr *rta = IFLA_RTA(ifi);
a5f5cb41
CB
1771 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1772
0130df54 1773 res = 0;
ebc73a67 1774 while (RTA_OK(rta, attr_len)) {
9c66dc4f 1775 /*
a5f5cb41 1776 * Found a local address for the
ebc73a67
CB
1777 * requested interface, return it.
1778 */
0130df54 1779 if (rta->rta_type == IFLA_MTU) {
a5f5cb41
CB
1780 memcpy(&res, RTA_DATA(rta), sizeof(int));
1781 return res;
0130df54 1782 }
a5f5cb41 1783
0130df54
SH
1784 rta = RTA_NEXT(rta, attr_len);
1785 }
0130df54
SH
1786 }
1787
ebc73a67
CB
1788 /* Keep reading more data from the socket if the last
1789 * message had the NLF_F_MULTI flag set.
1790 */
0130df54
SH
1791 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1792
ebc73a67 1793 /* Look at the next message received in this buffer. */
0130df54
SH
1794 msg = NLMSG_NEXT(msg, recv_len);
1795 }
1796 } while (readmore);
1797
6ce39620
CB
1798#pragma GCC diagnostic pop
1799
ebc73a67 1800 /* If we end up here, we didn't find any result, so signal an error. */
a5f5cb41 1801 return -1;
0130df54
SH
1802}
1803
d472214b 1804int lxc_netdev_set_mtu(const char *name, int mtu)
75d09f83 1805{
a5f5cb41
CB
1806 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1807 struct nl_handler nlh;
1808 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
54256301 1809 int err, len;
06f976ca 1810 struct ifinfomsg *ifi;
75d09f83 1811
a5f5cb41 1812 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1813 if (err)
1814 return err;
75d09f83
DL
1815
1816 len = strlen(name);
dae3fdf6 1817 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1818 return ret_errno(EINVAL);
75d09f83
DL
1819
1820 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1821 if (!nlmsg)
a5f5cb41 1822 return ret_errno(ENOMEM);
75d09f83 1823
06f976ca 1824 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
75d09f83 1825 if (!answer)
a5f5cb41 1826 return ret_errno(ENOMEM);
75d09f83 1827
ebc73a67 1828 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
06f976ca
SZ
1829 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1830
1831 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1832 if (!ifi)
1833 return ret_errno(ENOMEM);
1834
06f976ca 1835 ifi->ifi_family = AF_UNSPEC;
54256301
CB
1836
1837 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 1838 return ret_errno(ENOMEM);
75d09f83
DL
1839
1840 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 1841 return ret_errno(ENOMEM);
75d09f83 1842
a5f5cb41 1843 return netlink_transaction(nlh_ptr, nlmsg, answer);
75d09f83
DL
1844}
1845
d472214b 1846int lxc_netdev_up(const char *name)
0ad19a3f 1847{
d472214b 1848 return netdev_set_flag(name, IFF_UP);
0ad19a3f 1849}
1850
d472214b 1851int lxc_netdev_down(const char *name)
0ad19a3f 1852{
d472214b 1853 return netdev_set_flag(name, 0);
0ad19a3f 1854}
1855
54256301 1856int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
0ad19a3f 1857{
a5f5cb41
CB
1858 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1859 struct nl_handler nlh;
1860 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1861 int err, len;
06f976ca 1862 struct ifinfomsg *ifi;
0ad19a3f 1863 struct rtattr *nest1, *nest2, *nest3;
0ad19a3f 1864
a5f5cb41 1865 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1866 if (err)
1867 return err;
0ad19a3f 1868
1869 len = strlen(name1);
dae3fdf6 1870 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1871 return ret_errno(EINVAL);
0ad19a3f 1872
1873 len = strlen(name2);
dae3fdf6 1874 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1875 return ret_errno(EINVAL);
0ad19a3f 1876
1877 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1878 if (!nlmsg)
a5f5cb41 1879 return ret_errno(ENOMEM);
0ad19a3f 1880
06f976ca 1881 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 1882 if (!answer)
a5f5cb41 1883 return ret_errno(ENOMEM);
0ad19a3f 1884
a5f5cb41 1885 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1886 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1887
1888 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
25a9939b 1889 if (!ifi)
a5f5cb41
CB
1890 return ret_errno(ENOMEM);
1891
06f976ca 1892 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 1893
79e68309 1894 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 1895 if (!nest1)
a5f5cb41 1896 return ret_errno(EINVAL);
0ad19a3f 1897
1898 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
a5f5cb41 1899 return ret_errno(ENOMEM);
0ad19a3f 1900
1901 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1902 if (!nest2)
a5f5cb41 1903 return ret_errno(ENOMEM);
0ad19a3f 1904
1905 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1906 if (!nest3)
a5f5cb41 1907 return ret_errno(ENOMEM);
0ad19a3f 1908
06f976ca 1909 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1910 if (!ifi)
1911 return ret_errno(ENOMEM);
0ad19a3f 1912
1913 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
a5f5cb41 1914 return ret_errno(ENOMEM);
0ad19a3f 1915
54256301 1916 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
a5f5cb41 1917 return ret_errno(ENOMEM);
54256301
CB
1918
1919 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
a5f5cb41 1920 return ret_errno(ENOMEM);
54256301 1921
0ad19a3f 1922 nla_end_nested(nlmsg, nest3);
0ad19a3f 1923 nla_end_nested(nlmsg, nest2);
0ad19a3f 1924 nla_end_nested(nlmsg, nest1);
1925
1926 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
a5f5cb41 1927 return ret_errno(ENOMEM);
0ad19a3f 1928
a5f5cb41 1929 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 1930}
1931
ebc73a67 1932/* TODO: merge with lxc_macvlan_create */
7c11d57a 1933int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
26c39028 1934{
a5f5cb41
CB
1935 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1936 struct nl_handler nlh;
1937 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 1938 int err, len, lindex;
06f976ca 1939 struct ifinfomsg *ifi;
26c39028 1940 struct rtattr *nest, *nest2;
26c39028 1941
a5f5cb41 1942 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
1943 if (err)
1944 return err;
26c39028
JHS
1945
1946 len = strlen(master);
dae3fdf6 1947 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1948 return ret_errno(EINVAL);
26c39028
JHS
1949
1950 len = strlen(name);
dae3fdf6 1951 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 1952 return ret_errno(EINVAL);
26c39028
JHS
1953
1954 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1955 if (!nlmsg)
a5f5cb41 1956 return ret_errno(ENOMEM);
26c39028 1957
06f976ca 1958 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
26c39028 1959 if (!answer)
a5f5cb41 1960 return ret_errno(ENOMEM);
26c39028
JHS
1961
1962 lindex = if_nametoindex(master);
1963 if (!lindex)
a5f5cb41 1964 return ret_errno(EINVAL);
26c39028 1965
a5f5cb41 1966 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
1967 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1968
1969 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
1970 if (!ifi)
1971 return ret_errno(ENOMEM);
1972
06f976ca 1973 ifi->ifi_family = AF_UNSPEC;
26c39028 1974
79e68309 1975 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
26c39028 1976 if (!nest)
a5f5cb41 1977 return ret_errno(ENOMEM);
26c39028
JHS
1978
1979 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
a5f5cb41 1980 return ret_errno(ENOMEM);
26c39028
JHS
1981
1982 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1983 if (!nest2)
a5f5cb41 1984 return ret_errno(ENOMEM);
e892973e 1985
26c39028 1986 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
a5f5cb41 1987 return ret_errno(ENOMEM);
e892973e 1988
26c39028 1989 nla_end_nested(nlmsg, nest2);
26c39028
JHS
1990 nla_end_nested(nlmsg, nest);
1991
1992 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
a5f5cb41 1993 return ret_errno(ENOMEM);
26c39028
JHS
1994
1995 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41
CB
1996 return ret_errno(ENOMEM);
1997
1998 return netlink_transaction(nlh_ptr, nlmsg, answer);
26c39028
JHS
1999}
2000
e892973e 2001int lxc_macvlan_create(const char *master, const char *name, int mode)
0ad19a3f 2002{
a5f5cb41
CB
2003 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2004 struct nl_handler nlh;
2005 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2006 int err, index, len;
06f976ca 2007 struct ifinfomsg *ifi;
e892973e 2008 struct rtattr *nest, *nest2;
0ad19a3f 2009
a5f5cb41 2010 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2011 if (err)
2012 return err;
0ad19a3f 2013
2014 len = strlen(master);
dae3fdf6 2015 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2016 return ret_errno(EINVAL);
0ad19a3f 2017
2018 len = strlen(name);
dae3fdf6 2019 if (len == 1 || len >= IFNAMSIZ)
a5f5cb41 2020 return ret_errno(EINVAL);
0ad19a3f 2021
2022 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2023 if (!nlmsg)
a5f5cb41 2024 return ret_errno(ENOMEM);
0ad19a3f 2025
06f976ca 2026 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2027 if (!answer)
a5f5cb41 2028 return ret_errno(ENOMEM);
0ad19a3f 2029
2030 index = if_nametoindex(master);
2031 if (!index)
a5f5cb41 2032 return ret_errno(EINVAL);
0ad19a3f 2033
a5f5cb41 2034 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
06f976ca
SZ
2035 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2036
2037 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
a5f5cb41
CB
2038 if (!ifi)
2039 return ret_errno(ENOMEM);
2040
06f976ca 2041 ifi->ifi_family = AF_UNSPEC;
0ad19a3f 2042
79e68309 2043 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
0ad19a3f 2044 if (!nest)
a5f5cb41 2045 return ret_errno(ENOMEM);
0ad19a3f 2046
2047 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
a5f5cb41 2048 return ret_errno(ENOMEM);
0ad19a3f 2049
e892973e
DL
2050 if (mode) {
2051 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2052 if (!nest2)
a5f5cb41 2053 return ret_errno(ENOMEM);
e892973e
DL
2054
2055 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
a5f5cb41 2056 return ret_errno(ENOMEM);
e892973e
DL
2057
2058 nla_end_nested(nlmsg, nest2);
2059 }
2060
0ad19a3f 2061 nla_end_nested(nlmsg, nest);
2062
2063 if (nla_put_u32(nlmsg, IFLA_LINK, index))
a5f5cb41 2064 return ret_errno(ENOMEM);
0ad19a3f 2065
2066 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
a5f5cb41 2067 return ret_errno(ENOMEM);
0ad19a3f 2068
a5f5cb41 2069 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2070}
2071
2072static int proc_sys_net_write(const char *path, const char *value)
2073{
ebc73a67
CB
2074 int fd;
2075 int err = 0;
0ad19a3f 2076
2077 fd = open(path, O_WRONLY);
2078 if (fd < 0)
2079 return -errno;
2080
f640cf46 2081 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
0ad19a3f 2082 err = -errno;
2083
2084 close(fd);
2085 return err;
2086}
2087
6dfa9581 2088static int ip_forwarding_set(const char *ifname, int family, int flag)
6509154d 2089{
2090 int ret;
2091 char path[PATH_MAX];
6509154d 2092
2093 if (family != AF_INET && family != AF_INET6)
6dfa9581 2094 return -EINVAL;
6509154d 2095
9c66dc4f 2096 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6dfa9581 2097 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
9c66dc4f 2098 if (ret < 0 || (size_t)ret >= sizeof(path))
6dfa9581 2099 return -E2BIG;
6509154d 2100
6dfa9581
TP
2101 return proc_sys_net_write(path, flag ? "1" : "0");
2102}
2103
2104int lxc_ip_forwarding_on(const char *name, int family)
2105{
2106 return ip_forwarding_set(name, family, 1);
2107}
2108
2109int lxc_ip_forwarding_off(const char *name, int family)
2110{
2111 return ip_forwarding_set(name, family, 0);
6509154d 2112}
2113
0ad19a3f 2114static int neigh_proxy_set(const char *ifname, int family, int flag)
2115{
9ba8130c 2116 int ret;
419590da 2117 char path[PATH_MAX];
0ad19a3f 2118
2119 if (family != AF_INET && family != AF_INET6)
3cfc0f3a 2120 return -EINVAL;
0ad19a3f 2121
9c66dc4f 2122 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
ebc73a67
CB
2123 family == AF_INET ? "ipv4" : "ipv6", ifname,
2124 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2125 if (ret < 0 || (size_t)ret >= sizeof(path))
9ba8130c 2126 return -E2BIG;
0ad19a3f 2127
ebc73a67 2128 return proc_sys_net_write(path, flag ? "1" : "0");
0ad19a3f 2129}
2130
6509154d 2131static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2132{
2133 int ret;
2134 char path[PATH_MAX];
2135 char buf[1] = "";
2136
2137 if (family != AF_INET && family != AF_INET6)
596a002c 2138 return ret_set_errno(-1, EINVAL);
6509154d 2139
9c66dc4f 2140 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
6509154d 2141 family == AF_INET ? "ipv4" : "ipv6", ifname,
2142 family == AF_INET ? "proxy_arp" : "proxy_ndp");
9c66dc4f 2143 if (ret < 0 || (size_t)ret >= sizeof(path))
596a002c 2144 return ret_set_errno(-1, E2BIG);
6509154d 2145
2146 return lxc_read_file_expect(path, buf, 1, "1");
2147}
2148
497353b6 2149int lxc_neigh_proxy_on(const char *name, int family)
0ad19a3f 2150{
2151 return neigh_proxy_set(name, family, 1);
2152}
2153
497353b6 2154int lxc_neigh_proxy_off(const char *name, int family)
0ad19a3f 2155{
2156 return neigh_proxy_set(name, family, 0);
2157}
2158
2159int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2160{
1f1b18e7
DL
2161 int i = 0;
2162 unsigned val;
ebc73a67
CB
2163 char c;
2164 unsigned char *data;
1f1b18e7
DL
2165
2166 sockaddr->sa_family = ARPHRD_ETHER;
2167 data = (unsigned char *)sockaddr->sa_data;
2168
2169 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
ebc73a67
CB
2170 c = *macaddr++;
2171 if (isdigit(c))
2172 val = c - '0';
2173 else if (c >= 'a' && c <= 'f')
2174 val = c - 'a' + 10;
2175 else if (c >= 'A' && c <= 'F')
2176 val = c - 'A' + 10;
2177 else
2178 return -EINVAL;
2179
2180 val <<= 4;
2181 c = *macaddr;
2182 if (isdigit(c))
2183 val |= c - '0';
2184 else if (c >= 'a' && c <= 'f')
2185 val |= c - 'a' + 10;
2186 else if (c >= 'A' && c <= 'F')
2187 val |= c - 'A' + 10;
2188 else if (c == ':' || c == 0)
2189 val >>= 4;
2190 else
2191 return -EINVAL;
2192 if (c != 0)
2193 macaddr++;
2194 *data++ = (unsigned char)(val & 0377);
2195 i++;
2196
2197 if (*macaddr == ':')
2198 macaddr++;
0ad19a3f 2199 }
0ad19a3f 2200
1f1b18e7 2201 return 0;
0ad19a3f 2202}
2203
ebc73a67
CB
2204static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2205 void *acast, int prefix)
0ad19a3f 2206{
a5f5cb41
CB
2207 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2208 struct nl_handler nlh;
2209 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2210 int addrlen, err;
06f976ca 2211 struct ifaddrmsg *ifa;
0ad19a3f 2212
ebc73a67
CB
2213 addrlen = family == AF_INET ? sizeof(struct in_addr)
2214 : sizeof(struct in6_addr);
4bf1968d 2215
a5f5cb41 2216 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
3cfc0f3a
MN
2217 if (err)
2218 return err;
0ad19a3f 2219
0ad19a3f 2220 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2221 if (!nlmsg)
a5f5cb41 2222 return ret_errno(ENOMEM);
0ad19a3f 2223
06f976ca 2224 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
0ad19a3f 2225 if (!answer)
a5f5cb41 2226 return ret_errno(ENOMEM);
0ad19a3f 2227
a5f5cb41 2228 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2229 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2230
2231 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
acf47e1b 2232 if (!ifa)
a5f5cb41
CB
2233 return ret_errno(ENOMEM);
2234
06f976ca
SZ
2235 ifa->ifa_prefixlen = prefix;
2236 ifa->ifa_index = ifindex;
2237 ifa->ifa_family = family;
2238 ifa->ifa_scope = 0;
acf47e1b 2239
4bf1968d 2240 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
a5f5cb41 2241 return ret_errno(EINVAL);
0ad19a3f 2242
4bf1968d 2243 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
a5f5cb41 2244 return ret_errno(EINVAL);
0ad19a3f 2245
d8948a52 2246 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
a5f5cb41 2247 return ret_errno(EINVAL);
1f1b18e7 2248
ebc73a67 2249 /* TODO: multicast, anycast with ipv6 */
79881dc6
DL
2250 if (family == AF_INET6 &&
2251 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2252 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
a5f5cb41 2253 return ret_errno(EPROTONOSUPPORT);
0ad19a3f 2254
a5f5cb41 2255 return netlink_transaction(nlh_ptr, nlmsg, answer);
0ad19a3f 2256}
2257
1f1b18e7 2258int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
ebc73a67
CB
2259 struct in6_addr *mcast, struct in6_addr *acast,
2260 int prefix)
1f1b18e7
DL
2261{
2262 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2263}
2264
ebc73a67
CB
2265int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2266 int prefix)
1f1b18e7
DL
2267{
2268 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2269}
2270
ebc73a67
CB
2271/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2272 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2273 * that pointer in *res (so res should be an in_addr** or in6_addr**).
19a26f82 2274 */
6ce39620
CB
2275#pragma GCC diagnostic push
2276#pragma GCC diagnostic ignored "-Wcast-align"
2277
ebc73a67
CB
2278static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2279{
2280 int addrlen;
06f976ca
SZ
2281 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2282 struct rtattr *rta = IFA_RTA(ifa);
2283 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
19a26f82 2284
06f976ca 2285 if (ifa->ifa_family != family)
19a26f82
MK
2286 return 0;
2287
ebc73a67
CB
2288 addrlen = family == AF_INET ? sizeof(struct in_addr)
2289 : sizeof(struct in6_addr);
19a26f82
MK
2290
2291 /* Loop over the rtattr's in this message */
ebc73a67 2292 while (RTA_OK(rta, attr_len)) {
19a26f82 2293 /* Found a local address for the requested interface,
ebc73a67
CB
2294 * return it.
2295 */
2296 if (rta->rta_type == IFA_LOCAL ||
2297 rta->rta_type == IFA_ADDRESS) {
2298 /* Sanity check. The family check above should make sure
2299 * the address length is correct, but check here just in
2300 * case.
2301 */
19a26f82
MK
2302 if (RTA_PAYLOAD(rta) != addrlen)
2303 return -1;
2304
ebc73a67
CB
2305 /* We might have found an IFA_ADDRESS before, which we
2306 * now overwrite with an IFA_LOCAL.
2307 */
dd66e5ad 2308 if (!*res) {
19a26f82 2309 *res = malloc(addrlen);
dd66e5ad
DE
2310 if (!*res)
2311 return -1;
2312 }
19a26f82
MK
2313
2314 memcpy(*res, RTA_DATA(rta), addrlen);
19a26f82
MK
2315 if (rta->rta_type == IFA_LOCAL)
2316 break;
2317 }
2318 rta = RTA_NEXT(rta, attr_len);
2319 }
2320 return 0;
2321}
2322
6ce39620
CB
2323#pragma GCC diagnostic pop
2324
19a26f82
MK
2325static int ip_addr_get(int family, int ifindex, void **res)
2326{
a5f5cb41
CB
2327 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2328 struct nl_handler nlh;
2329 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
ebc73a67 2330 int answer_len, err;
06f976ca 2331 struct ifaddrmsg *ifa;
19a26f82 2332 struct nlmsghdr *msg;
ebc73a67 2333 int readmore = 0, recv_len = 0;
19a26f82 2334
a5f5cb41 2335 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
19a26f82
MK
2336 if (err)
2337 return err;
2338
19a26f82
MK
2339 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2340 if (!nlmsg)
a5f5cb41 2341 return ret_errno(ENOMEM);
19a26f82 2342
06f976ca 2343 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
19a26f82 2344 if (!answer)
a5f5cb41 2345 return ret_errno(ENOMEM);
19a26f82 2346
ebc73a67
CB
2347 /* Save the answer buffer length, since it will be overwritten on the
2348 * first receive (and we might need to receive more than once).
2349 */
06f976ca
SZ
2350 answer_len = answer->nlmsghdr->nlmsg_len;
2351
ebc73a67 2352 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
06f976ca 2353 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
19a26f82 2354
06f976ca 2355 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
25a9939b 2356 if (!ifa)
a5f5cb41
CB
2357 return ret_errno(ENOMEM);
2358
06f976ca 2359 ifa->ifa_family = family;
19a26f82 2360
ebc73a67
CB
2361 /* Send the request for addresses, which returns all addresses on all
2362 * interfaces.
2363 */
a5f5cb41 2364 err = netlink_send(nlh_ptr, nlmsg);
19a26f82 2365 if (err < 0)
a5f5cb41 2366 return ret_set_errno(err, errno);
19a26f82 2367
6ce39620
CB
2368#pragma GCC diagnostic push
2369#pragma GCC diagnostic ignored "-Wcast-align"
2370
19a26f82
MK
2371 do {
2372 /* Restore the answer buffer length, it might have been
ebc73a67
CB
2373 * overwritten by a previous receive.
2374 */
06f976ca 2375 answer->nlmsghdr->nlmsg_len = answer_len;
19a26f82 2376
ebc73a67 2377 /* Get the (next) batch of reply messages. */
a5f5cb41 2378 err = netlink_rcv(nlh_ptr, answer);
19a26f82 2379 if (err < 0)
a5f5cb41 2380 return ret_set_errno(err, errno);
19a26f82
MK
2381
2382 recv_len = err;
2383 err = 0;
2384
ebc73a67 2385 /* Satisfy the typing for the netlink macros. */
06f976ca 2386 msg = answer->nlmsghdr;
19a26f82
MK
2387
2388 while (NLMSG_OK(msg, recv_len)) {
ebc73a67 2389 /* Stop reading if we see an error message. */
19a26f82 2390 if (msg->nlmsg_type == NLMSG_ERROR) {
a5f5cb41
CB
2391 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2392 return ret_set_errno(errmsg->error, errno);
19a26f82
MK
2393 }
2394
ebc73a67 2395 /* Stop reading if we see a NLMSG_DONE message. */
19a26f82
MK
2396 if (msg->nlmsg_type == NLMSG_DONE) {
2397 readmore = 0;
2398 break;
2399 }
2400
a5f5cb41
CB
2401 if (msg->nlmsg_type != RTM_NEWADDR)
2402 return ret_errno(EINVAL);
19a26f82 2403
06f976ca
SZ
2404 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2405 if (ifa->ifa_index == ifindex) {
a5f5cb41
CB
2406 if (ifa_get_local_ip(family, msg, res) < 0)
2407 return ret_errno(EINVAL);
51e7a874 2408
ebc73a67 2409 /* Found a result, stop searching. */
19a26f82 2410 if (*res)
a5f5cb41 2411 return 0;
19a26f82
MK
2412 }
2413
ebc73a67
CB
2414 /* Keep reading more data from the socket if the last
2415 * message had the NLF_F_MULTI flag set.
2416 */
19a26f82
MK
2417 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2418
ebc73a67 2419 /* Look at the next message received in this buffer. */
19a26f82
MK
2420 msg = NLMSG_NEXT(msg, recv_len);
2421 }
2422 } while (readmore);
2423
6ce39620
CB
2424#pragma GCC diagnostic pop
2425
19a26f82 2426 /* If we end up here, we didn't find any result, so signal an
ebc73a67
CB
2427 * error.
2428 */
a5f5cb41 2429 return -1;
19a26f82
MK
2430}
2431
2432int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2433{
ebc73a67 2434 return ip_addr_get(AF_INET6, ifindex, (void **)res);
19a26f82
MK
2435}
2436
ebc73a67 2437int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
19a26f82 2438{
ebc73a67 2439 return ip_addr_get(AF_INET, ifindex, (void **)res);
19a26f82
MK
2440}
2441
f8fee0e2
MK
2442static int ip_gateway_add(int family, int ifindex, void *gw)
2443{
a5f5cb41 2444 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
f8fee0e2 2445 struct nl_handler nlh;
a5f5cb41
CB
2446 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2447 int addrlen, err;
06f976ca 2448 struct rtmsg *rt;
f8fee0e2 2449
ebc73a67
CB
2450 addrlen = family == AF_INET ? sizeof(struct in_addr)
2451 : sizeof(struct in6_addr);
f8fee0e2 2452
a5f5cb41 2453 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
f8fee0e2
MK
2454 if (err)
2455 return err;
2456
f8fee0e2
MK
2457 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2458 if (!nlmsg)
a5f5cb41 2459 return ret_errno(ENOMEM);
f8fee0e2 2460
06f976ca 2461 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
f8fee0e2 2462 if (!answer)
a5f5cb41 2463 return ret_errno(ENOMEM);
f8fee0e2 2464
a5f5cb41 2465 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
06f976ca
SZ
2466 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2467
2468 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
25a9939b 2469 if (!rt)
a5f5cb41
CB
2470 return ret_errno(ENOMEM);
2471
06f976ca
SZ
2472 rt->rtm_family = family;
2473 rt->rtm_table = RT_TABLE_MAIN;
2474 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2475 rt->rtm_protocol = RTPROT_BOOT;
2476 rt->rtm_type = RTN_UNICAST;
f8fee0e2 2477 /* "default" destination */
06f976ca 2478 rt->rtm_dst_len = 0;
f8fee0e2 2479
a2f9a670 2480 /* If gateway address not supplied, then a device route will be created instead */
a5f5cb41
CB
2481 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2482 return ret_errno(ENOMEM);
f8fee0e2
MK
2483
2484 /* Adding the interface index enables the use of link-local
ebc73a67
CB
2485 * addresses for the gateway.
2486 */
f8fee0e2 2487 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
a5f5cb41 2488 return ret_errno(EINVAL);
f8fee0e2 2489
a5f5cb41 2490 return netlink_transaction(nlh_ptr, nlmsg, answer);
f8fee0e2
MK
2491}
2492
2493int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2494{
2495 return ip_gateway_add(AF_INET, ifindex, gw);
2496}
2497
2498int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2499{
2500 return ip_gateway_add(AF_INET6, ifindex, gw);
2501}
581c75e7 2502bool is_ovs_bridge(const char *bridge)
0d204771 2503{
ebc73a67 2504 int ret;
0d204771 2505 struct stat sb;
ebc73a67 2506 char brdirname[22 + IFNAMSIZ + 1] = {0};
0d204771 2507
ebc73a67
CB
2508 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2509 bridge);
2510 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2511 return false;
2512
2513 ret = stat(brdirname, &sb);
2514 if (ret < 0 && errno == ENOENT)
0d204771 2515 return true;
ebc73a67 2516
0d204771
SH
2517 return false;
2518}
2519
581c75e7
CB
2520struct ovs_veth_args {
2521 const char *bridge;
2522 const char *nic;
2523};
2524
cb0dc11b
CB
2525/* Called from a background thread - when nic goes away, remove it from the
2526 * bridge.
c43cbc04 2527 */
581c75e7 2528static int lxc_ovs_delete_port_exec(void *data)
c43cbc04 2529{
581c75e7 2530 struct ovs_veth_args *args = data;
cb0dc11b 2531
9c66dc4f 2532 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
581c75e7 2533 return -1;
c43cbc04
SH
2534}
2535
581c75e7 2536int lxc_ovs_delete_port(const char *bridge, const char *nic)
0d204771 2537{
c43cbc04 2538 int ret;
419590da 2539 char cmd_output[PATH_MAX];
581c75e7 2540 struct ovs_veth_args args;
6ad22d06 2541
581c75e7
CB
2542 args.bridge = bridge;
2543 args.nic = nic;
2544 ret = run_command(cmd_output, sizeof(cmd_output),
2545 lxc_ovs_delete_port_exec, (void *)&args);
9c66dc4f
CB
2546 if (ret < 0)
2547 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2548
581c75e7
CB
2549 return 0;
2550}
ebc73a67 2551
581c75e7
CB
2552static int lxc_ovs_attach_bridge_exec(void *data)
2553{
2554 struct ovs_veth_args *args = data;
ebc73a67 2555
9c66dc4f 2556 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
581c75e7
CB
2557 return -1;
2558}
ebc73a67 2559
581c75e7
CB
2560static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2561{
2562 int ret;
419590da 2563 char cmd_output[PATH_MAX];
581c75e7 2564 struct ovs_veth_args args;
ebc73a67 2565
581c75e7
CB
2566 args.bridge = bridge;
2567 args.nic = nic;
2568 ret = run_command(cmd_output, sizeof(cmd_output),
2569 lxc_ovs_attach_bridge_exec, (void *)&args);
9c66dc4f
CB
2570 if (ret < 0)
2571 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
0d204771 2572
581c75e7 2573 return 0;
0d204771 2574}
0d204771 2575
581c75e7 2576int lxc_bridge_attach(const char *bridge, const char *ifname)
0ad19a3f 2577{
ebc73a67 2578 int err, fd, index;
9de31d5a 2579 size_t retlen;
0ad19a3f 2580 struct ifreq ifr;
2581
dae3fdf6 2582 if (strlen(ifname) >= IFNAMSIZ)
3cfc0f3a 2583 return -EINVAL;
0ad19a3f 2584
2585 index = if_nametoindex(ifname);
2586 if (!index)
3cfc0f3a 2587 return -EINVAL;
0ad19a3f 2588
0d204771 2589 if (is_ovs_bridge(bridge))
581c75e7 2590 return lxc_ovs_attach_bridge(bridge, ifname);
0d204771 2591
ad9429e5 2592 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
0ad19a3f 2593 if (fd < 0)
3cfc0f3a 2594 return -errno;
0ad19a3f 2595
9de31d5a 2596 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
42cc4083
CB
2597 if (retlen >= IFNAMSIZ) {
2598 close(fd);
9de31d5a 2599 return -E2BIG;
42cc4083 2600 }
9de31d5a 2601
ebc73a67 2602 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
0ad19a3f 2603 ifr.ifr_ifindex = index;
7d163508 2604 err = ioctl(fd, SIOCBRADDIF, &ifr);
0ad19a3f 2605 close(fd);
3cfc0f3a
MN
2606 if (err)
2607 err = -errno;
0ad19a3f 2608
2609 return err;
2610}
72d0e1cb 2611
ebc73a67 2612static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
b343592b 2613 [LXC_NET_EMPTY] = "empty",
72d0e1cb
SG
2614 [LXC_NET_VETH] = "veth",
2615 [LXC_NET_MACVLAN] = "macvlan",
c9f52382 2616 [LXC_NET_IPVLAN] = "ipvlan",
72d0e1cb 2617 [LXC_NET_PHYS] = "phys",
b343592b
BP
2618 [LXC_NET_VLAN] = "vlan",
2619 [LXC_NET_NONE] = "none",
72d0e1cb
SG
2620};
2621
2622const char *lxc_net_type_to_str(int type)
2623{
2624 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2625 return NULL;
ebc73a67 2626
72d0e1cb
SG
2627 return lxc_network_types[type];
2628}
8befa924 2629
3646ffd9 2630static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
a0265685 2631
3646ffd9 2632char *lxc_ifname_alnum_case_sensitive(char *template)
a0265685 2633{
2d7bf744 2634 int ret;
b1e44ed1 2635 struct netns_ifaddrs *ifa, *ifaddr;
966e9f1f
CB
2636 char name[IFNAMSIZ];
2637 bool exists = false;
2638 size_t i = 0;
280cc35f 2639#ifdef HAVE_RAND_R
2640 unsigned int seed;
2641
2642 seed = randseed(false);
2643#else
2644
2645 (void)randseed(true);
2646#endif
a0265685 2647
535e8859
CB
2648 if (strlen(template) >= IFNAMSIZ)
2649 return NULL;
2650
ebc73a67 2651 /* Get all the network interfaces. */
b1e44ed1 2652 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
9c66dc4f
CB
2653 if (ret < 0)
2654 return log_error_errno(NULL, errno, "Failed to get network interfaces");
a0265685 2655
ebc73a67 2656 /* Generate random names until we find one that doesn't exist. */
51a8a74c 2657 for (;;) {
966e9f1f 2658 name[0] = '\0';
94b1cade 2659 (void)strlcpy(name, template, IFNAMSIZ);
a0265685 2660
966e9f1f 2661 exists = false;
280cc35f 2662
a0265685
SG
2663 for (i = 0; i < strlen(name); i++) {
2664 if (name[i] == 'X') {
2665#ifdef HAVE_RAND_R
8523344a 2666 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
a0265685 2667#else
8523344a 2668 name[i] = padchar[rand() % strlen(padchar)];
a0265685
SG
2669#endif
2670 }
2671 }
2672
2673 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
966e9f1f
CB
2674 if (!strcmp(ifa->ifa_name, name)) {
2675 exists = true;
a0265685
SG
2676 break;
2677 }
2678 }
2679
966e9f1f 2680 if (!exists)
a0265685 2681 break;
a0265685
SG
2682 }
2683
b1e44ed1 2684 netns_freeifaddrs(ifaddr);
94b1cade
DJ
2685 (void)strlcpy(template, name, strlen(template) + 1);
2686
2687 return template;
a0265685
SG
2688}
2689
8befa924
SH
2690int setup_private_host_hw_addr(char *veth1)
2691{
ebc73a67 2692 int err, sockfd;
8befa924 2693 struct ifreq ifr;
8befa924 2694
ad9429e5 2695 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
8befa924
SH
2696 if (sockfd < 0)
2697 return -errno;
2698
ebc73a67 2699 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
87c6e5db
DJ
2700 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2701 close(sockfd);
ebc73a67 2702 return -E2BIG;
87c6e5db 2703 }
ebc73a67 2704
8befa924
SH
2705 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2706 if (err < 0) {
8befa924 2707 close(sockfd);
8befa924
SH
2708 return -errno;
2709 }
2710
2711 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2712 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
8befa924 2713 close(sockfd);
8befa924
SH
2714 if (err < 0)
2715 return -errno;
2716
2717 return 0;
2718}
811ef482
CB
2719
2720int lxc_find_gateway_addresses(struct lxc_handler *handler)
2721{
2722 struct lxc_list *network = &handler->conf->network;
2723 struct lxc_list *iterator;
2724 struct lxc_netdev *netdev;
2725 int link_index;
2726
2727 lxc_list_for_each(iterator, network) {
2728 netdev = iterator->elem;
2729
2730 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2731 continue;
2732
9c66dc4f
CB
2733 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2734 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
811ef482 2735
f2711167 2736 if (is_empty_string(netdev->link)) {
9c66dc4f 2737 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
811ef482
CB
2738 }
2739
2740 link_index = if_nametoindex(netdev->link);
2741 if (!link_index)
2742 return -EINVAL;
2743
2744 if (netdev->ipv4_gateway_auto) {
9c66dc4f
CB
2745 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2746 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2747 }
2748
2749 if (netdev->ipv6_gateway_auto) {
9c66dc4f
CB
2750 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2751 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
811ef482
CB
2752 }
2753 }
2754
2755 return 0;
2756}
2757
2758#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
f0ecc19d 2759static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
4d781681 2760 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
811ef482
CB
2761{
2762 int ret;
2763 pid_t child;
2764 int bytes, pipefd[2];
2765 char *token, *saveptr = NULL;
095ead80 2766 char netdev_link[IFNAMSIZ];
419590da 2767 char buffer[PATH_MAX] = {0};
94b1cade 2768 size_t retlen;
811ef482 2769
9c66dc4f
CB
2770 if (netdev->type != LXC_NET_VETH)
2771 return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2772
2773 ret = pipe(pipefd);
9c66dc4f
CB
2774 if (ret < 0)
2775 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2776
2777 child = fork();
2778 if (child < 0) {
811ef482
CB
2779 close(pipefd[0]);
2780 close(pipefd[1]);
9c66dc4f 2781 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2782 }
2783
2784 if (child == 0) {
8335fd40 2785 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
811ef482
CB
2786
2787 close(pipefd[0]);
2788
2789 ret = dup2(pipefd[1], STDOUT_FILENO);
2790 if (ret >= 0)
2791 ret = dup2(pipefd[1], STDERR_FILENO);
2792 close(pipefd[1]);
2793 if (ret < 0) {
2794 SYSERROR("Failed to duplicate std{err,out} file descriptor");
78070056 2795 _exit(EXIT_FAILURE);
811ef482
CB
2796 }
2797
f2711167 2798 if (!is_empty_string(netdev->link))
9de31d5a 2799 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
811ef482 2800 else
9de31d5a
CB
2801 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2802 if (retlen >= IFNAMSIZ) {
2803 SYSERROR("Invalid network device name");
2804 _exit(EXIT_FAILURE);
2805 }
811ef482 2806
8335fd40
CB
2807 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2808 if (ret < 0 || ret >= sizeof(pidstr))
78070056 2809 _exit(EXIT_FAILURE);
8335fd40 2810 pidstr[sizeof(pidstr) - 1] = '\0';
811ef482
CB
2811
2812 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2813 lxcname, pidstr, netdev_link,
3473ca76
CB
2814 !is_empty_string(netdev->name) ? netdev->name : "(null)");
2815 if (!is_empty_string(netdev->name))
811ef482
CB
2816 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2817 lxcpath, lxcname, pidstr, "veth", netdev_link,
2818 netdev->name, (char *)NULL);
2819 else
2820 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2821 lxcpath, lxcname, pidstr, "veth", netdev_link,
2822 (char *)NULL);
2823 SYSERROR("Failed to execute lxc-user-nic");
78070056 2824 _exit(EXIT_FAILURE);
811ef482
CB
2825 }
2826
2827 /* close the write-end of the pipe */
2828 close(pipefd[1]);
2829
9c66dc4f 2830 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482 2831 if (bytes < 0) {
74c6e2b0 2832 SYSERROR("Failed to read from pipe file descriptor");
811ef482 2833 close(pipefd[0]);
6b9f82a9
CB
2834 } else {
2835 buffer[bytes - 1] = '\0';
811ef482 2836 }
811ef482
CB
2837
2838 ret = wait_for_pid(child);
2839 close(pipefd[0]);
9c66dc4f
CB
2840 if (ret != 0 || bytes < 0)
2841 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
811ef482
CB
2842 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2843
2844 /* netdev->name */
2845 token = strtok_r(buffer, ":", &saveptr);
9c66dc4f
CB
2846 if (!token)
2847 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2848
e389f2af
CB
2849 /*
2850 * lxc-user-nic will take care of proper network device naming. So
2851 * netdev->name and netdev->created_name need to be identical to not
2852 * trigger another rename later on.
2853 */
2854 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2855 if (retlen < IFNAMSIZ)
2856 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
9c66dc4f
CB
2857 if (retlen >= IFNAMSIZ)
2858 return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
811ef482 2859
74c6e2b0 2860 /* netdev->ifindex */
811ef482 2861 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2862 if (!token)
2863 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2864
74c6e2b0 2865 ret = lxc_safe_int(token, &netdev->ifindex);
9c66dc4f
CB
2866 if (ret < 0)
2867 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2868
74c6e2b0 2869 /* netdev->priv.veth_attr.veth1 */
811ef482 2870 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2871 if (!token)
2872 return log_error(-1, "Failed to parse lxc-user-nic output");
811ef482 2873
94b1cade 2874 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
9c66dc4f
CB
2875 if (retlen >= IFNAMSIZ)
2876 return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
74c6e2b0
CB
2877
2878 /* netdev->priv.veth_attr.ifindex */
2879 token = strtok_r(NULL, ":", &saveptr);
9c66dc4f
CB
2880 if (!token)
2881 return log_error(-1, "Failed to parse lxc-user-nic output");
74c6e2b0
CB
2882
2883 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
9c66dc4f
CB
2884 if (ret < 0)
2885 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
811ef482 2886
4d781681 2887 if (netdev->upscript) {
2888 char *argv[] = {
2889 "veth",
2890 netdev->link,
2891 netdev->priv.veth_attr.veth1,
2892 NULL,
2893 };
2894
e389f2af
CB
2895 ret = run_script_argv(lxcname, hooks_version, "net",
2896 netdev->upscript, "up", argv);
4d781681 2897 if (ret < 0)
2898 return -1;
2899 }
2900
811ef482
CB
2901 return 0;
2902}
2903
f0ecc19d 2904static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
1bd8d726
CB
2905 struct lxc_netdev *netdev,
2906 const char *netns_path)
811ef482
CB
2907{
2908 int bytes, ret;
2909 pid_t child;
2910 int pipefd[2];
419590da 2911 char buffer[PATH_MAX] = {0};
811ef482 2912
9c66dc4f
CB
2913 if (netdev->type != LXC_NET_VETH)
2914 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
811ef482
CB
2915
2916 ret = pipe(pipefd);
9c66dc4f
CB
2917 if (ret < 0)
2918 return log_error_errno(-1, errno, "Failed to create pipe");
811ef482
CB
2919
2920 child = fork();
2921 if (child < 0) {
811ef482
CB
2922 close(pipefd[0]);
2923 close(pipefd[1]);
9c66dc4f 2924 return log_error_errno(-1, errno, "Failed to create new process");
811ef482
CB
2925 }
2926
2927 if (child == 0) {
8843fde4 2928 char *hostveth;
811ef482
CB
2929
2930 close(pipefd[0]);
2931
2932 ret = dup2(pipefd[1], STDOUT_FILENO);
2933 if (ret >= 0)
2934 ret = dup2(pipefd[1], STDERR_FILENO);
2935 close(pipefd[1]);
2936 if (ret < 0) {
2937 SYSERROR("Failed to duplicate std{err,out} file descriptor");
a30b9023 2938 _exit(EXIT_FAILURE);
811ef482
CB
2939 }
2940
f2711167 2941 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
2942 hostveth = netdev->priv.veth_attr.pair;
2943 else
2944 hostveth = netdev->priv.veth_attr.veth1;
f2711167 2945 if (is_empty_string(hostveth)) {
74c6e2b0 2946 SYSERROR("Host side veth device name is missing");
a30b9023 2947 _exit(EXIT_FAILURE);
74c6e2b0
CB
2948 }
2949
f2711167
CB
2950 if (is_empty_string(netdev->link)) {
2951 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
a30b9023 2952 _exit(EXIT_FAILURE);
74c6e2b0 2953 }
811ef482 2954
811ef482 2955 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
8843fde4 2956 lxcname, netns_path, netdev->link, hostveth);
811ef482 2957 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
8843fde4
CB
2958 lxcname, netns_path, "veth", netdev->link, hostveth,
2959 (char *)NULL);
811ef482 2960 SYSERROR("Failed to exec lxc-user-nic.");
a30b9023 2961 _exit(EXIT_FAILURE);
811ef482
CB
2962 }
2963
2964 close(pipefd[1]);
2965
9c66dc4f 2966 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
811ef482
CB
2967 if (bytes < 0) {
2968 SYSERROR("Failed to read from pipe file descriptor.");
2969 close(pipefd[0]);
6b9f82a9
CB
2970 } else {
2971 buffer[bytes - 1] = '\0';
811ef482 2972 }
811ef482 2973
6b9f82a9 2974 ret = wait_for_pid(child);
9c66dc4f
CB
2975 close_prot_errno_disarm(pipefd[0]);
2976 if (ret != 0 || bytes < 0)
2977 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
2978 !is_empty_string(buffer) ? buffer : "(null)");
811ef482 2979
811ef482
CB
2980 return 0;
2981}
2982
1bd8d726
CB
2983bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2984{
2985 int ret;
2986 struct lxc_list *iterator;
2987 struct lxc_list *network = &handler->conf->network;
2988 /* strlen("/proc/") = 6
2989 * +
8335fd40 2990 * INTTYPE_TO_STRLEN(pid_t)
1bd8d726
CB
2991 * +
2992 * strlen("/fd/") = 4
2993 * +
8335fd40 2994 * INTTYPE_TO_STRLEN(int)
1bd8d726
CB
2995 * +
2996 * \0
2997 */
8335fd40 2998 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
1bd8d726
CB
2999
3000 *netns_path = '\0';
3001
9c66dc4f
CB
3002 if (handler->nsfd[LXC_NS_NET] < 0)
3003 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
1bd8d726
CB
3004
3005 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
0059379f 3006 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
1bd8d726
CB
3007 if (ret < 0 || ret >= sizeof(netns_path))
3008 return false;
3009
3010 lxc_list_for_each(iterator, network) {
3011 char *hostveth = NULL;
3012 struct lxc_netdev *netdev = iterator->elem;
3013
3014 /* We can only delete devices whose ifindex we have. If we don't
3015 * have the index it means that we didn't create it.
3016 */
3017 if (!netdev->ifindex)
3018 continue;
3019
3020 if (netdev->type == LXC_NET_PHYS) {
3021 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3022 netdev->link);
3023 if (ret < 0)
9c66dc4f 3024 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
1bd8d726
CB
3025 netdev->ifindex, netdev->link);
3026 else
9c66dc4f 3027 TRACE("Renamed interface with index %d to its initial name \"%s\"",
1bd8d726 3028 netdev->ifindex, netdev->link);
b3259dc6
TP
3029
3030 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3031 if (ret < 0)
3032 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3033 netdev->ifindex, netdev->link);
66a7c406 3034 goto clear_ifindices;
1bd8d726
CB
3035 }
3036
3037 ret = netdev_deconf[netdev->type](handler, netdev);
3038 if (ret < 0)
3039 WARN("Failed to deconfigure network device");
3040
3041 if (netdev->type != LXC_NET_VETH)
66a7c406 3042 goto clear_ifindices;
1bd8d726 3043
f2711167 3044 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
66a7c406 3045 goto clear_ifindices;
1bd8d726 3046
f2711167 3047 if (!is_empty_string(netdev->priv.veth_attr.pair))
8843fde4
CB
3048 hostveth = netdev->priv.veth_attr.pair;
3049 else
3050 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3051 if (is_empty_string(hostveth))
66a7c406 3052 goto clear_ifindices;
8843fde4 3053
1bd8d726
CB
3054 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3055 handler->name, netdev,
3056 netns_path);
3057 if (ret < 0) {
9c66dc4f 3058 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
66a7c406 3059 goto clear_ifindices;
1bd8d726 3060 }
9c66dc4f 3061 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
66a7c406
CB
3062
3063clear_ifindices:
0858c829
CB
3064 /*
3065 * We need to clear any ifindices we recorded so liblxc won't
3066 * have cached stale data which would cause it to fail on
3067 * reboot where we don't re-read the on-disk config file.
66a7c406
CB
3068 */
3069 netdev->ifindex = 0;
3070 if (netdev->type == LXC_NET_PHYS) {
3071 netdev->priv.phys_attr.ifindex = 0;
3072 } else if (netdev->type == LXC_NET_VETH) {
3073 netdev->priv.veth_attr.veth1[0] = '\0';
3074 netdev->priv.veth_attr.ifindex = 0;
3075 }
1bd8d726
CB
3076 }
3077
bb84beda 3078 return true;
1bd8d726
CB
3079}
3080
6509154d 3081static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3082 struct lxc_list *cur, *next;
3083 struct lxc_inetdev *inet4dev;
3084 struct lxc_inet6dev *inet6dev;
3085 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
b670016a 3086 int err = 0;
5fe147e9
TP
3087 unsigned int lo_ifindex = 0, link_ifindex = 0;
3088
3089 link_ifindex = if_nametoindex(netdev->link);
9c66dc4f
CB
3090 if (link_ifindex == 0)
3091 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
5fe147e9 3092
6509154d 3093
3094 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3095 if (!lxc_list_empty(&netdev->ipv4)) {
3096 /* Check for net.ipv4.conf.[link].forwarding=1 */
9c66dc4f
CB
3097 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3098 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
6509154d 3099 }
3100
3101 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3102 if (!lxc_list_empty(&netdev->ipv6)) {
3103 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
9c66dc4f
CB
3104 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3105 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
6509154d 3106
3107 /* Check for net.ipv6.conf.[link].forwarding=1 */
9c66dc4f
CB
3108 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3109 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
6509154d 3110 }
3111
b670016a 3112 /* Perform IPVLAN specific checks. */
3113 if (netdev->type == LXC_NET_IPVLAN) {
3114 /* Check mode is l3s as other modes do not work with l2proxy. */
9c66dc4f
CB
3115 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3116 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
b670016a 3117
3118 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3119 lo_ifindex = if_nametoindex(loop_device);
9c66dc4f
CB
3120 if (lo_ifindex == 0)
3121 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
b670016a 3122 }
3123
6509154d 3124 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3125 inet4dev = cur->elem;
3126 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
596a002c 3127 return ret_set_errno(-1, -errno);
6509154d 3128
5fe147e9 3129 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
596a002c 3130 return ret_set_errno(-1, EINVAL);
b670016a 3131
3132 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3133 if (netdev->type == LXC_NET_IPVLAN) {
3134 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
9c66dc4f
CB
3135 if (err < 0)
3136 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
b670016a 3137 }
6509154d 3138 }
3139
3140 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3141 inet6dev = cur->elem;
3142 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
596a002c 3143 return ret_set_errno(-1, -errno);
6509154d 3144
5fe147e9 3145 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
596a002c 3146 return ret_set_errno(-1, EINVAL);
b670016a 3147
3148 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3149 if (netdev->type == LXC_NET_IPVLAN) {
3150 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
9c66dc4f
CB
3151 if (err < 0)
3152 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
b670016a 3153 }
6509154d 3154 }
3155
3156 return 0;
3157}
3158
9c66dc4f
CB
3159static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3160{
b670016a 3161 char bufinet4[INET_ADDRSTRLEN];
9c66dc4f
CB
3162 bool had_error = false;
3163 unsigned int link_ifindex = 0;
b670016a 3164
9c66dc4f
CB
3165 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3166 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
b670016a 3167
3168 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3169 if (lo_ifindex > 0) {
3170 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
9c66dc4f 3171 had_error = true;
b670016a 3172 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3173 }
3174 }
3175
3176 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3177 if (!is_empty_string(link)) {
5fe147e9 3178 link_ifindex = if_nametoindex(link);
9c66dc4f
CB
3179 if (link_ifindex == 0)
3180 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
5fe147e9
TP
3181
3182 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
9c66dc4f 3183 had_error = true;
b670016a 3184 }
3185
9c66dc4f 3186 if (had_error)
596a002c 3187 return ret_set_errno(-1, EINVAL);
b670016a 3188
3189 return 0;
3190}
3191
9c66dc4f
CB
3192static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3193{
b670016a 3194 char bufinet6[INET6_ADDRSTRLEN];
9c66dc4f
CB
3195 bool had_error = false;
3196 unsigned int link_ifindex = 0;
b670016a 3197
9c66dc4f
CB
3198 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3199 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
b670016a 3200
3201 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3202 if (lo_ifindex > 0) {
3203 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
9c66dc4f 3204 had_error = true;
b670016a 3205 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3206 }
3207 }
3208
3209 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
f2711167 3210 if (!is_empty_string(link)) {
5fe147e9
TP
3211 link_ifindex = if_nametoindex(link);
3212 if (link_ifindex == 0) {
3213 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3214 return ret_set_errno(-1, EINVAL);
3215 }
3216
3217 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
9c66dc4f 3218 had_error = true;
b670016a 3219 }
3220
9c66dc4f 3221 if (had_error)
596a002c 3222 return ret_set_errno(-1, EINVAL);
b670016a 3223
3224 return 0;
3225}
3226
6509154d 3227static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
b670016a 3228 unsigned int lo_ifindex = 0;
3229 unsigned int errCount = 0;
6509154d 3230 struct lxc_list *cur, *next;
3231 struct lxc_inetdev *inet4dev;
3232 struct lxc_inet6dev *inet6dev;
6509154d 3233
b670016a 3234 /* Perform IPVLAN specific checks. */
3235 if (netdev->type == LXC_NET_IPVLAN) {
3236 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3ebffb98 3237 lo_ifindex = if_nametoindex(loop_device);
b670016a 3238 if (lo_ifindex == 0) {
3239 errCount++;
3ebffb98 3240 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
6509154d 3241 }
b670016a 3242 }
6509154d 3243
b670016a 3244 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3245 inet4dev = cur->elem;
3246 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3247 errCount++;
6509154d 3248 }
3249
3250 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3251 inet6dev = cur->elem;
b670016a 3252 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3253 errCount++;
6509154d 3254 }
3255
b670016a 3256 if (errCount > 0)
596a002c 3257 return ret_set_errno(-1, EINVAL);
6509154d 3258
3259 return 0;
3260}
3261
e389f2af 3262static int lxc_create_network_priv(struct lxc_handler *handler)
811ef482 3263{
811ef482
CB
3264 struct lxc_list *iterator;
3265 struct lxc_list *network = &handler->conf->network;
3266
811ef482
CB
3267 lxc_list_for_each(iterator, network) {
3268 struct lxc_netdev *netdev = iterator->elem;
3269
9c66dc4f
CB
3270 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3271 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
811ef482 3272
6509154d 3273 /* Setup l2proxy entries if enabled and used with a link property */
f2711167 3274 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
9c66dc4f
CB
3275 if (lxc_setup_l2proxy(netdev))
3276 return log_error_errno(-1, errno, "Failed to setup l2proxy");
6509154d 3277 }
3278
9c66dc4f
CB
3279 if (netdev_conf[netdev->type](handler, netdev))
3280 return log_error_errno(-1, errno, "Failed to create network device");
811ef482
CB
3281 }
3282
3283 return 0;
3284}
3285
e389f2af 3286int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
811ef482 3287{
e389f2af
CB
3288 pid_t pid = handler->pid;
3289 struct lxc_list *network = &handler->conf->network;
811ef482
CB
3290 struct lxc_list *iterator;
3291
e0010464 3292 if (am_guest_unpriv())
74c6e2b0 3293 return 0;
811ef482
CB
3294
3295 lxc_list_for_each(iterator, network) {
3dd78294 3296 __do_free char *physname = NULL;
e389f2af 3297 int ret;
811ef482
CB
3298 struct lxc_netdev *netdev = iterator->elem;
3299
811ef482
CB
3300 if (!netdev->ifindex)
3301 continue;
3302
3dd78294
CB
3303 if (netdev->type == LXC_NET_PHYS)
3304 physname = is_wlan(netdev->link);
3305
3306 if (physname)
9f8cf6e1 3307 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
3dd78294 3308 else
9f8cf6e1 3309 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
9c66dc4f
CB
3310 if (ret)
3311 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3312 netdev->created_name,
3313 netdev->ifindex, pid);
811ef482 3314
24190194
CB
3315 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3316 netdev->created_name, netdev->ifindex, pid);
811ef482
CB
3317 }
3318
3319 return 0;
3320}
3321
3c09b97c
CB
3322static int network_requires_advanced_setup(int type)
3323{
3324 if (type == LXC_NET_EMPTY)
3325 return false;
3326
3327 if (type == LXC_NET_NONE)
3328 return false;
3329
3330 return true;
3331}
3332
e389f2af 3333static int lxc_create_network_unpriv(struct lxc_handler *handler)
74c6e2b0 3334{
e389f2af
CB
3335 int hooks_version = handler->conf->hooks_version;
3336 const char *lxcname = handler->name;
3337 const char *lxcpath = handler->lxcpath;
3338 struct lxc_list *network = &handler->conf->network;
3339 pid_t pid = handler->pid;
74c6e2b0
CB
3340 struct lxc_list *iterator;
3341
74c6e2b0
CB
3342 lxc_list_for_each(iterator, network) {
3343 struct lxc_netdev *netdev = iterator->elem;
3344
3c09b97c 3345 if (!network_requires_advanced_setup(netdev->type))
74c6e2b0
CB
3346 continue;
3347
9c66dc4f
CB
3348 if (netdev->type != LXC_NET_VETH)
3349 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3350 lxc_net_type_to_str(netdev->type));
74c6e2b0
CB
3351
3352 if (netdev->mtu)
3353 INFO("mtu ignored due to insufficient privilege");
3354
e389f2af
CB
3355 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3356 pid, hooks_version))
74c6e2b0
CB
3357 return -1;
3358 }
3359
3360 return 0;
3361}
3362
1bd8d726 3363bool lxc_delete_network_priv(struct lxc_handler *handler)
811ef482
CB
3364{
3365 int ret;
3366 struct lxc_list *iterator;
3367 struct lxc_list *network = &handler->conf->network;
1bd8d726 3368
811ef482
CB
3369 lxc_list_for_each(iterator, network) {
3370 char *hostveth = NULL;
3371 struct lxc_netdev *netdev = iterator->elem;
3372
3373 /* We can only delete devices whose ifindex we have. If we don't
3374 * have the index it means that we didn't create it.
3375 */
3376 if (!netdev->ifindex)
3377 continue;
3378
0104c121
CB
3379 /*
3380 * If the network device has been moved back from the
3381 * containers network namespace, update the ifindex.
3382 */
3383 netdev->ifindex = if_nametoindex(netdev->name);
3384
6509154d 3385 /* Delete l2proxy entries if enabled and used with a link property */
f2711167 3386 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
6509154d 3387 if (lxc_delete_l2proxy(netdev))
3388 WARN("Failed to delete all l2proxy config");
3389 /* Don't return, let the network be cleaned up as normal. */
3390 }
3391
811ef482
CB
3392 if (netdev->type == LXC_NET_PHYS) {
3393 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3394 if (ret < 0)
3395 WARN("Failed to rename interface with index %d "
b809f232
CB
3396 "from \"%s\" to its initial name \"%s\"",
3397 netdev->ifindex, netdev->name, netdev->link);
0b154989 3398 else {
29589196
CB
3399 TRACE("Renamed interface with index %d from "
3400 "\"%s\" to its initial name \"%s\"",
3401 netdev->ifindex, netdev->name,
3402 netdev->link);
0b154989
TP
3403
3404 /* Restore original MTU */
3405 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3406 if (ret < 0) {
3407 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3408 netdev->link, netdev->priv.phys_attr.mtu);
3409 } else {
3410 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3411 netdev->link, netdev->priv.phys_attr.mtu);
3412 }
3413 }
b3259dc6
TP
3414
3415 ret = netdev_deconf[netdev->type](handler, netdev);
40a22043
CB
3416 if (ret < 0)
3417 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3418 netdev->ifindex, netdev->link);
66a7c406 3419 goto clear_ifindices;
811ef482
CB
3420 }
3421
3422 ret = netdev_deconf[netdev->type](handler, netdev);
3423 if (ret < 0)
3424 WARN("Failed to deconfigure network device");
3425
811ef482 3426 if (netdev->type != LXC_NET_VETH)
66a7c406 3427 goto clear_ifindices;
811ef482 3428
811ef482
CB
3429 /* Explicitly delete host veth device to prevent lingering
3430 * devices. We had issues in LXD around this.
3431 */
f2711167 3432 if (!is_empty_string(netdev->priv.veth_attr.pair))
811ef482
CB
3433 hostveth = netdev->priv.veth_attr.pair;
3434 else
3435 hostveth = netdev->priv.veth_attr.veth1;
f2711167 3436 if (is_empty_string(hostveth))
66a7c406 3437 goto clear_ifindices;
811ef482 3438
1ee56cff
CB
3439 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3440 ret = lxc_netdev_delete_by_name(hostveth);
3441 if (ret < 0)
3442 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
811ef482 3443
1ee56cff
CB
3444 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3445 } else if (!is_empty_string(netdev->link)) {
3446 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3447 if (ret < 0)
3448 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
811ef482 3449
1ee56cff
CB
3450 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3451 }
811ef482 3452
66a7c406 3453clear_ifindices:
ad2ddfcd 3454 /* We need to clear any ifindices we recorded so liblxc won't
66a7c406
CB
3455 * have cached stale data which would cause it to fail on reboot
3456 * we're we don't re-read the on-disk config file.
3457 */
3458 netdev->ifindex = 0;
3459 if (netdev->type == LXC_NET_PHYS) {
3460 netdev->priv.phys_attr.ifindex = 0;
3461 } else if (netdev->type == LXC_NET_VETH) {
3462 netdev->priv.veth_attr.veth1[0] = '\0';
3463 netdev->priv.veth_attr.ifindex = 0;
3464 }
811ef482
CB
3465 }
3466
bb84beda 3467 return true;
811ef482
CB
3468}
3469
3470int lxc_requests_empty_network(struct lxc_handler *handler)
3471{
3472 struct lxc_list *network = &handler->conf->network;
3473 struct lxc_list *iterator;
3474 bool found_none = false, found_nic = false;
3475
3476 if (lxc_list_empty(network))
3477 return 0;
3478
9c66dc4f 3479 lxc_list_for_each (iterator, network) {
811ef482
CB
3480 struct lxc_netdev *netdev = iterator->elem;
3481
3482 if (netdev->type == LXC_NET_NONE)
3483 found_none = true;
3484 else
3485 found_nic = true;
3486 }
9c66dc4f 3487
811ef482
CB
3488 if (found_none && !found_nic)
3489 return 1;
9c66dc4f 3490
811ef482
CB
3491 return 0;
3492}
3493
3494/* try to move physical nics to the init netns */
b809f232 3495int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
811ef482 3496{
9c66dc4f
CB
3497 __do_close int oldfd = -EBADF;
3498 int netnsfd = handler->nsfd[LXC_NS_NET];
3499 struct lxc_conf *conf = handler->conf;
811ef482 3500 int ret;
811ef482 3501 char ifname[IFNAMSIZ];
b809f232 3502 struct lxc_list *iterator;
811ef482 3503
b809f232
CB
3504 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3505 * the parent network namespace. We won't have this capability if we are
3506 * unprivileged.
3507 */
d0fbc7ba 3508 if (!handler->am_root)
b809f232 3509 return 0;
811ef482 3510
b809f232 3511 TRACE("Moving physical network devices back to parent network namespace");
811ef482 3512
0037ab49 3513 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
9c66dc4f
CB
3514 if (oldfd < 0)
3515 return log_error_errno(-1, errno, "Failed to preserve network namespace");
811ef482 3516
b809f232 3517 ret = setns(netnsfd, CLONE_NEWNET);
9c66dc4f
CB
3518 if (ret < 0)
3519 return log_error_errno(-1, errno, "Failed to enter network namespace");
811ef482 3520
b809f232
CB
3521 lxc_list_for_each(iterator, &conf->network) {
3522 struct lxc_netdev *netdev = iterator->elem;
811ef482 3523
b809f232
CB
3524 if (netdev->type != LXC_NET_PHYS)
3525 continue;
3526
3527 /* Retrieve the name of the interface in the container's network
3528 * namespace.
3529 */
3530 if (!if_indextoname(netdev->ifindex, ifname)) {
9c66dc4f 3531 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
811ef482
CB
3532 continue;
3533 }
b809f232 3534
0037ab49 3535 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
b809f232 3536 if (ret < 0)
9c66dc4f 3537 WARN("Error moving network device \"%s\" back to network namespace", ifname);
b809f232 3538 else
9c66dc4f 3539 TRACE("Moved network device \"%s\" back to network namespace", ifname);
811ef482 3540 }
811ef482 3541
b809f232 3542 ret = setns(oldfd, CLONE_NEWNET);
9c66dc4f
CB
3543 if (ret < 0)
3544 return log_error_errno(-1, errno, "Failed to enter network namespace");
b809f232
CB
3545
3546 return 0;
811ef482
CB
3547}
3548
3549static int setup_hw_addr(char *hwaddr, const char *ifname)
3550{
9c66dc4f 3551 __do_close int fd = -EBADF;
811ef482
CB
3552 struct sockaddr sockaddr;
3553 struct ifreq ifr;
9c66dc4f 3554 int ret;
811ef482
CB
3555
3556 ret = lxc_convert_mac(hwaddr, &sockaddr);
9c66dc4f
CB
3557 if (ret)
3558 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
811ef482
CB
3559
3560 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3561 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3562 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3563
ad9429e5 3564 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
811ef482
CB
3565 if (fd < 0)
3566 return -1;
3567
3568 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
811ef482 3569 if (ret)
6d1400b5 3570 SYSERROR("Failed to perform ioctl");
3571
9c66dc4f 3572 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
811ef482
CB
3573
3574 return ret;
3575}
3576
3577static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3578{
3579 struct lxc_list *iterator;
3580 int err;
3581
3582 lxc_list_for_each(iterator, ip) {
3583 struct lxc_inetdev *inetdev = iterator->elem;
3584
3585 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3586 &inetdev->bcast, inetdev->prefix);
9c66dc4f
CB
3587 if (err)
3588 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
811ef482
CB
3589 }
3590
3591 return 0;
3592}
3593
3594static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3595{
3596 struct lxc_list *iterator;
3597 int err;
3598
3599 lxc_list_for_each(iterator, ip) {
3600 struct lxc_inet6dev *inet6dev = iterator->elem;
3601
3602 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3603 &inet6dev->mcast, &inet6dev->acast,
3604 inet6dev->prefix);
9c66dc4f
CB
3605 if (err)
3606 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
811ef482
CB
3607 }
3608
3609 return 0;
3610}
3611
8bf64b77 3612static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
811ef482 3613{
811ef482 3614 int err;
009d6127 3615 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
811ef482
CB
3616
3617 /* empty network namespace */
8bf64b77
CB
3618 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3619 err = lxc_netdev_up("lo");
9c66dc4f
CB
3620 if (err)
3621 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3622 }
3623
811ef482 3624 /* set a mac address */
9c66dc4f
CB
3625 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3626 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
811ef482
CB
3627
3628 /* setup ipv4 addresses on the interface */
9c66dc4f
CB
3629 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3630 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
811ef482
CB
3631
3632 /* setup ipv6 addresses on the interface */
9c66dc4f
CB
3633 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3634 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
811ef482
CB
3635
3636 /* set the network device up */
3637 if (netdev->flags & IFF_UP) {
8bf64b77 3638 err = lxc_netdev_up(netdev->name);
9c66dc4f
CB
3639 if (err)
3640 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
811ef482
CB
3641
3642 /* the network is up, make the loopback up too */
3643 err = lxc_netdev_up("lo");
9c66dc4f
CB
3644 if (err)
3645 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
811ef482
CB
3646 }
3647
811ef482 3648 /* setup ipv4 gateway on the interface */
a2f9a670 3649 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
9c66dc4f
CB
3650 if (!(netdev->flags & IFF_UP))
3651 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3652
9c66dc4f
CB
3653 if (lxc_list_empty(&netdev->ipv4))
3654 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3655
a2f9a670 3656 /* Setup device route if ipv4_gateway_dev is enabled */
3657 if (netdev->ipv4_gateway_dev) {
3658 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3659 if (err < 0)
3660 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
a2f9a670 3661 } else {
009d6127 3662 /* Check the gateway address is valid */
3663 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
596a002c 3664 return ret_set_errno(-1, errno);
009d6127 3665
3666 /* Try adding a default route to the gateway address */
811ef482 3667 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
009d6127 3668 if (err < 0) {
3669 /* If adding the default route fails, this could be because the
3670 * gateway address is in a different subnet to the container's address.
3671 * To work around this, we try adding a static device route to the
3672 * gateway address first, and then try again.
3673 */
a2f9a670 3674 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
9c66dc4f
CB
3675 if (err < 0)
3676 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
6d1400b5 3677
a2f9a670 3678 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
9c66dc4f
CB
3679 if (err < 0)
3680 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
811ef482
CB
3681 }
3682 }
3683 }
3684
3685 /* setup ipv6 gateway on the interface */
a2f9a670 3686 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
9c66dc4f
CB
3687 if (!(netdev->flags & IFF_UP))
3688 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
811ef482 3689
9c66dc4f
CB
3690 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3691 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
811ef482 3692
a2f9a670 3693 /* Setup device route if ipv6_gateway_dev is enabled */
3694 if (netdev->ipv6_gateway_dev) {
3695 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
9c66dc4f
CB
3696 if (err < 0)
3697 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
a2f9a670 3698 } else {
009d6127 3699 /* Check the gateway address is valid */
3700 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
596a002c 3701 return ret_set_errno(-1, errno);
009d6127 3702
3703 /* Try adding a default route to the gateway address */
811ef482 3704 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
009d6127 3705 if (err < 0) {
3706 /* If adding the default route fails, this could be because the
3707 * gateway address is in a different subnet to the container's address.
3708 * To work around this, we try adding a static device route to the
3709 * gateway address first, and then try again.
3710 */
a2f9a670 3711 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
9c66dc4f
CB
3712 if (err < 0)
3713 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
6d1400b5 3714
a2f9a670 3715 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
9c66dc4f
CB
3716 if (err < 0)
3717 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
811ef482
CB
3718 }
3719 }
3720 }
3721
8bf64b77 3722 DEBUG("Network device \"%s\" has been setup", netdev->name);
811ef482
CB
3723
3724 return 0;
3725}
3726
3727int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3728 struct lxc_list *network)
3729{
3730 struct lxc_list *iterator;
811ef482 3731
8bf64b77 3732 lxc_list_for_each (iterator, network) {
e389f2af 3733 struct lxc_netdev *netdev = iterator->elem;
8bf64b77 3734 int ret;
811ef482 3735
8bf64b77
CB
3736 ret = netdev_ns_conf[netdev->type](netdev);
3737 if (!ret)
3738 ret = lxc_network_setup_in_child_namespaces_common(netdev);
9c66dc4f
CB
3739 if (ret)
3740 return log_error_errno(-1, errno, "Failed to setup netdev");
811ef482
CB
3741 }
3742
3743 if (!lxc_list_empty(network))
e389f2af 3744 INFO("Network has been setup");
811ef482
CB
3745
3746 return 0;
3747}
7ab1ba02 3748
3c09b97c 3749int lxc_network_send_to_child(struct lxc_handler *handler)
7ab1ba02
CB
3750{
3751 struct lxc_list *iterator;
3752 struct lxc_list *network = &handler->conf->network;
3753 int data_sock = handler->data_sock[0];
3754
7ab1ba02
CB
3755 lxc_list_for_each(iterator, network) {
3756 int ret;
3757 struct lxc_netdev *netdev = iterator->elem;
3758
3c09b97c 3759 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3760 continue;
3761
7fbb15ec 3762 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
7729f8e5 3763 if (ret < 0)
7ab1ba02 3764 return -1;
e389f2af
CB
3765
3766 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3767 if (ret < 0)
3768 return -1;
3769
3770 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
7ab1ba02
CB
3771 }
3772
3773 return 0;
3774}
3775
3c09b97c 3776int lxc_network_recv_from_parent(struct lxc_handler *handler)
7ab1ba02
CB
3777{
3778 struct lxc_list *iterator;
3779 struct lxc_list *network = &handler->conf->network;
3780 int data_sock = handler->data_sock[1];
3781
7ab1ba02
CB
3782 lxc_list_for_each(iterator, network) {
3783 int ret;
3784 struct lxc_netdev *netdev = iterator->elem;
3785
3c09b97c 3786 if (!network_requires_advanced_setup(netdev->type))
7ab1ba02
CB
3787 continue;
3788
e3233f26 3789 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
7729f8e5 3790 if (ret < 0)
7ab1ba02 3791 return -1;
e389f2af
CB
3792
3793 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3794 if (ret < 0)
3795 return -1;
54256301 3796
e389f2af 3797 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
7ab1ba02
CB
3798 }
3799
3800 return 0;
3801}
a1ae535a
CB
3802
3803int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3804{
3805 struct lxc_list *iterator, *network;
3806 int data_sock = handler->data_sock[0];
3807
3808 if (!handler->am_root)
3809 return 0;
3810
3811 network = &handler->conf->network;
3812 lxc_list_for_each(iterator, network) {
3813 int ret;
3814 struct lxc_netdev *netdev = iterator->elem;
3815
3816 /* Send network device name in the child's namespace to parent. */
7fbb15ec 3817 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
a1ae535a 3818 if (ret < 0)
7729f8e5 3819 return -1;
a1ae535a
CB
3820
3821 /* Send network device ifindex in the child's namespace to
3822 * parent.
3823 */
7fbb15ec 3824 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
a1ae535a 3825 if (ret < 0)
7729f8e5 3826 return -1;
a1ae535a
CB
3827 }
3828
e389f2af
CB
3829 if (!lxc_list_empty(network))
3830 TRACE("Sent network device names and ifindices to parent");
3831
a1ae535a 3832 return 0;
a1ae535a
CB
3833}
3834
3835int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3836{
3837 struct lxc_list *iterator, *network;
3838 int data_sock = handler->data_sock[1];
3839
3840 if (!handler->am_root)
3841 return 0;
3842
3843 network = &handler->conf->network;
3844 lxc_list_for_each(iterator, network) {
3845 int ret;
3846 struct lxc_netdev *netdev = iterator->elem;
3847
3848 /* Receive network device name in the child's namespace to
3849 * parent.
3850 */
e3233f26 3851 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
a1ae535a 3852 if (ret < 0)
7729f8e5 3853 return -1;
a1ae535a
CB
3854
3855 /* Receive network device ifindex in the child's namespace to
3856 * parent.
3857 */
e3233f26 3858 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
a1ae535a 3859 if (ret < 0)
7729f8e5 3860 return -1;
a1ae535a
CB
3861 }
3862
3863 return 0;
a1ae535a 3864}
bb84beda
CB
3865
3866void lxc_delete_network(struct lxc_handler *handler)
3867{
3868 bool bret;
3869
3870 if (handler->am_root)
3871 bret = lxc_delete_network_priv(handler);
3872 else
3873 bret = lxc_delete_network_unpriv(handler);
3874 if (!bret)
3875 DEBUG("Failed to delete network devices");
3876 else
3877 DEBUG("Deleted network devices");
3878}
1cd95214 3879
1cd95214
CB
3880int lxc_netns_set_nsid(int fd)
3881{
41a3300d 3882 int ret;
0ce60f0d
CB
3883 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3884 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3885 NLMSG_ALIGN(1024)];
1cd95214 3886 struct nl_handler nlh;
a5f5cb41 3887 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
0ce60f0d
CB
3888 struct nlmsghdr *hdr;
3889 struct rtgenmsg *msg;
9d036caa
CB
3890 const __s32 ns_id = -1;
3891 const __u32 netns_fd = fd;
1cd95214 3892
a5f5cb41 3893 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
1cd95214 3894 if (ret < 0)
41a3300d 3895 return -1;
1cd95214 3896
0ce60f0d 3897 memset(buf, 0, sizeof(buf));
6ce39620
CB
3898
3899#pragma GCC diagnostic push
3900#pragma GCC diagnostic ignored "-Wcast-align"
0ce60f0d
CB
3901 hdr = (struct nlmsghdr *)buf;
3902 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3903#pragma GCC diagnostic pop
1cd95214 3904
0ce60f0d
CB
3905 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3906 hdr->nlmsg_type = RTM_NEWNSID;
3907 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3908 hdr->nlmsg_pid = 0;
3909 hdr->nlmsg_seq = RTM_NEWNSID;
3910 msg->rtgen_family = AF_UNSPEC;
1cd95214 3911
9d036caa
CB
3912 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3913 if (ret < 0)
a5f5cb41 3914 return ret_errno(ENOMEM);
9d036caa
CB
3915
3916 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3917 if (ret < 0)
a5f5cb41 3918 return ret_errno(ENOMEM);
1cd95214 3919
a5f5cb41 3920 return __netlink_transaction(nlh_ptr, hdr, hdr);
1cd95214 3921}
938980ba
CB
3922
3923static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3924{
3925
3926 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3927
3928 while (RTA_OK(rta, len)) {
3929 unsigned short type = rta->rta_type;
3930
3931 if ((type <= max) && (!tb[type]))
3932 tb[type] = rta;
3933
6ce39620
CB
3934#pragma GCC diagnostic push
3935#pragma GCC diagnostic ignored "-Wcast-align"
938980ba 3936 rta = RTA_NEXT(rta, len);
6ce39620 3937#pragma GCC diagnostic pop
938980ba
CB
3938 }
3939
3940 return 0;
3941}
3942
3943static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3944{
3945 return *(__s32 *)RTA_DATA(rta);
3946}
3947
3948#ifndef NETNS_RTA
3949#define NETNS_RTA(r) \
3950 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3951#endif
3952
3953int lxc_netns_get_nsid(int fd)
3954{
a5f5cb41
CB
3955 struct nl_handler nlh;
3956 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
938980ba
CB
3957 int ret;
3958 ssize_t len;
3959 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
9d036caa
CB
3960 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3961 NLMSG_ALIGN(1024)];
938980ba 3962 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
938980ba
CB
3963 struct nlmsghdr *hdr;
3964 struct rtgenmsg *msg;
938980ba
CB
3965 __u32 netns_fd = fd;
3966
a5f5cb41 3967 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
938980ba
CB
3968 if (ret < 0)
3969 return -1;
3970
3971 memset(buf, 0, sizeof(buf));
6ce39620
CB
3972
3973#pragma GCC diagnostic push
3974#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
3975 hdr = (struct nlmsghdr *)buf;
3976 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
6ce39620 3977#pragma GCC diagnostic pop
938980ba
CB
3978
3979 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3980 hdr->nlmsg_type = RTM_GETNSID;
3981 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3982 hdr->nlmsg_pid = 0;
3983 hdr->nlmsg_seq = RTM_GETNSID;
3984 msg->rtgen_family = AF_UNSPEC;
3985
9d036caa 3986 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
a5f5cb41
CB
3987 if (ret < 0)
3988 return ret_errno(ENOMEM);
938980ba 3989
a5f5cb41 3990 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
938980ba
CB
3991 if (ret < 0)
3992 return -1;
3993
3994 msg = NLMSG_DATA(hdr);
3995 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
3996 if (len < 0)
a5f5cb41 3997 return ret_errno(EINVAL);
938980ba 3998
6ce39620
CB
3999#pragma GCC diagnostic push
4000#pragma GCC diagnostic ignored "-Wcast-align"
938980ba
CB
4001 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4002 if (tb[__LXC_NETNSA_NSID])
4003 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
6ce39620 4004#pragma GCC diagnostic pop
938980ba
CB
4005
4006 return -1;
4007}
e389f2af
CB
4008
4009int lxc_create_network(struct lxc_handler *handler)
4010{
4011 int ret;
4012
e389f2af
CB
4013 if (handler->am_root) {
4014 ret = lxc_create_network_priv(handler);
4015 if (ret)
4016 return -1;
4017
4018 return lxc_network_move_created_netdev_priv(handler);
4019 }
4020
4021 return lxc_create_network_unpriv(handler);
4022}