]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/network.c
github: Update for main branch
[mirror_lxc.git] / src / lxc / network.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include "config.h"
4
5 #include <arpa/inet.h>
6 #include <ctype.h>
7 #include <errno.h>
8 #include <fcntl.h>
9 #include <linux/netlink.h>
10 #include <linux/rtnetlink.h>
11 #include <linux/sockios.h>
12 #include <net/ethernet.h>
13 #include <net/if.h>
14 #include <net/if_arp.h>
15 #include <netinet/in.h>
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include <sys/inotify.h>
20 #include <sys/ioctl.h>
21 #include <sys/param.h>
22 #include <sys/socket.h>
23 #include <sys/stat.h>
24 #include <sys/types.h>
25 #include <time.h>
26 #include <unistd.h>
27
28 #include "netns_ifaddrs.h"
29 #include "af_unix.h"
30 #include "conf.h"
31 #include "file_utils.h"
32 #include "log.h"
33 #include "macro.h"
34 #include "memory_utils.h"
35 #include "network.h"
36 #include "nl.h"
37 #include "process_utils.h"
38 #include "string_utils.h"
39 #include "syscall_wrappers.h"
40 #include "utils.h"
41
42 #if !HAVE_STRLCPY
43 #include "strlcpy.h"
44 #endif
45
46 lxc_log_define(network, lxc);
47
48 typedef int (*netdev_configure_server_cb)(struct lxc_handler *, struct lxc_netdev *);
49 typedef int (*netdev_configure_container_cb)(struct lxc_netdev *);
50 typedef int (*netdev_shutdown_server_cb)(struct lxc_handler *, struct lxc_netdev *);
51
52 const struct lxc_network_info {
53 const char *name;
54 const char template[IFNAMSIZ];
55 size_t template_len;
56 } lxc_network_info[LXC_NET_MAXCONFTYPE + 1] = {
57 [LXC_NET_EMPTY] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
58 [LXC_NET_VETH] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
59 [LXC_NET_MACVLAN] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
60 [LXC_NET_IPVLAN] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
61 [LXC_NET_PHYS] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
62 [LXC_NET_VLAN] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
63 [LXC_NET_NONE] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
64 [LXC_NET_MAXCONFTYPE] = { NULL, "", 0 }
65 };
66
67 const char *lxc_net_type_to_str(int type)
68 {
69 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
70 return NULL;
71
72 return lxc_network_info[type].name;
73 }
74
75 static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
76
77 char *lxc_ifname_alnum_case_sensitive(char *template)
78 {
79 char name[IFNAMSIZ];
80 size_t i = 0;
81 #ifdef HAVE_RAND_R
82 unsigned int seed;
83
84 seed = randseed(false);
85 #else
86
87 (void)randseed(true);
88 #endif
89
90 if (strlen(template) >= IFNAMSIZ)
91 return NULL;
92
93 /* Generate random names until we find one that doesn't exist. */
94 for (;;) {
95 name[0] = '\0';
96 (void)strlcpy(name, template, IFNAMSIZ);
97
98 for (i = 0; i < strlen(name); i++) {
99 if (name[i] == 'X') {
100 #ifdef HAVE_RAND_R
101 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
102 #else
103 name[i] = padchar[rand() % strlen(padchar)];
104 #endif
105 }
106 }
107
108 if (if_nametoindex(name) == 0)
109 break;
110 }
111
112 (void)strlcpy(template, name, strlen(template) + 1);
113
114 return template;
115 }
116 static const char loop_device[] = "lo";
117
118 static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
119 {
120 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
121 struct nl_handler nlh = NL_HANDLER_INIT;
122 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
123 int addrlen, err;
124 struct rtmsg *rt;
125
126 addrlen = family == AF_INET ? sizeof(struct in_addr)
127 : sizeof(struct in6_addr);
128
129 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
130 if (err)
131 return err;
132
133 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
134 if (!nlmsg)
135 return -ENOMEM;
136
137 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
138 if (!answer)
139 return -ENOMEM;
140
141 nlmsg->nlmsghdr->nlmsg_flags =
142 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
143 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
144
145 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
146 if (!rt)
147 return -ENOMEM;
148
149 rt->rtm_family = family;
150 rt->rtm_table = RT_TABLE_MAIN;
151 rt->rtm_scope = RT_SCOPE_LINK;
152 rt->rtm_protocol = RTPROT_BOOT;
153 rt->rtm_type = RTN_UNICAST;
154 rt->rtm_dst_len = netmask;
155
156 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
157 return -EINVAL;
158
159 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
160 return -EINVAL;
161
162 return netlink_transaction(nlh_ptr, nlmsg, answer);
163 }
164
165 static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
166 {
167 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
168 }
169
170 static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
171 {
172 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
173 }
174
175 static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
176 {
177 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
178 }
179
180 static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
181 {
182 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
183 }
184
185 static int setup_ipv4_routes(struct lxc_netdev *netdev)
186 {
187 int ifindex = netdev->priv.veth_attr.ifindex;
188 struct lxc_inetdev *inetdev;
189 int err;
190
191 list_for_each_entry(inetdev, &netdev->priv.veth_attr.ipv4_routes, head) {
192 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
193 if (err)
194 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
195 }
196
197 return 0;
198 }
199
200 static int setup_ipv6_routes(struct lxc_netdev *netdev)
201 {
202 int err;
203 struct lxc_inet6dev *inet6dev;
204 int ifindex = netdev->priv.veth_attr.ifindex;
205
206 list_for_each_entry(inet6dev, &netdev->priv.veth_attr.ipv6_routes, head) {
207 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
208 if (err)
209 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
210 }
211
212 return 0;
213 }
214
215 static int setup_ipv4_addr_routes(struct lxc_netdev *netdev)
216 {
217 int err;
218 struct lxc_inetdev *inetdev;
219 int ifindex;
220
221 if (netdev->type != LXC_NET_VETH)
222 return ret_errno(EINVAL);
223
224 ifindex = netdev->priv.veth_attr.ifindex;
225 list_for_each_entry(inetdev, &netdev->ipv4_addresses, head) {
226 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
227 if (err)
228 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
229 }
230
231 return 0;
232 }
233
234 static int setup_ipv6_addr_routes(struct lxc_netdev *netdev)
235 {
236 int err;
237 struct lxc_inet6dev *inet6dev;
238 int ifindex;
239
240 if (netdev->type != LXC_NET_VETH)
241 return ret_errno(EINVAL);
242
243 ifindex = netdev->priv.veth_attr.ifindex;
244 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
245
246 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
247 if (err)
248 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
249 }
250
251 return 0;
252 }
253
254 static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
255 {
256 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
257 struct nl_handler nlh = NL_HANDLER_INIT;
258 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
259 int addrlen, err;
260 struct ndmsg *rt;
261
262 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
263
264 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
265 if (err)
266 return err;
267
268 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
269 if (!nlmsg)
270 return -ENOMEM;
271
272 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
273 if (!answer)
274 return -ENOMEM;
275
276 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
277 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
278
279 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
280 if (!rt)
281 return -ENOMEM;
282
283 rt->ndm_ifindex = ifindex;
284 rt->ndm_flags = NTF_PROXY;
285 rt->ndm_type = NDA_DST;
286 rt->ndm_family = family;
287
288 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
289 return -EINVAL;
290
291 return netlink_transaction(nlh_ptr, nlmsg, answer);
292 }
293
294 static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
295 {
296 int ret;
297 char path[PATH_MAX];
298 char buf[1] = "";
299
300 if (family != AF_INET && family != AF_INET6)
301 return ret_set_errno(-1, EINVAL);
302
303 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
304 family == AF_INET ? "ipv4" : "ipv6", ifname,
305 "forwarding");
306 if (ret < 0)
307 return ret_set_errno(-1, E2BIG);
308
309 return lxc_read_file_expect(path, buf, 1, "1");
310 }
311
312 struct bridge_vlan_info {
313 __u16 flags;
314 __u16 vid;
315 };
316
317 static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
318 {
319 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
320 struct nl_handler nlh = NL_HANDLER_INIT;
321 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
322 int err;
323 struct ifinfomsg *ifi;
324 struct rtattr *nest;
325 unsigned short bridge_flags = 0;
326 struct bridge_vlan_info vlan_info;
327
328 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
329 if (err)
330 return err;
331
332 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
333 if (!nlmsg)
334 return ret_errno(ENOMEM);
335
336 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
337 if (!answer)
338 return ret_errno(ENOMEM);
339
340 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
341 nlmsg->nlmsghdr->nlmsg_type = operation;
342
343 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
344 if (!ifi)
345 return ret_errno(ENOMEM);
346 ifi->ifi_family = AF_BRIDGE;
347 ifi->ifi_index = ifindex;
348
349 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
350 if (!nest)
351 return ret_errno(ENOMEM);
352
353 bridge_flags |= BRIDGE_FLAGS_MASTER;
354 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
355 return ret_errno(ENOMEM);
356
357 vlan_info.vid = vlan_id;
358 vlan_info.flags = 0;
359 if (!tagged)
360 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
361
362 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
363 return ret_errno(ENOMEM);
364
365 nla_end_nested(nlmsg, nest);
366
367 return netlink_transaction(nlh_ptr, nlmsg, answer);
368 }
369
370 static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
371 {
372 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
373 }
374
375 static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
376 {
377 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
378 }
379
380 static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
381 {
382 struct lxc_list *iterator;
383 int err;
384
385 lxc_list_for_each(iterator, vlan_ids) {
386 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
387
388 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
389 if (err)
390 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
391 }
392
393 return 0;
394 }
395
396 static int validate_veth(struct lxc_netdev *netdev)
397 {
398 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
399 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
400 if (netdev->priv.veth_attr.vlan_id_set)
401 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
402
403 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
404 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
405 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
406 }
407
408 if (netdev->priv.veth_attr.vlan_id_set) {
409 struct lxc_list *it;
410 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
411 unsigned short i = PTR_TO_USHORT(it->elem);
412 if (i == netdev->priv.veth_attr.vlan_id)
413 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
414 }
415 }
416
417 return 0;
418 }
419
420 static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
421 {
422 int err, rc, veth1index;
423 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
424 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
425
426 /* Skip setup if no VLAN options are specified. */
427 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
428 return 0;
429
430 /* Check vlan filtering is enabled on parent bridge. */
431 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
432 if (rc < 0)
433 return -1;
434
435 rc = lxc_read_from_file(path, buf, sizeof(buf));
436 if (rc < 0)
437 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
438
439 buf[rc - 1] = '\0';
440
441 if (!strequal(buf, "1"))
442 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
443
444 /* Get veth1 ifindex for use with netlink. */
445 veth1index = if_nametoindex(veth1);
446 if (!veth1index)
447 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
448
449 /* Configure untagged VLAN settings on bridge port if specified. */
450 if (netdev->priv.veth_attr.vlan_id_set) {
451 unsigned short default_pvid;
452
453 /* Get the bridge's default VLAN PVID. */
454 rc = strnprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
455 if (rc < 0)
456 return -1;
457
458 rc = lxc_read_from_file(path, buf, sizeof(buf));
459 if (rc < 0)
460 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
461
462 buf[rc - 1] = '\0';
463 err = get_u16(&default_pvid, buf, 0);
464 if (err)
465 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
466
467 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
468 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
469 err = lxc_bridge_vlan_del(veth1index, default_pvid);
470 if (err)
471 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
472 }
473
474 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
475 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
476 if (err)
477 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
478 }
479 }
480
481 /* Configure tagged VLAN settings on bridge port if specified. */
482 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
483 if (err)
484 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
485
486 return 0;
487 }
488
489 struct ovs_veth_vlan_args {
490 const char *nic;
491 const char *vlan_mode; /* Port VLAN mode. */
492 short vlan_id; /* PVID VLAN ID. */
493 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
494 };
495
496 static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
497 {
498 free_disarm(args->trunks);
499 }
500
501 static int lxc_ovs_setup_bridge_vlan_exec(void *data)
502 {
503 struct ovs_veth_vlan_args *args = data;
504 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
505
506 if (!args->vlan_mode)
507 return ret_errno(EINVAL);
508
509 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
510
511 if (args->vlan_id > BRIDGE_VLAN_NONE) {
512 char buf[5];
513 int rc;
514
515 rc = strnprintf(buf, sizeof(buf), "%u", args->vlan_id);
516 if (rc < 0)
517 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
518
519 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
520 }
521
522 if (args->trunks)
523 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
524
525 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
526 if (tag && trunks)
527 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
528 else if (tag)
529 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
530 else if (trunks)
531 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
532 else
533 return -EINVAL;
534
535 return -errno;
536 }
537
538 static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
539 {
540 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
541 struct ovs_veth_vlan_args args;
542 args.nic = veth1;
543 args.vlan_mode = NULL;
544 args.vlan_id = BRIDGE_VLAN_NONE;
545 args.trunks = NULL;
546
547 /* Skip setup if no VLAN options are specified. */
548 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
549 return 0;
550
551 /* Configure untagged VLAN settings on bridge port if specified. */
552 if (netdev->priv.veth_attr.vlan_id_set) {
553 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
554 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
555
556 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
557 * Also set the vlan_mode=access, which will drop any tagged frames.
558 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
559 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
560 */
561 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
562 args.vlan_mode = "access";
563 args.vlan_id = netdev->priv.veth_attr.vlan_id;
564 }
565 }
566
567 if (taggedLength > 0) {
568 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
569
570 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
571 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
572 args.vlan_mode = "native-untagged";
573 }
574
575 struct lxc_list *iterator;
576 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
577 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
578 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
579 int rc;
580
581 rc = strnprintf(buf, sizeof(buf), "%u", vlan_id);
582 if (rc < 0) {
583 free_ovs_veth_vlan_args(&args);
584 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
585 }
586
587 if (args.trunks)
588 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
589 else
590 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
591 }
592 }
593
594 if (args.vlan_mode) {
595 int ret;
596 char cmd_output[PATH_MAX];
597
598 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
599 if (ret < 0) {
600 free_ovs_veth_vlan_args(&args);
601 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
602 }
603 }
604
605 free_ovs_veth_vlan_args(&args);
606 return 0;
607 }
608
609 static int netdev_configure_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
610 {
611 int err;
612 unsigned int mtu = 1500;
613 char *veth1, *veth2;
614 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
615
616 err = validate_veth(netdev);
617 if (err)
618 return err;
619
620 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
621 veth1 = netdev->priv.veth_attr.pair;
622 if (handler->conf->reboot)
623 lxc_netdev_delete_by_name(veth1);
624 } else {
625 err = strnprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
626 if (err < 0)
627 return -1;
628
629 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
630 if (!veth1)
631 return -1;
632
633 /* store away for deconf */
634 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
635 }
636
637 err = strnprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
638 if (err < 0)
639 return -1;
640
641 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
642 if (!veth2)
643 return -1;
644
645 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
646 if (netdev->mtu) {
647 if (lxc_safe_uint(netdev->mtu, &mtu))
648 return log_error_errno(-1, errno, "Failed to parse mtu");
649 } else if (!is_empty_string(netdev->link)) {
650 int ifindex_mtu;
651
652 ifindex_mtu = if_nametoindex(netdev->link);
653 if (ifindex_mtu) {
654 mtu = netdev_get_mtu(ifindex_mtu);
655 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
656 }
657 }
658
659 err = lxc_veth_create(veth1, veth2, handler->pid, mtu,
660 netdev->priv.veth_attr.n_rxqueues, netdev->priv.veth_attr.n_txqueues);
661 if (err)
662 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
663
664 /*
665 * Veth devices are directly created in the container's network
666 * namespace so the device doesn't need to be moved into the
667 * container's network namespace. Make this explicit by setting the
668 * devices ifindex to 0.
669 */
670 netdev->ifindex = 0;
671
672 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
673
674 /*
675 * Since the device won't be moved transient name generation won't
676 * happen. But the transient name is needed for the container to
677 * retrieve the ifindex for the device.
678 */
679 strlcpy(netdev->transient_name, veth2, IFNAMSIZ);
680
681 /*
682 * Changing the high byte of the mac address to 0xfe, the bridge interface
683 * will always keep the host's mac address and not take the mac address
684 * of a container.
685 */
686 err = setup_private_host_hw_addr(veth1);
687 if (err) {
688 errno = -err;
689 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
690 goto out_delete;
691 }
692
693 /* Retrieve ifindex of the host's veth device. */
694 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
695 if (!netdev->priv.veth_attr.ifindex) {
696 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
697 goto out_delete;
698 }
699
700 if (mtu) {
701 err = lxc_netdev_set_mtu(veth1, mtu);
702 if (err) {
703 errno = -err;
704 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
705 goto out_delete;
706 }
707 }
708
709 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
710 if (!lxc_nic_exists(netdev->link)) {
711 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
712 goto out_delete;
713 }
714
715 err = lxc_bridge_attach(netdev->link, veth1);
716 if (err) {
717 errno = -err;
718 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
719 goto out_delete;
720 }
721 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
722
723 if (is_ovs_bridge(netdev->link)) {
724 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
725 if (err) {
726 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
727 lxc_ovs_delete_port(netdev->link, veth1);
728 goto out_delete;
729 }
730 } else {
731 err = setup_veth_native_bridge_vlan(veth1, netdev);
732 if (err) {
733 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
734 goto out_delete;
735 }
736 }
737 }
738
739 err = lxc_netdev_up(veth1);
740 if (err) {
741 errno = -err;
742 SYSERROR("Failed to set \"%s\" up", veth1);
743 goto out_delete;
744 }
745
746 /* setup ipv4 routes on the host interface */
747 if (setup_ipv4_routes(netdev)) {
748 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
749 goto out_delete;
750 }
751
752 /* setup ipv6 routes on the host interface */
753 if (setup_ipv6_routes(netdev)) {
754 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
755 goto out_delete;
756 }
757
758 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
759 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
760 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
761 appear to be added successfully but then do not appear in the proxy list. The length of time
762 slept doesn't appear to be important, only that the process sleeps for a short period of time.
763 */
764 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
765
766 if (netdev->ipv4_gateway) {
767 char bufinet4[INET_ADDRSTRLEN];
768 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
769 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
770 goto out_delete;
771 }
772
773 err = lxc_ip_forwarding_on(veth1, AF_INET);
774 if (err) {
775 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
776 goto out_delete;
777 }
778
779 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
780 if (err) {
781 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
782 goto out_delete;
783 }
784 }
785
786 if (netdev->ipv6_gateway) {
787 char bufinet6[INET6_ADDRSTRLEN];
788
789 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
790 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
791 goto out_delete;
792 }
793
794 /* Check for sysctl net.ipv6.conf.all.forwarding=1
795 Kernel requires this to route any packets for IPv6.
796 */
797 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
798 if (err) {
799 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
800 goto out_delete;
801 }
802
803 err = lxc_ip_forwarding_on(veth1, AF_INET6);
804 if (err) {
805 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
806 goto out_delete;
807 }
808
809 err = lxc_neigh_proxy_on(veth1, AF_INET6);
810 if (err) {
811 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
812 goto out_delete;
813 }
814
815 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
816 if (err) {
817 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
818 goto out_delete;
819 }
820 }
821
822 /* setup ipv4 address routes on the host interface */
823 err = setup_ipv4_addr_routes(netdev);
824 if (err) {
825 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
826 goto out_delete;
827 }
828
829 /* setup ipv6 address routes on the host interface */
830 err = setup_ipv6_addr_routes(netdev);
831 if (err) {
832 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
833 goto out_delete;
834 }
835 }
836
837 if (netdev->upscript) {
838 char *argv[] = {
839 "veth",
840 netdev->link,
841 veth1,
842 NULL,
843 };
844
845 err = run_script_argv(handler->name,
846 handler->conf->hooks_version, "net",
847 netdev->upscript, "up", argv);
848 if (err < 0)
849 goto out_delete;
850 }
851
852 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
853
854 return 0;
855
856 out_delete:
857 lxc_netdev_delete_by_name(veth1);
858 return -1;
859 }
860
861 static int netdev_configure_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
862 {
863 char peer[IFNAMSIZ];
864 int err;
865
866 if (is_empty_string(netdev->link)) {
867 ERROR("No link for macvlan network device specified");
868 return -1;
869 }
870
871 err = strnprintf(peer, sizeof(peer), "mcXXXXXX");
872 if (err < 0)
873 return -1;
874
875 if (!lxc_ifname_alnum_case_sensitive(peer))
876 return -1;
877
878 err = lxc_macvlan_create(netdev->link, peer,
879 netdev->priv.macvlan_attr.mode);
880 if (err) {
881 errno = -err;
882 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
883 peer, netdev->link);
884 goto on_error;
885 }
886
887 strlcpy(netdev->created_name, peer, IFNAMSIZ);
888
889 netdev->ifindex = if_nametoindex(peer);
890 if (!netdev->ifindex) {
891 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
892 goto on_error;
893 }
894
895 if (netdev->mtu) {
896 unsigned int mtu;
897
898 err = lxc_safe_uint(netdev->mtu, &mtu);
899 if (err < 0) {
900 errno = -err;
901 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
902 goto on_error;
903 }
904
905 err = lxc_netdev_set_mtu(peer, mtu);
906 if (err < 0) {
907 errno = -err;
908 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
909 goto on_error;
910 }
911 }
912
913 if (netdev->upscript) {
914 char *argv[] = {
915 "macvlan",
916 netdev->link,
917 NULL,
918 };
919
920 err = run_script_argv(handler->name,
921 handler->conf->hooks_version, "net",
922 netdev->upscript, "up", argv);
923 if (err < 0)
924 goto on_error;
925 }
926
927 DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
928 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
929
930 return 0;
931
932 on_error:
933 lxc_netdev_delete_by_name(peer);
934 return -1;
935 }
936
937 static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
938 {
939 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
940 struct nl_handler nlh = NL_HANDLER_INIT;
941 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
942 int err, index, len;
943 struct ifinfomsg *ifi;
944 struct rtattr *nest, *nest2;
945
946 len = strlen(parent);
947 if (len == 1 || len >= IFNAMSIZ)
948 return ret_errno(EINVAL);
949
950 len = strlen(name);
951 if (len == 1 || len >= IFNAMSIZ)
952 return ret_errno(EINVAL);
953
954 index = if_nametoindex(parent);
955 if (!index)
956 return ret_errno(EINVAL);
957
958 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
959 if (err)
960 return err;
961
962 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
963 if (!nlmsg)
964 return ret_errno(ENOMEM);
965
966 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
967 if (!answer)
968 return ret_errno(ENOMEM);
969
970 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
971 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
972
973 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
974 if (!ifi)
975 return ret_errno(ENOMEM);
976 ifi->ifi_family = AF_UNSPEC;
977
978 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
979 if (!nest)
980 return ret_errno(EPROTO);
981
982 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
983 return ret_errno(EPROTO);
984
985 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
986 if (!nest2)
987 return ret_errno(EPROTO);
988
989 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
990 return ret_errno(EPROTO);
991
992 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
993 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
994 * according to ipvlan docs.
995 */
996 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
997 return ret_errno(EPROTO);
998
999 nla_end_nested(nlmsg, nest2);
1000 nla_end_nested(nlmsg, nest);
1001
1002 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1003 return ret_errno(EPROTO);
1004
1005 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1006 return ret_errno(EPROTO);
1007
1008 return netlink_transaction(nlh_ptr, nlmsg, answer);
1009 }
1010
1011 static int netdev_configure_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1012 {
1013 char peer[IFNAMSIZ];
1014 int err;
1015
1016 if (is_empty_string(netdev->link)) {
1017 ERROR("No link for ipvlan network device specified");
1018 return -1;
1019 }
1020
1021 err = strnprintf(peer, sizeof(peer), "ipXXXXXX");
1022 if (err < 0)
1023 return -1;
1024
1025 if (!lxc_ifname_alnum_case_sensitive(peer))
1026 return -1;
1027
1028 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
1029 netdev->priv.ipvlan_attr.isolation);
1030 if (err) {
1031 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
1032 peer, netdev->link);
1033 goto on_error;
1034 }
1035
1036 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1037
1038 netdev->ifindex = if_nametoindex(peer);
1039 if (!netdev->ifindex) {
1040 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1041 goto on_error;
1042 }
1043
1044 if (netdev->mtu) {
1045 unsigned int mtu;
1046
1047 err = lxc_safe_uint(netdev->mtu, &mtu);
1048 if (err < 0) {
1049 errno = -err;
1050 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
1051 goto on_error;
1052 }
1053
1054 err = lxc_netdev_set_mtu(peer, mtu);
1055 if (err < 0) {
1056 errno = -err;
1057 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
1058 goto on_error;
1059 }
1060 }
1061
1062 if (netdev->upscript) {
1063 char *argv[] = {
1064 "ipvlan",
1065 netdev->link,
1066 NULL,
1067 };
1068
1069 err = run_script_argv(handler->name, handler->conf->hooks_version,
1070 "net", netdev->upscript, "up", argv);
1071 if (err < 0)
1072 goto on_error;
1073 }
1074
1075 DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer,
1076 netdev->ifindex, netdev->priv.macvlan_attr.mode);
1077
1078 return 0;
1079
1080 on_error:
1081 lxc_netdev_delete_by_name(peer);
1082 return -1;
1083 }
1084
1085 static int netdev_configure_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1086 {
1087 char peer[IFNAMSIZ];
1088 int err;
1089 static uint16_t vlan_cntr = 0;
1090
1091 if (is_empty_string(netdev->link)) {
1092 ERROR("No link for vlan network device specified");
1093 return -1;
1094 }
1095
1096 err = strnprintf(peer, sizeof(peer), "vlan%d-%d",
1097 netdev->priv.vlan_attr.vid, vlan_cntr++);
1098 if (err < 0)
1099 return -1;
1100
1101 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1102 if (err) {
1103 errno = -err;
1104 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1105 peer, netdev->link);
1106 return -1;
1107 }
1108
1109 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1110
1111 netdev->ifindex = if_nametoindex(peer);
1112 if (!netdev->ifindex) {
1113 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1114 goto on_error;
1115 }
1116
1117 if (netdev->mtu) {
1118 unsigned int mtu;
1119
1120 err = lxc_safe_uint(netdev->mtu, &mtu);
1121 if (err < 0) {
1122 errno = -err;
1123 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
1124 goto on_error;
1125 }
1126
1127 err = lxc_netdev_set_mtu(peer, mtu);
1128 if (err < 0) {
1129 errno = -err;
1130 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
1131 goto on_error;
1132 }
1133 }
1134
1135 if (netdev->upscript) {
1136 char *argv[] = {
1137 "vlan",
1138 netdev->link,
1139 NULL,
1140 };
1141
1142 err = run_script_argv(handler->name, handler->conf->hooks_version,
1143 "net", netdev->upscript, "up", argv);
1144 if (err < 0) {
1145 goto on_error;
1146 }
1147 }
1148
1149 DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer,
1150 netdev->ifindex);
1151
1152 return 0;
1153
1154 on_error:
1155 lxc_netdev_delete_by_name(peer);
1156 return -1;
1157 }
1158
1159 static int netdev_configure_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1160 {
1161 int err, mtu_orig = 0;
1162
1163 if (is_empty_string(netdev->link))
1164 return log_error_errno(-1, errno, "No link for physical interface specified");
1165
1166 /*
1167 * Note that we're retrieving the container's ifindex in the host's
1168 * network namespace because we need it to move the device from the
1169 * host's network namespace to the container's network namespace later
1170 * on.
1171 * Note that netdev->link will contain the name of the physical network
1172 * device in the host's namespace.
1173 */
1174 netdev->ifindex = if_nametoindex(netdev->link);
1175 if (!netdev->ifindex)
1176 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
1177
1178 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
1179 if (is_empty_string(netdev->name))
1180 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
1181
1182 /*
1183 * Store the ifindex of the host's network device in the host's
1184 * namespace.
1185 */
1186 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1187
1188 /*
1189 * Get original device MTU setting and store for restoration after
1190 * container shutdown.
1191 */
1192 mtu_orig = netdev_get_mtu(netdev->ifindex);
1193 if (mtu_orig < 0)
1194 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
1195
1196 netdev->priv.phys_attr.mtu = mtu_orig;
1197
1198 if (netdev->mtu) {
1199 unsigned int mtu;
1200
1201 err = lxc_safe_uint(netdev->mtu, &mtu);
1202 if (err < 0)
1203 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
1204
1205 err = lxc_netdev_set_mtu(netdev->link, mtu);
1206 if (err < 0)
1207 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
1208 }
1209
1210 if (netdev->upscript) {
1211 char *argv[] = {
1212 "phys",
1213 netdev->link,
1214 NULL,
1215 };
1216
1217 err = run_script_argv(handler->name, handler->conf->hooks_version,
1218 "net", netdev->upscript, "up", argv);
1219 if (err < 0)
1220 return -1;
1221 }
1222
1223 DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev->link,
1224 netdev->ifindex);
1225
1226 return 0;
1227 }
1228
1229 static int netdev_configure_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1230 {
1231 int ret;
1232 char *argv[] = {
1233 "empty",
1234 NULL,
1235 };
1236
1237 /* The loopback device always has index 1. */
1238 netdev->ifindex = 1;
1239
1240 if (!strequal(netdev->name, "lo"))
1241 return syserror_set(-EINVAL, "Custom loopback device names not supported");
1242
1243 if (!netdev->upscript)
1244 return 0;
1245
1246 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1247 "net", netdev->upscript, "up", argv);
1248 if (ret < 0)
1249 return -1;
1250
1251 return 0;
1252 }
1253
1254 static int netdev_configure_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1255 {
1256 netdev->ifindex = 0;
1257 return 0;
1258 }
1259
1260 static netdev_configure_server_cb netdev_configure_server[LXC_NET_MAXCONFTYPE + 1] = {
1261 [LXC_NET_VETH] = netdev_configure_server_veth,
1262 [LXC_NET_MACVLAN] = netdev_configure_server_macvlan,
1263 [LXC_NET_IPVLAN] = netdev_configure_server_ipvlan,
1264 [LXC_NET_VLAN] = netdev_configure_server_vlan,
1265 [LXC_NET_PHYS] = netdev_configure_server_phys,
1266 [LXC_NET_EMPTY] = netdev_configure_server_empty,
1267 [LXC_NET_NONE] = netdev_configure_server_none,
1268 };
1269
1270 static int __netdev_configure_container_common(struct lxc_netdev *netdev)
1271 {
1272 char current_ifname[IFNAMSIZ];
1273
1274 netdev->ifindex = if_nametoindex(netdev->transient_name);
1275 if (!netdev->ifindex)
1276 return log_error_errno(-1,
1277 errno, "Failed to retrieve ifindex for network device with name %s",
1278 netdev->transient_name);
1279
1280 if (is_empty_string(netdev->name))
1281 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1282
1283 if (!strequal(netdev->transient_name, netdev->name)) {
1284 int ret;
1285
1286 ret = lxc_netdev_rename_by_name(netdev->transient_name, netdev->name);
1287 if (ret)
1288 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
1289 netdev->transient_name, netdev->name);
1290
1291 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->transient_name, netdev->name);
1292 }
1293
1294 /*
1295 * Re-read the name of the interface because its name has changed and
1296 * would be automatically allocated by the system
1297 */
1298 if (!if_indextoname(netdev->ifindex, current_ifname))
1299 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
1300
1301 /*
1302 * Now update the recorded name of the network device to reflect the
1303 * name of the network device in the child's network namespace. We will
1304 * later on send this information back to the parent.
1305 */
1306 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
1307 netdev->transient_name[0] = '\0';
1308
1309 return 0;
1310 }
1311
1312 static int netdev_configure_container_veth(struct lxc_netdev *netdev)
1313 {
1314
1315 return __netdev_configure_container_common(netdev);
1316 }
1317
1318 static int netdev_configure_container_macvlan(struct lxc_netdev *netdev)
1319 {
1320 return __netdev_configure_container_common(netdev);
1321 }
1322
1323 static int netdev_configure_container_ipvlan(struct lxc_netdev *netdev)
1324 {
1325 return __netdev_configure_container_common(netdev);
1326 }
1327
1328 static int netdev_configure_container_vlan(struct lxc_netdev *netdev)
1329 {
1330 return __netdev_configure_container_common(netdev);
1331 }
1332
1333 static int netdev_configure_container_phys(struct lxc_netdev *netdev)
1334 {
1335 return __netdev_configure_container_common(netdev);
1336 }
1337
1338 static int netdev_configure_container_empty(struct lxc_netdev *netdev)
1339 {
1340 return 0;
1341 }
1342
1343 static int netdev_configure_container_none(struct lxc_netdev *netdev)
1344 {
1345 return 0;
1346 }
1347
1348 static netdev_configure_container_cb netdev_configure_container[LXC_NET_MAXCONFTYPE + 1] = {
1349 [LXC_NET_VETH] = netdev_configure_container_veth,
1350 [LXC_NET_MACVLAN] = netdev_configure_container_macvlan,
1351 [LXC_NET_IPVLAN] = netdev_configure_container_ipvlan,
1352 [LXC_NET_VLAN] = netdev_configure_container_vlan,
1353 [LXC_NET_PHYS] = netdev_configure_container_phys,
1354 [LXC_NET_EMPTY] = netdev_configure_container_empty,
1355 [LXC_NET_NONE] = netdev_configure_container_none,
1356 };
1357
1358 static int netdev_shutdown_server_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1359 {
1360 int ret;
1361 char *argv[] = {
1362 "veth",
1363 netdev->link,
1364 NULL,
1365 NULL,
1366 };
1367
1368 if (!netdev->downscript)
1369 return 0;
1370
1371 if (!is_empty_string(netdev->priv.veth_attr.pair))
1372 argv[2] = netdev->priv.veth_attr.pair;
1373 else
1374 argv[2] = netdev->priv.veth_attr.veth1;
1375
1376 ret = run_script_argv(handler->name,
1377 handler->conf->hooks_version, "net",
1378 netdev->downscript, "down", argv);
1379 if (ret < 0)
1380 return -1;
1381
1382 return 0;
1383 }
1384
1385 static int netdev_shutdown_server_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1386 {
1387 int ret;
1388 char *argv[] = {
1389 "macvlan",
1390 netdev->link,
1391 NULL,
1392 };
1393
1394 if (!netdev->downscript)
1395 return 0;
1396
1397 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1398 "net", netdev->downscript, "down", argv);
1399 if (ret < 0)
1400 return -1;
1401
1402 return 0;
1403 }
1404
1405 static int netdev_shutdown_server_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1406 {
1407 int ret;
1408 char *argv[] = {
1409 "ipvlan",
1410 netdev->link,
1411 NULL,
1412 };
1413
1414 if (!netdev->downscript)
1415 return 0;
1416
1417 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1418 "net", netdev->downscript, "down", argv);
1419 if (ret < 0)
1420 return -1;
1421
1422 return 0;
1423 }
1424
1425 static int netdev_shutdown_server_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1426 {
1427 int ret;
1428 char *argv[] = {
1429 "vlan",
1430 netdev->link,
1431 NULL,
1432 };
1433
1434 if (!netdev->downscript)
1435 return 0;
1436
1437 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1438 "net", netdev->downscript, "down", argv);
1439 if (ret < 0)
1440 return -1;
1441
1442 return 0;
1443 }
1444
1445 static int netdev_shutdown_server_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1446 {
1447 int ret;
1448 char *argv[] = {
1449 "phys",
1450 netdev->link,
1451 NULL,
1452 };
1453
1454 if (!netdev->downscript)
1455 return 0;
1456
1457 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1458 "net", netdev->downscript, "down", argv);
1459 if (ret < 0)
1460 return -1;
1461
1462 return 0;
1463 }
1464
1465 static int netdev_shutdown_server_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1466 {
1467 int ret;
1468 char *argv[] = {
1469 "empty",
1470 NULL,
1471 };
1472
1473 if (!netdev->downscript)
1474 return 0;
1475
1476 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1477 "net", netdev->downscript, "down", argv);
1478 if (ret < 0)
1479 return -1;
1480
1481 return 0;
1482 }
1483
1484 static int netdev_shutdown_server_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1485 {
1486 return 0;
1487 }
1488
1489 static netdev_shutdown_server_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1490 [LXC_NET_VETH] = netdev_shutdown_server_veth,
1491 [LXC_NET_MACVLAN] = netdev_shutdown_server_macvlan,
1492 [LXC_NET_IPVLAN] = netdev_shutdown_server_ipvlan,
1493 [LXC_NET_VLAN] = netdev_shutdown_server_vlan,
1494 [LXC_NET_PHYS] = netdev_shutdown_server_phys,
1495 [LXC_NET_EMPTY] = netdev_shutdown_server_empty,
1496 [LXC_NET_NONE] = netdev_shutdown_server_none,
1497 };
1498
1499 static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1500 {
1501 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
1502 struct nl_handler nlh = NL_HANDLER_INIT;
1503 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1504 int err;
1505 struct ifinfomsg *ifi;
1506
1507 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1508 if (err)
1509 return err;
1510
1511 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1512 if (!nlmsg)
1513 return ret_errno(ENOMEM);
1514
1515 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1516 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1517
1518 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1519 if (!ifi)
1520 return ret_errno(ENOMEM);
1521
1522 ifi->ifi_family = AF_UNSPEC;
1523 ifi->ifi_index = ifindex;
1524
1525 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
1526 return ret_errno(ENOMEM);
1527
1528 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1529 return ret_errno(ENOMEM);
1530
1531 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
1532 }
1533
1534 int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
1535 {
1536 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
1537 struct nl_handler nlh = NL_HANDLER_INIT;
1538 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1539 int err;
1540 struct ifinfomsg *ifi;
1541
1542 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1543 if (err)
1544 return err;
1545
1546 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1547 if (!nlmsg)
1548 return ret_errno(ENOMEM);
1549
1550 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1551 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1552
1553 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1554 if (!ifi)
1555 return ret_errno(ENOMEM);
1556
1557 ifi->ifi_family = AF_UNSPEC;
1558 ifi->ifi_index = ifindex;
1559
1560 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
1561 return ret_errno(ENOMEM);
1562
1563 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1564 return ret_errno(ENOMEM);
1565
1566 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
1567 }
1568
1569 /* If we are asked to move a wireless interface, then we must actually move its
1570 * phyN device. Detect that condition and return the physname here. The physname
1571 * will be passed to lxc_netdev_move_wlan() which will free it when done.
1572 */
1573 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
1574 char *is_wlan(const char *ifname)
1575 {
1576 __do_fclose FILE *f = NULL;
1577 __do_free char *path = NULL, *physname = NULL;
1578 int i, ret;
1579 long physlen;
1580 size_t len;
1581
1582 len = strlen(ifname) + strlen(PHYSNAME) - 1;
1583 path = must_realloc(NULL, len + 1);
1584 ret = strnprintf(path, len, PHYSNAME, ifname);
1585 if (ret < 0)
1586 return NULL;
1587
1588 f = fopen(path, "re");
1589 if (!f)
1590 return NULL;
1591
1592 /* Feh - sb.st_size is always 4096. */
1593 fseek(f, 0, SEEK_END);
1594 physlen = ftell(f);
1595 fseek(f, 0, SEEK_SET);
1596 if (physlen < 0)
1597 return NULL;
1598
1599 physname = malloc(physlen + 1);
1600 if (!physname)
1601 return NULL;
1602
1603 memset(physname, 0, physlen + 1);
1604 ret = fread(physname, 1, physlen, f);
1605 if (ret < 0)
1606 return NULL;
1607
1608 for (i = 0; i < physlen; i++) {
1609 if (physname[i] == '\n')
1610 physname[i] = '\0';
1611
1612 if (physname[i] == '\0')
1613 break;
1614 }
1615
1616 return move_ptr(physname);
1617 }
1618
1619 static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1620 const char *new)
1621 {
1622 pid_t fpid;
1623
1624 fpid = fork();
1625 if (fpid < 0)
1626 return -1;
1627
1628 if (fpid != 0)
1629 return wait_for_pid(fpid);
1630
1631 if (!switch_to_ns(pid, "net"))
1632 return -1;
1633
1634 _exit(lxc_netdev_rename_by_name(old, new));
1635 }
1636
1637 int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1638 const char *newname)
1639 {
1640 __do_free char *cmd = NULL;
1641 pid_t fpid;
1642
1643 /* Move phyN into the container. TODO - do this using netlink.
1644 * However, IIUC this involves a bit more complicated work to talk to
1645 * the 80211 module, so for now just call out to iw.
1646 */
1647 cmd = on_path("iw", NULL);
1648 if (!cmd) {
1649 ERROR("Couldn't find the application iw in PATH");
1650 return -1;
1651 }
1652
1653 fpid = fork();
1654 if (fpid < 0)
1655 return -1;
1656
1657 if (fpid == 0) {
1658 char pidstr[30];
1659 sprintf(pidstr, "%d", pid);
1660 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
1661 _exit(EXIT_FAILURE);
1662 }
1663
1664 if (wait_for_pid(fpid))
1665 return -1;
1666
1667 if (newname)
1668 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1669
1670 return 0;
1671 }
1672
1673 int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
1674 {
1675 __do_free char *physname = NULL;
1676 int index;
1677
1678 if (!ifname)
1679 return -EINVAL;
1680
1681 index = if_nametoindex(ifname);
1682 if (!index)
1683 return -EINVAL;
1684
1685 physname = is_wlan(ifname);
1686 if (physname)
1687 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1688
1689 return lxc_netdev_move_by_index(index, pid, newname);
1690 }
1691
1692 int lxc_netdev_delete_by_index(int ifindex)
1693 {
1694 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1695 struct nl_handler nlh = NL_HANDLER_INIT;
1696 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1697 int err;
1698 struct ifinfomsg *ifi;
1699
1700 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1701 if (err)
1702 return err;
1703
1704 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1705 if (!nlmsg)
1706 return ret_errno(ENOMEM);
1707
1708 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1709 if (!answer)
1710 return ret_errno(ENOMEM);
1711
1712 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1713 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1714
1715 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1716 if (!ifi)
1717 return ret_errno(ENOMEM);
1718
1719 ifi->ifi_family = AF_UNSPEC;
1720 ifi->ifi_index = ifindex;
1721
1722 return netlink_transaction(nlh_ptr, nlmsg, answer);
1723 }
1724
1725 int lxc_netdev_delete_by_name(const char *name)
1726 {
1727 int index;
1728
1729 index = if_nametoindex(name);
1730 if (!index)
1731 return -EINVAL;
1732
1733 return lxc_netdev_delete_by_index(index);
1734 }
1735
1736 int lxc_netdev_rename_by_index(int ifindex, const char *newname)
1737 {
1738 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1739 struct nl_handler nlh = NL_HANDLER_INIT;
1740 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1741 int err, len;
1742 struct ifinfomsg *ifi;
1743
1744 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1745 if (err)
1746 return err;
1747
1748 len = strlen(newname);
1749 if (len == 1 || len >= IFNAMSIZ)
1750 return ret_errno(EINVAL);
1751
1752 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1753 if (!nlmsg)
1754 return ret_errno(ENOMEM);
1755
1756 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1757 if (!answer)
1758 return ret_errno(ENOMEM);
1759
1760 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1761 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1762
1763 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1764 if (!ifi)
1765 return ret_errno(ENOMEM);
1766
1767 ifi->ifi_family = AF_UNSPEC;
1768 ifi->ifi_index = ifindex;
1769
1770 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1771 return ret_errno(ENOMEM);
1772
1773 return netlink_transaction(nlh_ptr, nlmsg, answer);
1774 }
1775
1776 int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1777 {
1778 int len, index;
1779
1780 len = strlen(oldname);
1781 if (len == 1 || len >= IFNAMSIZ)
1782 return -EINVAL;
1783
1784 index = if_nametoindex(oldname);
1785 if (!index)
1786 return -EINVAL;
1787
1788 return lxc_netdev_rename_by_index(index, newname);
1789 }
1790
1791 int netdev_set_flag(const char *name, int flag)
1792 {
1793 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1794 struct nl_handler nlh = NL_HANDLER_INIT;
1795 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1796 int err, index, len;
1797 struct ifinfomsg *ifi;
1798
1799 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1800 if (err)
1801 return err;
1802
1803 len = strlen(name);
1804 if (len == 1 || len >= IFNAMSIZ)
1805 return ret_errno(EINVAL);
1806
1807 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1808 if (!nlmsg)
1809 return ret_errno(ENOMEM);
1810
1811 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1812 if (!answer)
1813 return ret_errno(ENOMEM);
1814
1815 index = if_nametoindex(name);
1816 if (!index)
1817 return ret_errno(EINVAL);
1818
1819 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1820 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1821
1822 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1823 if (!ifi)
1824 return ret_errno(ENOMEM);
1825
1826 ifi->ifi_family = AF_UNSPEC;
1827 ifi->ifi_index = index;
1828 ifi->ifi_change |= IFF_UP;
1829 ifi->ifi_flags |= flag;
1830
1831 return netlink_transaction(nlh_ptr, nlmsg, answer);
1832 }
1833
1834 static int netdev_get_flag(const char *name, int *flag)
1835 {
1836 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1837 struct nl_handler nlh = NL_HANDLER_INIT;
1838 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1839 int err, index, len;
1840 struct ifinfomsg *ifi;
1841
1842 if (!name)
1843 return ret_errno(EINVAL);
1844
1845 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1846 if (err)
1847 return err;
1848
1849 len = strlen(name);
1850 if (len == 1 || len >= IFNAMSIZ)
1851 return ret_errno(EINVAL);
1852
1853 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1854 if (!nlmsg)
1855 return ret_errno(ENOMEM);
1856
1857 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1858 if (!answer)
1859 return ret_errno(ENOMEM);
1860
1861 index = if_nametoindex(name);
1862 if (!index)
1863 return ret_errno(EINVAL);
1864
1865 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1866 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1867
1868 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1869 if (!ifi)
1870 return ret_errno(ENOMEM);
1871
1872 ifi->ifi_family = AF_UNSPEC;
1873 ifi->ifi_index = index;
1874
1875 err = netlink_transaction(nlh_ptr, nlmsg, answer);
1876 if (err)
1877 return ret_set_errno(-1, errno);
1878
1879 ifi = NLMSG_DATA(answer->nlmsghdr);
1880
1881 *flag = ifi->ifi_flags;
1882 return err;
1883 }
1884
1885 /*
1886 * \brief Check a interface is up or not.
1887 *
1888 * \param name: name for the interface.
1889 *
1890 * \return int.
1891 * 0 means interface is down.
1892 * 1 means interface is up.
1893 * Others means error happened, and ret-value is the error number.
1894 */
1895 int lxc_netdev_isup(const char *name)
1896 {
1897 int err;
1898 int flag = 0;
1899
1900 err = netdev_get_flag(name, &flag);
1901 if (err)
1902 return err;
1903
1904 if (flag & IFF_UP)
1905 return 1;
1906
1907 return 0;
1908 }
1909
1910 int netdev_get_mtu(int ifindex)
1911 {
1912 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1913 struct nl_handler nlh = NL_HANDLER_INIT;
1914 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1915 int readmore = 0;
1916 __u32 recv_len = 0;
1917 int answer_len, err, res;
1918 struct ifinfomsg *ifi;
1919 struct nlmsghdr *msg;
1920
1921 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1922 if (err)
1923 return err;
1924
1925 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1926 if (!nlmsg)
1927 return ret_errno(ENOMEM);
1928
1929 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1930 if (!answer)
1931 return ret_errno(ENOMEM);
1932
1933 /* Save the answer buffer length, since it will be overwritten
1934 * on the first receive (and we might need to receive more than
1935 * once.
1936 */
1937 answer_len = answer->nlmsghdr->nlmsg_len;
1938
1939 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
1940 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1941
1942 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1943 if (!ifi)
1944 return ret_errno(ENOMEM);
1945
1946 ifi->ifi_family = AF_UNSPEC;
1947
1948 /* Send the request for addresses, which returns all addresses
1949 * on all interfaces. */
1950 err = netlink_send(nlh_ptr, nlmsg);
1951 if (err < 0)
1952 return ret_set_errno(-1, errno);
1953
1954 #pragma GCC diagnostic push
1955 #pragma GCC diagnostic ignored "-Wcast-align"
1956
1957 do {
1958 /* Restore the answer buffer length, it might have been
1959 * overwritten by a previous receive.
1960 */
1961 answer->nlmsghdr->nlmsg_len = answer_len;
1962
1963 /* Get the (next) batch of reply messages */
1964 err = netlink_rcv(nlh_ptr, answer);
1965 if (err < 0)
1966 return ret_set_errno(-1, errno);
1967
1968 recv_len = err;
1969
1970 /* Satisfy the typing for the netlink macros */
1971 msg = answer->nlmsghdr;
1972
1973 while (NLMSG_OK(msg, recv_len)) {
1974 /* Stop reading if we see an error message */
1975 if (msg->nlmsg_type == NLMSG_ERROR) {
1976 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1977 return ret_set_errno(errmsg->error, errno);
1978 }
1979
1980 /* Stop reading if we see a NLMSG_DONE message */
1981 if (msg->nlmsg_type == NLMSG_DONE) {
1982 readmore = 0;
1983 break;
1984 }
1985
1986 ifi = NLMSG_DATA(msg);
1987 if (ifi->ifi_index == ifindex) {
1988 struct rtattr *rta = IFLA_RTA(ifi);
1989 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1990
1991 res = 0;
1992 while (RTA_OK(rta, attr_len)) {
1993 /*
1994 * Found a local address for the
1995 * requested interface, return it.
1996 */
1997 if (rta->rta_type == IFLA_MTU) {
1998 memcpy(&res, RTA_DATA(rta), sizeof(int));
1999 return res;
2000 }
2001
2002 rta = RTA_NEXT(rta, attr_len);
2003 }
2004 }
2005
2006 /* Keep reading more data from the socket if the last
2007 * message had the NLF_F_MULTI flag set.
2008 */
2009 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2010
2011 /* Look at the next message received in this buffer. */
2012 msg = NLMSG_NEXT(msg, recv_len);
2013 }
2014 } while (readmore);
2015
2016 #pragma GCC diagnostic pop
2017
2018 /* If we end up here, we didn't find any result, so signal an error. */
2019 return -1;
2020 }
2021
2022 int lxc_netdev_set_mtu(const char *name, int mtu)
2023 {
2024 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2025 struct nl_handler nlh = NL_HANDLER_INIT;
2026 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2027 int err, len;
2028 struct ifinfomsg *ifi;
2029
2030 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2031 if (err)
2032 return err;
2033
2034 len = strlen(name);
2035 if (len == 1 || len >= IFNAMSIZ)
2036 return ret_errno(EINVAL);
2037
2038 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2039 if (!nlmsg)
2040 return ret_errno(ENOMEM);
2041
2042 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2043 if (!answer)
2044 return ret_errno(ENOMEM);
2045
2046 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
2047 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2048
2049 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2050 if (!ifi)
2051 return ret_errno(ENOMEM);
2052
2053 ifi->ifi_family = AF_UNSPEC;
2054
2055 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
2056 return ret_errno(ENOMEM);
2057
2058 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
2059 return ret_errno(ENOMEM);
2060
2061 return netlink_transaction(nlh_ptr, nlmsg, answer);
2062 }
2063
2064 int lxc_netdev_up(const char *name)
2065 {
2066 return netdev_set_flag(name, IFF_UP);
2067 }
2068
2069 int lxc_netdev_down(const char *name)
2070 {
2071 return netdev_set_flag(name, 0);
2072 }
2073
2074 int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu,
2075 int n_rxqueues, int n_txqueues)
2076 {
2077 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2078 struct nl_handler nlh = NL_HANDLER_INIT;
2079 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2080 int err, len;
2081 struct ifinfomsg *ifi;
2082 struct rtattr *nest1, *nest2, *nest3;
2083
2084 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2085 if (err)
2086 return err;
2087
2088 len = strlen(name1);
2089 if (len == 1 || len >= IFNAMSIZ)
2090 return ret_errno(EINVAL);
2091
2092 len = strlen(name2);
2093 if (len == 1 || len >= IFNAMSIZ)
2094 return ret_errno(EINVAL);
2095
2096 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2097 if (!nlmsg)
2098 return ret_errno(ENOMEM);
2099
2100 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2101 if (!answer)
2102 return ret_errno(ENOMEM);
2103
2104 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
2105 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2106
2107 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2108 if (!ifi)
2109 return ret_errno(ENOMEM);
2110
2111 ifi->ifi_family = AF_UNSPEC;
2112
2113 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
2114 if (!nest1)
2115 return ret_errno(EINVAL);
2116
2117 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
2118 return ret_errno(ENOMEM);
2119
2120 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2121 if (!nest2)
2122 return ret_errno(ENOMEM);
2123
2124 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2125 if (!nest3)
2126 return ret_errno(ENOMEM);
2127
2128 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2129 if (!ifi)
2130 return ret_errno(ENOMEM);
2131
2132 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
2133 return ret_errno(ENOMEM);
2134
2135 if (n_rxqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_RX_QUEUES, (unsigned int)n_rxqueues))
2136 return ret_errno(ENOMEM);
2137
2138 if (n_txqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_TX_QUEUES, (unsigned int)n_txqueues))
2139 return ret_errno(ENOMEM);
2140
2141 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
2142 return ret_errno(ENOMEM);
2143
2144 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
2145 return ret_errno(ENOMEM);
2146
2147 nla_end_nested(nlmsg, nest3);
2148 nla_end_nested(nlmsg, nest2);
2149 nla_end_nested(nlmsg, nest1);
2150
2151 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
2152 return ret_errno(ENOMEM);
2153
2154 if (n_txqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_RX_QUEUES, (unsigned int)n_txqueues))
2155 return ret_errno(ENOMEM);
2156
2157 if (n_rxqueues > 0 && nla_put_u32(nlmsg, IFLA_NUM_TX_QUEUES, (unsigned int)n_rxqueues))
2158 return ret_errno(ENOMEM);
2159
2160 return netlink_transaction(nlh_ptr, nlmsg, answer);
2161 }
2162
2163 /* TODO: merge with lxc_macvlan_create */
2164 int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
2165 {
2166 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2167 struct nl_handler nlh = NL_HANDLER_INIT;
2168 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2169 int err, len, lindex;
2170 struct ifinfomsg *ifi;
2171 struct rtattr *nest, *nest2;
2172
2173 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2174 if (err)
2175 return err;
2176
2177 len = strlen(parent);
2178 if (len == 1 || len >= IFNAMSIZ)
2179 return ret_errno(EINVAL);
2180
2181 len = strlen(name);
2182 if (len == 1 || len >= IFNAMSIZ)
2183 return ret_errno(EINVAL);
2184
2185 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2186 if (!nlmsg)
2187 return ret_errno(ENOMEM);
2188
2189 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2190 if (!answer)
2191 return ret_errno(ENOMEM);
2192
2193 lindex = if_nametoindex(parent);
2194 if (!lindex)
2195 return ret_errno(EINVAL);
2196
2197 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
2198 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2199
2200 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2201 if (!ifi)
2202 return ret_errno(ENOMEM);
2203
2204 ifi->ifi_family = AF_UNSPEC;
2205
2206 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
2207 if (!nest)
2208 return ret_errno(ENOMEM);
2209
2210 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
2211 return ret_errno(ENOMEM);
2212
2213 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2214 if (!nest2)
2215 return ret_errno(ENOMEM);
2216
2217 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
2218 return ret_errno(ENOMEM);
2219
2220 nla_end_nested(nlmsg, nest2);
2221 nla_end_nested(nlmsg, nest);
2222
2223 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
2224 return ret_errno(ENOMEM);
2225
2226 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
2227 return ret_errno(ENOMEM);
2228
2229 return netlink_transaction(nlh_ptr, nlmsg, answer);
2230 }
2231
2232 int lxc_macvlan_create(const char *parent, const char *name, int mode)
2233 {
2234 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2235 struct nl_handler nlh = NL_HANDLER_INIT;
2236 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2237 int err, index, len;
2238 struct ifinfomsg *ifi;
2239 struct rtattr *nest, *nest2;
2240
2241 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2242 if (err)
2243 return err;
2244
2245 len = strlen(parent);
2246 if (len == 1 || len >= IFNAMSIZ)
2247 return ret_errno(EINVAL);
2248
2249 len = strlen(name);
2250 if (len == 1 || len >= IFNAMSIZ)
2251 return ret_errno(EINVAL);
2252
2253 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2254 if (!nlmsg)
2255 return ret_errno(ENOMEM);
2256
2257 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2258 if (!answer)
2259 return ret_errno(ENOMEM);
2260
2261 index = if_nametoindex(parent);
2262 if (!index)
2263 return ret_errno(EINVAL);
2264
2265 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
2266 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2267
2268 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2269 if (!ifi)
2270 return ret_errno(ENOMEM);
2271
2272 ifi->ifi_family = AF_UNSPEC;
2273
2274 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
2275 if (!nest)
2276 return ret_errno(ENOMEM);
2277
2278 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
2279 return ret_errno(ENOMEM);
2280
2281 if (mode) {
2282 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2283 if (!nest2)
2284 return ret_errno(ENOMEM);
2285
2286 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
2287 return ret_errno(ENOMEM);
2288
2289 nla_end_nested(nlmsg, nest2);
2290 }
2291
2292 nla_end_nested(nlmsg, nest);
2293
2294 if (nla_put_u32(nlmsg, IFLA_LINK, index))
2295 return ret_errno(ENOMEM);
2296
2297 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
2298 return ret_errno(ENOMEM);
2299
2300 return netlink_transaction(nlh_ptr, nlmsg, answer);
2301 }
2302
2303 static int proc_sys_net_write(const char *path, const char *value)
2304 {
2305 int fd;
2306 int err = 0;
2307
2308 fd = open(path, O_WRONLY);
2309 if (fd < 0)
2310 return -errno;
2311
2312 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
2313 err = -errno;
2314
2315 close(fd);
2316 return err;
2317 }
2318
2319 static int ip_forwarding_set(const char *ifname, int family, int flag)
2320 {
2321 int ret;
2322 char path[PATH_MAX];
2323
2324 if (family != AF_INET && family != AF_INET6)
2325 return -EINVAL;
2326
2327 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2328 family == AF_INET ? "ipv4" : "ipv6", ifname,
2329 "forwarding");
2330 if (ret < 0)
2331 return -E2BIG;
2332
2333 return proc_sys_net_write(path, flag ? "1" : "0");
2334 }
2335
2336 int lxc_ip_forwarding_on(const char *name, int family)
2337 {
2338 return ip_forwarding_set(name, family, 1);
2339 }
2340
2341 int lxc_ip_forwarding_off(const char *name, int family)
2342 {
2343 return ip_forwarding_set(name, family, 0);
2344 }
2345
2346 static int neigh_proxy_set(const char *ifname, int family, int flag)
2347 {
2348 int ret;
2349 char path[PATH_MAX];
2350
2351 if (family != AF_INET && family != AF_INET6)
2352 return -EINVAL;
2353
2354 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2355 family == AF_INET ? "ipv4" : "ipv6", ifname,
2356 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2357 if (ret < 0)
2358 return -E2BIG;
2359
2360 return proc_sys_net_write(path, flag ? "1" : "0");
2361 }
2362
2363 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2364 {
2365 int ret;
2366 char path[PATH_MAX];
2367 char buf[1] = "";
2368
2369 if (family != AF_INET && family != AF_INET6)
2370 return ret_set_errno(-1, EINVAL);
2371
2372 ret = strnprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2373 family == AF_INET ? "ipv4" : "ipv6", ifname,
2374 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2375 if (ret < 0)
2376 return ret_set_errno(-1, E2BIG);
2377
2378 return lxc_read_file_expect(path, buf, 1, "1");
2379 }
2380
2381 int lxc_neigh_proxy_on(const char *name, int family)
2382 {
2383 return neigh_proxy_set(name, family, 1);
2384 }
2385
2386 int lxc_neigh_proxy_off(const char *name, int family)
2387 {
2388 return neigh_proxy_set(name, family, 0);
2389 }
2390
2391 int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2392 {
2393 int i = 0;
2394 unsigned val;
2395 char c;
2396 unsigned char *data;
2397
2398 sockaddr->sa_family = ARPHRD_ETHER;
2399 data = (unsigned char *)sockaddr->sa_data;
2400
2401 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
2402 c = *macaddr++;
2403 if (isdigit(c))
2404 val = c - '0';
2405 else if (c >= 'a' && c <= 'f')
2406 val = c - 'a' + 10;
2407 else if (c >= 'A' && c <= 'F')
2408 val = c - 'A' + 10;
2409 else
2410 return -EINVAL;
2411
2412 val <<= 4;
2413 c = *macaddr;
2414 if (isdigit(c))
2415 val |= c - '0';
2416 else if (c >= 'a' && c <= 'f')
2417 val |= c - 'a' + 10;
2418 else if (c >= 'A' && c <= 'F')
2419 val |= c - 'A' + 10;
2420 else if (c == ':' || c == 0)
2421 val >>= 4;
2422 else
2423 return -EINVAL;
2424 if (c != 0)
2425 macaddr++;
2426 *data++ = (unsigned char)(val & 0377);
2427 i++;
2428
2429 if (*macaddr == ':')
2430 macaddr++;
2431 }
2432
2433 return 0;
2434 }
2435
2436 static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2437 void *acast, int prefix)
2438 {
2439 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2440 struct nl_handler nlh = NL_HANDLER_INIT;
2441 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2442 int addrlen, err;
2443 struct ifaddrmsg *ifa;
2444
2445 addrlen = family == AF_INET ? sizeof(struct in_addr)
2446 : sizeof(struct in6_addr);
2447
2448 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2449 if (err)
2450 return err;
2451
2452 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2453 if (!nlmsg)
2454 return ret_errno(ENOMEM);
2455
2456 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2457 if (!answer)
2458 return ret_errno(ENOMEM);
2459
2460 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2461 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2462
2463 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2464 if (!ifa)
2465 return ret_errno(ENOMEM);
2466
2467 ifa->ifa_prefixlen = prefix;
2468 ifa->ifa_index = ifindex;
2469 ifa->ifa_family = family;
2470 ifa->ifa_scope = 0;
2471
2472 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
2473 return ret_errno(EINVAL);
2474
2475 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
2476 return ret_errno(EINVAL);
2477
2478 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
2479 return ret_errno(EINVAL);
2480
2481 /* TODO: multicast, anycast with ipv6 */
2482 if (family == AF_INET6 &&
2483 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2484 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
2485 return ret_errno(EPROTONOSUPPORT);
2486
2487 return netlink_transaction(nlh_ptr, nlmsg, answer);
2488 }
2489
2490 int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
2491 struct in6_addr *mcast, struct in6_addr *acast,
2492 int prefix)
2493 {
2494 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2495 }
2496
2497 int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2498 int prefix)
2499 {
2500 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2501 }
2502
2503 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2504 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2505 * that pointer in *res (so res should be an in_addr** or in6_addr**).
2506 */
2507 #pragma GCC diagnostic push
2508 #pragma GCC diagnostic ignored "-Wcast-align"
2509
2510 static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2511 {
2512 int addrlen;
2513 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2514 struct rtattr *rta = IFA_RTA(ifa);
2515 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
2516
2517 if (ifa->ifa_family != family)
2518 return 0;
2519
2520 addrlen = family == AF_INET ? sizeof(struct in_addr)
2521 : sizeof(struct in6_addr);
2522
2523 /* Loop over the rtattr's in this message */
2524 while (RTA_OK(rta, attr_len)) {
2525 /* Found a local address for the requested interface,
2526 * return it.
2527 */
2528 if (rta->rta_type == IFA_LOCAL ||
2529 rta->rta_type == IFA_ADDRESS) {
2530 /* Sanity check. The family check above should make sure
2531 * the address length is correct, but check here just in
2532 * case.
2533 */
2534 if (RTA_PAYLOAD(rta) != (unsigned int)addrlen)
2535 return -1;
2536
2537 /* We might have found an IFA_ADDRESS before, which we
2538 * now overwrite with an IFA_LOCAL.
2539 */
2540 if (!*res) {
2541 *res = malloc(addrlen);
2542 if (!*res)
2543 return -1;
2544 }
2545
2546 memcpy(*res, RTA_DATA(rta), addrlen);
2547 if (rta->rta_type == IFA_LOCAL)
2548 break;
2549 }
2550 rta = RTA_NEXT(rta, attr_len);
2551 }
2552 return 0;
2553 }
2554
2555 #pragma GCC diagnostic pop
2556
2557 static int ip_addr_get(int family, int ifindex, void **res)
2558 {
2559 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2560 struct nl_handler nlh = NL_HANDLER_INIT;
2561 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2562 int answer_len, err;
2563 struct ifaddrmsg *ifa;
2564 struct nlmsghdr *msg;
2565 int readmore = 0;
2566 __u32 recv_len = 0;
2567
2568 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2569 if (err)
2570 return err;
2571
2572 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2573 if (!nlmsg)
2574 return ret_errno(ENOMEM);
2575
2576 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2577 if (!answer)
2578 return ret_errno(ENOMEM);
2579
2580 /* Save the answer buffer length, since it will be overwritten on the
2581 * first receive (and we might need to receive more than once).
2582 */
2583 answer_len = answer->nlmsghdr->nlmsg_len;
2584
2585 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
2586 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
2587
2588 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2589 if (!ifa)
2590 return ret_errno(ENOMEM);
2591
2592 ifa->ifa_family = family;
2593
2594 /* Send the request for addresses, which returns all addresses on all
2595 * interfaces.
2596 */
2597 err = netlink_send(nlh_ptr, nlmsg);
2598 if (err < 0)
2599 return ret_set_errno(err, errno);
2600
2601 #pragma GCC diagnostic push
2602 #pragma GCC diagnostic ignored "-Wcast-align"
2603
2604 do {
2605 /* Restore the answer buffer length, it might have been
2606 * overwritten by a previous receive.
2607 */
2608 answer->nlmsghdr->nlmsg_len = answer_len;
2609
2610 /* Get the (next) batch of reply messages. */
2611 err = netlink_rcv(nlh_ptr, answer);
2612 if (err < 0)
2613 return ret_set_errno(err, errno);
2614
2615 recv_len = err;
2616 err = 0;
2617
2618 /* Satisfy the typing for the netlink macros. */
2619 msg = answer->nlmsghdr;
2620
2621 while (NLMSG_OK(msg, recv_len)) {
2622 /* Stop reading if we see an error message. */
2623 if (msg->nlmsg_type == NLMSG_ERROR) {
2624 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2625 return ret_set_errno(errmsg->error, errno);
2626 }
2627
2628 /* Stop reading if we see a NLMSG_DONE message. */
2629 if (msg->nlmsg_type == NLMSG_DONE) {
2630 readmore = 0;
2631 break;
2632 }
2633
2634 if (msg->nlmsg_type != RTM_NEWADDR)
2635 return ret_errno(EINVAL);
2636
2637 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2638 if (ifa->ifa_index == (__u32)ifindex) {
2639 if (ifa_get_local_ip(family, msg, res) < 0)
2640 return ret_errno(EINVAL);
2641
2642 /* Found a result, stop searching. */
2643 if (*res)
2644 return 0;
2645 }
2646
2647 /* Keep reading more data from the socket if the last
2648 * message had the NLF_F_MULTI flag set.
2649 */
2650 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2651
2652 /* Look at the next message received in this buffer. */
2653 msg = NLMSG_NEXT(msg, recv_len);
2654 }
2655 } while (readmore);
2656
2657 #pragma GCC diagnostic pop
2658
2659 /* If we end up here, we didn't find any result, so signal an
2660 * error.
2661 */
2662 return -1;
2663 }
2664
2665 int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2666 {
2667 return ip_addr_get(AF_INET6, ifindex, (void **)res);
2668 }
2669
2670 int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
2671 {
2672 return ip_addr_get(AF_INET, ifindex, (void **)res);
2673 }
2674
2675 static int ip_gateway_add(int family, int ifindex, void *gw)
2676 {
2677 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2678 struct nl_handler nlh = NL_HANDLER_INIT;
2679 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2680 int addrlen, err;
2681 struct rtmsg *rt;
2682
2683 addrlen = family == AF_INET ? sizeof(struct in_addr)
2684 : sizeof(struct in6_addr);
2685
2686 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2687 if (err)
2688 return err;
2689
2690 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2691 if (!nlmsg)
2692 return ret_errno(ENOMEM);
2693
2694 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2695 if (!answer)
2696 return ret_errno(ENOMEM);
2697
2698 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2699 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2700
2701 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
2702 if (!rt)
2703 return ret_errno(ENOMEM);
2704
2705 rt->rtm_family = family;
2706 rt->rtm_table = RT_TABLE_MAIN;
2707 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2708 rt->rtm_protocol = RTPROT_BOOT;
2709 rt->rtm_type = RTN_UNICAST;
2710 /* "default" destination */
2711 rt->rtm_dst_len = 0;
2712
2713 /* If gateway address not supplied, then a device route will be created instead */
2714 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2715 return ret_errno(ENOMEM);
2716
2717 /* Adding the interface index enables the use of link-local
2718 * addresses for the gateway.
2719 */
2720 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2721 return ret_errno(EINVAL);
2722
2723 return netlink_transaction(nlh_ptr, nlmsg, answer);
2724 }
2725
2726 int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2727 {
2728 return ip_gateway_add(AF_INET, ifindex, gw);
2729 }
2730
2731 int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2732 {
2733 return ip_gateway_add(AF_INET6, ifindex, gw);
2734 }
2735 bool is_ovs_bridge(const char *bridge)
2736 {
2737 int ret;
2738 struct stat sb;
2739 char brdirname[22 + IFNAMSIZ + 1] = {0};
2740
2741 ret = strnprintf(brdirname, 22 + IFNAMSIZ + 1,
2742 "/sys/class/net/%s/bridge", bridge);
2743 if (ret < 0)
2744 return false;
2745
2746 ret = stat(brdirname, &sb);
2747 if (ret < 0 && errno == ENOENT)
2748 return true;
2749
2750 return false;
2751 }
2752
2753 struct ovs_veth_args {
2754 const char *bridge;
2755 const char *nic;
2756 };
2757
2758 /* Called from a background thread - when nic goes away, remove it from the
2759 * bridge.
2760 */
2761 static int lxc_ovs_delete_port_exec(void *data)
2762 {
2763 struct ovs_veth_args *args = data;
2764
2765 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
2766 return -1;
2767 }
2768
2769 int lxc_ovs_delete_port(const char *bridge, const char *nic)
2770 {
2771 int ret;
2772 char cmd_output[PATH_MAX];
2773 struct ovs_veth_args args;
2774
2775 args.bridge = bridge;
2776 args.nic = nic;
2777 ret = run_command(cmd_output, sizeof(cmd_output),
2778 lxc_ovs_delete_port_exec, (void *)&args);
2779 if (ret < 0)
2780 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
2781
2782 return 0;
2783 }
2784
2785 static int lxc_ovs_attach_bridge_exec(void *data)
2786 {
2787 struct ovs_veth_args *args = data;
2788
2789 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
2790 return -1;
2791 }
2792
2793 static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2794 {
2795 int ret;
2796 char cmd_output[PATH_MAX];
2797 struct ovs_veth_args args;
2798
2799 args.bridge = bridge;
2800 args.nic = nic;
2801 ret = run_command(cmd_output, sizeof(cmd_output),
2802 lxc_ovs_attach_bridge_exec, (void *)&args);
2803 if (ret < 0)
2804 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
2805
2806 return 0;
2807 }
2808
2809 int lxc_bridge_attach(const char *bridge, const char *ifname)
2810 {
2811 int err, fd, index;
2812 size_t retlen;
2813 struct ifreq ifr;
2814
2815 if (strlen(ifname) >= IFNAMSIZ)
2816 return -EINVAL;
2817
2818 index = if_nametoindex(ifname);
2819 if (!index)
2820 return -EINVAL;
2821
2822 if (is_ovs_bridge(bridge))
2823 return lxc_ovs_attach_bridge(bridge, ifname);
2824
2825 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
2826 if (fd < 0)
2827 return -errno;
2828
2829 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
2830 if (retlen >= IFNAMSIZ) {
2831 close(fd);
2832 return -E2BIG;
2833 }
2834
2835 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2836 ifr.ifr_ifindex = index;
2837 err = ioctl(fd, SIOCBRADDIF, &ifr);
2838 close(fd);
2839 if (err)
2840 err = -errno;
2841
2842 return err;
2843 }
2844
2845 int setup_private_host_hw_addr(char *veth1)
2846 {
2847 __do_close int sockfd = -EBADF;
2848 int err;
2849 struct ifreq ifr;
2850
2851 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
2852 if (sockfd < 0)
2853 return -errno;
2854
2855 err = strnprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2856 if (err < 0)
2857 return err;
2858
2859 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2860 if (err < 0)
2861 return -errno;
2862
2863 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2864 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2865 if (err < 0)
2866 return -errno;
2867
2868 return 0;
2869 }
2870
2871 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2872 {
2873 struct lxc_netdev *netdev;
2874 int link_index;
2875
2876 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
2877 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2878 continue;
2879
2880 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2881 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
2882
2883 if (is_empty_string(netdev->link))
2884 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
2885
2886 link_index = if_nametoindex(netdev->link);
2887 if (!link_index)
2888 return -EINVAL;
2889
2890 if (netdev->ipv4_gateway_auto) {
2891 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2892 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
2893 }
2894
2895 if (netdev->ipv6_gateway_auto) {
2896 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2897 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
2898 }
2899 }
2900
2901 return 0;
2902 }
2903
2904 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2905 static int lxc_create_network_unpriv_exec(const char *lxcpath,
2906 const char *lxcname,
2907 struct lxc_netdev *netdev, pid_t pid,
2908 unsigned int hooks_version)
2909 {
2910 int ret;
2911 pid_t child;
2912 int bytes, pipefd[2];
2913 char *token, *saveptr = NULL;
2914 char netdev_link[IFNAMSIZ];
2915 char buffer[PATH_MAX] = {0};
2916 size_t retlen;
2917
2918 if (netdev->type != LXC_NET_VETH)
2919 return log_error_errno(-1, errno,
2920 "Network type %d not support for unprivileged use",
2921 netdev->type);
2922
2923 ret = pipe(pipefd);
2924 if (ret < 0)
2925 return log_error_errno(-1, errno, "Failed to create pipe");
2926
2927 child = fork();
2928 if (child < 0) {
2929 close(pipefd[0]);
2930 close(pipefd[1]);
2931 return log_error_errno(-1, errno, "Failed to create new process");
2932 }
2933
2934 if (child == 0) {
2935 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
2936
2937 close(pipefd[0]);
2938
2939 ret = dup2(pipefd[1], STDOUT_FILENO);
2940 if (ret >= 0)
2941 ret = dup2(pipefd[1], STDERR_FILENO);
2942 close(pipefd[1]);
2943 if (ret < 0) {
2944 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2945 _exit(EXIT_FAILURE);
2946 }
2947
2948 if (!is_empty_string(netdev->link))
2949 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
2950 else
2951 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2952 if (retlen >= IFNAMSIZ) {
2953 SYSERROR("Invalid network device name");
2954 _exit(EXIT_FAILURE);
2955 }
2956
2957 ret = strnprintf(pidstr, sizeof(pidstr), "%d", pid);
2958 if (ret < 0)
2959 _exit(EXIT_FAILURE);
2960 pidstr[sizeof(pidstr) - 1] = '\0';
2961
2962 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2963 lxcname, pidstr, netdev_link, !is_empty_string(netdev->name) ? netdev->name : "(null)");
2964 if (!is_empty_string(netdev->name))
2965 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2966 lxcpath, lxcname, pidstr, "veth", netdev_link,
2967 netdev->name, (char *)NULL);
2968 else
2969 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2970 lxcpath, lxcname, pidstr, "veth", netdev_link,
2971 (char *)NULL);
2972 SYSERROR("Failed to execute lxc-user-nic");
2973 _exit(EXIT_FAILURE);
2974 }
2975
2976 /* close the write-end of the pipe */
2977 close(pipefd[1]);
2978
2979 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
2980 if (bytes < 0) {
2981 SYSERROR("Failed to read from pipe file descriptor");
2982 close(pipefd[0]);
2983 } else {
2984 buffer[bytes - 1] = '\0';
2985 }
2986
2987 ret = wait_for_pid(child);
2988 close(pipefd[0]);
2989 if (ret != 0 || bytes < 0)
2990 return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
2991 buffer[0] != '\0' ? buffer : "(null)");
2992 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2993
2994 /* netdev->name */
2995 token = strtok_r(buffer, ":", &saveptr);
2996 if (!token)
2997 return log_error(-1, "Failed to parse lxc-user-nic output");
2998
2999 /*
3000 * lxc-user-nic will take care of proper network device naming. So
3001 * netdev->name and netdev->transient_name need to be identical to not
3002 * trigger another rename later on.
3003 */
3004 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
3005 if (retlen < IFNAMSIZ) {
3006 retlen = strlcpy(netdev->transient_name, token, IFNAMSIZ);
3007 if (retlen < IFNAMSIZ)
3008 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
3009 }
3010 if (retlen >= IFNAMSIZ)
3011 return log_error_errno(-1, E2BIG,
3012 "Container side veth device name returned by lxc-user-nic is too long");
3013
3014 /* netdev->ifindex */
3015 token = strtok_r(NULL, ":", &saveptr);
3016 if (!token)
3017 return log_error(-1, "Failed to parse lxc-user-nic output");
3018
3019 ret = lxc_safe_int(token, &netdev->ifindex);
3020 if (ret < 0)
3021 return log_error_errno(-1, -ret,
3022 "Failed to convert string \"%s\" to integer", token);
3023
3024 /* netdev->priv.veth_attr.veth1 */
3025 token = strtok_r(NULL, ":", &saveptr);
3026 if (!token)
3027 return log_error(-1, "Failed to parse lxc-user-nic output");
3028
3029 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
3030 if (retlen >= IFNAMSIZ)
3031 return log_error_errno(-1, E2BIG,
3032 "Host side veth device name returned by lxc-user-nic is too long");
3033
3034 /* netdev->priv.veth_attr.ifindex */
3035 token = strtok_r(NULL, ":", &saveptr);
3036 if (!token)
3037 return log_error(-1, "Failed to parse lxc-user-nic output");
3038
3039 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
3040 if (ret < 0)
3041 return log_error_errno(-1, -ret,
3042 "Failed to convert string \"%s\" to integer", token);
3043
3044 if (netdev->upscript) {
3045 char *argv[] = {
3046 "veth",
3047 netdev->link,
3048 netdev->priv.veth_attr.veth1,
3049 NULL,
3050 };
3051
3052 ret = run_script_argv(lxcname, hooks_version, "net",
3053 netdev->upscript, "up", argv);
3054 if (ret < 0)
3055 return -1;
3056 }
3057
3058 return 0;
3059 }
3060
3061 static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
3062 struct lxc_netdev *netdev,
3063 const char *netns_path)
3064 {
3065 int bytes, ret;
3066 pid_t child;
3067 int pipefd[2];
3068 char buffer[PATH_MAX] = {};
3069
3070 if (netdev->type != LXC_NET_VETH)
3071 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
3072
3073 ret = pipe(pipefd);
3074 if (ret < 0)
3075 return log_error_errno(-1, errno, "Failed to create pipe");
3076
3077 child = fork();
3078 if (child < 0) {
3079 close(pipefd[0]);
3080 close(pipefd[1]);
3081 return log_error_errno(-1, errno, "Failed to create new process");
3082 }
3083
3084 if (child == 0) {
3085 char *hostveth;
3086
3087 close(pipefd[0]);
3088
3089 ret = dup2(pipefd[1], STDOUT_FILENO);
3090 if (ret >= 0)
3091 ret = dup2(pipefd[1], STDERR_FILENO);
3092 close(pipefd[1]);
3093 if (ret < 0) {
3094 SYSERROR("Failed to duplicate std{err,out} file descriptor");
3095 _exit(EXIT_FAILURE);
3096 }
3097
3098 if (!is_empty_string(netdev->priv.veth_attr.pair))
3099 hostveth = netdev->priv.veth_attr.pair;
3100 else
3101 hostveth = netdev->priv.veth_attr.veth1;
3102 if (is_empty_string(hostveth)) {
3103 SYSERROR("Host side veth device name is missing");
3104 _exit(EXIT_FAILURE);
3105 }
3106
3107 if (is_empty_string(netdev->link)) {
3108 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
3109 _exit(EXIT_FAILURE);
3110 }
3111
3112 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
3113 lxcname, netns_path, netdev->link, hostveth);
3114 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
3115 lxcname, netns_path, "veth", netdev->link, hostveth,
3116 (char *)NULL);
3117 SYSERROR("Failed to exec lxc-user-nic.");
3118 _exit(EXIT_FAILURE);
3119 }
3120
3121 close(pipefd[1]);
3122
3123 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
3124 if (bytes < 0) {
3125 SYSERROR("Failed to read from pipe file descriptor.");
3126 close(pipefd[0]);
3127 } else {
3128 buffer[bytes - 1] = '\0';
3129 }
3130
3131 ret = wait_for_pid(child);
3132 close_prot_errno_disarm(pipefd[0]);
3133 if (ret != 0 || bytes < 0)
3134 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3135 !is_empty_string(buffer) ? buffer : "(null)");
3136
3137 return 0;
3138 }
3139
3140 static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
3141 {
3142 int ret;
3143 struct lxc_netdev *netdev;
3144 /* strlen("/proc/") = 6
3145 * +
3146 * INTTYPE_TO_STRLEN(pid_t)
3147 * +
3148 * strlen("/fd/") = 4
3149 * +
3150 * INTTYPE_TO_STRLEN(int)
3151 * +
3152 * \0
3153 */
3154 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
3155
3156 *netns_path = '\0';
3157
3158 if (handler->nsfd[LXC_NS_NET] < 0)
3159 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
3160
3161 ret = strnprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
3162 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
3163 if (ret < 0)
3164 return false;
3165
3166 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3167 char *hostveth = NULL;
3168
3169 /* We can only delete devices whose ifindex we have. If we don't
3170 * have the index it means that we didn't create it.
3171 */
3172 if (!netdev->ifindex)
3173 continue;
3174
3175 if (netdev->type == LXC_NET_PHYS) {
3176 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3177 netdev->link);
3178 if (ret < 0)
3179 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
3180 netdev->ifindex, netdev->link);
3181 else
3182 TRACE("Renamed interface with index %d to its initial name \"%s\"",
3183 netdev->ifindex, netdev->link);
3184
3185 ret = netdev_deconf[netdev->type](handler, netdev);
3186 if (ret < 0)
3187 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3188 netdev->ifindex, netdev->link);
3189 goto clear_ifindices;
3190 }
3191
3192 ret = netdev_deconf[netdev->type](handler, netdev);
3193 if (ret < 0)
3194 WARN("Failed to deconfigure network device");
3195
3196 if (netdev->type != LXC_NET_VETH)
3197 goto clear_ifindices;
3198
3199 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
3200 goto clear_ifindices;
3201
3202 if (!is_empty_string(netdev->priv.veth_attr.pair))
3203 hostveth = netdev->priv.veth_attr.pair;
3204 else
3205 hostveth = netdev->priv.veth_attr.veth1;
3206 if (is_empty_string(hostveth))
3207 goto clear_ifindices;
3208
3209 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3210 handler->name, netdev,
3211 netns_path);
3212 if (ret < 0) {
3213 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3214 goto clear_ifindices;
3215 }
3216 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3217
3218 clear_ifindices:
3219 /*
3220 * We need to clear any ifindices we recorded so liblxc won't
3221 * have cached stale data which would cause it to fail on
3222 * reboot where we don't re-read the on-disk config file.
3223 */
3224 netdev->ifindex = 0;
3225 if (netdev->type == LXC_NET_PHYS) {
3226 netdev->priv.phys_attr.ifindex = 0;
3227 } else if (netdev->type == LXC_NET_VETH) {
3228 netdev->priv.veth_attr.veth1[0] = '\0';
3229 netdev->priv.veth_attr.ifindex = 0;
3230 }
3231 }
3232
3233 return true;
3234 }
3235
3236 static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3237 struct lxc_inetdev *inet4dev;
3238 struct lxc_inet6dev *inet6dev;
3239 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3240 int err = 0;
3241 unsigned int lo_ifindex = 0, link_ifindex = 0;
3242
3243 link_ifindex = if_nametoindex(netdev->link);
3244 if (link_ifindex == 0)
3245 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
3246
3247
3248 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3249 if (!list_empty(&netdev->ipv4_addresses)) {
3250 /* Check for net.ipv4.conf.[link].forwarding=1 */
3251 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3252 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
3253 }
3254
3255 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3256 if (!list_empty(&netdev->ipv6_addresses)) {
3257 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
3258 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3259 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
3260
3261 /* Check for net.ipv6.conf.[link].forwarding=1 */
3262 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3263 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
3264 }
3265
3266 /* Perform IPVLAN specific checks. */
3267 if (netdev->type == LXC_NET_IPVLAN) {
3268 /* Check mode is l3s as other modes do not work with l2proxy. */
3269 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3270 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
3271
3272 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3273 lo_ifindex = if_nametoindex(loop_device);
3274 if (lo_ifindex == 0)
3275 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3276 }
3277
3278 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
3279 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
3280 return ret_set_errno(-1, -errno);
3281
3282 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
3283 return ret_set_errno(-1, EINVAL);
3284
3285 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3286 if (netdev->type == LXC_NET_IPVLAN) {
3287 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
3288 if (err < 0)
3289 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
3290 }
3291 }
3292
3293 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
3294 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
3295 return ret_set_errno(-1, -errno);
3296
3297 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
3298 return ret_set_errno(-1, EINVAL);
3299
3300 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3301 if (netdev->type == LXC_NET_IPVLAN) {
3302 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
3303 if (err < 0)
3304 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
3305 }
3306 }
3307
3308 return 0;
3309 }
3310
3311 static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3312 {
3313 char bufinet4[INET_ADDRSTRLEN];
3314 bool had_error = false;
3315 unsigned int link_ifindex = 0;
3316
3317 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3318 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3319
3320 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3321 if (lo_ifindex > 0) {
3322 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3323 had_error = true;
3324 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3325 }
3326 }
3327
3328 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3329 if (!is_empty_string(link)) {
3330 link_ifindex = if_nametoindex(link);
3331 if (link_ifindex == 0)
3332 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3333
3334 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
3335 had_error = true;
3336 }
3337
3338 if (had_error)
3339 return ret_set_errno(-1, EINVAL);
3340
3341 return 0;
3342 }
3343
3344 static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3345 {
3346 char bufinet6[INET6_ADDRSTRLEN];
3347 bool had_error = false;
3348 unsigned int link_ifindex = 0;
3349
3350 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3351 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3352
3353 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3354 if (lo_ifindex > 0) {
3355 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3356 had_error = true;
3357 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3358 }
3359 }
3360
3361 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3362 if (!is_empty_string(link)) {
3363 link_ifindex = if_nametoindex(link);
3364 if (link_ifindex == 0) {
3365 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3366 return ret_set_errno(-1, EINVAL);
3367 }
3368
3369 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
3370 had_error = true;
3371 }
3372
3373 if (had_error)
3374 return ret_set_errno(-1, EINVAL);
3375
3376 return 0;
3377 }
3378
3379 static int lxc_delete_l2proxy(struct lxc_netdev *netdev)
3380 {
3381 unsigned int lo_ifindex = 0;
3382 unsigned int err = 0;
3383 struct lxc_inetdev *inet4dev;
3384 struct lxc_inet6dev *inet6dev;
3385
3386 /* Perform IPVLAN specific checks. */
3387 if (netdev->type == LXC_NET_IPVLAN) {
3388 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3389 lo_ifindex = if_nametoindex(loop_device);
3390 if (lo_ifindex == 0) {
3391 err++;
3392 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3393 }
3394 }
3395
3396 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
3397 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3398 err++;
3399 }
3400
3401 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
3402 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3403 err++;
3404 }
3405
3406 if (err > 0)
3407 return ret_errno(EINVAL);
3408
3409 return 0;
3410 }
3411
3412 static int lxc_create_network_priv(struct lxc_handler *handler)
3413 {
3414 struct lxc_netdev *netdev;
3415
3416 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3417 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3418 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
3419
3420 /* Setup l2proxy entries if enabled and used with a link property */
3421 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
3422 if (lxc_setup_l2proxy(netdev))
3423 return log_error_errno(-1, errno, "Failed to setup l2proxy");
3424 }
3425
3426 if (netdev_configure_server[netdev->type](handler, netdev))
3427 return log_error_errno(-1, errno, "Failed to create network device");
3428 }
3429
3430 return 0;
3431 }
3432
3433 /*
3434 * LXC moves network devices into the target namespace based on their created
3435 * name. The created name can either be randomly generated for e.g. veth
3436 * devices or it can be the name of the existing device in the server's
3437 * namespaces. This is e.g. the case when moving physical devices. However this
3438 * can lead to weird clashes. Consider we have a network namespace that has the
3439 * following devices:
3440
3441 * 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3442 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3443 * altname enp7s0
3444 * 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3445 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3446 * altname enp8s0
3447 *
3448 * and the user generates the following network config for their container:
3449 *
3450 * lxc.net.0.type = phys
3451 * lxc.net.0.name = eth1
3452 * lxc.net.0.link = eth2
3453 *
3454 * lxc.net.1.type = phys
3455 * lxc.net.1.name = eth2
3456 * lxc.net.1.link = eth1
3457 *
3458 * This would cause LXC to move the devices eth1 and eth2 from the server's
3459 * network namespace into the container's network namespace:
3460 *
3461 * 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3462 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3463 * altname enp7s0
3464 * 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3465 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3466 * altname enp8s0
3467 *
3468 * According to the network config above we now need to rename the network
3469 * devices in the container's network namespace. Let's say we start with
3470 * renaming eth2 to eth1. This would immediately lead to a clash since the
3471 * container's network namespace already contains a network device with that
3472 * name. Renaming the other device would have the same problem.
3473 *
3474 * There are multiple ways to fix this but I'm concerned with keeping the logic
3475 * somewhat reasonable which is why we simply start creating transient device
3476 * names that are unique which we'll use to move and rename the network device
3477 * in the container's network namespace at the same time. And then we rename
3478 * based on those random devices names to the target name.
3479 *
3480 * Note that the transient name is based on the type of network device as
3481 * specified in the LXC config. However, that doesn't mean it's correct. LXD
3482 * passes veth devices and a range of other network devices (e.g. Infiniband
3483 * VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
3484 * sense we like to think about it so you might see a veth device being
3485 * assigned a "physXXXXXX" transient name. That's not a problem.
3486 */
3487 static int create_transient_name(struct lxc_netdev *netdev)
3488 {
3489 const struct lxc_network_info *info;
3490
3491 if (!is_empty_string(netdev->transient_name))
3492 return syserror_set(-EINVAL, "Network device already had a transient name %s",
3493 netdev->transient_name);
3494
3495 info = &lxc_network_info[netdev->type];
3496 strlcpy(netdev->transient_name, info->template, info->template_len + 1);
3497
3498 if (!lxc_ifname_alnum_case_sensitive(netdev->transient_name))
3499 return syserror_set(-EINVAL, "Failed to create transient name for network device %s", netdev->created_name);
3500
3501 TRACE("Created transient name %s for network device", netdev->transient_name);
3502 return 0;
3503 }
3504
3505 static int netdev_requires_move(const struct lxc_netdev *netdev)
3506 {
3507 if (netdev->type == LXC_NET_EMPTY || netdev->type == LXC_NET_NONE)
3508 return false;
3509
3510 /*
3511 * Veth devices are directly created in the container's network
3512 * namespace so the device doesn't need to be moved into the
3513 * container's network namespace. The transient name will
3514 * already have been set above when we created the veth tunnel.
3515 */
3516 if (!netdev->ifindex)
3517 return false;
3518
3519 return true;
3520 }
3521
3522 int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
3523 {
3524 pid_t pid = handler->pid;
3525 struct lxc_netdev *netdev;
3526
3527 if (am_guest_unpriv())
3528 return 0;
3529
3530 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3531 __do_free char *physname = NULL;
3532 int ret;
3533
3534 if (!netdev_requires_move(netdev))
3535 continue;
3536
3537 ret = create_transient_name(netdev);
3538 if (ret < 0)
3539 return ret;
3540
3541 if (netdev->type == LXC_NET_PHYS)
3542 physname = is_wlan(netdev->link);
3543
3544 if (physname)
3545 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, netdev->transient_name);
3546 else
3547 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, netdev->transient_name);
3548 if (ret)
3549 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
3550 netdev->created_name, netdev->ifindex, pid, netdev->transient_name);
3551
3552 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
3553 maybe_empty(netdev->created_name), netdev->ifindex, pid, netdev->transient_name);
3554 }
3555
3556 return 0;
3557 }
3558
3559 static int network_requires_advanced_setup(int type)
3560 {
3561 if (type == LXC_NET_EMPTY)
3562 return false;
3563
3564 if (type == LXC_NET_NONE)
3565 return false;
3566
3567 return true;
3568 }
3569
3570 static int lxc_create_network_unpriv(struct lxc_handler *handler)
3571 {
3572 int hooks_version = handler->conf->hooks_version;
3573 const char *lxcname = handler->name;
3574 const char *lxcpath = handler->lxcpath;
3575 pid_t pid = handler->pid;
3576 struct lxc_netdev *netdev;
3577
3578 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3579 if (!network_requires_advanced_setup(netdev->type))
3580 continue;
3581
3582 if (netdev->type != LXC_NET_VETH)
3583 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3584 lxc_net_type_to_str(netdev->type));
3585
3586 if (netdev->mtu)
3587 INFO("mtu ignored due to insufficient privilege");
3588
3589 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3590 pid, hooks_version))
3591 return -1;
3592 }
3593
3594 return 0;
3595 }
3596
3597 static bool lxc_delete_network_priv(struct lxc_handler *handler)
3598 {
3599 int ret;
3600 struct lxc_netdev *netdev;
3601
3602 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
3603 char *hostveth = NULL;
3604
3605 /* We can only delete devices whose ifindex we have. If we don't
3606 * have the index it means that we didn't create it.
3607 */
3608 if (!netdev->ifindex)
3609 continue;
3610
3611 /*
3612 * If the network device has been moved back from the
3613 * containers network namespace, update the ifindex.
3614 */
3615 netdev->ifindex = if_nametoindex(netdev->name);
3616
3617 /* Delete l2proxy entries if enabled and used with a link property */
3618 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
3619 if (lxc_delete_l2proxy(netdev))
3620 WARN("Failed to delete all l2proxy config");
3621 /* Don't return, let the network be cleaned up as normal. */
3622 }
3623
3624 if (netdev->type == LXC_NET_PHYS) {
3625 /* Physical interfaces are initially returned to the parent namespace
3626 * with their transient name to avoid collisions
3627 */
3628 netdev->ifindex = if_nametoindex(netdev->transient_name);
3629 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3630 if (ret < 0)
3631 WARN("Failed to rename interface with index %d "
3632 "from \"%s\" to its initial name \"%s\"",
3633 netdev->ifindex, netdev->name, netdev->link);
3634 else {
3635 TRACE("Renamed interface with index %d from "
3636 "\"%s\" to its initial name \"%s\"",
3637 netdev->ifindex, netdev->name,
3638 netdev->link);
3639
3640 /* Restore original MTU */
3641 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3642 if (ret < 0) {
3643 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3644 netdev->link, netdev->priv.phys_attr.mtu);
3645 } else {
3646 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3647 netdev->link, netdev->priv.phys_attr.mtu);
3648 }
3649 }
3650
3651 ret = netdev_deconf[netdev->type](handler, netdev);
3652 if (ret < 0)
3653 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3654 netdev->ifindex, netdev->link);
3655 goto clear_ifindices;
3656 }
3657
3658 ret = netdev_deconf[netdev->type](handler, netdev);
3659 if (ret < 0)
3660 WARN("Failed to deconfigure network device");
3661
3662 if (netdev->type != LXC_NET_VETH)
3663 goto clear_ifindices;
3664
3665 /* Explicitly delete host veth device to prevent lingering
3666 * devices. We had issues in LXD around this.
3667 */
3668 if (!is_empty_string(netdev->priv.veth_attr.pair))
3669 hostveth = netdev->priv.veth_attr.pair;
3670 else
3671 hostveth = netdev->priv.veth_attr.veth1;
3672 if (is_empty_string(hostveth))
3673 goto clear_ifindices;
3674
3675 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3676 ret = lxc_netdev_delete_by_name(hostveth);
3677 if (ret < 0)
3678 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
3679
3680 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3681 } else if (!is_empty_string(netdev->link)) {
3682 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3683 if (ret < 0)
3684 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3685
3686 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3687 }
3688
3689 clear_ifindices:
3690 /* We need to clear any ifindices we recorded so liblxc won't
3691 * have cached stale data which would cause it to fail on reboot
3692 * we're we don't re-read the on-disk config file.
3693 */
3694 netdev->ifindex = 0;
3695 if (netdev->type == LXC_NET_PHYS) {
3696 netdev->priv.phys_attr.ifindex = 0;
3697 } else if (netdev->type == LXC_NET_VETH) {
3698 netdev->priv.veth_attr.veth1[0] = '\0';
3699 netdev->priv.veth_attr.ifindex = 0;
3700 }
3701
3702 /* Clear transient name */
3703 if (!is_empty_string (netdev->transient_name))
3704 {
3705 netdev->transient_name[0] = '\0';
3706 }
3707 }
3708
3709 return true;
3710 }
3711
3712 int lxc_requests_empty_network(struct lxc_handler *handler)
3713 {
3714 struct list_head *netdevs = &handler->conf->netdevs;
3715 bool found_none = false, found_nic = false;
3716 struct lxc_netdev *netdev;
3717
3718 if (list_empty(netdevs))
3719 return 0;
3720
3721 list_for_each_entry(netdev, netdevs, head) {
3722
3723 if (netdev->type == LXC_NET_NONE)
3724 found_none = true;
3725 else
3726 found_nic = true;
3727 }
3728
3729 if (found_none && !found_nic)
3730 return 1;
3731
3732 return 0;
3733 }
3734
3735 /* try to move physical nics to the init netns */
3736 int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
3737 {
3738 __do_close int oldfd = -EBADF;
3739 int netnsfd = handler->nsfd[LXC_NS_NET];
3740 struct lxc_conf *conf = handler->conf;
3741 int ret;
3742 char ifname[IFNAMSIZ];
3743 struct lxc_netdev *netdev;
3744
3745 /*
3746 * If we weren't asked to clone a new network namespace, there's
3747 * nothing to restore.
3748 */
3749 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3750 return 0;
3751
3752 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3753 * the parent network namespace. We won't have this capability if we are
3754 * unprivileged.
3755 */
3756 if (!handler->am_root)
3757 return 0;
3758
3759 TRACE("Moving physical network devices back to parent network namespace");
3760
3761 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
3762 if (oldfd < 0)
3763 return log_error_errno(-1, errno, "Failed to preserve network namespace");
3764
3765 ret = setns(netnsfd, CLONE_NEWNET);
3766 if (ret < 0)
3767 return log_error_errno(-1, errno, "Failed to enter network namespace");
3768
3769 list_for_each_entry(netdev, &conf->netdevs, head) {
3770 if (netdev->type != LXC_NET_PHYS)
3771 continue;
3772
3773 /* Retrieve the name of the interface in the container's network
3774 * namespace.
3775 */
3776 if (!if_indextoname(netdev->ifindex, ifname)) {
3777 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
3778 continue;
3779 }
3780
3781 /* Restore physical interfaces to host's network namespace with its transient name
3782 * to avoid collisions with the host's other interfaces.
3783 */
3784 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->transient_name);
3785 if (ret < 0)
3786 WARN("Error moving network device \"%s\" back to network namespace", ifname);
3787 else
3788 TRACE("Moved network device \"%s\" back to network namespace", ifname);
3789 }
3790
3791 ret = setns(oldfd, CLONE_NEWNET);
3792 if (ret < 0)
3793 return log_error_errno(-1, errno, "Failed to enter network namespace");
3794
3795 return 0;
3796 }
3797
3798 static int setup_hw_addr(char *hwaddr, const char *ifname)
3799 {
3800 __do_close int fd = -EBADF;
3801 struct sockaddr sockaddr;
3802 struct ifreq ifr;
3803 int ret;
3804
3805 ret = lxc_convert_mac(hwaddr, &sockaddr);
3806 if (ret)
3807 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
3808
3809 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3810 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3811 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3812
3813 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
3814 if (fd < 0)
3815 return -1;
3816
3817 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
3818 if (ret)
3819 SYSERROR("Failed to perform ioctl");
3820
3821 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
3822
3823 return ret;
3824 }
3825
3826 static int setup_ipv4_addr(struct lxc_netdev *netdev)
3827 {
3828 int ifindex = netdev->ifindex;
3829 int err;
3830 struct lxc_inetdev *inet4dev;
3831
3832 list_for_each_entry(inet4dev, &netdev->ipv4_addresses, head) {
3833 err = lxc_ipv4_addr_add(ifindex, &inet4dev->addr,
3834 &inet4dev->bcast, inet4dev->prefix);
3835 if (err)
3836 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
3837 }
3838
3839 return 0;
3840 }
3841
3842 static int setup_ipv6_addr(struct lxc_netdev *netdev)
3843 {
3844 int err;
3845 struct lxc_inet6dev *inet6dev;
3846 int ifindex = netdev->ifindex;
3847
3848 list_for_each_entry(inet6dev, &netdev->ipv6_addresses, head) {
3849 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3850 &inet6dev->mcast, &inet6dev->acast,
3851 inet6dev->prefix);
3852 if (err)
3853 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
3854 }
3855
3856 return 0;
3857 }
3858
3859 static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
3860 {
3861 int err;
3862 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3863
3864 /* set a mac address */
3865 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3866 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
3867
3868 /* setup ipv4 addresses on the interface */
3869 if (setup_ipv4_addr(netdev))
3870 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
3871
3872 /* setup ipv6 addresses on the interface */
3873 if (setup_ipv6_addr(netdev))
3874 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
3875
3876 /* set the network device up */
3877 if (netdev->flags & IFF_UP) {
3878 err = lxc_netdev_up(netdev->name);
3879 if (err)
3880 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
3881
3882 /* the network is up, make the loopback up too */
3883 err = lxc_netdev_up("lo");
3884 if (err)
3885 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
3886 }
3887
3888 /* setup ipv4 gateway on the interface */
3889 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
3890 if (!(netdev->flags & IFF_UP))
3891 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
3892
3893 if (list_empty(&netdev->ipv4_addresses))
3894 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
3895
3896 /* Setup device route if ipv4_gateway_dev is enabled */
3897 if (netdev->ipv4_gateway_dev) {
3898 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3899 if (err < 0)
3900 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
3901 } else {
3902 /* Check the gateway address is valid */
3903 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3904 return ret_set_errno(-1, errno);
3905
3906 /* Try adding a default route to the gateway address */
3907 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3908 if (err < 0) {
3909 /* If adding the default route fails, this could be because the
3910 * gateway address is in a different subnet to the container's address.
3911 * To work around this, we try adding a static device route to the
3912 * gateway address first, and then try again.
3913 */
3914 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
3915 if (err < 0)
3916 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
3917
3918 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3919 if (err < 0)
3920 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
3921 }
3922 }
3923 }
3924
3925 /* setup ipv6 gateway on the interface */
3926 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
3927 if (!(netdev->flags & IFF_UP))
3928 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
3929
3930 if (list_empty(&netdev->ipv6_addresses) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3931 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
3932
3933 /* Setup device route if ipv6_gateway_dev is enabled */
3934 if (netdev->ipv6_gateway_dev) {
3935 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3936 if (err < 0)
3937 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
3938 } else {
3939 /* Check the gateway address is valid */
3940 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3941 return ret_set_errno(-1, errno);
3942
3943 /* Try adding a default route to the gateway address */
3944 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3945 if (err < 0) {
3946 /* If adding the default route fails, this could be because the
3947 * gateway address is in a different subnet to the container's address.
3948 * To work around this, we try adding a static device route to the
3949 * gateway address first, and then try again.
3950 */
3951 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
3952 if (err < 0)
3953 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
3954
3955 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3956 if (err < 0)
3957 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
3958 }
3959 }
3960 }
3961
3962 DEBUG("Network device \"%s\" has been setup", netdev->name);
3963
3964 return 0;
3965 }
3966
3967 /**
3968 * Consider the following network layout:
3969 *
3970 * lxc.net.0.type = phys
3971 * lxc.net.0.link = eth2
3972 * lxc.net.0.name = eth%d
3973 *
3974 * lxc.net.1.type = phys
3975 * lxc.net.1.link = eth1
3976 * lxc.net.1.name = eth0
3977 *
3978 * If we simply follow this order and create the first network first the kernel
3979 * will allocate eth0 for the first network but the second network requests
3980 * that eth1 be renamed to eth0 in the container's network namespace which
3981 * would lead to a clash.
3982 *
3983 * Note, we don't handle cases like:
3984 *
3985 * lxc.net.0.type = phys
3986 * lxc.net.0.link = eth2
3987 * lxc.net.0.name = eth0
3988 *
3989 * lxc.net.1.type = phys
3990 * lxc.net.1.link = eth1
3991 * lxc.net.1.name = eth0
3992 *
3993 * That'll brutally fail of course but there's nothing we can do about it.
3994 */
3995 int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf)
3996 {
3997 bool needs_second_pass = false;
3998 struct lxc_netdev *netdev;
3999 const struct list_head *netdevs = &conf->netdevs;
4000
4001 if (list_empty(netdevs))
4002 return 0;
4003
4004 /* Configure all devices that have a specific target name. */
4005 list_for_each_entry(netdev, netdevs, head) {
4006 int ret;
4007
4008 if (is_empty_string(netdev->name) || strequal(netdev->name, "eth%d")) {
4009 needs_second_pass = true;
4010 continue;
4011 }
4012
4013 ret = netdev_configure_container[netdev->type](netdev);
4014 if (!ret)
4015 ret = lxc_network_setup_in_child_namespaces_common(netdev);
4016 if (ret)
4017 return log_error_errno(-1, errno, "Failed to setup netdev");
4018 }
4019 INFO("Finished setting up network devices with caller assigned names");
4020
4021 if (needs_second_pass) {
4022 /* Configure all devices that have a kernel assigned name. */
4023 list_for_each_entry(netdev, netdevs, head) {
4024 int ret;
4025
4026 if (!is_empty_string(netdev->name) && !strequal(netdev->name, "eth%d"))
4027 continue;
4028
4029 ret = netdev_configure_container[netdev->type](netdev);
4030 if (!ret)
4031 ret = lxc_network_setup_in_child_namespaces_common(netdev);
4032 if (ret)
4033 return log_error_errno(-1, errno, "Failed to setup netdev");
4034 }
4035 INFO("Finished setting up network devices with kernel assigned names");
4036 }
4037
4038 return 0;
4039 }
4040
4041 int lxc_network_send_to_child(struct lxc_handler *handler)
4042 {
4043 int data_sock = handler->data_sock[0];
4044 struct lxc_netdev *netdev;
4045
4046 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
4047 int ret;
4048
4049 if (!network_requires_advanced_setup(netdev->type))
4050 continue;
4051
4052 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
4053 if (ret < 0)
4054 return -1;
4055
4056 ret = lxc_send_nointr(data_sock, netdev->transient_name, IFNAMSIZ, MSG_NOSIGNAL);
4057 if (ret < 0)
4058 return -1;
4059
4060 TRACE("Sent network device name \"%s\" to child", netdev->transient_name);
4061 }
4062
4063 return 0;
4064 }
4065
4066 int lxc_network_recv_from_parent(struct lxc_handler *handler)
4067 {
4068 int data_sock = handler->data_sock[1];
4069 struct lxc_netdev *netdev;
4070
4071 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
4072 int ret;
4073
4074 if (!network_requires_advanced_setup(netdev->type))
4075 continue;
4076
4077 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
4078 if (ret < 0)
4079 return -1;
4080
4081 ret = lxc_recv_nointr(data_sock, netdev->transient_name, IFNAMSIZ, 0);
4082 if (ret < 0)
4083 return -1;
4084
4085 TRACE("Received network device name \"%s\" from parent", netdev->transient_name);
4086 }
4087
4088 return 0;
4089 }
4090
4091 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
4092 {
4093 int data_sock = handler->data_sock[0];
4094 struct lxc_netdev *netdev;
4095 struct list_head *netdevs = &handler->conf->netdevs;
4096
4097 if (!handler->am_root)
4098 return 0;
4099
4100 list_for_each_entry(netdev, netdevs, head) {
4101 int ret;
4102
4103 /* Send network device name in the child's namespace to parent. */
4104 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
4105 if (ret < 0)
4106 return -1;
4107
4108 /* Send network device ifindex in the child's namespace to
4109 * parent.
4110 */
4111 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
4112 if (ret < 0)
4113 return -1;
4114
4115 TRACE("Sent network device %s with ifindex %d to parent", maybe_empty(netdev->name), netdev->ifindex);
4116 }
4117
4118 if (!list_empty(netdevs))
4119 TRACE("Sent network device names and ifindices to parent");
4120
4121 return 0;
4122 }
4123
4124 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
4125 {
4126 int data_sock = handler->data_sock[1];
4127 struct lxc_netdev *netdev;
4128
4129 if (!handler->am_root)
4130 return 0;
4131
4132 list_for_each_entry(netdev, &handler->conf->netdevs, head) {
4133 int ret;
4134
4135 /* Receive network device name in the child's namespace to
4136 * parent.
4137 */
4138 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
4139 if (ret < 0)
4140 return -1;
4141
4142 /* Receive network device ifindex in the child's namespace to
4143 * parent.
4144 */
4145 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
4146 if (ret < 0)
4147 return -1;
4148
4149 TRACE("Received network device %s with ifindex %d from child", maybe_empty(netdev->name), netdev->ifindex);
4150 }
4151
4152 return 0;
4153 }
4154
4155 void lxc_delete_network(struct lxc_handler *handler)
4156 {
4157 bool bret;
4158
4159 /*
4160 * Always expose namespace fd paths to network down hooks via
4161 * environment variables. No need to complicate things by passing them
4162 * as additional hook arguments.
4163 */
4164 lxc_expose_namespace_environment(handler);
4165
4166 if (handler->am_root)
4167 bret = lxc_delete_network_priv(handler);
4168 else
4169 bret = lxc_delete_network_unpriv(handler);
4170 if (!bret)
4171 DEBUG("Failed to delete network devices");
4172 else
4173 DEBUG("Deleted network devices");
4174 }
4175
4176 int lxc_netns_set_nsid(int fd)
4177 {
4178 int ret;
4179 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4180 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4181 NLMSG_ALIGN(1024)];
4182 struct nl_handler nlh = NL_HANDLER_INIT;
4183 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
4184 struct nlmsghdr *hdr;
4185 struct rtgenmsg *msg;
4186 const __s32 ns_id = -1;
4187 const __u32 netns_fd = fd;
4188
4189 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
4190 if (ret < 0)
4191 return -1;
4192
4193 memset(buf, 0, sizeof(buf));
4194
4195 #pragma GCC diagnostic push
4196 #pragma GCC diagnostic ignored "-Wcast-align"
4197 hdr = (struct nlmsghdr *)buf;
4198 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
4199 #pragma GCC diagnostic pop
4200
4201 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4202 hdr->nlmsg_type = RTM_NEWNSID;
4203 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4204 hdr->nlmsg_pid = 0;
4205 hdr->nlmsg_seq = RTM_NEWNSID;
4206 msg->rtgen_family = AF_UNSPEC;
4207
4208 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4209 if (ret < 0)
4210 return ret_errno(ENOMEM);
4211
4212 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4213 if (ret < 0)
4214 return ret_errno(ENOMEM);
4215
4216 return __netlink_transaction(nlh_ptr, hdr, hdr);
4217 }
4218
4219 static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4220 {
4221
4222 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4223
4224 while (RTA_OK(rta, len)) {
4225 unsigned short type = rta->rta_type;
4226
4227 if ((type <= max) && (!tb[type]))
4228 tb[type] = rta;
4229
4230 #pragma GCC diagnostic push
4231 #pragma GCC diagnostic ignored "-Wcast-align"
4232 rta = RTA_NEXT(rta, len);
4233 #pragma GCC diagnostic pop
4234 }
4235
4236 return 0;
4237 }
4238
4239 static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4240 {
4241 return *(__s32 *)RTA_DATA(rta);
4242 }
4243
4244 #ifndef NETNS_RTA
4245 #define NETNS_RTA(r) \
4246 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4247 #endif
4248
4249 int lxc_netns_get_nsid(int fd)
4250 {
4251 struct nl_handler nlh = NL_HANDLER_INIT;
4252 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
4253 int ret;
4254 ssize_t len;
4255 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4256 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4257 NLMSG_ALIGN(1024)];
4258 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
4259 struct nlmsghdr *hdr;
4260 struct rtgenmsg *msg;
4261 __u32 netns_fd = fd;
4262
4263 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
4264 if (ret < 0)
4265 return -1;
4266
4267 memset(buf, 0, sizeof(buf));
4268
4269 #pragma GCC diagnostic push
4270 #pragma GCC diagnostic ignored "-Wcast-align"
4271 hdr = (struct nlmsghdr *)buf;
4272 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
4273 #pragma GCC diagnostic pop
4274
4275 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4276 hdr->nlmsg_type = RTM_GETNSID;
4277 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4278 hdr->nlmsg_pid = 0;
4279 hdr->nlmsg_seq = RTM_GETNSID;
4280 msg->rtgen_family = AF_UNSPEC;
4281
4282 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4283 if (ret < 0)
4284 return ret_errno(ENOMEM);
4285
4286 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
4287 if (ret < 0)
4288 return -1;
4289
4290 msg = NLMSG_DATA(hdr);
4291 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4292 if (len < 0)
4293 return ret_errno(EINVAL);
4294
4295 #pragma GCC diagnostic push
4296 #pragma GCC diagnostic ignored "-Wcast-align"
4297 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4298 if (tb[__LXC_NETNSA_NSID])
4299 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
4300 #pragma GCC diagnostic pop
4301
4302 return -1;
4303 }
4304
4305 int lxc_create_network(struct lxc_handler *handler)
4306 {
4307 int ret;
4308
4309 if (handler->am_root) {
4310 ret = lxc_create_network_priv(handler);
4311 if (ret)
4312 return -1;
4313
4314 return lxc_network_move_created_netdev_priv(handler);
4315 }
4316
4317 return lxc_create_network_unpriv(handler);
4318 }