]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/network.c
network: use empty initializer
[mirror_lxc.git] / src / lxc / network.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #ifndef _GNU_SOURCE
4 #define _GNU_SOURCE 1
5 #endif
6 #include <arpa/inet.h>
7 #include <ctype.h>
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <linux/netlink.h>
11 #include <linux/rtnetlink.h>
12 #include <linux/sockios.h>
13 #include <net/ethernet.h>
14 #include <net/if.h>
15 #include <net/if_arp.h>
16 #include <netinet/in.h>
17 #include <stdio.h>
18 #include <stdlib.h>
19 #include <string.h>
20 #include <sys/inotify.h>
21 #include <sys/ioctl.h>
22 #include <sys/param.h>
23 #include <sys/socket.h>
24 #include <sys/stat.h>
25 #include <sys/types.h>
26 #include <time.h>
27 #include <unistd.h>
28
29 #include "../include/netns_ifaddrs.h"
30 #include "af_unix.h"
31 #include "conf.h"
32 #include "config.h"
33 #include "file_utils.h"
34 #include "log.h"
35 #include "macro.h"
36 #include "memory_utils.h"
37 #include "network.h"
38 #include "nl.h"
39 #include "process_utils.h"
40 #include "syscall_wrappers.h"
41 #include "utils.h"
42
43 #ifndef HAVE_STRLCPY
44 #include "include/strlcpy.h"
45 #endif
46
47 lxc_log_define(network, lxc);
48
49 typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
50 typedef int (*instantiate_ns_cb)(struct lxc_netdev *);
51 static const char loop_device[] = "lo";
52
53 static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
54 {
55 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
56 struct nl_handler nlh;
57 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
58 int addrlen, err;
59 struct rtmsg *rt;
60
61 addrlen = family == AF_INET ? sizeof(struct in_addr)
62 : sizeof(struct in6_addr);
63
64 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
65 if (err)
66 return err;
67
68 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
69 if (!nlmsg)
70 return -ENOMEM;
71
72 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
73 if (!answer)
74 return -ENOMEM;
75
76 nlmsg->nlmsghdr->nlmsg_flags =
77 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
78 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
79
80 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
81 if (!rt)
82 return -ENOMEM;
83
84 rt->rtm_family = family;
85 rt->rtm_table = RT_TABLE_MAIN;
86 rt->rtm_scope = RT_SCOPE_LINK;
87 rt->rtm_protocol = RTPROT_BOOT;
88 rt->rtm_type = RTN_UNICAST;
89 rt->rtm_dst_len = netmask;
90
91 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
92 return -EINVAL;
93
94 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
95 return -EINVAL;
96
97 return netlink_transaction(nlh_ptr, nlmsg, answer);
98 }
99
100 static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
101 {
102 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
103 }
104
105 static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
106 {
107 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
108 }
109
110 static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
111 {
112 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
113 }
114
115 static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
116 {
117 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
118 }
119
120 static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
121 {
122 struct lxc_list *iterator;
123 int err;
124
125 lxc_list_for_each(iterator, ip) {
126 struct lxc_inetdev *inetdev = iterator->elem;
127
128 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
129 if (err)
130 return log_error_errno(-1, -err, "Failed to setup ipv4 route for network device with ifindex %d", ifindex);
131 }
132
133 return 0;
134 }
135
136 static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
137 {
138 struct lxc_list *iterator;
139 int err;
140
141 lxc_list_for_each(iterator, ip) {
142 struct lxc_inet6dev *inet6dev = iterator->elem;
143
144 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
145 if (err)
146 return log_error_errno(-1, -err, "Failed to setup ipv6 route for network device with ifindex %d", ifindex);
147 }
148
149 return 0;
150 }
151
152 static int setup_ipv4_addr_routes(struct lxc_list *ip, int ifindex)
153 {
154 struct lxc_list *iterator;
155 int err;
156
157 lxc_list_for_each(iterator, ip) {
158 struct lxc_inetdev *inetdev = iterator->elem;
159
160 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, 32);
161
162 if (err)
163 return log_error_errno(-1, err, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex);
164 }
165
166 return 0;
167 }
168
169 static int setup_ipv6_addr_routes(struct lxc_list *ip, int ifindex)
170 {
171 struct lxc_list *iterator;
172 int err;
173
174 lxc_list_for_each(iterator, ip) {
175 struct lxc_inet6dev *inet6dev = iterator->elem;
176
177 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, 128);
178 if (err)
179 return log_error_errno(-1, err, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex);
180 }
181
182 return 0;
183 }
184
185 static int lxc_ip_neigh_proxy(__u16 nlmsg_type, int family, int ifindex, void *dest)
186 {
187 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
188 struct nl_handler nlh;
189 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
190 int addrlen, err;
191 struct ndmsg *rt;
192
193 addrlen = family == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr);
194
195 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
196 if (err)
197 return err;
198
199 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
200 if (!nlmsg)
201 return -ENOMEM;
202
203 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
204 if (!answer)
205 return -ENOMEM;
206
207 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
208 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
209
210 rt = nlmsg_reserve(nlmsg, sizeof(struct ndmsg));
211 if (!rt)
212 return -ENOMEM;
213
214 rt->ndm_ifindex = ifindex;
215 rt->ndm_flags = NTF_PROXY;
216 rt->ndm_type = NDA_DST;
217 rt->ndm_family = family;
218
219 if (nla_put_buffer(nlmsg, NDA_DST, dest, addrlen))
220 return -EINVAL;
221
222 return netlink_transaction(nlh_ptr, nlmsg, answer);
223 }
224
225 static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
226 {
227 int ret;
228 char path[PATH_MAX];
229 char buf[1] = "";
230
231 if (family != AF_INET && family != AF_INET6)
232 return ret_set_errno(-1, EINVAL);
233
234 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
235 family == AF_INET ? "ipv4" : "ipv6", ifname,
236 "forwarding");
237 if (ret < 0 || (size_t)ret >= sizeof(path))
238 return ret_set_errno(-1, E2BIG);
239
240 return lxc_read_file_expect(path, buf, 1, "1");
241 }
242
243 struct bridge_vlan_info {
244 __u16 flags;
245 __u16 vid;
246 };
247
248 static int lxc_bridge_vlan(unsigned int ifindex, unsigned short operation, unsigned short vlan_id, bool tagged)
249 {
250 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
251 struct nl_handler nlh;
252 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
253 int err;
254 struct ifinfomsg *ifi;
255 struct rtattr *nest;
256 unsigned short bridge_flags = 0;
257 struct bridge_vlan_info vlan_info;
258
259 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
260 if (err)
261 return err;
262
263 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
264 if (!nlmsg)
265 return ret_errno(ENOMEM);
266
267 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
268 if (!answer)
269 return ret_errno(ENOMEM);
270
271 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
272 nlmsg->nlmsghdr->nlmsg_type = operation;
273
274 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
275 if (!ifi)
276 return ret_errno(ENOMEM);
277 ifi->ifi_family = AF_BRIDGE;
278 ifi->ifi_index = ifindex;
279
280 nest = nla_begin_nested(nlmsg, IFLA_AF_SPEC);
281 if (!nest)
282 return ret_errno(ENOMEM);
283
284 bridge_flags |= BRIDGE_FLAGS_MASTER;
285 if (nla_put_u16(nlmsg, IFLA_BRIDGE_FLAGS, bridge_flags))
286 return ret_errno(ENOMEM);
287
288 vlan_info.vid = vlan_id;
289 vlan_info.flags = 0;
290 if (!tagged)
291 vlan_info.flags = BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED;
292
293 if (nla_put_buffer(nlmsg, IFLA_BRIDGE_VLAN_INFO, &vlan_info, sizeof(struct bridge_vlan_info)))
294 return ret_errno(ENOMEM);
295
296 nla_end_nested(nlmsg, nest);
297
298 return netlink_transaction(nlh_ptr, nlmsg, answer);
299 }
300
301 static int lxc_bridge_vlan_add(unsigned int ifindex, unsigned short vlan_id, bool tagged)
302 {
303 return lxc_bridge_vlan(ifindex, RTM_SETLINK, vlan_id, tagged);
304 }
305
306 static int lxc_bridge_vlan_del(unsigned int ifindex, unsigned short vlan_id)
307 {
308 return lxc_bridge_vlan(ifindex, RTM_DELLINK, vlan_id, false);
309 }
310
311 static int lxc_bridge_vlan_add_tagged(unsigned int ifindex, struct lxc_list *vlan_ids)
312 {
313 struct lxc_list *iterator;
314 int err;
315
316 lxc_list_for_each(iterator, vlan_ids) {
317 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
318
319 err = lxc_bridge_vlan_add(ifindex, vlan_id, true);
320 if (err)
321 return log_error_errno(-1, -err, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id, ifindex);
322 }
323
324 return 0;
325 }
326
327 static int validate_veth(struct lxc_netdev *netdev)
328 {
329 if (netdev->priv.veth_attr.mode != VETH_MODE_BRIDGE || is_empty_string(netdev->link)) {
330 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
331 if (netdev->priv.veth_attr.vlan_id_set)
332 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
333
334 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
335 if (lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) > 0)
336 return log_error_errno(-1, EINVAL, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
337 }
338
339 if (netdev->priv.veth_attr.vlan_id_set) {
340 struct lxc_list *it;
341 lxc_list_for_each(it, &netdev->priv.veth_attr.vlan_tagged_ids) {
342 unsigned short i = PTR_TO_USHORT(it->elem);
343 if (i == netdev->priv.veth_attr.vlan_id)
344 return log_error_errno(-1, EINVAL, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev->priv.veth_attr.vlan_id);
345 }
346 }
347
348 return 0;
349 }
350
351 static int setup_veth_native_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
352 {
353 int err, rc, veth1index;
354 char path[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ + 1];
355 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
356
357 /* Skip setup if no VLAN options are specified. */
358 if (!netdev->priv.veth_attr.vlan_id_set && lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids) <= 0)
359 return 0;
360
361 /* Check vlan filtering is enabled on parent bridge. */
362 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/vlan_filtering", netdev->link);
363 if (rc < 0 || (size_t)rc >= sizeof(path))
364 return -1;
365
366 rc = lxc_read_from_file(path, buf, sizeof(buf));
367 if (rc < 0)
368 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
369
370 buf[rc - 1] = '\0';
371
372 if (strcmp(buf, "1") != 0)
373 return log_error_errno(-1, EPERM, "vlan_filtering is not enabled on \"%s\"", netdev->link);
374
375 /* Get veth1 ifindex for use with netlink. */
376 veth1index = if_nametoindex(veth1);
377 if (!veth1index)
378 return log_error_errno(-1, errno, "Failed getting ifindex of \"%s\"", netdev->link);
379
380 /* Configure untagged VLAN settings on bridge port if specified. */
381 if (netdev->priv.veth_attr.vlan_id_set) {
382 unsigned short default_pvid;
383
384 /* Get the bridge's default VLAN PVID. */
385 rc = snprintf(path, sizeof(path), "/sys/class/net/%s/bridge/default_pvid", netdev->link);
386 if (rc < 0 || (size_t)rc >= sizeof(path))
387 return -1;
388
389 rc = lxc_read_from_file(path, buf, sizeof(buf));
390 if (rc < 0)
391 return log_error_errno(rc, errno, "Failed reading from \"%s\"", path);
392
393 buf[rc - 1] = '\0';
394 err = get_u16(&default_pvid, buf, 0);
395 if (err)
396 return log_error_errno(-1, EINVAL, "Failed parsing default_pvid of \"%s\"", netdev->link);
397
398 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
399 if (default_pvid != netdev->priv.veth_attr.vlan_id) {
400 err = lxc_bridge_vlan_del(veth1index, default_pvid);
401 if (err)
402 return log_error_errno(err, errno, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid, veth1);
403 }
404
405 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
406 err = lxc_bridge_vlan_add(veth1index, netdev->priv.veth_attr.vlan_id, false);
407 if (err)
408 return log_error_errno(err, errno, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev->priv.veth_attr.vlan_id, veth1);
409 }
410 }
411
412 /* Configure tagged VLAN settings on bridge port if specified. */
413 err = lxc_bridge_vlan_add_tagged(veth1index, &netdev->priv.veth_attr.vlan_tagged_ids);
414 if (err)
415 return log_error_errno(err, errno, "Failed to add tagged vlans on \"%s\"", veth1);
416
417 return 0;
418 }
419
420 struct ovs_veth_vlan_args {
421 const char *nic;
422 const char *vlan_mode; /* Port VLAN mode. */
423 short vlan_id; /* PVID VLAN ID. */
424 char *trunks; /* Comma delimited list of tagged VLAN IDs. */
425 };
426
427 static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args *args)
428 {
429 free_disarm(args->trunks);
430 }
431
432 static int lxc_ovs_setup_bridge_vlan_exec(void *data)
433 {
434 struct ovs_veth_vlan_args *args = data;
435 __do_free char *vlan_mode = NULL, *tag = NULL, *trunks = NULL;
436
437 if (!args->vlan_mode)
438 return ret_errno(EINVAL);
439
440 vlan_mode = must_concat(NULL, "vlan_mode=", args->vlan_mode, (char *)NULL);
441
442 if (args->vlan_id > BRIDGE_VLAN_NONE) {
443 char buf[5];
444 int rc;
445
446 rc = snprintf(buf, sizeof(buf), "%u", args->vlan_id);
447 if (rc < 0 || (size_t)rc >= sizeof(buf))
448 return log_error_errno(-1, EINVAL, "Failed to parse ovs bridge vlan \"%d\"", args->vlan_id);
449
450 tag = must_concat(NULL, "tag=", buf, (char *)NULL);
451 }
452
453 if (args->trunks)
454 trunks = must_concat(NULL, "trunks=", args->trunks, (char *)NULL);
455
456 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
457 if (tag && trunks)
458 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, trunks, (char *)NULL);
459 else if (tag)
460 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, tag, (char *)NULL);
461 else if (trunks)
462 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args->nic, vlan_mode, trunks, (char *)NULL);
463 else
464 return -EINVAL;
465
466 return -errno;
467 }
468
469 static int setup_veth_ovs_bridge_vlan(char *veth1, struct lxc_netdev *netdev)
470 {
471 int taggedLength = lxc_list_len(&netdev->priv.veth_attr.vlan_tagged_ids);
472 struct ovs_veth_vlan_args args;
473 args.nic = veth1;
474 args.vlan_mode = NULL;
475 args.vlan_id = BRIDGE_VLAN_NONE;
476 args.trunks = NULL;
477
478 /* Skip setup if no VLAN options are specified. */
479 if (!netdev->priv.veth_attr.vlan_id_set && taggedLength <= 0)
480 return 0;
481
482 /* Configure untagged VLAN settings on bridge port if specified. */
483 if (netdev->priv.veth_attr.vlan_id_set) {
484 if (netdev->priv.veth_attr.vlan_id == BRIDGE_VLAN_NONE && taggedLength <= 0)
485 return log_error_errno(-1, EINVAL, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
486
487 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
488 * Also set the vlan_mode=access, which will drop any tagged frames.
489 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
490 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
491 */
492 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
493 args.vlan_mode = "access";
494 args.vlan_id = netdev->priv.veth_attr.vlan_id;
495 }
496 }
497
498 if (taggedLength > 0) {
499 args.vlan_mode = "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
500
501 if (netdev->priv.veth_attr.vlan_id > BRIDGE_VLAN_NONE) {
502 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
503 args.vlan_mode = "native-untagged";
504 }
505
506 struct lxc_list *iterator;
507 lxc_list_for_each(iterator, &netdev->priv.veth_attr.vlan_tagged_ids) {
508 unsigned short vlan_id = PTR_TO_USHORT(iterator->elem);
509 char buf[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
510 int rc;
511
512 rc = snprintf(buf, sizeof(buf), "%u", vlan_id);
513 if (rc < 0 || (size_t)rc >= sizeof(buf)) {
514 free_ovs_veth_vlan_args(&args);
515 return log_error_errno(-1, EINVAL, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id, veth1);
516 }
517
518 if (args.trunks)
519 args.trunks = must_concat(NULL, args.trunks, buf, ",", (char *)NULL);
520 else
521 args.trunks = must_concat(NULL, buf, ",", (char *)NULL);
522 }
523 }
524
525 if (args.vlan_mode) {
526 int ret;
527 char cmd_output[PATH_MAX];
528
529 ret = run_command(cmd_output, sizeof(cmd_output), lxc_ovs_setup_bridge_vlan_exec, (void *)&args);
530 if (ret < 0) {
531 free_ovs_veth_vlan_args(&args);
532 return log_error_errno(-1, ret, "Failed to setup openvswitch vlan on port \"%s\": %s", args.nic, cmd_output);
533 }
534 }
535
536 free_ovs_veth_vlan_args(&args);
537 return 0;
538 }
539
540 static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
541 {
542 int err;
543 unsigned int mtu = 1500;
544 char *veth1, *veth2;
545 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
546
547 err = validate_veth(netdev);
548 if (err)
549 return err;
550
551 if (!is_empty_string(netdev->priv.veth_attr.pair)) {
552 veth1 = netdev->priv.veth_attr.pair;
553 if (handler->conf->reboot)
554 lxc_netdev_delete_by_name(veth1);
555 } else {
556 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
557 if (err < 0 || (size_t)err >= sizeof(veth1buf))
558 return -1;
559
560 veth1 = lxc_ifname_alnum_case_sensitive(veth1buf);
561 if (!veth1)
562 return -1;
563
564 /* store away for deconf */
565 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
566 }
567
568 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
569 if (err < 0 || (size_t)err >= sizeof(veth2buf))
570 return -1;
571
572 veth2 = lxc_ifname_alnum_case_sensitive(veth2buf);
573 if (!veth2)
574 return -1;
575
576 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
577 if (netdev->mtu) {
578 if (lxc_safe_uint(netdev->mtu, &mtu))
579 return log_error_errno(-1, errno, "Failed to parse mtu");
580 } else if (!is_empty_string(netdev->link)) {
581 int ifindex_mtu;
582
583 ifindex_mtu = if_nametoindex(netdev->link);
584 if (ifindex_mtu) {
585 mtu = netdev_get_mtu(ifindex_mtu);
586 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
587 }
588 }
589
590 err = lxc_veth_create(veth1, veth2, handler->pid, mtu);
591 if (err)
592 return log_error_errno(-1, -err, "Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
593
594 strlcpy(netdev->created_name, veth2, IFNAMSIZ);
595
596 /* changing the high byte of the mac address to 0xfe, the bridge interface
597 * will always keep the host's mac address and not take the mac address
598 * of a container */
599 err = setup_private_host_hw_addr(veth1);
600 if (err) {
601 errno = -err;
602 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
603 goto out_delete;
604 }
605
606 /* Retrieve ifindex of the host's veth device. */
607 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
608 if (!netdev->priv.veth_attr.ifindex) {
609 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
610 goto out_delete;
611 }
612
613 if (mtu) {
614 err = lxc_netdev_set_mtu(veth1, mtu);
615 if (err) {
616 errno = -err;
617 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu, veth1);
618 goto out_delete;
619 }
620 }
621
622 if (!is_empty_string(netdev->link) && netdev->priv.veth_attr.mode == VETH_MODE_BRIDGE) {
623 if (!lxc_nic_exists(netdev->link)) {
624 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1, netdev->link);
625 goto out_delete;
626 }
627
628 err = lxc_bridge_attach(netdev->link, veth1);
629 if (err) {
630 errno = -err;
631 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1, netdev->link);
632 goto out_delete;
633 }
634 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
635
636 if (is_ovs_bridge(netdev->link)) {
637 err = setup_veth_ovs_bridge_vlan(veth1, netdev);
638 if (err) {
639 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1);
640 lxc_ovs_delete_port(netdev->link, veth1);
641 goto out_delete;
642 }
643 } else {
644 err = setup_veth_native_bridge_vlan(veth1, netdev);
645 if (err) {
646 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1);
647 goto out_delete;
648 }
649 }
650 }
651
652 err = lxc_netdev_up(veth1);
653 if (err) {
654 errno = -err;
655 SYSERROR("Failed to set \"%s\" up", veth1);
656 goto out_delete;
657 }
658
659 /* setup ipv4 routes on the host interface */
660 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
661 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
662 goto out_delete;
663 }
664
665 /* setup ipv6 routes on the host interface */
666 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
667 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
668 goto out_delete;
669 }
670
671 if (netdev->priv.veth_attr.mode == VETH_MODE_ROUTER) {
672 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
673 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
674 appear to be added successfully but then do not appear in the proxy list. The length of time
675 slept doesn't appear to be important, only that the process sleeps for a short period of time.
676 */
677 nanosleep((const struct timespec[]){{0, 1000}}, NULL);
678
679 if (netdev->ipv4_gateway) {
680 char bufinet4[INET_ADDRSTRLEN];
681 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4))) {
682 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1);
683 goto out_delete;
684 }
685
686 err = lxc_ip_forwarding_on(veth1, AF_INET);
687 if (err) {
688 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1);
689 goto out_delete;
690 }
691
692 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, netdev->priv.veth_attr.ifindex, netdev->ipv4_gateway);
693 if (err) {
694 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1);
695 goto out_delete;
696 }
697 }
698
699 if (netdev->ipv6_gateway) {
700 char bufinet6[INET6_ADDRSTRLEN];
701
702 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6))) {
703 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1);
704 goto out_delete;
705 }
706
707 /* Check for sysctl net.ipv6.conf.all.forwarding=1
708 Kernel requires this to route any packets for IPv6.
709 */
710 err = lxc_is_ip_forwarding_enabled("all", AF_INET6);
711 if (err) {
712 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
713 goto out_delete;
714 }
715
716 err = lxc_ip_forwarding_on(veth1, AF_INET6);
717 if (err) {
718 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1);
719 goto out_delete;
720 }
721
722 err = lxc_neigh_proxy_on(veth1, AF_INET6);
723 if (err) {
724 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1);
725 goto out_delete;
726 }
727
728 err = lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, netdev->priv.veth_attr.ifindex, netdev->ipv6_gateway);
729 if (err) {
730 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1);
731 goto out_delete;
732 }
733 }
734
735 /* setup ipv4 address routes on the host interface */
736 err = setup_ipv4_addr_routes(&netdev->ipv4, netdev->priv.veth_attr.ifindex);
737 if (err) {
738 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
739 goto out_delete;
740 }
741
742 /* setup ipv6 address routes on the host interface */
743 err = setup_ipv6_addr_routes(&netdev->ipv6, netdev->priv.veth_attr.ifindex);
744 if (err) {
745 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1);
746 goto out_delete;
747 }
748 }
749
750 if (netdev->upscript) {
751 char *argv[] = {
752 "veth",
753 netdev->link,
754 veth1,
755 NULL,
756 };
757
758 err = run_script_argv(handler->name,
759 handler->conf->hooks_version, "net",
760 netdev->upscript, "up", argv);
761 if (err < 0)
762 goto out_delete;
763 }
764
765 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1, veth2);
766
767 return 0;
768
769 out_delete:
770 lxc_netdev_delete_by_name(veth1);
771 return -1;
772 }
773
774 static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
775 {
776 char peer[IFNAMSIZ];
777 int err;
778
779 if (is_empty_string(netdev->link)) {
780 ERROR("No link for macvlan network device specified");
781 return -1;
782 }
783
784 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
785 if (err < 0 || (size_t)err >= sizeof(peer))
786 return -1;
787
788 if (!lxc_ifname_alnum_case_sensitive(peer))
789 return -1;
790
791 err = lxc_macvlan_create(netdev->link, peer,
792 netdev->priv.macvlan_attr.mode);
793 if (err) {
794 errno = -err;
795 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
796 peer, netdev->link);
797 goto on_error;
798 }
799
800 strlcpy(netdev->created_name, peer, IFNAMSIZ);
801
802 netdev->ifindex = if_nametoindex(peer);
803 if (!netdev->ifindex) {
804 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
805 goto on_error;
806 }
807
808 if (netdev->mtu) {
809 unsigned int mtu;
810
811 err = lxc_safe_uint(netdev->mtu, &mtu);
812 if (err < 0) {
813 errno = -err;
814 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
815 goto on_error;
816 }
817
818 err = lxc_netdev_set_mtu(peer, mtu);
819 if (err < 0) {
820 errno = -err;
821 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
822 goto on_error;
823 }
824 }
825
826 if (netdev->upscript) {
827 char *argv[] = {
828 "macvlan",
829 netdev->link,
830 NULL,
831 };
832
833 err = run_script_argv(handler->name,
834 handler->conf->hooks_version, "net",
835 netdev->upscript, "up", argv);
836 if (err < 0)
837 goto on_error;
838 }
839
840 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
841 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
842
843 return 0;
844
845 on_error:
846 lxc_netdev_delete_by_name(peer);
847 return -1;
848 }
849
850 static int lxc_ipvlan_create(const char *parent, const char *name, int mode, int isolation)
851 {
852 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
853 struct nl_handler nlh;
854 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
855 int err, index, len;
856 struct ifinfomsg *ifi;
857 struct rtattr *nest, *nest2;
858
859 len = strlen(parent);
860 if (len == 1 || len >= IFNAMSIZ)
861 return ret_errno(EINVAL);
862
863 len = strlen(name);
864 if (len == 1 || len >= IFNAMSIZ)
865 return ret_errno(EINVAL);
866
867 index = if_nametoindex(parent);
868 if (!index)
869 return ret_errno(EINVAL);
870
871 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
872 if (err)
873 return err;
874
875 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
876 if (!nlmsg)
877 return ret_errno(ENOMEM);
878
879 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
880 if (!answer)
881 return ret_errno(ENOMEM);
882
883 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
884 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
885
886 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
887 if (!ifi)
888 return ret_errno(ENOMEM);
889 ifi->ifi_family = AF_UNSPEC;
890
891 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
892 if (!nest)
893 return ret_errno(EPROTO);
894
895 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
896 return ret_errno(EPROTO);
897
898 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
899 if (!nest2)
900 return ret_errno(EPROTO);
901
902 if (nla_put_u16(nlmsg, IFLA_IPVLAN_MODE, mode))
903 return ret_errno(EPROTO);
904
905 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
906 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
907 * according to ipvlan docs.
908 */
909 if (isolation > 0 && nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
910 return ret_errno(EPROTO);
911
912 nla_end_nested(nlmsg, nest2);
913 nla_end_nested(nlmsg, nest);
914
915 if (nla_put_u32(nlmsg, IFLA_LINK, index))
916 return ret_errno(EPROTO);
917
918 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
919 return ret_errno(EPROTO);
920
921 return netlink_transaction(nlh_ptr, nlmsg, answer);
922 }
923
924 static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
925 {
926 char peer[IFNAMSIZ];
927 int err;
928
929 if (is_empty_string(netdev->link)) {
930 ERROR("No link for ipvlan network device specified");
931 return -1;
932 }
933
934 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
935 if (err < 0 || (size_t)err >= sizeof(peer))
936 return -1;
937
938 if (!lxc_ifname_alnum_case_sensitive(peer))
939 return -1;
940
941 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
942 netdev->priv.ipvlan_attr.isolation);
943 if (err) {
944 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
945 peer, netdev->link);
946 goto on_error;
947 }
948
949 strlcpy(netdev->created_name, peer, IFNAMSIZ);
950
951 netdev->ifindex = if_nametoindex(peer);
952 if (!netdev->ifindex) {
953 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
954 goto on_error;
955 }
956
957 if (netdev->mtu) {
958 unsigned int mtu;
959
960 err = lxc_safe_uint(netdev->mtu, &mtu);
961 if (err < 0) {
962 errno = -err;
963 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
964 goto on_error;
965 }
966
967 err = lxc_netdev_set_mtu(peer, mtu);
968 if (err < 0) {
969 errno = -err;
970 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
971 goto on_error;
972 }
973 }
974
975 if (netdev->upscript) {
976 char *argv[] = {
977 "ipvlan",
978 netdev->link,
979 NULL,
980 };
981
982 err = run_script_argv(handler->name, handler->conf->hooks_version,
983 "net", netdev->upscript, "up", argv);
984 if (err < 0)
985 goto on_error;
986 }
987
988 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
989 netdev->ifindex, netdev->priv.macvlan_attr.mode);
990
991 return 0;
992
993 on_error:
994 lxc_netdev_delete_by_name(peer);
995 return -1;
996 }
997
998 static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
999 {
1000 char peer[IFNAMSIZ];
1001 int err;
1002 static uint16_t vlan_cntr = 0;
1003
1004 if (is_empty_string(netdev->link)) {
1005 ERROR("No link for vlan network device specified");
1006 return -1;
1007 }
1008
1009 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
1010 netdev->priv.vlan_attr.vid, vlan_cntr++);
1011 if (err < 0 || (size_t)err >= sizeof(peer))
1012 return -1;
1013
1014 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
1015 if (err) {
1016 errno = -err;
1017 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1018 peer, netdev->link);
1019 return -1;
1020 }
1021
1022 strlcpy(netdev->created_name, peer, IFNAMSIZ);
1023
1024 netdev->ifindex = if_nametoindex(peer);
1025 if (!netdev->ifindex) {
1026 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
1027 goto on_error;
1028 }
1029
1030 if (netdev->mtu) {
1031 unsigned int mtu;
1032
1033 err = lxc_safe_uint(netdev->mtu, &mtu);
1034 if (err < 0) {
1035 errno = -err;
1036 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
1037 goto on_error;
1038 }
1039
1040 err = lxc_netdev_set_mtu(peer, mtu);
1041 if (err < 0) {
1042 errno = -err;
1043 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
1044 goto on_error;
1045 }
1046 }
1047
1048 if (netdev->upscript) {
1049 char *argv[] = {
1050 "vlan",
1051 netdev->link,
1052 NULL,
1053 };
1054
1055 err = run_script_argv(handler->name, handler->conf->hooks_version,
1056 "net", netdev->upscript, "up", argv);
1057 if (err < 0) {
1058 goto on_error;
1059 }
1060 }
1061
1062 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
1063 netdev->ifindex);
1064
1065 return 0;
1066
1067 on_error:
1068 lxc_netdev_delete_by_name(peer);
1069 return -1;
1070 }
1071
1072 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1073 {
1074 int err, mtu_orig = 0;
1075
1076 if (is_empty_string(netdev->link))
1077 return log_error_errno(-1, errno, "No link for physical interface specified");
1078
1079 /*
1080 * Note that we're retrieving the container's ifindex in the host's
1081 * network namespace because we need it to move the device from the
1082 * host's network namespace to the container's network namespace later
1083 * on.
1084 * Note that netdev->link will contain the name of the physical network
1085 * device in the host's namespace.
1086 */
1087 netdev->ifindex = if_nametoindex(netdev->link);
1088 if (!netdev->ifindex)
1089 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\"", netdev->link);
1090
1091 strlcpy(netdev->created_name, netdev->link, IFNAMSIZ);
1092 if (is_empty_string(netdev->name))
1093 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
1094
1095 /*
1096 * Store the ifindex of the host's network device in the host's
1097 * namespace.
1098 */
1099 netdev->priv.phys_attr.ifindex = netdev->ifindex;
1100
1101 /*
1102 * Get original device MTU setting and store for restoration after
1103 * container shutdown.
1104 */
1105 mtu_orig = netdev_get_mtu(netdev->ifindex);
1106 if (mtu_orig < 0)
1107 return log_error_errno(-1, -mtu_orig, "Failed to get original mtu for interface \"%s\"", netdev->link);
1108
1109 netdev->priv.phys_attr.mtu = mtu_orig;
1110
1111 if (netdev->mtu) {
1112 unsigned int mtu;
1113
1114 err = lxc_safe_uint(netdev->mtu, &mtu);
1115 if (err < 0)
1116 return log_error_errno(-1, -err, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
1117
1118 err = lxc_netdev_set_mtu(netdev->link, mtu);
1119 if (err < 0)
1120 return log_error_errno(-1, -err, "Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
1121 }
1122
1123 if (netdev->upscript) {
1124 char *argv[] = {
1125 "phys",
1126 netdev->link,
1127 NULL,
1128 };
1129
1130 err = run_script_argv(handler->name, handler->conf->hooks_version,
1131 "net", netdev->upscript, "up", argv);
1132 if (err < 0)
1133 return -1;
1134 }
1135
1136 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link,
1137 netdev->ifindex);
1138
1139 return 0;
1140 }
1141
1142 static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1143 {
1144 int ret;
1145 char *argv[] = {
1146 "empty",
1147 NULL,
1148 };
1149
1150 netdev->ifindex = 0;
1151 if (!netdev->upscript)
1152 return 0;
1153
1154 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1155 "net", netdev->upscript, "up", argv);
1156 if (ret < 0)
1157 return -1;
1158
1159 return 0;
1160 }
1161
1162 static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1163 {
1164 netdev->ifindex = 0;
1165 return 0;
1166 }
1167
1168 static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
1169 [LXC_NET_VETH] = instantiate_veth,
1170 [LXC_NET_MACVLAN] = instantiate_macvlan,
1171 [LXC_NET_IPVLAN] = instantiate_ipvlan,
1172 [LXC_NET_VLAN] = instantiate_vlan,
1173 [LXC_NET_PHYS] = instantiate_phys,
1174 [LXC_NET_EMPTY] = instantiate_empty,
1175 [LXC_NET_NONE] = instantiate_none,
1176 };
1177
1178 static int __instantiate_ns_common(struct lxc_netdev *netdev)
1179 {
1180 char current_ifname[IFNAMSIZ];
1181
1182 netdev->ifindex = if_nametoindex(netdev->created_name);
1183 if (!netdev->ifindex)
1184 return log_error_errno(-1,
1185 errno, "Failed to retrieve ifindex for network device with name %s",
1186 netdev->created_name);
1187
1188 if (is_empty_string(netdev->name))
1189 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
1190
1191 if (strcmp(netdev->created_name, netdev->name) != 0) {
1192 int ret;
1193
1194 ret = lxc_netdev_rename_by_name(netdev->created_name, netdev->name);
1195 if (ret)
1196 return log_error_errno(-1, -ret, "Failed to rename network device \"%s\" to \"%s\"",
1197 netdev->created_name,
1198 netdev->name);
1199
1200 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev->created_name, netdev->name);
1201 }
1202
1203 /*
1204 * Re-read the name of the interface because its name has changed and
1205 * would be automatically allocated by the system
1206 */
1207 if (!if_indextoname(netdev->ifindex, current_ifname))
1208 return log_error_errno(-1, errno, "Failed get name for network device with ifindex %d", netdev->ifindex);
1209
1210 /*
1211 * Now update the recorded name of the network device to reflect the
1212 * name of the network device in the child's network namespace. We will
1213 * later on send this information back to the parent.
1214 */
1215 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
1216
1217 return 0;
1218 }
1219
1220 static int instantiate_ns_veth(struct lxc_netdev *netdev)
1221 {
1222
1223 return __instantiate_ns_common(netdev);
1224 }
1225
1226 static int instantiate_ns_macvlan(struct lxc_netdev *netdev)
1227 {
1228 return __instantiate_ns_common(netdev);
1229 }
1230
1231 static int instantiate_ns_ipvlan(struct lxc_netdev *netdev)
1232 {
1233 return __instantiate_ns_common(netdev);
1234 }
1235
1236 static int instantiate_ns_vlan(struct lxc_netdev *netdev)
1237 {
1238 return __instantiate_ns_common(netdev);
1239 }
1240
1241 static int instantiate_ns_phys(struct lxc_netdev *netdev)
1242 {
1243 return __instantiate_ns_common(netdev);
1244 }
1245
1246 static int instantiate_ns_empty(struct lxc_netdev *netdev)
1247 {
1248 return 0;
1249 }
1250
1251 static int instantiate_ns_none(struct lxc_netdev *netdev)
1252 {
1253 return 0;
1254 }
1255
1256 static instantiate_ns_cb netdev_ns_conf[LXC_NET_MAXCONFTYPE + 1] = {
1257 [LXC_NET_VETH] = instantiate_ns_veth,
1258 [LXC_NET_MACVLAN] = instantiate_ns_macvlan,
1259 [LXC_NET_IPVLAN] = instantiate_ns_ipvlan,
1260 [LXC_NET_VLAN] = instantiate_ns_vlan,
1261 [LXC_NET_PHYS] = instantiate_ns_phys,
1262 [LXC_NET_EMPTY] = instantiate_ns_empty,
1263 [LXC_NET_NONE] = instantiate_ns_none,
1264 };
1265
1266 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
1267 {
1268 int ret;
1269 char *argv[] = {
1270 "veth",
1271 netdev->link,
1272 NULL,
1273 NULL,
1274 };
1275
1276 if (!netdev->downscript)
1277 return 0;
1278
1279 if (!is_empty_string(netdev->priv.veth_attr.pair))
1280 argv[2] = netdev->priv.veth_attr.pair;
1281 else
1282 argv[2] = netdev->priv.veth_attr.veth1;
1283
1284 ret = run_script_argv(handler->name,
1285 handler->conf->hooks_version, "net",
1286 netdev->downscript, "down", argv);
1287 if (ret < 0)
1288 return -1;
1289
1290 return 0;
1291 }
1292
1293 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1294 {
1295 int ret;
1296 char *argv[] = {
1297 "macvlan",
1298 netdev->link,
1299 NULL,
1300 };
1301
1302 if (!netdev->downscript)
1303 return 0;
1304
1305 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1306 "net", netdev->downscript, "down", argv);
1307 if (ret < 0)
1308 return -1;
1309
1310 return 0;
1311 }
1312
1313 static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1314 {
1315 int ret;
1316 char *argv[] = {
1317 "ipvlan",
1318 netdev->link,
1319 NULL,
1320 };
1321
1322 if (!netdev->downscript)
1323 return 0;
1324
1325 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1326 "net", netdev->downscript, "down", argv);
1327 if (ret < 0)
1328 return -1;
1329
1330 return 0;
1331 }
1332
1333 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
1334 {
1335 int ret;
1336 char *argv[] = {
1337 "vlan",
1338 netdev->link,
1339 NULL,
1340 };
1341
1342 if (!netdev->downscript)
1343 return 0;
1344
1345 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1346 "net", netdev->downscript, "down", argv);
1347 if (ret < 0)
1348 return -1;
1349
1350 return 0;
1351 }
1352
1353 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
1354 {
1355 int ret;
1356 char *argv[] = {
1357 "phys",
1358 netdev->link,
1359 NULL,
1360 };
1361
1362 if (!netdev->downscript)
1363 return 0;
1364
1365 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1366 "net", netdev->downscript, "down", argv);
1367 if (ret < 0)
1368 return -1;
1369
1370 return 0;
1371 }
1372
1373 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
1374 {
1375 int ret;
1376 char *argv[] = {
1377 "empty",
1378 NULL,
1379 };
1380
1381 if (!netdev->downscript)
1382 return 0;
1383
1384 ret = run_script_argv(handler->name, handler->conf->hooks_version,
1385 "net", netdev->downscript, "down", argv);
1386 if (ret < 0)
1387 return -1;
1388
1389 return 0;
1390 }
1391
1392 static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
1393 {
1394 return 0;
1395 }
1396
1397 static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
1398 [LXC_NET_VETH] = shutdown_veth,
1399 [LXC_NET_MACVLAN] = shutdown_macvlan,
1400 [LXC_NET_IPVLAN] = shutdown_ipvlan,
1401 [LXC_NET_VLAN] = shutdown_vlan,
1402 [LXC_NET_PHYS] = shutdown_phys,
1403 [LXC_NET_EMPTY] = shutdown_empty,
1404 [LXC_NET_NONE] = shutdown_none,
1405 };
1406
1407 static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
1408 {
1409 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
1410 struct nl_handler nlh;
1411 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1412 int err;
1413 struct ifinfomsg *ifi;
1414
1415 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1416 if (err)
1417 return err;
1418
1419 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1420 if (!nlmsg)
1421 return ret_errno(ENOMEM);
1422
1423 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1424 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1425
1426 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1427 if (!ifi)
1428 return ret_errno(ENOMEM);
1429
1430 ifi->ifi_family = AF_UNSPEC;
1431 ifi->ifi_index = ifindex;
1432
1433 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
1434 return ret_errno(ENOMEM);
1435
1436 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1437 return ret_errno(ENOMEM);
1438
1439 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
1440 }
1441
1442 int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
1443 {
1444 call_cleaner(nlmsg_free) struct nlmsg *nlmsg = NULL;
1445 struct nl_handler nlh;
1446 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1447 int err;
1448 struct ifinfomsg *ifi;
1449
1450 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1451 if (err)
1452 return err;
1453
1454 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1455 if (!nlmsg)
1456 return ret_errno(ENOMEM);
1457
1458 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1459 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1460
1461 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1462 if (!ifi)
1463 return ret_errno(ENOMEM);
1464
1465 ifi->ifi_family = AF_UNSPEC;
1466 ifi->ifi_index = ifindex;
1467
1468 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
1469 return ret_errno(ENOMEM);
1470
1471 if (!is_empty_string(ifname) && nla_put_string(nlmsg, IFLA_IFNAME, ifname))
1472 return ret_errno(ENOMEM);
1473
1474 return netlink_transaction(nlh_ptr, nlmsg, nlmsg);
1475 }
1476
1477 /* If we are asked to move a wireless interface, then we must actually move its
1478 * phyN device. Detect that condition and return the physname here. The physname
1479 * will be passed to lxc_netdev_move_wlan() which will free it when done.
1480 */
1481 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
1482 char *is_wlan(const char *ifname)
1483 {
1484 __do_fclose FILE *f = NULL;
1485 __do_free char *path = NULL, *physname = NULL;
1486 int i, ret;
1487 long physlen;
1488 size_t len;
1489
1490 len = strlen(ifname) + strlen(PHYSNAME) - 1;
1491 path = must_realloc(NULL, len + 1);
1492 ret = snprintf(path, len, PHYSNAME, ifname);
1493 if (ret < 0 || (size_t)ret >= len)
1494 return NULL;
1495
1496 f = fopen(path, "re");
1497 if (!f)
1498 return NULL;
1499
1500 /* Feh - sb.st_size is always 4096. */
1501 fseek(f, 0, SEEK_END);
1502 physlen = ftell(f);
1503 fseek(f, 0, SEEK_SET);
1504 if (physlen < 0)
1505 return NULL;
1506
1507 physname = malloc(physlen + 1);
1508 if (!physname)
1509 return NULL;
1510
1511 memset(physname, 0, physlen + 1);
1512 ret = fread(physname, 1, physlen, f);
1513 if (ret < 0)
1514 return NULL;
1515
1516 for (i = 0; i < physlen; i++) {
1517 if (physname[i] == '\n')
1518 physname[i] = '\0';
1519
1520 if (physname[i] == '\0')
1521 break;
1522 }
1523
1524 return move_ptr(physname);
1525 }
1526
1527 static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1528 const char *new)
1529 {
1530 pid_t fpid;
1531
1532 fpid = fork();
1533 if (fpid < 0)
1534 return -1;
1535
1536 if (fpid != 0)
1537 return wait_for_pid(fpid);
1538
1539 if (!switch_to_ns(pid, "net"))
1540 return -1;
1541
1542 _exit(lxc_netdev_rename_by_name(old, new));
1543 }
1544
1545 int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1546 const char *newname)
1547 {
1548 __do_free char *cmd = NULL;
1549 pid_t fpid;
1550
1551 /* Move phyN into the container. TODO - do this using netlink.
1552 * However, IIUC this involves a bit more complicated work to talk to
1553 * the 80211 module, so for now just call out to iw.
1554 */
1555 cmd = on_path("iw", NULL);
1556 if (!cmd)
1557 return -1;
1558
1559 fpid = fork();
1560 if (fpid < 0)
1561 return -1;
1562
1563 if (fpid == 0) {
1564 char pidstr[30];
1565 sprintf(pidstr, "%d", pid);
1566 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr, (char *)NULL);
1567 _exit(EXIT_FAILURE);
1568 }
1569
1570 if (wait_for_pid(fpid))
1571 return -1;
1572
1573 if (newname)
1574 return lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1575
1576 return 0;
1577 }
1578
1579 int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
1580 {
1581 __do_free char *physname = NULL;
1582 int index;
1583
1584 if (!ifname)
1585 return -EINVAL;
1586
1587 index = if_nametoindex(ifname);
1588 if (!index)
1589 return -EINVAL;
1590
1591 physname = is_wlan(ifname);
1592 if (physname)
1593 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1594
1595 return lxc_netdev_move_by_index(index, pid, newname);
1596 }
1597
1598 int lxc_netdev_delete_by_index(int ifindex)
1599 {
1600 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1601 struct nl_handler nlh;
1602 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1603 int err;
1604 struct ifinfomsg *ifi;
1605
1606 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1607 if (err)
1608 return err;
1609
1610 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1611 if (!nlmsg)
1612 return ret_errno(ENOMEM);
1613
1614 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1615 if (!answer)
1616 return ret_errno(ENOMEM);
1617
1618 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1619 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1620
1621 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1622 if (!ifi)
1623 return ret_errno(ENOMEM);
1624
1625 ifi->ifi_family = AF_UNSPEC;
1626 ifi->ifi_index = ifindex;
1627
1628 return netlink_transaction(nlh_ptr, nlmsg, answer);
1629 }
1630
1631 int lxc_netdev_delete_by_name(const char *name)
1632 {
1633 int index;
1634
1635 index = if_nametoindex(name);
1636 if (!index)
1637 return -EINVAL;
1638
1639 return lxc_netdev_delete_by_index(index);
1640 }
1641
1642 int lxc_netdev_rename_by_index(int ifindex, const char *newname)
1643 {
1644 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1645 struct nl_handler nlh;
1646 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1647 int err, len;
1648 struct ifinfomsg *ifi;
1649
1650 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1651 if (err)
1652 return err;
1653
1654 len = strlen(newname);
1655 if (len == 1 || len >= IFNAMSIZ)
1656 return ret_errno(EINVAL);
1657
1658 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1659 if (!nlmsg)
1660 return ret_errno(ENOMEM);
1661
1662 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1663 if (!answer)
1664 return ret_errno(ENOMEM);
1665
1666 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1667 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1668
1669 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1670 if (!ifi)
1671 return ret_errno(ENOMEM);
1672
1673 ifi->ifi_family = AF_UNSPEC;
1674 ifi->ifi_index = ifindex;
1675
1676 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1677 return ret_errno(ENOMEM);
1678
1679 return netlink_transaction(nlh_ptr, nlmsg, answer);
1680 }
1681
1682 int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1683 {
1684 int len, index;
1685
1686 len = strlen(oldname);
1687 if (len == 1 || len >= IFNAMSIZ)
1688 return -EINVAL;
1689
1690 index = if_nametoindex(oldname);
1691 if (!index)
1692 return -EINVAL;
1693
1694 return lxc_netdev_rename_by_index(index, newname);
1695 }
1696
1697 int netdev_set_flag(const char *name, int flag)
1698 {
1699 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1700 struct nl_handler nlh;
1701 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1702 int err, index, len;
1703 struct ifinfomsg *ifi;
1704
1705 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1706 if (err)
1707 return err;
1708
1709 len = strlen(name);
1710 if (len == 1 || len >= IFNAMSIZ)
1711 return ret_errno(EINVAL);
1712
1713 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1714 if (!nlmsg)
1715 return ret_errno(ENOMEM);
1716
1717 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1718 if (!answer)
1719 return ret_errno(ENOMEM);
1720
1721 index = if_nametoindex(name);
1722 if (!index)
1723 return ret_errno(EINVAL);
1724
1725 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1726 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1727
1728 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1729 if (!ifi)
1730 return ret_errno(ENOMEM);
1731
1732 ifi->ifi_family = AF_UNSPEC;
1733 ifi->ifi_index = index;
1734 ifi->ifi_change |= IFF_UP;
1735 ifi->ifi_flags |= flag;
1736
1737 return netlink_transaction(nlh_ptr, nlmsg, answer);
1738 }
1739
1740 static int netdev_get_flag(const char *name, int *flag)
1741 {
1742 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1743 struct nl_handler nlh;
1744 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1745 int err, index, len;
1746 struct ifinfomsg *ifi;
1747
1748 if (!name)
1749 return ret_errno(EINVAL);
1750
1751 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1752 if (err)
1753 return err;
1754
1755 len = strlen(name);
1756 if (len == 1 || len >= IFNAMSIZ)
1757 return ret_errno(EINVAL);
1758
1759 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1760 if (!nlmsg)
1761 return ret_errno(ENOMEM);
1762
1763 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1764 if (!answer)
1765 return ret_errno(ENOMEM);
1766
1767 index = if_nametoindex(name);
1768 if (!index)
1769 return ret_errno(EINVAL);
1770
1771 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1772 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1773
1774 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1775 if (!ifi)
1776 return ret_errno(ENOMEM);
1777
1778 ifi->ifi_family = AF_UNSPEC;
1779 ifi->ifi_index = index;
1780
1781 err = netlink_transaction(nlh_ptr, nlmsg, answer);
1782 if (err)
1783 return ret_set_errno(-1, errno);
1784
1785 ifi = NLMSG_DATA(answer->nlmsghdr);
1786
1787 *flag = ifi->ifi_flags;
1788 return err;
1789 }
1790
1791 /*
1792 * \brief Check a interface is up or not.
1793 *
1794 * \param name: name for the interface.
1795 *
1796 * \return int.
1797 * 0 means interface is down.
1798 * 1 means interface is up.
1799 * Others means error happened, and ret-value is the error number.
1800 */
1801 int lxc_netdev_isup(const char *name)
1802 {
1803 int err, flag;
1804
1805 err = netdev_get_flag(name, &flag);
1806 if (err)
1807 return err;
1808
1809 if (flag & IFF_UP)
1810 return 1;
1811
1812 return 0;
1813 }
1814
1815 int netdev_get_mtu(int ifindex)
1816 {
1817 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1818 struct nl_handler nlh;
1819 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1820 int readmore = 0, recv_len = 0;
1821 int answer_len, err, res;
1822 struct ifinfomsg *ifi;
1823 struct nlmsghdr *msg;
1824
1825 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1826 if (err)
1827 return err;
1828
1829 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1830 if (!nlmsg)
1831 return ret_errno(ENOMEM);
1832
1833 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1834 if (!answer)
1835 return ret_errno(ENOMEM);
1836
1837 /* Save the answer buffer length, since it will be overwritten
1838 * on the first receive (and we might need to receive more than
1839 * once.
1840 */
1841 answer_len = answer->nlmsghdr->nlmsg_len;
1842
1843 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
1844 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1845
1846 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1847 if (!ifi)
1848 return ret_errno(ENOMEM);
1849
1850 ifi->ifi_family = AF_UNSPEC;
1851
1852 /* Send the request for addresses, which returns all addresses
1853 * on all interfaces. */
1854 err = netlink_send(nlh_ptr, nlmsg);
1855 if (err < 0)
1856 return ret_set_errno(-1, errno);
1857
1858 #pragma GCC diagnostic push
1859 #pragma GCC diagnostic ignored "-Wcast-align"
1860
1861 do {
1862 /* Restore the answer buffer length, it might have been
1863 * overwritten by a previous receive.
1864 */
1865 answer->nlmsghdr->nlmsg_len = answer_len;
1866
1867 /* Get the (next) batch of reply messages */
1868 err = netlink_rcv(nlh_ptr, answer);
1869 if (err < 0)
1870 return ret_set_errno(-1, errno);
1871
1872 recv_len = err;
1873
1874 /* Satisfy the typing for the netlink macros */
1875 msg = answer->nlmsghdr;
1876
1877 while (NLMSG_OK(msg, recv_len)) {
1878 /* Stop reading if we see an error message */
1879 if (msg->nlmsg_type == NLMSG_ERROR) {
1880 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
1881 return ret_set_errno(errmsg->error, errno);
1882 }
1883
1884 /* Stop reading if we see a NLMSG_DONE message */
1885 if (msg->nlmsg_type == NLMSG_DONE) {
1886 readmore = 0;
1887 break;
1888 }
1889
1890 ifi = NLMSG_DATA(msg);
1891 if (ifi->ifi_index == ifindex) {
1892 struct rtattr *rta = IFLA_RTA(ifi);
1893 int attr_len = msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1894
1895 res = 0;
1896 while (RTA_OK(rta, attr_len)) {
1897 /*
1898 * Found a local address for the
1899 * requested interface, return it.
1900 */
1901 if (rta->rta_type == IFLA_MTU) {
1902 memcpy(&res, RTA_DATA(rta), sizeof(int));
1903 return res;
1904 }
1905
1906 rta = RTA_NEXT(rta, attr_len);
1907 }
1908 }
1909
1910 /* Keep reading more data from the socket if the last
1911 * message had the NLF_F_MULTI flag set.
1912 */
1913 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1914
1915 /* Look at the next message received in this buffer. */
1916 msg = NLMSG_NEXT(msg, recv_len);
1917 }
1918 } while (readmore);
1919
1920 #pragma GCC diagnostic pop
1921
1922 /* If we end up here, we didn't find any result, so signal an error. */
1923 return -1;
1924 }
1925
1926 int lxc_netdev_set_mtu(const char *name, int mtu)
1927 {
1928 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1929 struct nl_handler nlh;
1930 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1931 int err, len;
1932 struct ifinfomsg *ifi;
1933
1934 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1935 if (err)
1936 return err;
1937
1938 len = strlen(name);
1939 if (len == 1 || len >= IFNAMSIZ)
1940 return ret_errno(EINVAL);
1941
1942 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1943 if (!nlmsg)
1944 return ret_errno(ENOMEM);
1945
1946 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1947 if (!answer)
1948 return ret_errno(ENOMEM);
1949
1950 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1951 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1952
1953 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1954 if (!ifi)
1955 return ret_errno(ENOMEM);
1956
1957 ifi->ifi_family = AF_UNSPEC;
1958
1959 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1960 return ret_errno(ENOMEM);
1961
1962 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1963 return ret_errno(ENOMEM);
1964
1965 return netlink_transaction(nlh_ptr, nlmsg, answer);
1966 }
1967
1968 int lxc_netdev_up(const char *name)
1969 {
1970 return netdev_set_flag(name, IFF_UP);
1971 }
1972
1973 int lxc_netdev_down(const char *name)
1974 {
1975 return netdev_set_flag(name, 0);
1976 }
1977
1978 int lxc_veth_create(const char *name1, const char *name2, pid_t pid, unsigned int mtu)
1979 {
1980 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
1981 struct nl_handler nlh;
1982 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
1983 int err, len;
1984 struct ifinfomsg *ifi;
1985 struct rtattr *nest1, *nest2, *nest3;
1986
1987 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
1988 if (err)
1989 return err;
1990
1991 len = strlen(name1);
1992 if (len == 1 || len >= IFNAMSIZ)
1993 return ret_errno(EINVAL);
1994
1995 len = strlen(name2);
1996 if (len == 1 || len >= IFNAMSIZ)
1997 return ret_errno(EINVAL);
1998
1999 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2000 if (!nlmsg)
2001 return ret_errno(ENOMEM);
2002
2003 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2004 if (!answer)
2005 return ret_errno(ENOMEM);
2006
2007 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
2008 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2009
2010 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2011 if (!ifi)
2012 return ret_errno(ENOMEM);
2013
2014 ifi->ifi_family = AF_UNSPEC;
2015
2016 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
2017 if (!nest1)
2018 return ret_errno(EINVAL);
2019
2020 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
2021 return ret_errno(ENOMEM);
2022
2023 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2024 if (!nest2)
2025 return ret_errno(ENOMEM);
2026
2027 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
2028 if (!nest3)
2029 return ret_errno(ENOMEM);
2030
2031 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2032 if (!ifi)
2033 return ret_errno(ENOMEM);
2034
2035 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
2036 return ret_errno(ENOMEM);
2037
2038 if (mtu > 0 && nla_put_u32(nlmsg, IFLA_MTU, mtu))
2039 return ret_errno(ENOMEM);
2040
2041 if (pid > 0 && nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
2042 return ret_errno(ENOMEM);
2043
2044 nla_end_nested(nlmsg, nest3);
2045 nla_end_nested(nlmsg, nest2);
2046 nla_end_nested(nlmsg, nest1);
2047
2048 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
2049 return ret_errno(ENOMEM);
2050
2051 return netlink_transaction(nlh_ptr, nlmsg, answer);
2052 }
2053
2054 /* TODO: merge with lxc_macvlan_create */
2055 int lxc_vlan_create(const char *parent, const char *name, unsigned short vlanid)
2056 {
2057 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2058 struct nl_handler nlh;
2059 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2060 int err, len, lindex;
2061 struct ifinfomsg *ifi;
2062 struct rtattr *nest, *nest2;
2063
2064 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2065 if (err)
2066 return err;
2067
2068 len = strlen(parent);
2069 if (len == 1 || len >= IFNAMSIZ)
2070 return ret_errno(EINVAL);
2071
2072 len = strlen(name);
2073 if (len == 1 || len >= IFNAMSIZ)
2074 return ret_errno(EINVAL);
2075
2076 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2077 if (!nlmsg)
2078 return ret_errno(ENOMEM);
2079
2080 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2081 if (!answer)
2082 return ret_errno(ENOMEM);
2083
2084 lindex = if_nametoindex(parent);
2085 if (!lindex)
2086 return ret_errno(EINVAL);
2087
2088 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
2089 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2090
2091 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2092 if (!ifi)
2093 return ret_errno(ENOMEM);
2094
2095 ifi->ifi_family = AF_UNSPEC;
2096
2097 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
2098 if (!nest)
2099 return ret_errno(ENOMEM);
2100
2101 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
2102 return ret_errno(ENOMEM);
2103
2104 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2105 if (!nest2)
2106 return ret_errno(ENOMEM);
2107
2108 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
2109 return ret_errno(ENOMEM);
2110
2111 nla_end_nested(nlmsg, nest2);
2112 nla_end_nested(nlmsg, nest);
2113
2114 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
2115 return ret_errno(ENOMEM);
2116
2117 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
2118 return ret_errno(ENOMEM);
2119
2120 return netlink_transaction(nlh_ptr, nlmsg, answer);
2121 }
2122
2123 int lxc_macvlan_create(const char *parent, const char *name, int mode)
2124 {
2125 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2126 struct nl_handler nlh;
2127 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2128 int err, index, len;
2129 struct ifinfomsg *ifi;
2130 struct rtattr *nest, *nest2;
2131
2132 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2133 if (err)
2134 return err;
2135
2136 len = strlen(parent);
2137 if (len == 1 || len >= IFNAMSIZ)
2138 return ret_errno(EINVAL);
2139
2140 len = strlen(name);
2141 if (len == 1 || len >= IFNAMSIZ)
2142 return ret_errno(EINVAL);
2143
2144 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2145 if (!nlmsg)
2146 return ret_errno(ENOMEM);
2147
2148 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2149 if (!answer)
2150 return ret_errno(ENOMEM);
2151
2152 index = if_nametoindex(parent);
2153 if (!index)
2154 return ret_errno(EINVAL);
2155
2156 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
2157 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
2158
2159 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
2160 if (!ifi)
2161 return ret_errno(ENOMEM);
2162
2163 ifi->ifi_family = AF_UNSPEC;
2164
2165 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
2166 if (!nest)
2167 return ret_errno(ENOMEM);
2168
2169 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
2170 return ret_errno(ENOMEM);
2171
2172 if (mode) {
2173 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
2174 if (!nest2)
2175 return ret_errno(ENOMEM);
2176
2177 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
2178 return ret_errno(ENOMEM);
2179
2180 nla_end_nested(nlmsg, nest2);
2181 }
2182
2183 nla_end_nested(nlmsg, nest);
2184
2185 if (nla_put_u32(nlmsg, IFLA_LINK, index))
2186 return ret_errno(ENOMEM);
2187
2188 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
2189 return ret_errno(ENOMEM);
2190
2191 return netlink_transaction(nlh_ptr, nlmsg, answer);
2192 }
2193
2194 static int proc_sys_net_write(const char *path, const char *value)
2195 {
2196 int fd;
2197 int err = 0;
2198
2199 fd = open(path, O_WRONLY);
2200 if (fd < 0)
2201 return -errno;
2202
2203 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
2204 err = -errno;
2205
2206 close(fd);
2207 return err;
2208 }
2209
2210 static int ip_forwarding_set(const char *ifname, int family, int flag)
2211 {
2212 int ret;
2213 char path[PATH_MAX];
2214
2215 if (family != AF_INET && family != AF_INET6)
2216 return -EINVAL;
2217
2218 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2219 family == AF_INET ? "ipv4" : "ipv6", ifname, "forwarding");
2220 if (ret < 0 || (size_t)ret >= sizeof(path))
2221 return -E2BIG;
2222
2223 return proc_sys_net_write(path, flag ? "1" : "0");
2224 }
2225
2226 int lxc_ip_forwarding_on(const char *name, int family)
2227 {
2228 return ip_forwarding_set(name, family, 1);
2229 }
2230
2231 int lxc_ip_forwarding_off(const char *name, int family)
2232 {
2233 return ip_forwarding_set(name, family, 0);
2234 }
2235
2236 static int neigh_proxy_set(const char *ifname, int family, int flag)
2237 {
2238 int ret;
2239 char path[PATH_MAX];
2240
2241 if (family != AF_INET && family != AF_INET6)
2242 return -EINVAL;
2243
2244 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2245 family == AF_INET ? "ipv4" : "ipv6", ifname,
2246 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2247 if (ret < 0 || (size_t)ret >= sizeof(path))
2248 return -E2BIG;
2249
2250 return proc_sys_net_write(path, flag ? "1" : "0");
2251 }
2252
2253 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
2254 {
2255 int ret;
2256 char path[PATH_MAX];
2257 char buf[1] = "";
2258
2259 if (family != AF_INET && family != AF_INET6)
2260 return ret_set_errno(-1, EINVAL);
2261
2262 ret = snprintf(path, sizeof(path), "/proc/sys/net/%s/conf/%s/%s",
2263 family == AF_INET ? "ipv4" : "ipv6", ifname,
2264 family == AF_INET ? "proxy_arp" : "proxy_ndp");
2265 if (ret < 0 || (size_t)ret >= sizeof(path))
2266 return ret_set_errno(-1, E2BIG);
2267
2268 return lxc_read_file_expect(path, buf, 1, "1");
2269 }
2270
2271 int lxc_neigh_proxy_on(const char *name, int family)
2272 {
2273 return neigh_proxy_set(name, family, 1);
2274 }
2275
2276 int lxc_neigh_proxy_off(const char *name, int family)
2277 {
2278 return neigh_proxy_set(name, family, 0);
2279 }
2280
2281 int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
2282 {
2283 int i = 0;
2284 unsigned val;
2285 char c;
2286 unsigned char *data;
2287
2288 sockaddr->sa_family = ARPHRD_ETHER;
2289 data = (unsigned char *)sockaddr->sa_data;
2290
2291 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
2292 c = *macaddr++;
2293 if (isdigit(c))
2294 val = c - '0';
2295 else if (c >= 'a' && c <= 'f')
2296 val = c - 'a' + 10;
2297 else if (c >= 'A' && c <= 'F')
2298 val = c - 'A' + 10;
2299 else
2300 return -EINVAL;
2301
2302 val <<= 4;
2303 c = *macaddr;
2304 if (isdigit(c))
2305 val |= c - '0';
2306 else if (c >= 'a' && c <= 'f')
2307 val |= c - 'a' + 10;
2308 else if (c >= 'A' && c <= 'F')
2309 val |= c - 'A' + 10;
2310 else if (c == ':' || c == 0)
2311 val >>= 4;
2312 else
2313 return -EINVAL;
2314 if (c != 0)
2315 macaddr++;
2316 *data++ = (unsigned char)(val & 0377);
2317 i++;
2318
2319 if (*macaddr == ':')
2320 macaddr++;
2321 }
2322
2323 return 0;
2324 }
2325
2326 static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
2327 void *acast, int prefix)
2328 {
2329 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2330 struct nl_handler nlh;
2331 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2332 int addrlen, err;
2333 struct ifaddrmsg *ifa;
2334
2335 addrlen = family == AF_INET ? sizeof(struct in_addr)
2336 : sizeof(struct in6_addr);
2337
2338 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2339 if (err)
2340 return err;
2341
2342 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2343 if (!nlmsg)
2344 return ret_errno(ENOMEM);
2345
2346 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2347 if (!answer)
2348 return ret_errno(ENOMEM);
2349
2350 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2351 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
2352
2353 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2354 if (!ifa)
2355 return ret_errno(ENOMEM);
2356
2357 ifa->ifa_prefixlen = prefix;
2358 ifa->ifa_index = ifindex;
2359 ifa->ifa_family = family;
2360 ifa->ifa_scope = 0;
2361
2362 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
2363 return ret_errno(EINVAL);
2364
2365 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
2366 return ret_errno(EINVAL);
2367
2368 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
2369 return ret_errno(EINVAL);
2370
2371 /* TODO: multicast, anycast with ipv6 */
2372 if (family == AF_INET6 &&
2373 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
2374 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
2375 return ret_errno(EPROTONOSUPPORT);
2376
2377 return netlink_transaction(nlh_ptr, nlmsg, answer);
2378 }
2379
2380 int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
2381 struct in6_addr *mcast, struct in6_addr *acast,
2382 int prefix)
2383 {
2384 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
2385 }
2386
2387 int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
2388 int prefix)
2389 {
2390 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
2391 }
2392
2393 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2394 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2395 * that pointer in *res (so res should be an in_addr** or in6_addr**).
2396 */
2397 #pragma GCC diagnostic push
2398 #pragma GCC diagnostic ignored "-Wcast-align"
2399
2400 static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
2401 {
2402 int addrlen;
2403 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
2404 struct rtattr *rta = IFA_RTA(ifa);
2405 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
2406
2407 if (ifa->ifa_family != family)
2408 return 0;
2409
2410 addrlen = family == AF_INET ? sizeof(struct in_addr)
2411 : sizeof(struct in6_addr);
2412
2413 /* Loop over the rtattr's in this message */
2414 while (RTA_OK(rta, attr_len)) {
2415 /* Found a local address for the requested interface,
2416 * return it.
2417 */
2418 if (rta->rta_type == IFA_LOCAL ||
2419 rta->rta_type == IFA_ADDRESS) {
2420 /* Sanity check. The family check above should make sure
2421 * the address length is correct, but check here just in
2422 * case.
2423 */
2424 if (RTA_PAYLOAD(rta) != addrlen)
2425 return -1;
2426
2427 /* We might have found an IFA_ADDRESS before, which we
2428 * now overwrite with an IFA_LOCAL.
2429 */
2430 if (!*res) {
2431 *res = malloc(addrlen);
2432 if (!*res)
2433 return -1;
2434 }
2435
2436 memcpy(*res, RTA_DATA(rta), addrlen);
2437 if (rta->rta_type == IFA_LOCAL)
2438 break;
2439 }
2440 rta = RTA_NEXT(rta, attr_len);
2441 }
2442 return 0;
2443 }
2444
2445 #pragma GCC diagnostic pop
2446
2447 static int ip_addr_get(int family, int ifindex, void **res)
2448 {
2449 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2450 struct nl_handler nlh;
2451 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2452 int answer_len, err;
2453 struct ifaddrmsg *ifa;
2454 struct nlmsghdr *msg;
2455 int readmore = 0, recv_len = 0;
2456
2457 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2458 if (err)
2459 return err;
2460
2461 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2462 if (!nlmsg)
2463 return ret_errno(ENOMEM);
2464
2465 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2466 if (!answer)
2467 return ret_errno(ENOMEM);
2468
2469 /* Save the answer buffer length, since it will be overwritten on the
2470 * first receive (and we might need to receive more than once).
2471 */
2472 answer_len = answer->nlmsghdr->nlmsg_len;
2473
2474 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
2475 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
2476
2477 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2478 if (!ifa)
2479 return ret_errno(ENOMEM);
2480
2481 ifa->ifa_family = family;
2482
2483 /* Send the request for addresses, which returns all addresses on all
2484 * interfaces.
2485 */
2486 err = netlink_send(nlh_ptr, nlmsg);
2487 if (err < 0)
2488 return ret_set_errno(err, errno);
2489
2490 #pragma GCC diagnostic push
2491 #pragma GCC diagnostic ignored "-Wcast-align"
2492
2493 do {
2494 /* Restore the answer buffer length, it might have been
2495 * overwritten by a previous receive.
2496 */
2497 answer->nlmsghdr->nlmsg_len = answer_len;
2498
2499 /* Get the (next) batch of reply messages. */
2500 err = netlink_rcv(nlh_ptr, answer);
2501 if (err < 0)
2502 return ret_set_errno(err, errno);
2503
2504 recv_len = err;
2505 err = 0;
2506
2507 /* Satisfy the typing for the netlink macros. */
2508 msg = answer->nlmsghdr;
2509
2510 while (NLMSG_OK(msg, recv_len)) {
2511 /* Stop reading if we see an error message. */
2512 if (msg->nlmsg_type == NLMSG_ERROR) {
2513 struct nlmsgerr *errmsg = (struct nlmsgerr *)NLMSG_DATA(msg);
2514 return ret_set_errno(errmsg->error, errno);
2515 }
2516
2517 /* Stop reading if we see a NLMSG_DONE message. */
2518 if (msg->nlmsg_type == NLMSG_DONE) {
2519 readmore = 0;
2520 break;
2521 }
2522
2523 if (msg->nlmsg_type != RTM_NEWADDR)
2524 return ret_errno(EINVAL);
2525
2526 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2527 if (ifa->ifa_index == ifindex) {
2528 if (ifa_get_local_ip(family, msg, res) < 0)
2529 return ret_errno(EINVAL);
2530
2531 /* Found a result, stop searching. */
2532 if (*res)
2533 return 0;
2534 }
2535
2536 /* Keep reading more data from the socket if the last
2537 * message had the NLF_F_MULTI flag set.
2538 */
2539 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2540
2541 /* Look at the next message received in this buffer. */
2542 msg = NLMSG_NEXT(msg, recv_len);
2543 }
2544 } while (readmore);
2545
2546 #pragma GCC diagnostic pop
2547
2548 /* If we end up here, we didn't find any result, so signal an
2549 * error.
2550 */
2551 return -1;
2552 }
2553
2554 int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2555 {
2556 return ip_addr_get(AF_INET6, ifindex, (void **)res);
2557 }
2558
2559 int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
2560 {
2561 return ip_addr_get(AF_INET, ifindex, (void **)res);
2562 }
2563
2564 static int ip_gateway_add(int family, int ifindex, void *gw)
2565 {
2566 call_cleaner(nlmsg_free) struct nlmsg *answer = NULL, *nlmsg = NULL;
2567 struct nl_handler nlh;
2568 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
2569 int addrlen, err;
2570 struct rtmsg *rt;
2571
2572 addrlen = family == AF_INET ? sizeof(struct in_addr)
2573 : sizeof(struct in6_addr);
2574
2575 err = netlink_open(nlh_ptr, NETLINK_ROUTE);
2576 if (err)
2577 return err;
2578
2579 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2580 if (!nlmsg)
2581 return ret_errno(ENOMEM);
2582
2583 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2584 if (!answer)
2585 return ret_errno(ENOMEM);
2586
2587 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2588 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2589
2590 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
2591 if (!rt)
2592 return ret_errno(ENOMEM);
2593
2594 rt->rtm_family = family;
2595 rt->rtm_table = RT_TABLE_MAIN;
2596 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2597 rt->rtm_protocol = RTPROT_BOOT;
2598 rt->rtm_type = RTN_UNICAST;
2599 /* "default" destination */
2600 rt->rtm_dst_len = 0;
2601
2602 /* If gateway address not supplied, then a device route will be created instead */
2603 if (gw && nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2604 return ret_errno(ENOMEM);
2605
2606 /* Adding the interface index enables the use of link-local
2607 * addresses for the gateway.
2608 */
2609 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2610 return ret_errno(EINVAL);
2611
2612 return netlink_transaction(nlh_ptr, nlmsg, answer);
2613 }
2614
2615 int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2616 {
2617 return ip_gateway_add(AF_INET, ifindex, gw);
2618 }
2619
2620 int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2621 {
2622 return ip_gateway_add(AF_INET6, ifindex, gw);
2623 }
2624 bool is_ovs_bridge(const char *bridge)
2625 {
2626 int ret;
2627 struct stat sb;
2628 char brdirname[22 + IFNAMSIZ + 1] = {0};
2629
2630 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2631 bridge);
2632 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2633 return false;
2634
2635 ret = stat(brdirname, &sb);
2636 if (ret < 0 && errno == ENOENT)
2637 return true;
2638
2639 return false;
2640 }
2641
2642 struct ovs_veth_args {
2643 const char *bridge;
2644 const char *nic;
2645 };
2646
2647 /* Called from a background thread - when nic goes away, remove it from the
2648 * bridge.
2649 */
2650 static int lxc_ovs_delete_port_exec(void *data)
2651 {
2652 struct ovs_veth_args *args = data;
2653
2654 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic, (char *)NULL);
2655 return -1;
2656 }
2657
2658 int lxc_ovs_delete_port(const char *bridge, const char *nic)
2659 {
2660 int ret;
2661 char cmd_output[PATH_MAX];
2662 struct ovs_veth_args args;
2663
2664 args.bridge = bridge;
2665 args.nic = nic;
2666 ret = run_command(cmd_output, sizeof(cmd_output),
2667 lxc_ovs_delete_port_exec, (void *)&args);
2668 if (ret < 0)
2669 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
2670
2671 return 0;
2672 }
2673
2674 static int lxc_ovs_attach_bridge_exec(void *data)
2675 {
2676 struct ovs_veth_args *args = data;
2677
2678 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic, (char *)NULL);
2679 return -1;
2680 }
2681
2682 static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2683 {
2684 int ret;
2685 char cmd_output[PATH_MAX];
2686 struct ovs_veth_args args;
2687
2688 args.bridge = bridge;
2689 args.nic = nic;
2690 ret = run_command(cmd_output, sizeof(cmd_output),
2691 lxc_ovs_attach_bridge_exec, (void *)&args);
2692 if (ret < 0)
2693 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic, bridge, cmd_output);
2694
2695 return 0;
2696 }
2697
2698 int lxc_bridge_attach(const char *bridge, const char *ifname)
2699 {
2700 int err, fd, index;
2701 size_t retlen;
2702 struct ifreq ifr;
2703
2704 if (strlen(ifname) >= IFNAMSIZ)
2705 return -EINVAL;
2706
2707 index = if_nametoindex(ifname);
2708 if (!index)
2709 return -EINVAL;
2710
2711 if (is_ovs_bridge(bridge))
2712 return lxc_ovs_attach_bridge(bridge, ifname);
2713
2714 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
2715 if (fd < 0)
2716 return -errno;
2717
2718 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
2719 if (retlen >= IFNAMSIZ) {
2720 close(fd);
2721 return -E2BIG;
2722 }
2723
2724 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2725 ifr.ifr_ifindex = index;
2726 err = ioctl(fd, SIOCBRADDIF, &ifr);
2727 close(fd);
2728 if (err)
2729 err = -errno;
2730
2731 return err;
2732 }
2733
2734 static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
2735 [LXC_NET_EMPTY] = "empty",
2736 [LXC_NET_VETH] = "veth",
2737 [LXC_NET_MACVLAN] = "macvlan",
2738 [LXC_NET_IPVLAN] = "ipvlan",
2739 [LXC_NET_PHYS] = "phys",
2740 [LXC_NET_VLAN] = "vlan",
2741 [LXC_NET_NONE] = "none",
2742 };
2743
2744 const char *lxc_net_type_to_str(int type)
2745 {
2746 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2747 return NULL;
2748
2749 return lxc_network_types[type];
2750 }
2751
2752 static const char padchar[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
2753
2754 char *lxc_ifname_alnum_case_sensitive(char *template)
2755 {
2756 char name[IFNAMSIZ];
2757 size_t i = 0;
2758 #ifdef HAVE_RAND_R
2759 unsigned int seed;
2760
2761 seed = randseed(false);
2762 #else
2763
2764 (void)randseed(true);
2765 #endif
2766
2767 if (strlen(template) >= IFNAMSIZ)
2768 return NULL;
2769
2770 /* Generate random names until we find one that doesn't exist. */
2771 for (;;) {
2772 name[0] = '\0';
2773 (void)strlcpy(name, template, IFNAMSIZ);
2774
2775 for (i = 0; i < strlen(name); i++) {
2776 if (name[i] == 'X') {
2777 #ifdef HAVE_RAND_R
2778 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
2779 #else
2780 name[i] = padchar[rand() % strlen(padchar)];
2781 #endif
2782 }
2783 }
2784
2785 if (if_nametoindex(name) == 0)
2786 break;
2787 }
2788
2789 (void)strlcpy(template, name, strlen(template) + 1);
2790
2791 return template;
2792 }
2793
2794 int setup_private_host_hw_addr(char *veth1)
2795 {
2796 int err, sockfd;
2797 struct ifreq ifr;
2798
2799 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
2800 if (sockfd < 0)
2801 return -errno;
2802
2803 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2804 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2805 close(sockfd);
2806 return -E2BIG;
2807 }
2808
2809 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2810 if (err < 0) {
2811 close(sockfd);
2812 return -errno;
2813 }
2814
2815 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2816 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2817 close(sockfd);
2818 if (err < 0)
2819 return -errno;
2820
2821 return 0;
2822 }
2823
2824 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2825 {
2826 struct lxc_list *network = &handler->conf->network;
2827 struct lxc_list *iterator;
2828 struct lxc_netdev *netdev;
2829 int link_index;
2830
2831 lxc_list_for_each(iterator, network) {
2832 netdev = iterator->elem;
2833
2834 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2835 continue;
2836
2837 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN)
2838 return log_error_errno(-1, EINVAL, "Automatic gateway detection is only supported for veth and macvlan");
2839
2840 if (is_empty_string(netdev->link)) {
2841 return log_error_errno(-1, errno, "Automatic gateway detection needs a link interface");
2842 }
2843
2844 link_index = if_nametoindex(netdev->link);
2845 if (!link_index)
2846 return -EINVAL;
2847
2848 if (netdev->ipv4_gateway_auto) {
2849 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway))
2850 return log_error_errno(-1, errno, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev->link);
2851 }
2852
2853 if (netdev->ipv6_gateway_auto) {
2854 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway))
2855 return log_error_errno(-1, errno, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev->link);
2856 }
2857 }
2858
2859 return 0;
2860 }
2861
2862 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2863 static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2864 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
2865 {
2866 int ret;
2867 pid_t child;
2868 int bytes, pipefd[2];
2869 char *token, *saveptr = NULL;
2870 char netdev_link[IFNAMSIZ];
2871 char buffer[PATH_MAX] = {0};
2872 size_t retlen;
2873
2874 if (netdev->type != LXC_NET_VETH)
2875 return log_error_errno(-1, errno, "Network type %d not support for unprivileged use", netdev->type);
2876
2877 ret = pipe(pipefd);
2878 if (ret < 0)
2879 return log_error_errno(-1, errno, "Failed to create pipe");
2880
2881 child = fork();
2882 if (child < 0) {
2883 close(pipefd[0]);
2884 close(pipefd[1]);
2885 return log_error_errno(-1, errno, "Failed to create new process");
2886 }
2887
2888 if (child == 0) {
2889 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
2890
2891 close(pipefd[0]);
2892
2893 ret = dup2(pipefd[1], STDOUT_FILENO);
2894 if (ret >= 0)
2895 ret = dup2(pipefd[1], STDERR_FILENO);
2896 close(pipefd[1]);
2897 if (ret < 0) {
2898 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2899 _exit(EXIT_FAILURE);
2900 }
2901
2902 if (!is_empty_string(netdev->link))
2903 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
2904 else
2905 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2906 if (retlen >= IFNAMSIZ) {
2907 SYSERROR("Invalid network device name");
2908 _exit(EXIT_FAILURE);
2909 }
2910
2911 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2912 if (ret < 0 || ret >= sizeof(pidstr))
2913 _exit(EXIT_FAILURE);
2914 pidstr[sizeof(pidstr) - 1] = '\0';
2915
2916 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2917 lxcname, pidstr, netdev_link,
2918 !is_empty_string(netdev->name) ? netdev->name : "(null)");
2919 if (!is_empty_string(netdev->name))
2920 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2921 lxcpath, lxcname, pidstr, "veth", netdev_link,
2922 netdev->name, (char *)NULL);
2923 else
2924 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2925 lxcpath, lxcname, pidstr, "veth", netdev_link,
2926 (char *)NULL);
2927 SYSERROR("Failed to execute lxc-user-nic");
2928 _exit(EXIT_FAILURE);
2929 }
2930
2931 /* close the write-end of the pipe */
2932 close(pipefd[1]);
2933
2934 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
2935 if (bytes < 0) {
2936 SYSERROR("Failed to read from pipe file descriptor");
2937 close(pipefd[0]);
2938 } else {
2939 buffer[bytes - 1] = '\0';
2940 }
2941
2942 ret = wait_for_pid(child);
2943 close(pipefd[0]);
2944 if (ret != 0 || bytes < 0)
2945 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer[0] != '\0' ? buffer : "(null)");
2946 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2947
2948 /* netdev->name */
2949 token = strtok_r(buffer, ":", &saveptr);
2950 if (!token)
2951 return log_error(-1, "Failed to parse lxc-user-nic output");
2952
2953 /*
2954 * lxc-user-nic will take care of proper network device naming. So
2955 * netdev->name and netdev->created_name need to be identical to not
2956 * trigger another rename later on.
2957 */
2958 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2959 if (retlen < IFNAMSIZ)
2960 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2961 if (retlen >= IFNAMSIZ)
2962 return log_error_errno(-1, E2BIG, "Container side veth device name returned by lxc-user-nic is too long");
2963
2964 /* netdev->ifindex */
2965 token = strtok_r(NULL, ":", &saveptr);
2966 if (!token)
2967 return log_error(-1, "Failed to parse lxc-user-nic output");
2968
2969 ret = lxc_safe_int(token, &netdev->ifindex);
2970 if (ret < 0)
2971 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
2972
2973 /* netdev->priv.veth_attr.veth1 */
2974 token = strtok_r(NULL, ":", &saveptr);
2975 if (!token)
2976 return log_error(-1, "Failed to parse lxc-user-nic output");
2977
2978 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2979 if (retlen >= IFNAMSIZ)
2980 return log_error_errno(-1, E2BIG, "Host side veth device name returned by lxc-user-nic is too long");
2981
2982 /* netdev->priv.veth_attr.ifindex */
2983 token = strtok_r(NULL, ":", &saveptr);
2984 if (!token)
2985 return log_error(-1, "Failed to parse lxc-user-nic output");
2986
2987 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
2988 if (ret < 0)
2989 return log_error_errno(-1, -ret, "Failed to convert string \"%s\" to integer", token);
2990
2991 if (netdev->upscript) {
2992 char *argv[] = {
2993 "veth",
2994 netdev->link,
2995 netdev->priv.veth_attr.veth1,
2996 NULL,
2997 };
2998
2999 ret = run_script_argv(lxcname, hooks_version, "net",
3000 netdev->upscript, "up", argv);
3001 if (ret < 0)
3002 return -1;
3003 }
3004
3005 return 0;
3006 }
3007
3008 static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
3009 struct lxc_netdev *netdev,
3010 const char *netns_path)
3011 {
3012 int bytes, ret;
3013 pid_t child;
3014 int pipefd[2];
3015 char buffer[PATH_MAX] = {};
3016
3017 if (netdev->type != LXC_NET_VETH)
3018 return log_error_errno(-1, EINVAL, "Network type %d not support for unprivileged use", netdev->type);
3019
3020 ret = pipe(pipefd);
3021 if (ret < 0)
3022 return log_error_errno(-1, errno, "Failed to create pipe");
3023
3024 child = fork();
3025 if (child < 0) {
3026 close(pipefd[0]);
3027 close(pipefd[1]);
3028 return log_error_errno(-1, errno, "Failed to create new process");
3029 }
3030
3031 if (child == 0) {
3032 char *hostveth;
3033
3034 close(pipefd[0]);
3035
3036 ret = dup2(pipefd[1], STDOUT_FILENO);
3037 if (ret >= 0)
3038 ret = dup2(pipefd[1], STDERR_FILENO);
3039 close(pipefd[1]);
3040 if (ret < 0) {
3041 SYSERROR("Failed to duplicate std{err,out} file descriptor");
3042 _exit(EXIT_FAILURE);
3043 }
3044
3045 if (!is_empty_string(netdev->priv.veth_attr.pair))
3046 hostveth = netdev->priv.veth_attr.pair;
3047 else
3048 hostveth = netdev->priv.veth_attr.veth1;
3049 if (is_empty_string(hostveth)) {
3050 SYSERROR("Host side veth device name is missing");
3051 _exit(EXIT_FAILURE);
3052 }
3053
3054 if (is_empty_string(netdev->link)) {
3055 SYSERROR("Network link for network device \"%s\" is missing", netdev->priv.veth_attr.veth1);
3056 _exit(EXIT_FAILURE);
3057 }
3058
3059 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
3060 lxcname, netns_path, netdev->link, hostveth);
3061 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
3062 lxcname, netns_path, "veth", netdev->link, hostveth,
3063 (char *)NULL);
3064 SYSERROR("Failed to exec lxc-user-nic.");
3065 _exit(EXIT_FAILURE);
3066 }
3067
3068 close(pipefd[1]);
3069
3070 bytes = lxc_read_nointr(pipefd[0], &buffer, sizeof(buffer));
3071 if (bytes < 0) {
3072 SYSERROR("Failed to read from pipe file descriptor.");
3073 close(pipefd[0]);
3074 } else {
3075 buffer[bytes - 1] = '\0';
3076 }
3077
3078 ret = wait_for_pid(child);
3079 close_prot_errno_disarm(pipefd[0]);
3080 if (ret != 0 || bytes < 0)
3081 return log_error_errno(-1, errno, "lxc-user-nic failed to delete requested network: %s",
3082 !is_empty_string(buffer) ? buffer : "(null)");
3083
3084 return 0;
3085 }
3086
3087 static bool lxc_delete_network_unpriv(struct lxc_handler *handler)
3088 {
3089 int ret;
3090 struct lxc_list *iterator;
3091 struct lxc_list *network = &handler->conf->network;
3092 /* strlen("/proc/") = 6
3093 * +
3094 * INTTYPE_TO_STRLEN(pid_t)
3095 * +
3096 * strlen("/fd/") = 4
3097 * +
3098 * INTTYPE_TO_STRLEN(int)
3099 * +
3100 * \0
3101 */
3102 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
3103
3104 *netns_path = '\0';
3105
3106 if (handler->nsfd[LXC_NS_NET] < 0)
3107 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
3108
3109 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
3110 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
3111 if (ret < 0 || ret >= sizeof(netns_path))
3112 return false;
3113
3114 lxc_list_for_each(iterator, network) {
3115 char *hostveth = NULL;
3116 struct lxc_netdev *netdev = iterator->elem;
3117
3118 /* We can only delete devices whose ifindex we have. If we don't
3119 * have the index it means that we didn't create it.
3120 */
3121 if (!netdev->ifindex)
3122 continue;
3123
3124 if (netdev->type == LXC_NET_PHYS) {
3125 ret = lxc_netdev_rename_by_index(netdev->ifindex,
3126 netdev->link);
3127 if (ret < 0)
3128 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
3129 netdev->ifindex, netdev->link);
3130 else
3131 TRACE("Renamed interface with index %d to its initial name \"%s\"",
3132 netdev->ifindex, netdev->link);
3133
3134 ret = netdev_deconf[netdev->type](handler, netdev);
3135 if (ret < 0)
3136 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3137 netdev->ifindex, netdev->link);
3138 goto clear_ifindices;
3139 }
3140
3141 ret = netdev_deconf[netdev->type](handler, netdev);
3142 if (ret < 0)
3143 WARN("Failed to deconfigure network device");
3144
3145 if (netdev->type != LXC_NET_VETH)
3146 goto clear_ifindices;
3147
3148 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link))
3149 goto clear_ifindices;
3150
3151 if (!is_empty_string(netdev->priv.veth_attr.pair))
3152 hostveth = netdev->priv.veth_attr.pair;
3153 else
3154 hostveth = netdev->priv.veth_attr.veth1;
3155 if (is_empty_string(hostveth))
3156 goto clear_ifindices;
3157
3158 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
3159 handler->name, netdev,
3160 netns_path);
3161 if (ret < 0) {
3162 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3163 goto clear_ifindices;
3164 }
3165 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3166
3167 clear_ifindices:
3168 /*
3169 * We need to clear any ifindices we recorded so liblxc won't
3170 * have cached stale data which would cause it to fail on
3171 * reboot where we don't re-read the on-disk config file.
3172 */
3173 netdev->ifindex = 0;
3174 if (netdev->type == LXC_NET_PHYS) {
3175 netdev->priv.phys_attr.ifindex = 0;
3176 } else if (netdev->type == LXC_NET_VETH) {
3177 netdev->priv.veth_attr.veth1[0] = '\0';
3178 netdev->priv.veth_attr.ifindex = 0;
3179 }
3180 }
3181
3182 return true;
3183 }
3184
3185 static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
3186 struct lxc_list *cur, *next;
3187 struct lxc_inetdev *inet4dev;
3188 struct lxc_inet6dev *inet6dev;
3189 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3190 int err = 0;
3191 unsigned int lo_ifindex = 0, link_ifindex = 0;
3192
3193 link_ifindex = if_nametoindex(netdev->link);
3194 if (link_ifindex == 0)
3195 return log_error_errno(-1, errno, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev->link);
3196
3197
3198 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3199 if (!lxc_list_empty(&netdev->ipv4)) {
3200 /* Check for net.ipv4.conf.[link].forwarding=1 */
3201 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0)
3202 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
3203 }
3204
3205 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3206 if (!lxc_list_empty(&netdev->ipv6)) {
3207 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
3208 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0)
3209 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
3210
3211 /* Check for net.ipv6.conf.[link].forwarding=1 */
3212 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0)
3213 return log_error_errno(-1, EINVAL, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
3214 }
3215
3216 /* Perform IPVLAN specific checks. */
3217 if (netdev->type == LXC_NET_IPVLAN) {
3218 /* Check mode is l3s as other modes do not work with l2proxy. */
3219 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S)
3220 return log_error_errno(-1, EINVAL, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
3221
3222 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3223 lo_ifindex = if_nametoindex(loop_device);
3224 if (lo_ifindex == 0)
3225 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3226 }
3227
3228 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3229 inet4dev = cur->elem;
3230 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
3231 return ret_set_errno(-1, -errno);
3232
3233 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET, link_ifindex, &inet4dev->addr) < 0)
3234 return ret_set_errno(-1, EINVAL);
3235
3236 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3237 if (netdev->type == LXC_NET_IPVLAN) {
3238 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
3239 if (err < 0)
3240 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
3241 }
3242 }
3243
3244 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3245 inet6dev = cur->elem;
3246 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
3247 return ret_set_errno(-1, -errno);
3248
3249 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH, AF_INET6, link_ifindex, &inet6dev->addr) < 0)
3250 return ret_set_errno(-1, EINVAL);
3251
3252 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3253 if (netdev->type == LXC_NET_IPVLAN) {
3254 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
3255 if (err < 0)
3256 return log_error_errno(-1, -err, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
3257 }
3258 }
3259
3260 return 0;
3261 }
3262
3263 static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex)
3264 {
3265 char bufinet4[INET_ADDRSTRLEN];
3266 bool had_error = false;
3267 unsigned int link_ifindex = 0;
3268
3269 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4)))
3270 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3271
3272 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3273 if (lo_ifindex > 0) {
3274 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3275 had_error = true;
3276 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3277 }
3278 }
3279
3280 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3281 if (!is_empty_string(link)) {
3282 link_ifindex = if_nametoindex(link);
3283 if (link_ifindex == 0)
3284 return log_error_errno(-1, EINVAL, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3285
3286 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET, link_ifindex, ip) < 0)
3287 had_error = true;
3288 }
3289
3290 if (had_error)
3291 return ret_set_errno(-1, EINVAL);
3292
3293 return 0;
3294 }
3295
3296 static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex)
3297 {
3298 char bufinet6[INET6_ADDRSTRLEN];
3299 bool had_error = false;
3300 unsigned int link_ifindex = 0;
3301
3302 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6)))
3303 return log_error_errno(-1, EINVAL, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3304
3305 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3306 if (lo_ifindex > 0) {
3307 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3308 had_error = true;
3309 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3310 }
3311 }
3312
3313 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3314 if (!is_empty_string(link)) {
3315 link_ifindex = if_nametoindex(link);
3316 if (link_ifindex == 0) {
3317 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link);
3318 return ret_set_errno(-1, EINVAL);
3319 }
3320
3321 if (lxc_ip_neigh_proxy(RTM_DELNEIGH, AF_INET6, link_ifindex, ip) < 0)
3322 had_error = true;
3323 }
3324
3325 if (had_error)
3326 return ret_set_errno(-1, EINVAL);
3327
3328 return 0;
3329 }
3330
3331 static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
3332 unsigned int lo_ifindex = 0;
3333 unsigned int errCount = 0;
3334 struct lxc_list *cur, *next;
3335 struct lxc_inetdev *inet4dev;
3336 struct lxc_inet6dev *inet6dev;
3337
3338 /* Perform IPVLAN specific checks. */
3339 if (netdev->type == LXC_NET_IPVLAN) {
3340 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3341 lo_ifindex = if_nametoindex(loop_device);
3342 if (lo_ifindex == 0) {
3343 errCount++;
3344 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3345 }
3346 }
3347
3348 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3349 inet4dev = cur->elem;
3350 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3351 errCount++;
3352 }
3353
3354 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3355 inet6dev = cur->elem;
3356 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3357 errCount++;
3358 }
3359
3360 if (errCount > 0)
3361 return ret_set_errno(-1, EINVAL);
3362
3363 return 0;
3364 }
3365
3366 static int lxc_create_network_priv(struct lxc_handler *handler)
3367 {
3368 struct lxc_list *iterator;
3369 struct lxc_list *network = &handler->conf->network;
3370
3371 lxc_list_for_each(iterator, network) {
3372 struct lxc_netdev *netdev = iterator->elem;
3373
3374 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE)
3375 return log_error_errno(-1, EINVAL, "Invalid network configuration type %d", netdev->type);
3376
3377 /* Setup l2proxy entries if enabled and used with a link property */
3378 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
3379 if (lxc_setup_l2proxy(netdev))
3380 return log_error_errno(-1, errno, "Failed to setup l2proxy");
3381 }
3382
3383 if (netdev_conf[netdev->type](handler, netdev))
3384 return log_error_errno(-1, errno, "Failed to create network device");
3385 }
3386
3387 return 0;
3388 }
3389
3390 int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
3391 {
3392 pid_t pid = handler->pid;
3393 struct lxc_list *network = &handler->conf->network;
3394 struct lxc_list *iterator;
3395
3396 if (am_guest_unpriv())
3397 return 0;
3398
3399 lxc_list_for_each(iterator, network) {
3400 __do_free char *physname = NULL;
3401 int ret;
3402 struct lxc_netdev *netdev = iterator->elem;
3403
3404 if (!netdev->ifindex)
3405 continue;
3406
3407 if (netdev->type == LXC_NET_PHYS)
3408 physname = is_wlan(netdev->link);
3409
3410 if (physname)
3411 ret = lxc_netdev_move_wlan(physname, netdev->link, pid, NULL);
3412 else
3413 ret = lxc_netdev_move_by_index(netdev->ifindex, pid, NULL);
3414 if (ret)
3415 return log_error_errno(-1, -ret, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3416 netdev->created_name,
3417 netdev->ifindex, pid);
3418
3419 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3420 netdev->created_name, netdev->ifindex, pid);
3421 }
3422
3423 return 0;
3424 }
3425
3426 static int network_requires_advanced_setup(int type)
3427 {
3428 if (type == LXC_NET_EMPTY)
3429 return false;
3430
3431 if (type == LXC_NET_NONE)
3432 return false;
3433
3434 return true;
3435 }
3436
3437 static int lxc_create_network_unpriv(struct lxc_handler *handler)
3438 {
3439 int hooks_version = handler->conf->hooks_version;
3440 const char *lxcname = handler->name;
3441 const char *lxcpath = handler->lxcpath;
3442 struct lxc_list *network = &handler->conf->network;
3443 pid_t pid = handler->pid;
3444 struct lxc_list *iterator;
3445
3446 lxc_list_for_each(iterator, network) {
3447 struct lxc_netdev *netdev = iterator->elem;
3448
3449 if (!network_requires_advanced_setup(netdev->type))
3450 continue;
3451
3452 if (netdev->type != LXC_NET_VETH)
3453 return log_error_errno(-1, EINVAL, "Networks of type %s are not supported by unprivileged containers",
3454 lxc_net_type_to_str(netdev->type));
3455
3456 if (netdev->mtu)
3457 INFO("mtu ignored due to insufficient privilege");
3458
3459 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3460 pid, hooks_version))
3461 return -1;
3462 }
3463
3464 return 0;
3465 }
3466
3467 static bool lxc_delete_network_priv(struct lxc_handler *handler)
3468 {
3469 int ret;
3470 struct lxc_list *iterator;
3471 struct lxc_list *network = &handler->conf->network;
3472
3473 lxc_list_for_each(iterator, network) {
3474 char *hostveth = NULL;
3475 struct lxc_netdev *netdev = iterator->elem;
3476
3477 /* We can only delete devices whose ifindex we have. If we don't
3478 * have the index it means that we didn't create it.
3479 */
3480 if (!netdev->ifindex)
3481 continue;
3482
3483 /*
3484 * If the network device has been moved back from the
3485 * containers network namespace, update the ifindex.
3486 */
3487 netdev->ifindex = if_nametoindex(netdev->name);
3488
3489 /* Delete l2proxy entries if enabled and used with a link property */
3490 if (netdev->l2proxy && !is_empty_string(netdev->link)) {
3491 if (lxc_delete_l2proxy(netdev))
3492 WARN("Failed to delete all l2proxy config");
3493 /* Don't return, let the network be cleaned up as normal. */
3494 }
3495
3496 if (netdev->type == LXC_NET_PHYS) {
3497 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3498 if (ret < 0)
3499 WARN("Failed to rename interface with index %d "
3500 "from \"%s\" to its initial name \"%s\"",
3501 netdev->ifindex, netdev->name, netdev->link);
3502 else {
3503 TRACE("Renamed interface with index %d from "
3504 "\"%s\" to its initial name \"%s\"",
3505 netdev->ifindex, netdev->name,
3506 netdev->link);
3507
3508 /* Restore original MTU */
3509 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3510 if (ret < 0) {
3511 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3512 netdev->link, netdev->priv.phys_attr.mtu);
3513 } else {
3514 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3515 netdev->link, netdev->priv.phys_attr.mtu);
3516 }
3517 }
3518
3519 ret = netdev_deconf[netdev->type](handler, netdev);
3520 if (ret < 0)
3521 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3522 netdev->ifindex, netdev->link);
3523 goto clear_ifindices;
3524 }
3525
3526 ret = netdev_deconf[netdev->type](handler, netdev);
3527 if (ret < 0)
3528 WARN("Failed to deconfigure network device");
3529
3530 if (netdev->type != LXC_NET_VETH)
3531 goto clear_ifindices;
3532
3533 /* Explicitly delete host veth device to prevent lingering
3534 * devices. We had issues in LXD around this.
3535 */
3536 if (!is_empty_string(netdev->priv.veth_attr.pair))
3537 hostveth = netdev->priv.veth_attr.pair;
3538 else
3539 hostveth = netdev->priv.veth_attr.veth1;
3540 if (is_empty_string(hostveth))
3541 goto clear_ifindices;
3542
3543 if (is_empty_string(netdev->link) || !is_ovs_bridge(netdev->link)) {
3544 ret = lxc_netdev_delete_by_name(hostveth);
3545 if (ret < 0)
3546 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth, netdev->link);
3547
3548 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3549 } else if (!is_empty_string(netdev->link)) {
3550 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3551 if (ret < 0)
3552 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3553
3554 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth, netdev->link);
3555 }
3556
3557 clear_ifindices:
3558 /* We need to clear any ifindices we recorded so liblxc won't
3559 * have cached stale data which would cause it to fail on reboot
3560 * we're we don't re-read the on-disk config file.
3561 */
3562 netdev->ifindex = 0;
3563 if (netdev->type == LXC_NET_PHYS) {
3564 netdev->priv.phys_attr.ifindex = 0;
3565 } else if (netdev->type == LXC_NET_VETH) {
3566 netdev->priv.veth_attr.veth1[0] = '\0';
3567 netdev->priv.veth_attr.ifindex = 0;
3568 }
3569 }
3570
3571 return true;
3572 }
3573
3574 int lxc_requests_empty_network(struct lxc_handler *handler)
3575 {
3576 struct lxc_list *network = &handler->conf->network;
3577 struct lxc_list *iterator;
3578 bool found_none = false, found_nic = false;
3579
3580 if (lxc_list_empty(network))
3581 return 0;
3582
3583 lxc_list_for_each (iterator, network) {
3584 struct lxc_netdev *netdev = iterator->elem;
3585
3586 if (netdev->type == LXC_NET_NONE)
3587 found_none = true;
3588 else
3589 found_nic = true;
3590 }
3591
3592 if (found_none && !found_nic)
3593 return 1;
3594
3595 return 0;
3596 }
3597
3598 /* try to move physical nics to the init netns */
3599 int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
3600 {
3601 __do_close int oldfd = -EBADF;
3602 int netnsfd = handler->nsfd[LXC_NS_NET];
3603 struct lxc_conf *conf = handler->conf;
3604 int ret;
3605 char ifname[IFNAMSIZ];
3606 struct lxc_list *iterator;
3607
3608 /*
3609 * If we weren't asked to clone a new network namespace, there's
3610 * nothing to restore.
3611 */
3612 if (!(handler->ns_clone_flags & CLONE_NEWNET))
3613 return 0;
3614
3615 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3616 * the parent network namespace. We won't have this capability if we are
3617 * unprivileged.
3618 */
3619 if (!handler->am_root)
3620 return 0;
3621
3622 TRACE("Moving physical network devices back to parent network namespace");
3623
3624 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
3625 if (oldfd < 0)
3626 return log_error_errno(-1, errno, "Failed to preserve network namespace");
3627
3628 ret = setns(netnsfd, CLONE_NEWNET);
3629 if (ret < 0)
3630 return log_error_errno(-1, errno, "Failed to enter network namespace");
3631
3632 lxc_list_for_each(iterator, &conf->network) {
3633 struct lxc_netdev *netdev = iterator->elem;
3634
3635 if (netdev->type != LXC_NET_PHYS)
3636 continue;
3637
3638 /* Retrieve the name of the interface in the container's network
3639 * namespace.
3640 */
3641 if (!if_indextoname(netdev->ifindex, ifname)) {
3642 WARN("No interface corresponding to ifindex %d", netdev->ifindex);
3643 continue;
3644 }
3645
3646 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
3647 if (ret < 0)
3648 WARN("Error moving network device \"%s\" back to network namespace", ifname);
3649 else
3650 TRACE("Moved network device \"%s\" back to network namespace", ifname);
3651 }
3652
3653 ret = setns(oldfd, CLONE_NEWNET);
3654 if (ret < 0)
3655 return log_error_errno(-1, errno, "Failed to enter network namespace");
3656
3657 return 0;
3658 }
3659
3660 static int setup_hw_addr(char *hwaddr, const char *ifname)
3661 {
3662 __do_close int fd = -EBADF;
3663 struct sockaddr sockaddr;
3664 struct ifreq ifr;
3665 int ret;
3666
3667 ret = lxc_convert_mac(hwaddr, &sockaddr);
3668 if (ret)
3669 return log_error_errno(-1, -ret, "Mac address \"%s\" conversion failed", hwaddr);
3670
3671 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3672 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3673 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3674
3675 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
3676 if (fd < 0)
3677 return -1;
3678
3679 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
3680 if (ret)
3681 SYSERROR("Failed to perform ioctl");
3682
3683 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr, ifr.ifr_name);
3684
3685 return ret;
3686 }
3687
3688 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3689 {
3690 struct lxc_list *iterator;
3691 int err;
3692
3693 lxc_list_for_each(iterator, ip) {
3694 struct lxc_inetdev *inetdev = iterator->elem;
3695
3696 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3697 &inetdev->bcast, inetdev->prefix);
3698 if (err)
3699 return log_error_errno(-1, -err, "Failed to setup ipv4 address for network device with ifindex %d", ifindex);
3700 }
3701
3702 return 0;
3703 }
3704
3705 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3706 {
3707 struct lxc_list *iterator;
3708 int err;
3709
3710 lxc_list_for_each(iterator, ip) {
3711 struct lxc_inet6dev *inet6dev = iterator->elem;
3712
3713 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3714 &inet6dev->mcast, &inet6dev->acast,
3715 inet6dev->prefix);
3716 if (err)
3717 return log_error_errno(-1, -err, "Failed to setup ipv6 address for network device with ifindex %d", ifindex);
3718 }
3719
3720 return 0;
3721 }
3722
3723 static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev *netdev)
3724 {
3725 int err;
3726 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3727
3728 /* empty network namespace */
3729 if (!netdev->ifindex && netdev->flags & IFF_UP) {
3730 err = lxc_netdev_up("lo");
3731 if (err)
3732 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
3733 }
3734
3735 /* set a mac address */
3736 if (netdev->hwaddr && setup_hw_addr(netdev->hwaddr, netdev->name))
3737 return log_error_errno(-1, errno, "Failed to setup hw address for network device \"%s\"", netdev->name);
3738
3739 /* setup ipv4 addresses on the interface */
3740 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex))
3741 return log_error_errno(-1, errno, "Failed to setup ip addresses for network device \"%s\"", netdev->name);
3742
3743 /* setup ipv6 addresses on the interface */
3744 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex))
3745 return log_error_errno(-1, errno, "Failed to setup ipv6 addresses for network device \"%s\"", netdev->name);
3746
3747 /* set the network device up */
3748 if (netdev->flags & IFF_UP) {
3749 err = lxc_netdev_up(netdev->name);
3750 if (err)
3751 return log_error_errno(-1, -err, "Failed to set network device \"%s\" up", netdev->name);
3752
3753 /* the network is up, make the loopback up too */
3754 err = lxc_netdev_up("lo");
3755 if (err)
3756 return log_error_errno(-1, -err, "Failed to set the loopback network device up");
3757 }
3758
3759 /* setup ipv4 gateway on the interface */
3760 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
3761 if (!(netdev->flags & IFF_UP))
3762 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
3763
3764 if (lxc_list_empty(&netdev->ipv4))
3765 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev->name);
3766
3767 /* Setup device route if ipv4_gateway_dev is enabled */
3768 if (netdev->ipv4_gateway_dev) {
3769 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3770 if (err < 0)
3771 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway to network device \"%s\"", netdev->name);
3772 } else {
3773 /* Check the gateway address is valid */
3774 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3775 return ret_set_errno(-1, errno);
3776
3777 /* Try adding a default route to the gateway address */
3778 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3779 if (err < 0) {
3780 /* If adding the default route fails, this could be because the
3781 * gateway address is in a different subnet to the container's address.
3782 * To work around this, we try adding a static device route to the
3783 * gateway address first, and then try again.
3784 */
3785 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
3786 if (err < 0)
3787 return log_error_errno(-1, -err, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, netdev->name);
3788
3789 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3790 if (err < 0)
3791 return log_error_errno(-1, -err, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4, netdev->name);
3792 }
3793 }
3794 }
3795
3796 /* setup ipv6 gateway on the interface */
3797 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
3798 if (!(netdev->flags & IFF_UP))
3799 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev->name);
3800
3801 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway))
3802 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev->name);
3803
3804 /* Setup device route if ipv6_gateway_dev is enabled */
3805 if (netdev->ipv6_gateway_dev) {
3806 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3807 if (err < 0)
3808 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway to network device \"%s\"", netdev->name);
3809 } else {
3810 /* Check the gateway address is valid */
3811 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3812 return ret_set_errno(-1, errno);
3813
3814 /* Try adding a default route to the gateway address */
3815 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3816 if (err < 0) {
3817 /* If adding the default route fails, this could be because the
3818 * gateway address is in a different subnet to the container's address.
3819 * To work around this, we try adding a static device route to the
3820 * gateway address first, and then try again.
3821 */
3822 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
3823 if (err < 0)
3824 return log_error_errno(-1, errno, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, netdev->name);
3825
3826 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3827 if (err < 0)
3828 return log_error_errno(-1, -err, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6, netdev->name);
3829 }
3830 }
3831 }
3832
3833 DEBUG("Network device \"%s\" has been setup", netdev->name);
3834
3835 return 0;
3836 }
3837
3838 int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3839 struct lxc_list *network)
3840 {
3841 struct lxc_list *iterator;
3842
3843 lxc_list_for_each (iterator, network) {
3844 struct lxc_netdev *netdev = iterator->elem;
3845 int ret;
3846
3847 ret = netdev_ns_conf[netdev->type](netdev);
3848 if (!ret)
3849 ret = lxc_network_setup_in_child_namespaces_common(netdev);
3850 if (ret)
3851 return log_error_errno(-1, errno, "Failed to setup netdev");
3852 }
3853
3854 if (!lxc_list_empty(network))
3855 INFO("Network has been setup");
3856
3857 return 0;
3858 }
3859
3860 int lxc_network_send_to_child(struct lxc_handler *handler)
3861 {
3862 struct lxc_list *iterator;
3863 struct lxc_list *network = &handler->conf->network;
3864 int data_sock = handler->data_sock[0];
3865
3866 lxc_list_for_each(iterator, network) {
3867 int ret;
3868 struct lxc_netdev *netdev = iterator->elem;
3869
3870 if (!network_requires_advanced_setup(netdev->type))
3871 continue;
3872
3873 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3874 if (ret < 0)
3875 return -1;
3876
3877 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3878 if (ret < 0)
3879 return -1;
3880
3881 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
3882 }
3883
3884 return 0;
3885 }
3886
3887 int lxc_network_recv_from_parent(struct lxc_handler *handler)
3888 {
3889 struct lxc_list *iterator;
3890 struct lxc_list *network = &handler->conf->network;
3891 int data_sock = handler->data_sock[1];
3892
3893 lxc_list_for_each(iterator, network) {
3894 int ret;
3895 struct lxc_netdev *netdev = iterator->elem;
3896
3897 if (!network_requires_advanced_setup(netdev->type))
3898 continue;
3899
3900 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3901 if (ret < 0)
3902 return -1;
3903
3904 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3905 if (ret < 0)
3906 return -1;
3907
3908 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
3909 }
3910
3911 return 0;
3912 }
3913
3914 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3915 {
3916 struct lxc_list *iterator, *network;
3917 int data_sock = handler->data_sock[0];
3918
3919 if (!handler->am_root)
3920 return 0;
3921
3922 network = &handler->conf->network;
3923 lxc_list_for_each(iterator, network) {
3924 int ret;
3925 struct lxc_netdev *netdev = iterator->elem;
3926
3927 /* Send network device name in the child's namespace to parent. */
3928 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3929 if (ret < 0)
3930 return -1;
3931
3932 /* Send network device ifindex in the child's namespace to
3933 * parent.
3934 */
3935 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
3936 if (ret < 0)
3937 return -1;
3938 }
3939
3940 if (!lxc_list_empty(network))
3941 TRACE("Sent network device names and ifindices to parent");
3942
3943 return 0;
3944 }
3945
3946 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3947 {
3948 struct lxc_list *iterator, *network;
3949 int data_sock = handler->data_sock[1];
3950
3951 if (!handler->am_root)
3952 return 0;
3953
3954 network = &handler->conf->network;
3955 lxc_list_for_each(iterator, network) {
3956 int ret;
3957 struct lxc_netdev *netdev = iterator->elem;
3958
3959 /* Receive network device name in the child's namespace to
3960 * parent.
3961 */
3962 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3963 if (ret < 0)
3964 return -1;
3965
3966 /* Receive network device ifindex in the child's namespace to
3967 * parent.
3968 */
3969 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3970 if (ret < 0)
3971 return -1;
3972 }
3973
3974 return 0;
3975 }
3976
3977 void lxc_delete_network(struct lxc_handler *handler)
3978 {
3979 bool bret;
3980
3981 if (handler->am_root)
3982 bret = lxc_delete_network_priv(handler);
3983 else
3984 bret = lxc_delete_network_unpriv(handler);
3985 if (!bret)
3986 DEBUG("Failed to delete network devices");
3987 else
3988 DEBUG("Deleted network devices");
3989 }
3990
3991 int lxc_netns_set_nsid(int fd)
3992 {
3993 int ret;
3994 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3995 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3996 NLMSG_ALIGN(1024)];
3997 struct nl_handler nlh;
3998 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
3999 struct nlmsghdr *hdr;
4000 struct rtgenmsg *msg;
4001 const __s32 ns_id = -1;
4002 const __u32 netns_fd = fd;
4003
4004 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
4005 if (ret < 0)
4006 return -1;
4007
4008 memset(buf, 0, sizeof(buf));
4009
4010 #pragma GCC diagnostic push
4011 #pragma GCC diagnostic ignored "-Wcast-align"
4012 hdr = (struct nlmsghdr *)buf;
4013 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
4014 #pragma GCC diagnostic pop
4015
4016 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4017 hdr->nlmsg_type = RTM_NEWNSID;
4018 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4019 hdr->nlmsg_pid = 0;
4020 hdr->nlmsg_seq = RTM_NEWNSID;
4021 msg->rtgen_family = AF_UNSPEC;
4022
4023 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4024 if (ret < 0)
4025 return ret_errno(ENOMEM);
4026
4027 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
4028 if (ret < 0)
4029 return ret_errno(ENOMEM);
4030
4031 return __netlink_transaction(nlh_ptr, hdr, hdr);
4032 }
4033
4034 static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
4035 {
4036
4037 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
4038
4039 while (RTA_OK(rta, len)) {
4040 unsigned short type = rta->rta_type;
4041
4042 if ((type <= max) && (!tb[type]))
4043 tb[type] = rta;
4044
4045 #pragma GCC diagnostic push
4046 #pragma GCC diagnostic ignored "-Wcast-align"
4047 rta = RTA_NEXT(rta, len);
4048 #pragma GCC diagnostic pop
4049 }
4050
4051 return 0;
4052 }
4053
4054 static inline __s32 rta_getattr_s32(const struct rtattr *rta)
4055 {
4056 return *(__s32 *)RTA_DATA(rta);
4057 }
4058
4059 #ifndef NETNS_RTA
4060 #define NETNS_RTA(r) \
4061 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4062 #endif
4063
4064 int lxc_netns_get_nsid(int fd)
4065 {
4066 struct nl_handler nlh;
4067 call_cleaner(netlink_close) struct nl_handler *nlh_ptr = &nlh;
4068 int ret;
4069 ssize_t len;
4070 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
4071 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
4072 NLMSG_ALIGN(1024)];
4073 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
4074 struct nlmsghdr *hdr;
4075 struct rtgenmsg *msg;
4076 __u32 netns_fd = fd;
4077
4078 ret = netlink_open(nlh_ptr, NETLINK_ROUTE);
4079 if (ret < 0)
4080 return -1;
4081
4082 memset(buf, 0, sizeof(buf));
4083
4084 #pragma GCC diagnostic push
4085 #pragma GCC diagnostic ignored "-Wcast-align"
4086 hdr = (struct nlmsghdr *)buf;
4087 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
4088 #pragma GCC diagnostic pop
4089
4090 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
4091 hdr->nlmsg_type = RTM_GETNSID;
4092 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
4093 hdr->nlmsg_pid = 0;
4094 hdr->nlmsg_seq = RTM_GETNSID;
4095 msg->rtgen_family = AF_UNSPEC;
4096
4097 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4098 if (ret < 0)
4099 return ret_errno(ENOMEM);
4100
4101 ret = __netlink_transaction(nlh_ptr, hdr, hdr);
4102 if (ret < 0)
4103 return -1;
4104
4105 msg = NLMSG_DATA(hdr);
4106 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4107 if (len < 0)
4108 return ret_errno(EINVAL);
4109
4110 #pragma GCC diagnostic push
4111 #pragma GCC diagnostic ignored "-Wcast-align"
4112 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4113 if (tb[__LXC_NETNSA_NSID])
4114 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
4115 #pragma GCC diagnostic pop
4116
4117 return -1;
4118 }
4119
4120 int lxc_create_network(struct lxc_handler *handler)
4121 {
4122 int ret;
4123
4124 if (handler->am_root) {
4125 ret = lxc_create_network_priv(handler);
4126 if (ret)
4127 return -1;
4128
4129 return lxc_network_move_created_netdev_priv(handler);
4130 }
4131
4132 return lxc_create_network_unpriv(handler);
4133 }