]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/network.c
network: record created_name for instantiate_vlan()
[mirror_lxc.git] / src / lxc / network.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE 1
26 #endif
27 #include <arpa/inet.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <linux/netlink.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/sockios.h>
34 #include <net/ethernet.h>
35 #include <net/if.h>
36 #include <net/if_arp.h>
37 #include <netinet/in.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/inotify.h>
42 #include <sys/ioctl.h>
43 #include <sys/param.h>
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47 #include <time.h>
48 #include <unistd.h>
49
50 #include "../include/netns_ifaddrs.h"
51 #include "af_unix.h"
52 #include "conf.h"
53 #include "config.h"
54 #include "file_utils.h"
55 #include "log.h"
56 #include "macro.h"
57 #include "memory_utils.h"
58 #include "network.h"
59 #include "nl.h"
60 #include "raw_syscalls.h"
61 #include "syscall_wrappers.h"
62 #include "utils.h"
63
64 #ifndef HAVE_STRLCPY
65 #include "include/strlcpy.h"
66 #endif
67
68 lxc_log_define(network, lxc);
69
70 typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
71 static const char loop_device[] = "lo";
72
73 static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
74 {
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116 out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121 }
122
123 static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124 {
125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
126 }
127
128 static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129 {
130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131 }
132
133 static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134 {
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136 }
137
138 static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139 {
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
141 }
142
143 static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144 {
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160 }
161
162 static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163 {
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179 }
180
181 static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182 {
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
188 if (netdev->priv.veth_attr.pair[0] != '\0') {
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
227 goto out_delete;
228 }
229
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
253 } else if (netdev->link[0] != '\0') {
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
268
269 if (err) {
270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
273 goto out_delete;
274 }
275 }
276
277 if (netdev->link[0] != '\0') {
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
292 goto out_delete;
293 }
294
295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
307 if (netdev->upscript) {
308 char *argv[] = {
309 "veth",
310 netdev->link,
311 veth1,
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327 out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
330 return -1;
331 }
332
333 static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334 {
335 char peer[IFNAMSIZ];
336 int err;
337 unsigned int mtu = 0;
338
339 if (netdev->link[0] == '\0') {
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
346 return -1;
347
348 if (!lxc_mkifname(peer))
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
357 goto on_error;
358 }
359
360 strlcpy(netdev->created_name, peer, IFNAMSIZ);
361
362 netdev->ifindex = if_nametoindex(peer);
363 if (!netdev->ifindex) {
364 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
365 goto on_error;
366 }
367
368 if (netdev->mtu) {
369 err = lxc_safe_uint(netdev->mtu, &mtu);
370 if (err < 0) {
371 errno = -err;
372 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
373 goto on_error;
374 }
375
376 err = lxc_netdev_set_mtu(peer, mtu);
377 if (err < 0) {
378 errno = -err;
379 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
380 goto on_error;
381 }
382 }
383
384 if (netdev->upscript) {
385 char *argv[] = {
386 "macvlan",
387 netdev->link,
388 NULL,
389 };
390
391 err = run_script_argv(handler->name,
392 handler->conf->hooks_version, "net",
393 netdev->upscript, "up", argv);
394 if (err < 0)
395 goto on_error;
396 }
397
398 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
399 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
400
401 return 0;
402
403 on_error:
404 lxc_netdev_delete_by_name(peer);
405 return -1;
406 }
407
408 static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
409 {
410 int err, index, len;
411 struct ifinfomsg *ifi;
412 struct nl_handler nlh;
413 struct rtattr *nest, *nest2;
414 struct nlmsg *answer = NULL, *nlmsg = NULL;
415
416 len = strlen(master);
417 if (len == 1 || len >= IFNAMSIZ)
418 return minus_one_set_errno(EINVAL);
419
420 len = strlen(name);
421 if (len == 1 || len >= IFNAMSIZ)
422 return minus_one_set_errno(EINVAL);
423
424 index = if_nametoindex(master);
425 if (!index)
426 return minus_one_set_errno(EINVAL);
427
428 err = netlink_open(&nlh, NETLINK_ROUTE);
429 if (err)
430 return minus_one_set_errno(-err);
431
432 err = -ENOMEM;
433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
434 if (!nlmsg)
435 goto out;
436
437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
438 if (!answer)
439 goto out;
440
441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
443
444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
445 if (!ifi) {
446 goto out;
447 }
448 ifi->ifi_family = AF_UNSPEC;
449
450 err = -EPROTO;
451 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
452 if (!nest)
453 goto out;
454
455 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
456 goto out;
457
458 if (mode) {
459 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
460 if (!nest2)
461 goto out;
462
463 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
464 goto out;
465
466 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
467 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
468 */
469 if (isolation > 0) {
470 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
471 goto out;
472 }
473
474 nla_end_nested(nlmsg, nest2);
475 }
476
477 nla_end_nested(nlmsg, nest);
478
479 if (nla_put_u32(nlmsg, IFLA_LINK, index))
480 goto out;
481
482 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
483 goto out;
484
485 err = netlink_transaction(&nlh, nlmsg, answer);
486 out:
487 netlink_close(&nlh);
488 nlmsg_free(answer);
489 nlmsg_free(nlmsg);
490 if (err < 0)
491 return minus_one_set_errno(-err);
492 return 0;
493 }
494
495 static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
496 {
497 char peer[IFNAMSIZ];
498 int err;
499 unsigned int mtu = 0;
500
501 if (netdev->link[0] == '\0') {
502 ERROR("No link for ipvlan network device specified");
503 return -1;
504 }
505
506 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
507 if (err < 0 || (size_t)err >= sizeof(peer))
508 return -1;
509
510 if (!lxc_mkifname(peer))
511 return -1;
512
513 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
514 netdev->priv.ipvlan_attr.isolation);
515 if (err) {
516 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
517 peer, netdev->link);
518 goto on_error;
519 }
520
521 strlcpy(netdev->created_name, peer, IFNAMSIZ);
522
523 netdev->ifindex = if_nametoindex(peer);
524 if (!netdev->ifindex) {
525 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
526 goto on_error;
527 }
528
529 if (netdev->mtu) {
530 err = lxc_safe_uint(netdev->mtu, &mtu);
531 if (err < 0) {
532 errno = -err;
533 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
534 netdev->mtu, peer);
535 goto on_error;
536 }
537
538 err = lxc_netdev_set_mtu(peer, mtu);
539 if (err < 0) {
540 errno = -err;
541 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
542 netdev->mtu, peer);
543 goto on_error;
544 }
545 }
546
547 if (netdev->upscript) {
548 char *argv[] = {
549 "ipvlan",
550 netdev->link,
551 NULL,
552 };
553
554 err = run_script_argv(handler->name, handler->conf->hooks_version,
555 "net", netdev->upscript, "up", argv);
556 if (err < 0)
557 goto on_error;
558 }
559
560 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
561 netdev->ifindex, netdev->priv.macvlan_attr.mode);
562
563 return 0;
564
565 on_error:
566 lxc_netdev_delete_by_name(peer);
567 return -1;
568 }
569
570 static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
571 {
572 char peer[IFNAMSIZ];
573 int err;
574 static uint16_t vlan_cntr = 0;
575 unsigned int mtu = 0;
576
577 if (netdev->link[0] == '\0') {
578 ERROR("No link for vlan network device specified");
579 return -1;
580 }
581
582 err = snprintf(peer, sizeof(peer), "vlan%d-%d",
583 netdev->priv.vlan_attr.vid, vlan_cntr++);
584 if (err < 0 || (size_t)err >= sizeof(peer))
585 return -1;
586
587 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
588 if (err) {
589 errno = -err;
590 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
591 peer, netdev->link);
592 return -1;
593 }
594
595 strlcpy(netdev->created_name, peer, IFNAMSIZ);
596
597 netdev->ifindex = if_nametoindex(peer);
598 if (!netdev->ifindex) {
599 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
600 goto on_error;
601 }
602
603 if (netdev->mtu) {
604 err = lxc_safe_uint(netdev->mtu, &mtu);
605 if (err < 0) {
606 errno = -err;
607 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
608 netdev->mtu, peer);
609 goto on_error;
610 }
611
612 err = lxc_netdev_set_mtu(peer, mtu);
613 if (err) {
614 errno = -err;
615 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
616 netdev->mtu, peer);
617 goto on_error;
618 }
619 }
620
621 if (netdev->upscript) {
622 char *argv[] = {
623 "vlan",
624 netdev->link,
625 NULL,
626 };
627
628 err = run_script_argv(handler->name, handler->conf->hooks_version,
629 "net", netdev->upscript, "up", argv);
630 if (err < 0) {
631 goto on_error;
632 }
633 }
634
635 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer,
636 netdev->ifindex);
637
638 return 0;
639
640 on_error:
641 lxc_netdev_delete_by_name(peer);
642 return -1;
643 }
644
645 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
646 {
647 int err, mtu_orig = 0;
648 unsigned int mtu = 0;
649
650 if (netdev->link[0] == '\0') {
651 ERROR("No link for physical interface specified");
652 return -1;
653 }
654
655 /* Note that we're retrieving the container's ifindex in the host's
656 * network namespace because we need it to move the device from the
657 * host's network namespace to the container's network namespace later
658 * on.
659 * Note that netdev->link will contain the name of the physical network
660 * device in the host's namespace.
661 */
662 netdev->ifindex = if_nametoindex(netdev->link);
663 if (!netdev->ifindex) {
664 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
665 return -1;
666 }
667
668 /* Store the ifindex of the host's network device in the host's
669 * namespace.
670 */
671 netdev->priv.phys_attr.ifindex = netdev->ifindex;
672
673 /* Get original device MTU setting and store for restoration after container shutdown. */
674 mtu_orig = netdev_get_mtu(netdev->ifindex);
675 if (mtu_orig < 0) {
676 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
677 return minus_one_set_errno(-mtu_orig);
678 }
679
680 netdev->priv.phys_attr.mtu = mtu_orig;
681
682 if (netdev->mtu) {
683 err = lxc_safe_uint(netdev->mtu, &mtu);
684 if (err < 0) {
685 errno = -err;
686 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
687 return -1;
688 }
689
690 err = lxc_netdev_set_mtu(netdev->link, mtu);
691 if (err < 0) {
692 errno = -err;
693 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
694 return -1;
695 }
696 }
697
698 if (netdev->upscript) {
699 char *argv[] = {
700 "phys",
701 netdev->link,
702 NULL,
703 };
704
705 err = run_script_argv(handler->name,
706 handler->conf->hooks_version, "net",
707 netdev->upscript, "up", argv);
708 if (err < 0) {
709 return -1;
710 }
711 }
712
713 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
714
715 return 0;
716 }
717
718 static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
719 {
720 int ret;
721 char *argv[] = {
722 "empty",
723 NULL,
724 };
725
726 netdev->ifindex = 0;
727 if (!netdev->upscript)
728 return 0;
729
730 ret = run_script_argv(handler->name, handler->conf->hooks_version,
731 "net", netdev->upscript, "up", argv);
732 if (ret < 0)
733 return -1;
734
735 return 0;
736 }
737
738 static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
739 {
740 netdev->ifindex = 0;
741 return 0;
742 }
743
744 static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
745 [LXC_NET_VETH] = instantiate_veth,
746 [LXC_NET_MACVLAN] = instantiate_macvlan,
747 [LXC_NET_IPVLAN] = instantiate_ipvlan,
748 [LXC_NET_VLAN] = instantiate_vlan,
749 [LXC_NET_PHYS] = instantiate_phys,
750 [LXC_NET_EMPTY] = instantiate_empty,
751 [LXC_NET_NONE] = instantiate_none,
752 };
753
754 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
755 {
756 int ret;
757 char *argv[] = {
758 "veth",
759 netdev->link,
760 NULL,
761 NULL,
762 };
763
764 if (!netdev->downscript)
765 return 0;
766
767 if (netdev->priv.veth_attr.pair[0] != '\0')
768 argv[2] = netdev->priv.veth_attr.pair;
769 else
770 argv[2] = netdev->priv.veth_attr.veth1;
771
772 ret = run_script_argv(handler->name,
773 handler->conf->hooks_version, "net",
774 netdev->downscript, "down", argv);
775 if (ret < 0)
776 return -1;
777
778 return 0;
779 }
780
781 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
782 {
783 int ret;
784 char *argv[] = {
785 "macvlan",
786 netdev->link,
787 NULL,
788 };
789
790 if (!netdev->downscript)
791 return 0;
792
793 ret = run_script_argv(handler->name, handler->conf->hooks_version,
794 "net", netdev->downscript, "down", argv);
795 if (ret < 0)
796 return -1;
797
798 return 0;
799 }
800
801 static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
802 {
803 int ret;
804 char *argv[] = {
805 "ipvlan",
806 netdev->link,
807 NULL,
808 };
809
810 if (!netdev->downscript)
811 return 0;
812
813 ret = run_script_argv(handler->name, handler->conf->hooks_version,
814 "net", netdev->downscript, "down", argv);
815 if (ret < 0)
816 return -1;
817
818 return 0;
819 }
820
821 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
822 {
823 int ret;
824 char *argv[] = {
825 "vlan",
826 netdev->link,
827 NULL,
828 };
829
830 if (!netdev->downscript)
831 return 0;
832
833 ret = run_script_argv(handler->name, handler->conf->hooks_version,
834 "net", netdev->downscript, "down", argv);
835 if (ret < 0)
836 return -1;
837
838 return 0;
839 }
840
841 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
842 {
843 int ret;
844 char *argv[] = {
845 "phys",
846 netdev->link,
847 NULL,
848 };
849
850 if (!netdev->downscript)
851 return 0;
852
853 ret = run_script_argv(handler->name, handler->conf->hooks_version,
854 "net", netdev->downscript, "down", argv);
855 if (ret < 0)
856 return -1;
857
858 return 0;
859 }
860
861 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
862 {
863 int ret;
864 char *argv[] = {
865 "empty",
866 NULL,
867 };
868
869 if (!netdev->downscript)
870 return 0;
871
872 ret = run_script_argv(handler->name, handler->conf->hooks_version,
873 "net", netdev->downscript, "down", argv);
874 if (ret < 0)
875 return -1;
876
877 return 0;
878 }
879
880 static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
881 {
882 return 0;
883 }
884
885 static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
886 [LXC_NET_VETH] = shutdown_veth,
887 [LXC_NET_MACVLAN] = shutdown_macvlan,
888 [LXC_NET_IPVLAN] = shutdown_ipvlan,
889 [LXC_NET_VLAN] = shutdown_vlan,
890 [LXC_NET_PHYS] = shutdown_phys,
891 [LXC_NET_EMPTY] = shutdown_empty,
892 [LXC_NET_NONE] = shutdown_none,
893 };
894
895 static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
896 {
897 int err;
898 struct nl_handler nlh;
899 struct ifinfomsg *ifi;
900 struct nlmsg *nlmsg = NULL;
901
902 err = netlink_open(&nlh, NETLINK_ROUTE);
903 if (err)
904 return err;
905
906 err = -ENOMEM;
907 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
908 if (!nlmsg)
909 goto out;
910
911 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
912 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
913
914 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
915 if (!ifi)
916 goto out;
917 ifi->ifi_family = AF_UNSPEC;
918 ifi->ifi_index = ifindex;
919
920 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
921 goto out;
922
923 if (ifname != NULL) {
924 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
925 goto out;
926 }
927
928 err = netlink_transaction(&nlh, nlmsg, nlmsg);
929 out:
930 netlink_close(&nlh);
931 nlmsg_free(nlmsg);
932 return err;
933 }
934
935 int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
936 {
937 int err;
938 struct nl_handler nlh;
939 struct ifinfomsg *ifi;
940 struct nlmsg *nlmsg = NULL;
941
942 err = netlink_open(&nlh, NETLINK_ROUTE);
943 if (err)
944 return err;
945
946 err = -ENOMEM;
947 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
948 if (!nlmsg)
949 goto out;
950
951 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
952 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
953
954 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
955 if (!ifi)
956 goto out;
957 ifi->ifi_family = AF_UNSPEC;
958 ifi->ifi_index = ifindex;
959
960 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
961 goto out;
962
963 if (ifname != NULL) {
964 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
965 goto out;
966 }
967
968 err = netlink_transaction(&nlh, nlmsg, nlmsg);
969 out:
970 netlink_close(&nlh);
971 nlmsg_free(nlmsg);
972 return err;
973 }
974
975 /* If we are asked to move a wireless interface, then we must actually move its
976 * phyN device. Detect that condition and return the physname here. The physname
977 * will be passed to lxc_netdev_move_wlan() which will free it when done.
978 */
979 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
980 static char *is_wlan(const char *ifname)
981 {
982 __do_free char *path = NULL;
983 int i, ret;
984 long physlen;
985 size_t len;
986 FILE *f;
987 char *physname = NULL;
988
989 len = strlen(ifname) + strlen(PHYSNAME) - 1;
990 path = must_realloc(NULL, len + 1);
991 ret = snprintf(path, len, PHYSNAME, ifname);
992 if (ret < 0 || (size_t)ret >= len)
993 goto bad;
994
995 f = fopen(path, "r");
996 if (!f)
997 goto bad;
998
999 /* Feh - sb.st_size is always 4096. */
1000 fseek(f, 0, SEEK_END);
1001 physlen = ftell(f);
1002 fseek(f, 0, SEEK_SET);
1003 if (physlen < 0) {
1004 fclose(f);
1005 goto bad;
1006 }
1007
1008 physname = malloc(physlen + 1);
1009 if (!physname) {
1010 fclose(f);
1011 goto bad;
1012 }
1013
1014 memset(physname, 0, physlen + 1);
1015 ret = fread(physname, 1, physlen, f);
1016 fclose(f);
1017 if (ret < 0)
1018 goto bad;
1019
1020 for (i = 0; i < physlen; i++) {
1021 if (physname[i] == '\n')
1022 physname[i] = '\0';
1023
1024 if (physname[i] == '\0')
1025 break;
1026 }
1027
1028 return physname;
1029
1030 bad:
1031 free(physname);
1032 return NULL;
1033 }
1034
1035 static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1036 const char *new)
1037 {
1038 pid_t fpid;
1039
1040 fpid = fork();
1041 if (fpid < 0)
1042 return -1;
1043
1044 if (fpid != 0)
1045 return wait_for_pid(fpid);
1046
1047 if (!switch_to_ns(pid, "net"))
1048 return -1;
1049
1050 _exit(lxc_netdev_rename_by_name(old, new));
1051 }
1052
1053 static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1054 const char *newname)
1055 {
1056 char *cmd;
1057 pid_t fpid;
1058 int err = -1;
1059
1060 /* Move phyN into the container. TODO - do this using netlink.
1061 * However, IIUC this involves a bit more complicated work to talk to
1062 * the 80211 module, so for now just call out to iw.
1063 */
1064 cmd = on_path("iw", NULL);
1065 if (!cmd)
1066 goto out1;
1067 free(cmd);
1068
1069 fpid = fork();
1070 if (fpid < 0)
1071 goto out1;
1072
1073 if (fpid == 0) {
1074 char pidstr[30];
1075 sprintf(pidstr, "%d", pid);
1076 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1077 (char *)NULL);
1078 _exit(EXIT_FAILURE);
1079 }
1080
1081 if (wait_for_pid(fpid))
1082 goto out1;
1083
1084 err = 0;
1085 if (newname)
1086 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1087
1088 out1:
1089 free(physname);
1090 return err;
1091 }
1092
1093 int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
1094 {
1095 int index;
1096 char *physname;
1097
1098 if (!ifname)
1099 return -EINVAL;
1100
1101 index = if_nametoindex(ifname);
1102 if (!index)
1103 return -EINVAL;
1104
1105 physname = is_wlan(ifname);
1106 if (physname)
1107 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1108
1109 return lxc_netdev_move_by_index(index, pid, newname);
1110 }
1111
1112 int lxc_netdev_delete_by_index(int ifindex)
1113 {
1114 int err;
1115 struct ifinfomsg *ifi;
1116 struct nl_handler nlh;
1117 struct nlmsg *answer = NULL, *nlmsg = NULL;
1118
1119 err = netlink_open(&nlh, NETLINK_ROUTE);
1120 if (err)
1121 return err;
1122
1123 err = -ENOMEM;
1124 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1125 if (!nlmsg)
1126 goto out;
1127
1128 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1129 if (!answer)
1130 goto out;
1131
1132 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1133 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1134
1135 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1136 if (!ifi)
1137 goto out;
1138 ifi->ifi_family = AF_UNSPEC;
1139 ifi->ifi_index = ifindex;
1140
1141 err = netlink_transaction(&nlh, nlmsg, answer);
1142 out:
1143 netlink_close(&nlh);
1144 nlmsg_free(answer);
1145 nlmsg_free(nlmsg);
1146 return err;
1147 }
1148
1149 int lxc_netdev_delete_by_name(const char *name)
1150 {
1151 int index;
1152
1153 index = if_nametoindex(name);
1154 if (!index)
1155 return -EINVAL;
1156
1157 return lxc_netdev_delete_by_index(index);
1158 }
1159
1160 int lxc_netdev_rename_by_index(int ifindex, const char *newname)
1161 {
1162 int err, len;
1163 struct ifinfomsg *ifi;
1164 struct nl_handler nlh;
1165 struct nlmsg *answer = NULL, *nlmsg = NULL;
1166
1167 err = netlink_open(&nlh, NETLINK_ROUTE);
1168 if (err)
1169 return err;
1170
1171 len = strlen(newname);
1172 if (len == 1 || len >= IFNAMSIZ) {
1173 err = -EINVAL;
1174 goto out;
1175 }
1176
1177 err = -ENOMEM;
1178 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1179 if (!nlmsg)
1180 goto out;
1181
1182 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1183 if (!answer)
1184 goto out;
1185
1186 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1187 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1188
1189 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1190 if (!ifi)
1191 goto out;
1192 ifi->ifi_family = AF_UNSPEC;
1193 ifi->ifi_index = ifindex;
1194
1195 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1196 goto out;
1197
1198 err = netlink_transaction(&nlh, nlmsg, answer);
1199 out:
1200 netlink_close(&nlh);
1201 nlmsg_free(answer);
1202 nlmsg_free(nlmsg);
1203 return err;
1204 }
1205
1206 int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1207 {
1208 int len, index;
1209
1210 len = strlen(oldname);
1211 if (len == 1 || len >= IFNAMSIZ)
1212 return -EINVAL;
1213
1214 index = if_nametoindex(oldname);
1215 if (!index)
1216 return -EINVAL;
1217
1218 return lxc_netdev_rename_by_index(index, newname);
1219 }
1220
1221 int netdev_set_flag(const char *name, int flag)
1222 {
1223 int err, index, len;
1224 struct ifinfomsg *ifi;
1225 struct nl_handler nlh;
1226 struct nlmsg *answer = NULL, *nlmsg = NULL;
1227
1228 err = netlink_open(&nlh, NETLINK_ROUTE);
1229 if (err)
1230 return err;
1231
1232 err = -EINVAL;
1233 len = strlen(name);
1234 if (len == 1 || len >= IFNAMSIZ)
1235 goto out;
1236
1237 err = -ENOMEM;
1238 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1239 if (!nlmsg)
1240 goto out;
1241
1242 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1243 if (!answer)
1244 goto out;
1245
1246 err = -EINVAL;
1247 index = if_nametoindex(name);
1248 if (!index)
1249 goto out;
1250
1251 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1252 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1253
1254 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1255 if (!ifi) {
1256 err = -ENOMEM;
1257 goto out;
1258 }
1259 ifi->ifi_family = AF_UNSPEC;
1260 ifi->ifi_index = index;
1261 ifi->ifi_change |= IFF_UP;
1262 ifi->ifi_flags |= flag;
1263
1264 err = netlink_transaction(&nlh, nlmsg, answer);
1265 out:
1266 netlink_close(&nlh);
1267 nlmsg_free(nlmsg);
1268 nlmsg_free(answer);
1269 return err;
1270 }
1271
1272 int netdev_get_flag(const char *name, int *flag)
1273 {
1274 int err, index, len;
1275 struct ifinfomsg *ifi;
1276 struct nl_handler nlh;
1277 struct nlmsg *answer = NULL, *nlmsg = NULL;
1278
1279 if (!name)
1280 return -EINVAL;
1281
1282 err = netlink_open(&nlh, NETLINK_ROUTE);
1283 if (err)
1284 return err;
1285
1286 err = -EINVAL;
1287 len = strlen(name);
1288 if (len == 1 || len >= IFNAMSIZ)
1289 goto out;
1290
1291 err = -ENOMEM;
1292 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1293 if (!nlmsg)
1294 goto out;
1295
1296 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1297 if (!answer)
1298 goto out;
1299
1300 err = -EINVAL;
1301 index = if_nametoindex(name);
1302 if (!index)
1303 goto out;
1304
1305 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1306 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1307
1308 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1309 if (!ifi) {
1310 err = -ENOMEM;
1311 goto out;
1312 }
1313 ifi->ifi_family = AF_UNSPEC;
1314 ifi->ifi_index = index;
1315
1316 err = netlink_transaction(&nlh, nlmsg, answer);
1317 if (err)
1318 goto out;
1319
1320 ifi = NLMSG_DATA(answer->nlmsghdr);
1321
1322 *flag = ifi->ifi_flags;
1323 out:
1324 netlink_close(&nlh);
1325 nlmsg_free(nlmsg);
1326 nlmsg_free(answer);
1327 return err;
1328 }
1329
1330 /*
1331 * \brief Check a interface is up or not.
1332 *
1333 * \param name: name for the interface.
1334 *
1335 * \return int.
1336 * 0 means interface is down.
1337 * 1 means interface is up.
1338 * Others means error happened, and ret-value is the error number.
1339 */
1340 int lxc_netdev_isup(const char *name)
1341 {
1342 int err, flag;
1343
1344 err = netdev_get_flag(name, &flag);
1345 if (err)
1346 return err;
1347
1348 if (flag & IFF_UP)
1349 return 1;
1350
1351 return 0;
1352 }
1353
1354 int netdev_get_mtu(int ifindex)
1355 {
1356 int answer_len, err, res;
1357 struct nl_handler nlh;
1358 struct ifinfomsg *ifi;
1359 struct nlmsghdr *msg;
1360 int readmore = 0, recv_len = 0;
1361 struct nlmsg *answer = NULL, *nlmsg = NULL;
1362
1363 err = netlink_open(&nlh, NETLINK_ROUTE);
1364 if (err)
1365 return err;
1366
1367 err = -ENOMEM;
1368 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1369 if (!nlmsg)
1370 goto out;
1371
1372 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1373 if (!answer)
1374 goto out;
1375
1376 /* Save the answer buffer length, since it will be overwritten
1377 * on the first receive (and we might need to receive more than
1378 * once.
1379 */
1380 answer_len = answer->nlmsghdr->nlmsg_len;
1381
1382 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
1383 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1384
1385 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1386 if (!ifi)
1387 goto out;
1388 ifi->ifi_family = AF_UNSPEC;
1389
1390 /* Send the request for addresses, which returns all addresses
1391 * on all interfaces. */
1392 err = netlink_send(&nlh, nlmsg);
1393 if (err < 0)
1394 goto out;
1395
1396 #pragma GCC diagnostic push
1397 #pragma GCC diagnostic ignored "-Wcast-align"
1398
1399 do {
1400 /* Restore the answer buffer length, it might have been
1401 * overwritten by a previous receive.
1402 */
1403 answer->nlmsghdr->nlmsg_len = answer_len;
1404
1405 /* Get the (next) batch of reply messages */
1406 err = netlink_rcv(&nlh, answer);
1407 if (err < 0)
1408 goto out;
1409
1410 recv_len = err;
1411
1412 /* Satisfy the typing for the netlink macros */
1413 msg = answer->nlmsghdr;
1414
1415 while (NLMSG_OK(msg, recv_len)) {
1416
1417 /* Stop reading if we see an error message */
1418 if (msg->nlmsg_type == NLMSG_ERROR) {
1419 struct nlmsgerr *errmsg =
1420 (struct nlmsgerr *)NLMSG_DATA(msg);
1421 err = errmsg->error;
1422 goto out;
1423 }
1424
1425 /* Stop reading if we see a NLMSG_DONE message */
1426 if (msg->nlmsg_type == NLMSG_DONE) {
1427 readmore = 0;
1428 break;
1429 }
1430
1431 ifi = NLMSG_DATA(msg);
1432 if (ifi->ifi_index == ifindex) {
1433 struct rtattr *rta = IFLA_RTA(ifi);
1434 int attr_len =
1435 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1436 res = 0;
1437 while (RTA_OK(rta, attr_len)) {
1438 /* Found a local address for the
1439 * requested interface, return it.
1440 */
1441 if (rta->rta_type == IFLA_MTU) {
1442 memcpy(&res, RTA_DATA(rta),
1443 sizeof(int));
1444 err = res;
1445 goto out;
1446 }
1447 rta = RTA_NEXT(rta, attr_len);
1448 }
1449 }
1450
1451 /* Keep reading more data from the socket if the last
1452 * message had the NLF_F_MULTI flag set.
1453 */
1454 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1455
1456 /* Look at the next message received in this buffer. */
1457 msg = NLMSG_NEXT(msg, recv_len);
1458 }
1459 } while (readmore);
1460
1461 #pragma GCC diagnostic pop
1462
1463 /* If we end up here, we didn't find any result, so signal an error. */
1464 err = -1;
1465
1466 out:
1467 netlink_close(&nlh);
1468 nlmsg_free(answer);
1469 nlmsg_free(nlmsg);
1470 return err;
1471 }
1472
1473 int lxc_netdev_set_mtu(const char *name, int mtu)
1474 {
1475 int err, index, len;
1476 struct ifinfomsg *ifi;
1477 struct nl_handler nlh;
1478 struct nlmsg *answer = NULL, *nlmsg = NULL;
1479
1480 err = netlink_open(&nlh, NETLINK_ROUTE);
1481 if (err)
1482 return err;
1483
1484 err = -EINVAL;
1485 len = strlen(name);
1486 if (len == 1 || len >= IFNAMSIZ)
1487 goto out;
1488
1489 err = -ENOMEM;
1490 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1491 if (!nlmsg)
1492 goto out;
1493
1494 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1495 if (!answer)
1496 goto out;
1497
1498 err = -EINVAL;
1499 index = if_nametoindex(name);
1500 if (!index)
1501 goto out;
1502
1503 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1504 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1505
1506 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1507 if (!ifi) {
1508 err = -ENOMEM;
1509 goto out;
1510 }
1511 ifi->ifi_family = AF_UNSPEC;
1512 ifi->ifi_index = index;
1513
1514 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1515 goto out;
1516
1517 err = netlink_transaction(&nlh, nlmsg, answer);
1518 out:
1519 netlink_close(&nlh);
1520 nlmsg_free(nlmsg);
1521 nlmsg_free(answer);
1522 return err;
1523 }
1524
1525 int lxc_netdev_up(const char *name)
1526 {
1527 return netdev_set_flag(name, IFF_UP);
1528 }
1529
1530 int lxc_netdev_down(const char *name)
1531 {
1532 return netdev_set_flag(name, 0);
1533 }
1534
1535 int lxc_veth_create(const char *name1, const char *name2)
1536 {
1537 int err, len;
1538 struct ifinfomsg *ifi;
1539 struct nl_handler nlh;
1540 struct rtattr *nest1, *nest2, *nest3;
1541 struct nlmsg *answer = NULL, *nlmsg = NULL;
1542
1543 err = netlink_open(&nlh, NETLINK_ROUTE);
1544 if (err)
1545 return err;
1546
1547 err = -EINVAL;
1548 len = strlen(name1);
1549 if (len == 1 || len >= IFNAMSIZ)
1550 goto out;
1551
1552 len = strlen(name2);
1553 if (len == 1 || len >= IFNAMSIZ)
1554 goto out;
1555
1556 err = -ENOMEM;
1557 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1558 if (!nlmsg)
1559 goto out;
1560
1561 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1562 if (!answer)
1563 goto out;
1564
1565 nlmsg->nlmsghdr->nlmsg_flags =
1566 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1567 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1568
1569 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1570 if (!ifi)
1571 goto out;
1572 ifi->ifi_family = AF_UNSPEC;
1573
1574 err = -EINVAL;
1575 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1576 if (!nest1)
1577 goto out;
1578
1579 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1580 goto out;
1581
1582 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1583 if (!nest2)
1584 goto out;
1585
1586 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1587 if (!nest3)
1588 goto out;
1589
1590 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1591 if (!ifi) {
1592 err = -ENOMEM;
1593 goto out;
1594 }
1595
1596 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1597 goto out;
1598
1599 nla_end_nested(nlmsg, nest3);
1600 nla_end_nested(nlmsg, nest2);
1601 nla_end_nested(nlmsg, nest1);
1602
1603 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1604 goto out;
1605
1606 err = netlink_transaction(&nlh, nlmsg, answer);
1607 out:
1608 netlink_close(&nlh);
1609 nlmsg_free(answer);
1610 nlmsg_free(nlmsg);
1611 return err;
1612 }
1613
1614 /* TODO: merge with lxc_macvlan_create */
1615 int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
1616 {
1617 int err, len, lindex;
1618 struct ifinfomsg *ifi;
1619 struct nl_handler nlh;
1620 struct rtattr *nest, *nest2;
1621 struct nlmsg *answer = NULL, *nlmsg = NULL;
1622
1623 err = netlink_open(&nlh, NETLINK_ROUTE);
1624 if (err)
1625 return err;
1626
1627 err = -EINVAL;
1628 len = strlen(master);
1629 if (len == 1 || len >= IFNAMSIZ)
1630 goto err3;
1631
1632 len = strlen(name);
1633 if (len == 1 || len >= IFNAMSIZ)
1634 goto err3;
1635
1636 err = -ENOMEM;
1637 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1638 if (!nlmsg)
1639 goto err3;
1640
1641 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1642 if (!answer)
1643 goto err2;
1644
1645 err = -EINVAL;
1646 lindex = if_nametoindex(master);
1647 if (!lindex)
1648 goto err1;
1649
1650 nlmsg->nlmsghdr->nlmsg_flags =
1651 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1652 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1653
1654 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1655 if (!ifi) {
1656 err = -ENOMEM;
1657 goto err1;
1658 }
1659 ifi->ifi_family = AF_UNSPEC;
1660
1661 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1662 if (!nest)
1663 goto err1;
1664
1665 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1666 goto err1;
1667
1668 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1669 if (!nest2)
1670 goto err1;
1671
1672 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1673 goto err1;
1674
1675 nla_end_nested(nlmsg, nest2);
1676 nla_end_nested(nlmsg, nest);
1677
1678 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1679 goto err1;
1680
1681 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1682 goto err1;
1683
1684 err = netlink_transaction(&nlh, nlmsg, answer);
1685 err1:
1686 nlmsg_free(answer);
1687 err2:
1688 nlmsg_free(nlmsg);
1689 err3:
1690 netlink_close(&nlh);
1691 return err;
1692 }
1693
1694 int lxc_macvlan_create(const char *master, const char *name, int mode)
1695 {
1696 int err, index, len;
1697 struct ifinfomsg *ifi;
1698 struct nl_handler nlh;
1699 struct rtattr *nest, *nest2;
1700 struct nlmsg *answer = NULL, *nlmsg = NULL;
1701
1702 err = netlink_open(&nlh, NETLINK_ROUTE);
1703 if (err)
1704 return err;
1705
1706 err = -EINVAL;
1707 len = strlen(master);
1708 if (len == 1 || len >= IFNAMSIZ)
1709 goto out;
1710
1711 len = strlen(name);
1712 if (len == 1 || len >= IFNAMSIZ)
1713 goto out;
1714
1715 err = -ENOMEM;
1716 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1717 if (!nlmsg)
1718 goto out;
1719
1720 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1721 if (!answer)
1722 goto out;
1723
1724 err = -EINVAL;
1725 index = if_nametoindex(master);
1726 if (!index)
1727 goto out;
1728
1729 nlmsg->nlmsghdr->nlmsg_flags =
1730 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1731 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1732
1733 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1734 if (!ifi) {
1735 err = -ENOMEM;
1736 goto out;
1737 }
1738 ifi->ifi_family = AF_UNSPEC;
1739
1740 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1741 if (!nest)
1742 goto out;
1743
1744 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1745 goto out;
1746
1747 if (mode) {
1748 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1749 if (!nest2)
1750 goto out;
1751
1752 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1753 goto out;
1754
1755 nla_end_nested(nlmsg, nest2);
1756 }
1757
1758 nla_end_nested(nlmsg, nest);
1759
1760 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1761 goto out;
1762
1763 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1764 goto out;
1765
1766 err = netlink_transaction(&nlh, nlmsg, answer);
1767 out:
1768 netlink_close(&nlh);
1769 nlmsg_free(answer);
1770 nlmsg_free(nlmsg);
1771 return err;
1772 }
1773
1774 static int proc_sys_net_write(const char *path, const char *value)
1775 {
1776 int fd;
1777 int err = 0;
1778
1779 fd = open(path, O_WRONLY);
1780 if (fd < 0)
1781 return -errno;
1782
1783 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
1784 err = -errno;
1785
1786 close(fd);
1787 return err;
1788 }
1789
1790 static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1791 {
1792 int ret;
1793 char path[PATH_MAX];
1794 char buf[1] = "";
1795
1796 if (family != AF_INET && family != AF_INET6)
1797 return minus_one_set_errno(EINVAL);
1798
1799 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1800 family == AF_INET ? "ipv4" : "ipv6", ifname,
1801 "forwarding");
1802 if (ret < 0 || (size_t)ret >= PATH_MAX)
1803 return minus_one_set_errno(E2BIG);
1804
1805 return lxc_read_file_expect(path, buf, 1, "1");
1806 }
1807
1808 static int neigh_proxy_set(const char *ifname, int family, int flag)
1809 {
1810 int ret;
1811 char path[PATH_MAX];
1812
1813 if (family != AF_INET && family != AF_INET6)
1814 return -EINVAL;
1815
1816 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1817 family == AF_INET ? "ipv4" : "ipv6", ifname,
1818 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1819 if (ret < 0 || (size_t)ret >= PATH_MAX)
1820 return -E2BIG;
1821
1822 return proc_sys_net_write(path, flag ? "1" : "0");
1823 }
1824
1825 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1826 {
1827 int ret;
1828 char path[PATH_MAX];
1829 char buf[1] = "";
1830
1831 if (family != AF_INET && family != AF_INET6)
1832 return minus_one_set_errno(EINVAL);
1833
1834 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1835 family == AF_INET ? "ipv4" : "ipv6", ifname,
1836 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1837 if (ret < 0 || (size_t)ret >= PATH_MAX)
1838 return minus_one_set_errno(E2BIG);
1839
1840 return lxc_read_file_expect(path, buf, 1, "1");
1841 }
1842
1843 int lxc_neigh_proxy_on(const char *name, int family)
1844 {
1845 return neigh_proxy_set(name, family, 1);
1846 }
1847
1848 int lxc_neigh_proxy_off(const char *name, int family)
1849 {
1850 return neigh_proxy_set(name, family, 0);
1851 }
1852
1853 int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1854 {
1855 int i = 0;
1856 unsigned val;
1857 char c;
1858 unsigned char *data;
1859
1860 sockaddr->sa_family = ARPHRD_ETHER;
1861 data = (unsigned char *)sockaddr->sa_data;
1862
1863 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
1864 c = *macaddr++;
1865 if (isdigit(c))
1866 val = c - '0';
1867 else if (c >= 'a' && c <= 'f')
1868 val = c - 'a' + 10;
1869 else if (c >= 'A' && c <= 'F')
1870 val = c - 'A' + 10;
1871 else
1872 return -EINVAL;
1873
1874 val <<= 4;
1875 c = *macaddr;
1876 if (isdigit(c))
1877 val |= c - '0';
1878 else if (c >= 'a' && c <= 'f')
1879 val |= c - 'a' + 10;
1880 else if (c >= 'A' && c <= 'F')
1881 val |= c - 'A' + 10;
1882 else if (c == ':' || c == 0)
1883 val >>= 4;
1884 else
1885 return -EINVAL;
1886 if (c != 0)
1887 macaddr++;
1888 *data++ = (unsigned char)(val & 0377);
1889 i++;
1890
1891 if (*macaddr == ':')
1892 macaddr++;
1893 }
1894
1895 return 0;
1896 }
1897
1898 static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1899 void *acast, int prefix)
1900 {
1901 int addrlen, err;
1902 struct ifaddrmsg *ifa;
1903 struct nl_handler nlh;
1904 struct nlmsg *answer = NULL, *nlmsg = NULL;
1905
1906 addrlen = family == AF_INET ? sizeof(struct in_addr)
1907 : sizeof(struct in6_addr);
1908
1909 err = netlink_open(&nlh, NETLINK_ROUTE);
1910 if (err)
1911 return err;
1912
1913 err = -ENOMEM;
1914 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1915 if (!nlmsg)
1916 goto out;
1917
1918 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1919 if (!answer)
1920 goto out;
1921
1922 nlmsg->nlmsghdr->nlmsg_flags =
1923 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1924 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1925
1926 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
1927 if (!ifa)
1928 goto out;
1929 ifa->ifa_prefixlen = prefix;
1930 ifa->ifa_index = ifindex;
1931 ifa->ifa_family = family;
1932 ifa->ifa_scope = 0;
1933
1934 err = -EINVAL;
1935 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
1936 goto out;
1937
1938 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
1939 goto out;
1940
1941 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1942 goto out;
1943
1944 /* TODO: multicast, anycast with ipv6 */
1945 err = -EPROTONOSUPPORT;
1946 if (family == AF_INET6 &&
1947 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1948 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1949 goto out;
1950
1951 err = netlink_transaction(&nlh, nlmsg, answer);
1952 out:
1953 netlink_close(&nlh);
1954 nlmsg_free(answer);
1955 nlmsg_free(nlmsg);
1956 return err;
1957 }
1958
1959 int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
1960 struct in6_addr *mcast, struct in6_addr *acast,
1961 int prefix)
1962 {
1963 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1964 }
1965
1966 int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1967 int prefix)
1968 {
1969 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1970 }
1971
1972 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1973 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1974 * that pointer in *res (so res should be an in_addr** or in6_addr**).
1975 */
1976 #pragma GCC diagnostic push
1977 #pragma GCC diagnostic ignored "-Wcast-align"
1978
1979 static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1980 {
1981 int addrlen;
1982 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1983 struct rtattr *rta = IFA_RTA(ifa);
1984 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
1985
1986 if (ifa->ifa_family != family)
1987 return 0;
1988
1989 addrlen = family == AF_INET ? sizeof(struct in_addr)
1990 : sizeof(struct in6_addr);
1991
1992 /* Loop over the rtattr's in this message */
1993 while (RTA_OK(rta, attr_len)) {
1994 /* Found a local address for the requested interface,
1995 * return it.
1996 */
1997 if (rta->rta_type == IFA_LOCAL ||
1998 rta->rta_type == IFA_ADDRESS) {
1999 /* Sanity check. The family check above should make sure
2000 * the address length is correct, but check here just in
2001 * case.
2002 */
2003 if (RTA_PAYLOAD(rta) != addrlen)
2004 return -1;
2005
2006 /* We might have found an IFA_ADDRESS before, which we
2007 * now overwrite with an IFA_LOCAL.
2008 */
2009 if (!*res) {
2010 *res = malloc(addrlen);
2011 if (!*res)
2012 return -1;
2013 }
2014
2015 memcpy(*res, RTA_DATA(rta), addrlen);
2016 if (rta->rta_type == IFA_LOCAL)
2017 break;
2018 }
2019 rta = RTA_NEXT(rta, attr_len);
2020 }
2021 return 0;
2022 }
2023
2024 #pragma GCC diagnostic pop
2025
2026 static int ip_addr_get(int family, int ifindex, void **res)
2027 {
2028 int answer_len, err;
2029 struct ifaddrmsg *ifa;
2030 struct nl_handler nlh;
2031 struct nlmsghdr *msg;
2032 int readmore = 0, recv_len = 0;
2033 struct nlmsg *answer = NULL, *nlmsg = NULL;
2034
2035 err = netlink_open(&nlh, NETLINK_ROUTE);
2036 if (err)
2037 return err;
2038
2039 err = -ENOMEM;
2040 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2041 if (!nlmsg)
2042 goto out;
2043
2044 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2045 if (!answer)
2046 goto out;
2047
2048 /* Save the answer buffer length, since it will be overwritten on the
2049 * first receive (and we might need to receive more than once).
2050 */
2051 answer_len = answer->nlmsghdr->nlmsg_len;
2052
2053 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
2054 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
2055
2056 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2057 if (!ifa)
2058 goto out;
2059 ifa->ifa_family = family;
2060
2061 /* Send the request for addresses, which returns all addresses on all
2062 * interfaces.
2063 */
2064 err = netlink_send(&nlh, nlmsg);
2065 if (err < 0)
2066 goto out;
2067
2068 #pragma GCC diagnostic push
2069 #pragma GCC diagnostic ignored "-Wcast-align"
2070
2071 do {
2072 /* Restore the answer buffer length, it might have been
2073 * overwritten by a previous receive.
2074 */
2075 answer->nlmsghdr->nlmsg_len = answer_len;
2076
2077 /* Get the (next) batch of reply messages. */
2078 err = netlink_rcv(&nlh, answer);
2079 if (err < 0)
2080 goto out;
2081
2082 recv_len = err;
2083 err = 0;
2084
2085 /* Satisfy the typing for the netlink macros. */
2086 msg = answer->nlmsghdr;
2087
2088 while (NLMSG_OK(msg, recv_len)) {
2089 /* Stop reading if we see an error message. */
2090 if (msg->nlmsg_type == NLMSG_ERROR) {
2091 struct nlmsgerr *errmsg =
2092 (struct nlmsgerr *)NLMSG_DATA(msg);
2093 err = errmsg->error;
2094 goto out;
2095 }
2096
2097 /* Stop reading if we see a NLMSG_DONE message. */
2098 if (msg->nlmsg_type == NLMSG_DONE) {
2099 readmore = 0;
2100 break;
2101 }
2102
2103 if (msg->nlmsg_type != RTM_NEWADDR) {
2104 err = -1;
2105 goto out;
2106 }
2107
2108 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2109 if (ifa->ifa_index == ifindex) {
2110 if (ifa_get_local_ip(family, msg, res) < 0) {
2111 err = -1;
2112 goto out;
2113 }
2114
2115 /* Found a result, stop searching. */
2116 if (*res)
2117 goto out;
2118 }
2119
2120 /* Keep reading more data from the socket if the last
2121 * message had the NLF_F_MULTI flag set.
2122 */
2123 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2124
2125 /* Look at the next message received in this buffer. */
2126 msg = NLMSG_NEXT(msg, recv_len);
2127 }
2128 } while (readmore);
2129
2130 #pragma GCC diagnostic pop
2131
2132 /* If we end up here, we didn't find any result, so signal an
2133 * error.
2134 */
2135 err = -1;
2136
2137 out:
2138 netlink_close(&nlh);
2139 nlmsg_free(answer);
2140 nlmsg_free(nlmsg);
2141 return err;
2142 }
2143
2144 int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2145 {
2146 return ip_addr_get(AF_INET6, ifindex, (void **)res);
2147 }
2148
2149 int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
2150 {
2151 return ip_addr_get(AF_INET, ifindex, (void **)res);
2152 }
2153
2154 static int ip_gateway_add(int family, int ifindex, void *gw)
2155 {
2156 int addrlen, err;
2157 struct nl_handler nlh;
2158 struct rtmsg *rt;
2159 struct nlmsg *answer = NULL, *nlmsg = NULL;
2160
2161 addrlen = family == AF_INET ? sizeof(struct in_addr)
2162 : sizeof(struct in6_addr);
2163
2164 err = netlink_open(&nlh, NETLINK_ROUTE);
2165 if (err)
2166 return err;
2167
2168 err = -ENOMEM;
2169 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2170 if (!nlmsg)
2171 goto out;
2172
2173 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2174 if (!answer)
2175 goto out;
2176
2177 nlmsg->nlmsghdr->nlmsg_flags =
2178 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2179 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2180
2181 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
2182 if (!rt)
2183 goto out;
2184 rt->rtm_family = family;
2185 rt->rtm_table = RT_TABLE_MAIN;
2186 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2187 rt->rtm_protocol = RTPROT_BOOT;
2188 rt->rtm_type = RTN_UNICAST;
2189 /* "default" destination */
2190 rt->rtm_dst_len = 0;
2191
2192 err = -EINVAL;
2193
2194 /* If gateway address not supplied, then a device route will be created instead */
2195 if (gw != NULL) {
2196 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2197 goto out;
2198 }
2199
2200 /* Adding the interface index enables the use of link-local
2201 * addresses for the gateway.
2202 */
2203 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2204 goto out;
2205
2206 err = netlink_transaction(&nlh, nlmsg, answer);
2207 out:
2208 netlink_close(&nlh);
2209 nlmsg_free(answer);
2210 nlmsg_free(nlmsg);
2211 return err;
2212 }
2213
2214 int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2215 {
2216 return ip_gateway_add(AF_INET, ifindex, gw);
2217 }
2218
2219 int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2220 {
2221 return ip_gateway_add(AF_INET6, ifindex, gw);
2222 }
2223 bool is_ovs_bridge(const char *bridge)
2224 {
2225 int ret;
2226 struct stat sb;
2227 char brdirname[22 + IFNAMSIZ + 1] = {0};
2228
2229 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2230 bridge);
2231 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2232 return false;
2233
2234 ret = stat(brdirname, &sb);
2235 if (ret < 0 && errno == ENOENT)
2236 return true;
2237
2238 return false;
2239 }
2240
2241 struct ovs_veth_args {
2242 const char *bridge;
2243 const char *nic;
2244 };
2245
2246 /* Called from a background thread - when nic goes away, remove it from the
2247 * bridge.
2248 */
2249 static int lxc_ovs_delete_port_exec(void *data)
2250 {
2251 struct ovs_veth_args *args = data;
2252
2253 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2254 (char *)NULL);
2255 return -1;
2256 }
2257
2258 int lxc_ovs_delete_port(const char *bridge, const char *nic)
2259 {
2260 int ret;
2261 char cmd_output[PATH_MAX];
2262 struct ovs_veth_args args;
2263
2264 args.bridge = bridge;
2265 args.nic = nic;
2266 ret = run_command(cmd_output, sizeof(cmd_output),
2267 lxc_ovs_delete_port_exec, (void *)&args);
2268 if (ret < 0) {
2269 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2270 "%s", bridge, nic, cmd_output);
2271 return -1;
2272 }
2273
2274 return 0;
2275 }
2276
2277 static int lxc_ovs_attach_bridge_exec(void *data)
2278 {
2279 struct ovs_veth_args *args = data;
2280
2281 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2282 (char *)NULL);
2283 return -1;
2284 }
2285
2286 static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2287 {
2288 int ret;
2289 char cmd_output[PATH_MAX];
2290 struct ovs_veth_args args;
2291
2292 args.bridge = bridge;
2293 args.nic = nic;
2294 ret = run_command(cmd_output, sizeof(cmd_output),
2295 lxc_ovs_attach_bridge_exec, (void *)&args);
2296 if (ret < 0) {
2297 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2298 bridge, nic, cmd_output);
2299 return -1;
2300 }
2301
2302 return 0;
2303 }
2304
2305 int lxc_bridge_attach(const char *bridge, const char *ifname)
2306 {
2307 int err, fd, index;
2308 size_t retlen;
2309 struct ifreq ifr;
2310
2311 if (strlen(ifname) >= IFNAMSIZ)
2312 return -EINVAL;
2313
2314 index = if_nametoindex(ifname);
2315 if (!index)
2316 return -EINVAL;
2317
2318 if (is_ovs_bridge(bridge))
2319 return lxc_ovs_attach_bridge(bridge, ifname);
2320
2321 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
2322 if (fd < 0)
2323 return -errno;
2324
2325 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
2326 if (retlen >= IFNAMSIZ) {
2327 close(fd);
2328 return -E2BIG;
2329 }
2330
2331 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2332 ifr.ifr_ifindex = index;
2333 err = ioctl(fd, SIOCBRADDIF, &ifr);
2334 close(fd);
2335 if (err)
2336 err = -errno;
2337
2338 return err;
2339 }
2340
2341 static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
2342 [LXC_NET_EMPTY] = "empty",
2343 [LXC_NET_VETH] = "veth",
2344 [LXC_NET_MACVLAN] = "macvlan",
2345 [LXC_NET_IPVLAN] = "ipvlan",
2346 [LXC_NET_PHYS] = "phys",
2347 [LXC_NET_VLAN] = "vlan",
2348 [LXC_NET_NONE] = "none",
2349 };
2350
2351 const char *lxc_net_type_to_str(int type)
2352 {
2353 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2354 return NULL;
2355
2356 return lxc_network_types[type];
2357 }
2358
2359 static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2360
2361 char *lxc_mkifname(char *template)
2362 {
2363 int ret;
2364 struct netns_ifaddrs *ifa, *ifaddr;
2365 char name[IFNAMSIZ];
2366 bool exists = false;
2367 size_t i = 0;
2368 #ifdef HAVE_RAND_R
2369 unsigned int seed;
2370
2371 seed = randseed(false);
2372 #else
2373
2374 (void)randseed(true);
2375 #endif
2376
2377 if (strlen(template) >= IFNAMSIZ)
2378 return NULL;
2379
2380 /* Get all the network interfaces. */
2381 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2382 if (ret < 0) {
2383 SYSERROR("Failed to get network interfaces");
2384 return NULL;
2385 }
2386
2387 /* Generate random names until we find one that doesn't exist. */
2388 for (;;) {
2389 name[0] = '\0';
2390 (void)strlcpy(name, template, IFNAMSIZ);
2391
2392 exists = false;
2393
2394 for (i = 0; i < strlen(name); i++) {
2395 if (name[i] == 'X') {
2396 #ifdef HAVE_RAND_R
2397 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
2398 #else
2399 name[i] = padchar[rand() % strlen(padchar)];
2400 #endif
2401 }
2402 }
2403
2404 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
2405 if (!strcmp(ifa->ifa_name, name)) {
2406 exists = true;
2407 break;
2408 }
2409 }
2410
2411 if (!exists)
2412 break;
2413 }
2414
2415 netns_freeifaddrs(ifaddr);
2416 (void)strlcpy(template, name, strlen(template) + 1);
2417
2418 return template;
2419 }
2420
2421 int setup_private_host_hw_addr(char *veth1)
2422 {
2423 int err, sockfd;
2424 struct ifreq ifr;
2425
2426 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
2427 if (sockfd < 0)
2428 return -errno;
2429
2430 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2431 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2432 close(sockfd);
2433 return -E2BIG;
2434 }
2435
2436 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2437 if (err < 0) {
2438 close(sockfd);
2439 return -errno;
2440 }
2441
2442 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2443 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2444 close(sockfd);
2445 if (err < 0)
2446 return -errno;
2447
2448 return 0;
2449 }
2450
2451 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2452 {
2453 struct lxc_list *network = &handler->conf->network;
2454 struct lxc_list *iterator;
2455 struct lxc_netdev *netdev;
2456 int link_index;
2457
2458 lxc_list_for_each(iterator, network) {
2459 netdev = iterator->elem;
2460
2461 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2462 continue;
2463
2464 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2465 ERROR("Automatic gateway detection is only supported "
2466 "for veth and macvlan");
2467 return -1;
2468 }
2469
2470 if (netdev->link[0] == '\0') {
2471 ERROR("Automatic gateway detection needs a link interface");
2472 return -1;
2473 }
2474
2475 link_index = if_nametoindex(netdev->link);
2476 if (!link_index)
2477 return -EINVAL;
2478
2479 if (netdev->ipv4_gateway_auto) {
2480 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2481 ERROR("Failed to automatically find ipv4 gateway "
2482 "address from link interface \"%s\"", netdev->link);
2483 return -1;
2484 }
2485 }
2486
2487 if (netdev->ipv6_gateway_auto) {
2488 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2489 ERROR("Failed to automatically find ipv6 gateway "
2490 "address from link interface \"%s\"", netdev->link);
2491 return -1;
2492 }
2493 }
2494 }
2495
2496 return 0;
2497 }
2498
2499 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2500 static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2501 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
2502 {
2503 int ret;
2504 pid_t child;
2505 int bytes, pipefd[2];
2506 char *token, *saveptr = NULL;
2507 char netdev_link[IFNAMSIZ];
2508 char buffer[PATH_MAX] = {0};
2509 size_t retlen;
2510
2511 if (netdev->type != LXC_NET_VETH) {
2512 ERROR("Network type %d not support for unprivileged use", netdev->type);
2513 return -1;
2514 }
2515
2516 ret = pipe(pipefd);
2517 if (ret < 0) {
2518 SYSERROR("Failed to create pipe");
2519 return -1;
2520 }
2521
2522 child = fork();
2523 if (child < 0) {
2524 SYSERROR("Failed to create new process");
2525 close(pipefd[0]);
2526 close(pipefd[1]);
2527 return -1;
2528 }
2529
2530 if (child == 0) {
2531 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
2532
2533 close(pipefd[0]);
2534
2535 ret = dup2(pipefd[1], STDOUT_FILENO);
2536 if (ret >= 0)
2537 ret = dup2(pipefd[1], STDERR_FILENO);
2538 close(pipefd[1]);
2539 if (ret < 0) {
2540 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2541 _exit(EXIT_FAILURE);
2542 }
2543
2544 if (netdev->link[0] != '\0')
2545 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
2546 else
2547 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2548 if (retlen >= IFNAMSIZ) {
2549 SYSERROR("Invalid network device name");
2550 _exit(EXIT_FAILURE);
2551 }
2552
2553 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2554 if (ret < 0 || ret >= sizeof(pidstr))
2555 _exit(EXIT_FAILURE);
2556 pidstr[sizeof(pidstr) - 1] = '\0';
2557
2558 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2559 lxcname, pidstr, netdev_link,
2560 netdev->name[0] != '\0' ? netdev->name : "(null)");
2561 if (netdev->name[0] != '\0')
2562 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2563 lxcpath, lxcname, pidstr, "veth", netdev_link,
2564 netdev->name, (char *)NULL);
2565 else
2566 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2567 lxcpath, lxcname, pidstr, "veth", netdev_link,
2568 (char *)NULL);
2569 SYSERROR("Failed to execute lxc-user-nic");
2570 _exit(EXIT_FAILURE);
2571 }
2572
2573 /* close the write-end of the pipe */
2574 close(pipefd[1]);
2575
2576 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2577 if (bytes < 0) {
2578 SYSERROR("Failed to read from pipe file descriptor");
2579 close(pipefd[0]);
2580 } else {
2581 buffer[bytes - 1] = '\0';
2582 }
2583
2584 ret = wait_for_pid(child);
2585 close(pipefd[0]);
2586 if (ret != 0 || bytes < 0) {
2587 ERROR("lxc-user-nic failed to configure requested network: %s",
2588 buffer[0] != '\0' ? buffer : "(null)");
2589 return -1;
2590 }
2591 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2592
2593 /* netdev->name */
2594 token = strtok_r(buffer, ":", &saveptr);
2595 if (!token) {
2596 ERROR("Failed to parse lxc-user-nic output");
2597 return -1;
2598 }
2599
2600 /*
2601 * lxc-user-nic will take care of proper network device naming. So
2602 * netdev->name and netdev->created_name need to be identical to not
2603 * trigger another rename later on.
2604 */
2605 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2606 if (retlen < IFNAMSIZ)
2607 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2608 if (retlen >= IFNAMSIZ) {
2609 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2610 return -E2BIG;
2611 }
2612
2613 /* netdev->ifindex */
2614 token = strtok_r(NULL, ":", &saveptr);
2615 if (!token) {
2616 ERROR("Failed to parse lxc-user-nic output");
2617 return -1;
2618 }
2619
2620 ret = lxc_safe_int(token, &netdev->ifindex);
2621 if (ret < 0) {
2622 errno = -ret;
2623 SYSERROR("Failed to convert string \"%s\" to integer", token);
2624 return -1;
2625 }
2626
2627 /* netdev->priv.veth_attr.veth1 */
2628 token = strtok_r(NULL, ":", &saveptr);
2629 if (!token) {
2630 ERROR("Failed to parse lxc-user-nic output");
2631 return -1;
2632 }
2633
2634 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2635 if (retlen >= IFNAMSIZ) {
2636 ERROR("Host side veth device name returned by lxc-user-nic is "
2637 "too long");
2638 return -E2BIG;
2639 }
2640
2641 /* netdev->priv.veth_attr.ifindex */
2642 token = strtok_r(NULL, ":", &saveptr);
2643 if (!token) {
2644 ERROR("Failed to parse lxc-user-nic output");
2645 return -1;
2646 }
2647
2648 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
2649 if (ret < 0) {
2650 errno = -ret;
2651 SYSERROR("Failed to convert string \"%s\" to integer", token);
2652 return -1;
2653 }
2654
2655 if (netdev->upscript) {
2656 char *argv[] = {
2657 "veth",
2658 netdev->link,
2659 netdev->priv.veth_attr.veth1,
2660 NULL,
2661 };
2662
2663 ret = run_script_argv(lxcname, hooks_version, "net",
2664 netdev->upscript, "up", argv);
2665 if (ret < 0)
2666 return -1;
2667 }
2668
2669 return 0;
2670 }
2671
2672 static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2673 struct lxc_netdev *netdev,
2674 const char *netns_path)
2675 {
2676 int bytes, ret;
2677 pid_t child;
2678 int pipefd[2];
2679 char buffer[PATH_MAX] = {0};
2680
2681 if (netdev->type != LXC_NET_VETH) {
2682 ERROR("Network type %d not support for unprivileged use", netdev->type);
2683 return -1;
2684 }
2685
2686 ret = pipe(pipefd);
2687 if (ret < 0) {
2688 SYSERROR("Failed to create pipe");
2689 return -1;
2690 }
2691
2692 child = fork();
2693 if (child < 0) {
2694 SYSERROR("Failed to create new process");
2695 close(pipefd[0]);
2696 close(pipefd[1]);
2697 return -1;
2698 }
2699
2700 if (child == 0) {
2701 char *hostveth;
2702
2703 close(pipefd[0]);
2704
2705 ret = dup2(pipefd[1], STDOUT_FILENO);
2706 if (ret >= 0)
2707 ret = dup2(pipefd[1], STDERR_FILENO);
2708 close(pipefd[1]);
2709 if (ret < 0) {
2710 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2711 _exit(EXIT_FAILURE);
2712 }
2713
2714 if (netdev->priv.veth_attr.pair[0] != '\0')
2715 hostveth = netdev->priv.veth_attr.pair;
2716 else
2717 hostveth = netdev->priv.veth_attr.veth1;
2718 if (hostveth[0] == '\0') {
2719 SYSERROR("Host side veth device name is missing");
2720 _exit(EXIT_FAILURE);
2721 }
2722
2723 if (netdev->link[0] == '\0') {
2724 SYSERROR("Network link for network device \"%s\" is "
2725 "missing", netdev->priv.veth_attr.veth1);
2726 _exit(EXIT_FAILURE);
2727 }
2728
2729 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
2730 lxcname, netns_path, netdev->link, hostveth);
2731 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
2732 lxcname, netns_path, "veth", netdev->link, hostveth,
2733 (char *)NULL);
2734 SYSERROR("Failed to exec lxc-user-nic.");
2735 _exit(EXIT_FAILURE);
2736 }
2737
2738 close(pipefd[1]);
2739
2740 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2741 if (bytes < 0) {
2742 SYSERROR("Failed to read from pipe file descriptor.");
2743 close(pipefd[0]);
2744 } else {
2745 buffer[bytes - 1] = '\0';
2746 }
2747
2748 ret = wait_for_pid(child);
2749 close(pipefd[0]);
2750 if (ret != 0 || bytes < 0) {
2751 ERROR("lxc-user-nic failed to delete requested network: %s",
2752 buffer[0] != '\0' ? buffer : "(null)");
2753 return -1;
2754 }
2755
2756 return 0;
2757 }
2758
2759 bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2760 {
2761 int ret;
2762 struct lxc_list *iterator;
2763 struct lxc_list *network = &handler->conf->network;
2764 /* strlen("/proc/") = 6
2765 * +
2766 * INTTYPE_TO_STRLEN(pid_t)
2767 * +
2768 * strlen("/fd/") = 4
2769 * +
2770 * INTTYPE_TO_STRLEN(int)
2771 * +
2772 * \0
2773 */
2774 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
2775
2776 *netns_path = '\0';
2777
2778 if (handler->nsfd[LXC_NS_NET] < 0) {
2779 DEBUG("Cannot not guarantee safe deletion of network devices. "
2780 "Manual cleanup maybe needed");
2781 return false;
2782 }
2783
2784 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
2785 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
2786 if (ret < 0 || ret >= sizeof(netns_path))
2787 return false;
2788
2789 lxc_list_for_each(iterator, network) {
2790 char *hostveth = NULL;
2791 struct lxc_netdev *netdev = iterator->elem;
2792
2793 /* We can only delete devices whose ifindex we have. If we don't
2794 * have the index it means that we didn't create it.
2795 */
2796 if (!netdev->ifindex)
2797 continue;
2798
2799 if (netdev->type == LXC_NET_PHYS) {
2800 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2801 netdev->link);
2802 if (ret < 0)
2803 WARN("Failed to rename interface with index %d "
2804 "to its initial name \"%s\"",
2805 netdev->ifindex, netdev->link);
2806 else
2807 TRACE("Renamed interface with index %d to its "
2808 "initial name \"%s\"",
2809 netdev->ifindex, netdev->link);
2810
2811 ret = netdev_deconf[netdev->type](handler, netdev);
2812 goto clear_ifindices;
2813 }
2814
2815 ret = netdev_deconf[netdev->type](handler, netdev);
2816 if (ret < 0)
2817 WARN("Failed to deconfigure network device");
2818
2819 if (netdev->type != LXC_NET_VETH)
2820 goto clear_ifindices;
2821
2822 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
2823 goto clear_ifindices;
2824
2825 if (netdev->priv.veth_attr.pair[0] != '\0')
2826 hostveth = netdev->priv.veth_attr.pair;
2827 else
2828 hostveth = netdev->priv.veth_attr.veth1;
2829 if (hostveth[0] == '\0')
2830 goto clear_ifindices;
2831
2832 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2833 handler->name, netdev,
2834 netns_path);
2835 if (ret < 0) {
2836 WARN("Failed to remove port \"%s\" from openvswitch "
2837 "bridge \"%s\"", hostveth, netdev->link);
2838 goto clear_ifindices;
2839 }
2840 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2841 netdev->link);
2842
2843 clear_ifindices:
2844 /* We need to clear any ifindices we recorded so liblxc won't
2845 * have cached stale data which would cause it to fail on reboot
2846 * we're we don't re-read the on-disk config file.
2847 */
2848 netdev->ifindex = 0;
2849 if (netdev->type == LXC_NET_PHYS) {
2850 netdev->priv.phys_attr.ifindex = 0;
2851 } else if (netdev->type == LXC_NET_VETH) {
2852 netdev->priv.veth_attr.veth1[0] = '\0';
2853 netdev->priv.veth_attr.ifindex = 0;
2854 }
2855 }
2856
2857 return true;
2858 }
2859
2860 struct ip_proxy_args {
2861 const char *ip;
2862 const char *dev;
2863 };
2864
2865 static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2866 {
2867 struct ip_proxy_args *args = data;
2868
2869 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2870 return -1;
2871 }
2872
2873 static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2874 {
2875 struct ip_proxy_args *args = data;
2876
2877 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2878 return -1;
2879 }
2880
2881 static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2882 {
2883 int ret;
2884 char cmd_output[PATH_MAX];
2885 struct ip_proxy_args args = {
2886 .ip = ip,
2887 .dev = dev,
2888 };
2889
2890 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2891 if (ret < 0) {
2892 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2893 return -1;
2894 }
2895
2896 return 0;
2897 }
2898
2899 static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2900 {
2901 int ret;
2902 char cmd_output[PATH_MAX];
2903 struct ip_proxy_args args = {
2904 .ip = ip,
2905 .dev = dev,
2906 };
2907
2908 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2909 if (ret < 0) {
2910 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2911 return -1;
2912 }
2913
2914 return 0;
2915 }
2916
2917 static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2918 struct lxc_list *cur, *next;
2919 struct lxc_inetdev *inet4dev;
2920 struct lxc_inet6dev *inet6dev;
2921 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
2922 int err = 0;
2923 unsigned int lo_ifindex = 0;
2924
2925 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2926 if (!lxc_list_empty(&netdev->ipv4)) {
2927 /* Check for net.ipv4.conf.[link].forwarding=1 */
2928 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2929 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2930 return minus_one_set_errno(EINVAL);
2931 }
2932 }
2933
2934 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2935 if (!lxc_list_empty(&netdev->ipv6)) {
2936 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2937 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2938 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2939 return minus_one_set_errno(EINVAL);
2940 }
2941
2942 /* Check for net.ipv6.conf.[link].forwarding=1 */
2943 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2944 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2945 return minus_one_set_errno(EINVAL);
2946 }
2947 }
2948
2949 /* Perform IPVLAN specific checks. */
2950 if (netdev->type == LXC_NET_IPVLAN) {
2951 /* Check mode is l3s as other modes do not work with l2proxy. */
2952 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2953 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2954 return minus_one_set_errno(EINVAL);
2955 }
2956
2957 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
2958 lo_ifindex = if_nametoindex(loop_device);
2959 if (lo_ifindex == 0) {
2960 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
2961 return minus_one_set_errno(EINVAL);
2962 }
2963 }
2964
2965 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2966 inet4dev = cur->elem;
2967 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2968 return minus_one_set_errno(-errno);
2969
2970 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2971 return minus_one_set_errno(EINVAL);
2972
2973 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2974 if (netdev->type == LXC_NET_IPVLAN) {
2975 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2976 if (err < 0) {
2977 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
2978 return minus_one_set_errno(-err);
2979 }
2980 }
2981 }
2982
2983 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2984 inet6dev = cur->elem;
2985 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2986 return minus_one_set_errno(-errno);
2987
2988 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2989 return minus_one_set_errno(EINVAL);
2990
2991 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2992 if (netdev->type == LXC_NET_IPVLAN) {
2993 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2994 if (err < 0) {
2995 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
2996 return minus_one_set_errno(-err);
2997 }
2998 }
2999 }
3000
3001 return 0;
3002 }
3003
3004 static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
3005 char bufinet4[INET_ADDRSTRLEN];
3006 unsigned int errCount = 0;
3007
3008 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3009 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3010 return minus_one_set_errno(EINVAL);
3011 }
3012
3013 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3014 if (lo_ifindex > 0) {
3015 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3016 errCount++;
3017 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3018 }
3019 }
3020
3021 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3022 if (link[0] != '\0') {
3023 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3024 errCount++;
3025 }
3026
3027 if (errCount > 0)
3028 return minus_one_set_errno(EINVAL);
3029
3030 return 0;
3031 }
3032
3033 static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3034 char bufinet6[INET6_ADDRSTRLEN];
3035 unsigned int errCount = 0;
3036
3037 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3038 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3039 return minus_one_set_errno(EINVAL);
3040 }
3041
3042 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3043 if (lo_ifindex > 0) {
3044 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3045 errCount++;
3046 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3047 }
3048 }
3049
3050 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3051 if (link[0] != '\0') {
3052 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3053 errCount++;
3054 }
3055
3056 if (errCount > 0)
3057 return minus_one_set_errno(EINVAL);
3058
3059 return 0;
3060 }
3061
3062 static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
3063 unsigned int lo_ifindex = 0;
3064 unsigned int errCount = 0;
3065 struct lxc_list *cur, *next;
3066 struct lxc_inetdev *inet4dev;
3067 struct lxc_inet6dev *inet6dev;
3068
3069 /* Perform IPVLAN specific checks. */
3070 if (netdev->type == LXC_NET_IPVLAN) {
3071 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3072 lo_ifindex = if_nametoindex(loop_device);
3073 if (lo_ifindex == 0) {
3074 errCount++;
3075 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3076 }
3077 }
3078
3079 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3080 inet4dev = cur->elem;
3081 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3082 errCount++;
3083 }
3084
3085 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3086 inet6dev = cur->elem;
3087 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3088 errCount++;
3089 }
3090
3091 if (errCount > 0)
3092 return minus_one_set_errno(EINVAL);
3093
3094 return 0;
3095 }
3096
3097 static int lxc_create_network_priv(struct lxc_handler *handler)
3098 {
3099 struct lxc_list *iterator;
3100 struct lxc_list *network = &handler->conf->network;
3101
3102 lxc_list_for_each(iterator, network) {
3103 struct lxc_netdev *netdev = iterator->elem;
3104
3105 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3106 ERROR("Invalid network configuration type %d", netdev->type);
3107 return -1;
3108 }
3109
3110 /* Setup l2proxy entries if enabled and used with a link property */
3111 if (netdev->l2proxy && netdev->link[0] != '\0') {
3112 if (lxc_setup_l2proxy(netdev)) {
3113 ERROR("Failed to setup l2proxy");
3114 return -1;
3115 }
3116 }
3117
3118 if (netdev_conf[netdev->type](handler, netdev)) {
3119 ERROR("Failed to create network device");
3120 return -1;
3121 }
3122 }
3123
3124 return 0;
3125 }
3126
3127 int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
3128 {
3129 pid_t pid = handler->pid;
3130 struct lxc_list *network = &handler->conf->network;
3131 struct lxc_list *iterator;
3132
3133 if (am_guest_unpriv())
3134 return 0;
3135
3136 lxc_list_for_each(iterator, network) {
3137 int ret;
3138 char ifname[IFNAMSIZ];
3139 struct lxc_netdev *netdev = iterator->elem;
3140
3141 if (!netdev->ifindex)
3142 continue;
3143
3144 /* retrieve the name of the interface */
3145 if (!if_indextoname(netdev->ifindex, ifname)) {
3146 ERROR("No interface corresponding to ifindex \"%d\"",
3147 netdev->ifindex);
3148 return -1;
3149 }
3150
3151 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3152 if (ret) {
3153 errno = -ret;
3154 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3155 ifname, pid);
3156 return -1;
3157 }
3158
3159 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3160
3161 DEBUG("Moved network device \"%s\" to network namespace of %d",
3162 netdev->created_name, pid);
3163 }
3164
3165 return 0;
3166 }
3167
3168 static int lxc_create_network_unpriv(struct lxc_handler *handler)
3169 {
3170 int hooks_version = handler->conf->hooks_version;
3171 const char *lxcname = handler->name;
3172 const char *lxcpath = handler->lxcpath;
3173 struct lxc_list *network = &handler->conf->network;
3174 pid_t pid = handler->pid;
3175 struct lxc_list *iterator;
3176
3177 lxc_list_for_each(iterator, network) {
3178 struct lxc_netdev *netdev = iterator->elem;
3179
3180 if (netdev->type == LXC_NET_EMPTY)
3181 continue;
3182
3183 if (netdev->type == LXC_NET_NONE)
3184 continue;
3185
3186 if (netdev->type != LXC_NET_VETH) {
3187 ERROR("Networks of type %s are not supported by unprivileged containers",
3188 lxc_net_type_to_str(netdev->type));
3189 return -1;
3190 }
3191
3192 if (netdev->mtu)
3193 INFO("mtu ignored due to insufficient privilege");
3194
3195 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3196 pid, hooks_version))
3197 return -1;
3198 }
3199
3200 return 0;
3201 }
3202
3203 bool lxc_delete_network_priv(struct lxc_handler *handler)
3204 {
3205 int ret;
3206 struct lxc_list *iterator;
3207 struct lxc_list *network = &handler->conf->network;
3208
3209 lxc_list_for_each(iterator, network) {
3210 char *hostveth = NULL;
3211 struct lxc_netdev *netdev = iterator->elem;
3212
3213 /* We can only delete devices whose ifindex we have. If we don't
3214 * have the index it means that we didn't create it.
3215 */
3216 if (!netdev->ifindex)
3217 continue;
3218
3219 /* Delete l2proxy entries if enabled and used with a link property */
3220 if (netdev->l2proxy && netdev->link[0] != '\0') {
3221 if (lxc_delete_l2proxy(netdev))
3222 WARN("Failed to delete all l2proxy config");
3223 /* Don't return, let the network be cleaned up as normal. */
3224 }
3225
3226 if (netdev->type == LXC_NET_PHYS) {
3227 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3228 if (ret < 0)
3229 WARN("Failed to rename interface with index %d "
3230 "from \"%s\" to its initial name \"%s\"",
3231 netdev->ifindex, netdev->name, netdev->link);
3232 else {
3233 TRACE("Renamed interface with index %d from "
3234 "\"%s\" to its initial name \"%s\"",
3235 netdev->ifindex, netdev->name,
3236 netdev->link);
3237
3238 /* Restore original MTU */
3239 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3240 if (ret < 0) {
3241 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3242 netdev->link, netdev->priv.phys_attr.mtu);
3243 } else {
3244 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3245 netdev->link, netdev->priv.phys_attr.mtu);
3246 }
3247 }
3248
3249 ret = netdev_deconf[netdev->type](handler, netdev);
3250 goto clear_ifindices;
3251 }
3252
3253 ret = netdev_deconf[netdev->type](handler, netdev);
3254 if (ret < 0)
3255 WARN("Failed to deconfigure network device");
3256
3257 /* Recent kernels remove the virtual interfaces when the network
3258 * namespace is destroyed but in case we did not move the
3259 * interface to the network namespace, we have to destroy it.
3260 */
3261 ret = lxc_netdev_delete_by_index(netdev->ifindex);
3262 if (ret < 0) {
3263 if (errno != ENODEV) {
3264 WARN("Failed to remove interface \"%s\" with index %d",
3265 netdev->name[0] != '\0' ? netdev->name : "(null)",
3266 netdev->ifindex);
3267 goto clear_ifindices;
3268 }
3269 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
3270 netdev->name[0] != '\0' ? netdev->name : "(null)",
3271 netdev->ifindex);
3272 }
3273 INFO("Removed interface \"%s\" with index %d",
3274 netdev->name[0] != '\0' ? netdev->name : "(null)",
3275 netdev->ifindex);
3276
3277 if (netdev->type != LXC_NET_VETH)
3278 goto clear_ifindices;
3279
3280 /* Explicitly delete host veth device to prevent lingering
3281 * devices. We had issues in LXD around this.
3282 */
3283 if (netdev->priv.veth_attr.pair[0] != '\0')
3284 hostveth = netdev->priv.veth_attr.pair;
3285 else
3286 hostveth = netdev->priv.veth_attr.veth1;
3287 if (hostveth[0] == '\0')
3288 goto clear_ifindices;
3289
3290 ret = lxc_netdev_delete_by_name(hostveth);
3291 if (ret < 0) {
3292 WARN("Failed to remove interface \"%s\" from \"%s\"",
3293 hostveth, netdev->link);
3294 goto clear_ifindices;
3295 }
3296 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3297
3298 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
3299 netdev->priv.veth_attr.veth1[0] = '\0';
3300 netdev->ifindex = 0;
3301 netdev->priv.veth_attr.ifindex = 0;
3302 goto clear_ifindices;
3303 }
3304
3305 /* Delete the openvswitch port. */
3306 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3307 if (ret < 0)
3308 WARN("Failed to remove port \"%s\" from openvswitch "
3309 "bridge \"%s\"", hostveth, netdev->link);
3310 else
3311 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3312 hostveth, netdev->link);
3313
3314 clear_ifindices:
3315 /* We need to clear any ifindices we recorded so liblxc won't
3316 * have cached stale data which would cause it to fail on reboot
3317 * we're we don't re-read the on-disk config file.
3318 */
3319 netdev->ifindex = 0;
3320 if (netdev->type == LXC_NET_PHYS) {
3321 netdev->priv.phys_attr.ifindex = 0;
3322 } else if (netdev->type == LXC_NET_VETH) {
3323 netdev->priv.veth_attr.veth1[0] = '\0';
3324 netdev->priv.veth_attr.ifindex = 0;
3325 }
3326 }
3327
3328 return true;
3329 }
3330
3331 int lxc_requests_empty_network(struct lxc_handler *handler)
3332 {
3333 struct lxc_list *network = &handler->conf->network;
3334 struct lxc_list *iterator;
3335 bool found_none = false, found_nic = false;
3336
3337 if (lxc_list_empty(network))
3338 return 0;
3339
3340 lxc_list_for_each(iterator, network) {
3341 struct lxc_netdev *netdev = iterator->elem;
3342
3343 if (netdev->type == LXC_NET_NONE)
3344 found_none = true;
3345 else
3346 found_nic = true;
3347 }
3348 if (found_none && !found_nic)
3349 return 1;
3350 return 0;
3351 }
3352
3353 /* try to move physical nics to the init netns */
3354 int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
3355 {
3356 int ret;
3357 int oldfd;
3358 char ifname[IFNAMSIZ];
3359 struct lxc_list *iterator;
3360 int netnsfd = handler->nsfd[LXC_NS_NET];
3361 struct lxc_conf *conf = handler->conf;
3362
3363 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3364 * the parent network namespace. We won't have this capability if we are
3365 * unprivileged.
3366 */
3367 if (!handler->am_root)
3368 return 0;
3369
3370 TRACE("Moving physical network devices back to parent network namespace");
3371
3372 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
3373 if (oldfd < 0) {
3374 SYSERROR("Failed to preserve network namespace");
3375 return -1;
3376 }
3377
3378 ret = setns(netnsfd, CLONE_NEWNET);
3379 if (ret < 0) {
3380 SYSERROR("Failed to enter network namespace");
3381 close(oldfd);
3382 return -1;
3383 }
3384
3385 lxc_list_for_each(iterator, &conf->network) {
3386 struct lxc_netdev *netdev = iterator->elem;
3387
3388 if (netdev->type != LXC_NET_PHYS)
3389 continue;
3390
3391 /* Retrieve the name of the interface in the container's network
3392 * namespace.
3393 */
3394 if (!if_indextoname(netdev->ifindex, ifname)) {
3395 WARN("No interface corresponding to ifindex %d",
3396 netdev->ifindex);
3397 continue;
3398 }
3399
3400 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
3401 if (ret < 0)
3402 WARN("Error moving network device \"%s\" back to "
3403 "network namespace", ifname);
3404 else
3405 TRACE("Moved network device \"%s\" back to network "
3406 "namespace", ifname);
3407 }
3408
3409 ret = setns(oldfd, CLONE_NEWNET);
3410 close(oldfd);
3411 if (ret < 0) {
3412 SYSERROR("Failed to enter network namespace");
3413 return -1;
3414 }
3415
3416 return 0;
3417 }
3418
3419 static int setup_hw_addr(char *hwaddr, const char *ifname)
3420 {
3421 struct sockaddr sockaddr;
3422 struct ifreq ifr;
3423 int ret, fd;
3424
3425 ret = lxc_convert_mac(hwaddr, &sockaddr);
3426 if (ret) {
3427 errno = -ret;
3428 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
3429 return -1;
3430 }
3431
3432 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3433 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3434 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3435
3436 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
3437 if (fd < 0)
3438 return -1;
3439
3440 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
3441 if (ret)
3442 SYSERROR("Failed to perform ioctl");
3443
3444 close(fd);
3445
3446 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3447 ifr.ifr_name);
3448
3449 return ret;
3450 }
3451
3452 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3453 {
3454 struct lxc_list *iterator;
3455 int err;
3456
3457 lxc_list_for_each(iterator, ip) {
3458 struct lxc_inetdev *inetdev = iterator->elem;
3459
3460 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3461 &inetdev->bcast, inetdev->prefix);
3462 if (err) {
3463 errno = -err;
3464 SYSERROR("Failed to setup ipv4 address for network device "
3465 "with ifindex %d", ifindex);
3466 return -1;
3467 }
3468 }
3469
3470 return 0;
3471 }
3472
3473 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3474 {
3475 struct lxc_list *iterator;
3476 int err;
3477
3478 lxc_list_for_each(iterator, ip) {
3479 struct lxc_inet6dev *inet6dev = iterator->elem;
3480
3481 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3482 &inet6dev->mcast, &inet6dev->acast,
3483 inet6dev->prefix);
3484 if (err) {
3485 errno = -err;
3486 SYSERROR("Failed to setup ipv6 address for network device "
3487 "with ifindex %d", ifindex);
3488 return -1;
3489 }
3490 }
3491
3492 return 0;
3493 }
3494
3495 static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3496 {
3497 char ifname[IFNAMSIZ];
3498 int err;
3499 char *current_ifname = ifname;
3500 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3501
3502 /* empty network namespace */
3503 if (!netdev->ifindex) {
3504 if (netdev->flags & IFF_UP) {
3505 err = lxc_netdev_up("lo");
3506 if (err) {
3507 errno = -err;
3508 SYSERROR("Failed to set the loopback network device up");
3509 return -1;
3510 }
3511 }
3512
3513 if (netdev->type == LXC_NET_EMPTY)
3514 return 0;
3515
3516 if (netdev->type == LXC_NET_NONE)
3517 return 0;
3518
3519 netdev->ifindex = if_nametoindex(netdev->created_name);
3520 if (!netdev->ifindex)
3521 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3522 netdev->name ?: "(null)");
3523 }
3524
3525 /* get the new ifindex in case of physical netdev */
3526 if (netdev->type == LXC_NET_PHYS) {
3527 netdev->ifindex = if_nametoindex(netdev->link);
3528 if (!netdev->ifindex) {
3529 ERROR("Failed to get ifindex for network device \"%s\"",
3530 netdev->link);
3531 return -1;
3532 }
3533 }
3534
3535 /* retrieve the name of the interface */
3536 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3537 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3538 netdev->ifindex);
3539 return -1;
3540 }
3541
3542 /* Default: let the system choose an interface name.
3543 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3544 * netlink will replace the format specifier with an appropriate index.
3545 */
3546 if (netdev->name[0] == '\0') {
3547 if (netdev->type == LXC_NET_PHYS)
3548 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
3549 else
3550 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
3551 }
3552
3553 /* rename the interface name */
3554 if (strcmp(current_ifname, netdev->name) != 0) {
3555 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
3556 if (err) {
3557 errno = -err;
3558 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
3559 current_ifname, netdev->name);
3560 return -1;
3561 }
3562
3563 TRACE("Renamed network device from \"%s\" to \"%s\"",
3564 current_ifname, netdev->name);
3565 }
3566
3567 /* Re-read the name of the interface because its name has changed
3568 * and would be automatically allocated by the system
3569 */
3570 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3571 ERROR("Failed get name for network device with ifindex %d",
3572 netdev->ifindex);
3573 return -1;
3574 }
3575
3576 /* Now update the recorded name of the network device to reflect the
3577 * name of the network device in the child's network namespace. We will
3578 * later on send this information back to the parent.
3579 */
3580 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
3581
3582 /* set a mac address */
3583 if (netdev->hwaddr) {
3584 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3585 ERROR("Failed to setup hw address for network device \"%s\"",
3586 current_ifname);
3587 return -1;
3588 }
3589 }
3590
3591 /* setup ipv4 addresses on the interface */
3592 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3593 ERROR("Failed to setup ip addresses for network device \"%s\"",
3594 current_ifname);
3595 return -1;
3596 }
3597
3598 /* setup ipv6 addresses on the interface */
3599 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3600 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
3601 current_ifname);
3602 return -1;
3603 }
3604
3605 /* set the network device up */
3606 if (netdev->flags & IFF_UP) {
3607 err = lxc_netdev_up(current_ifname);
3608 if (err) {
3609 errno = -err;
3610 SYSERROR("Failed to set network device \"%s\" up",
3611 current_ifname);
3612 return -1;
3613 }
3614
3615 /* the network is up, make the loopback up too */
3616 err = lxc_netdev_up("lo");
3617 if (err) {
3618 errno = -err;
3619 SYSERROR("Failed to set the loopback network device up");
3620 return -1;
3621 }
3622 }
3623
3624 /* setup ipv4 gateway on the interface */
3625 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
3626 if (!(netdev->flags & IFF_UP)) {
3627 ERROR("Cannot add ipv4 gateway for network device "
3628 "\"%s\" when not bringing up the interface", current_ifname);
3629 return -1;
3630 }
3631
3632 if (lxc_list_empty(&netdev->ipv4)) {
3633 ERROR("Cannot add ipv4 gateway for network device "
3634 "\"%s\" when not assigning an address", current_ifname);
3635 return -1;
3636 }
3637
3638 /* Setup device route if ipv4_gateway_dev is enabled */
3639 if (netdev->ipv4_gateway_dev) {
3640 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3641 if (err < 0) {
3642 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
3643 current_ifname);
3644 return minus_one_set_errno(-err);
3645 }
3646 } else {
3647 /* Check the gateway address is valid */
3648 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3649 return minus_one_set_errno(errno);
3650
3651 /* Try adding a default route to the gateway address */
3652 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3653 if (err < 0) {
3654 /* If adding the default route fails, this could be because the
3655 * gateway address is in a different subnet to the container's address.
3656 * To work around this, we try adding a static device route to the
3657 * gateway address first, and then try again.
3658 */
3659 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
3660 if (err < 0) {
3661 errno = -err;
3662 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
3663 bufinet4, current_ifname);
3664 return -1;
3665 }
3666
3667 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3668 if (err < 0) {
3669 errno = -err;
3670 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
3671 bufinet4, current_ifname);
3672 return -1;
3673 }
3674 }
3675 }
3676 }
3677
3678 /* setup ipv6 gateway on the interface */
3679 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
3680 if (!(netdev->flags & IFF_UP)) {
3681 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3682 current_ifname);
3683 return -1;
3684 }
3685
3686 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
3687 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3688 current_ifname);
3689 return -1;
3690 }
3691
3692 /* Setup device route if ipv6_gateway_dev is enabled */
3693 if (netdev->ipv6_gateway_dev) {
3694 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3695 if (err < 0) {
3696 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
3697 current_ifname);
3698 return minus_one_set_errno(-err);
3699 }
3700 } else {
3701 /* Check the gateway address is valid */
3702 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3703 return minus_one_set_errno(errno);
3704
3705 /* Try adding a default route to the gateway address */
3706 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3707 if (err < 0) {
3708 /* If adding the default route fails, this could be because the
3709 * gateway address is in a different subnet to the container's address.
3710 * To work around this, we try adding a static device route to the
3711 * gateway address first, and then try again.
3712 */
3713 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
3714 if (err < 0) {
3715 errno = -err;
3716 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
3717 bufinet6, current_ifname);
3718 return -1;
3719 }
3720
3721 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3722 if (err < 0) {
3723 errno = -err;
3724 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
3725 bufinet6, current_ifname);
3726 return -1;
3727 }
3728 }
3729 }
3730 }
3731
3732 DEBUG("Network device \"%s\" has been setup", current_ifname);
3733
3734 return 0;
3735 }
3736
3737 int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3738 struct lxc_list *network)
3739 {
3740 struct lxc_list *iterator;
3741
3742 lxc_list_for_each(iterator, network) {
3743 struct lxc_netdev *netdev = iterator->elem;
3744
3745 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
3746 ERROR("Failed to setup netdev");
3747 return -1;
3748 }
3749 }
3750
3751 if (!lxc_list_empty(network))
3752 INFO("Network has been setup");
3753
3754 return 0;
3755 }
3756
3757 int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3758 {
3759 struct lxc_list *iterator;
3760 struct lxc_list *network = &handler->conf->network;
3761 int data_sock = handler->data_sock[0];
3762
3763 lxc_list_for_each(iterator, network) {
3764 int ret;
3765 struct lxc_netdev *netdev = iterator->elem;
3766
3767 if (netdev->type != LXC_NET_VETH)
3768 continue;
3769
3770 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3771 if (ret < 0)
3772 return -1;
3773
3774 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3775 if (ret < 0)
3776 return -1;
3777
3778 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
3779 }
3780
3781 return 0;
3782 }
3783
3784 int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3785 {
3786 struct lxc_list *iterator;
3787 struct lxc_list *network = &handler->conf->network;
3788 int data_sock = handler->data_sock[1];
3789
3790 lxc_list_for_each(iterator, network) {
3791 int ret;
3792 struct lxc_netdev *netdev = iterator->elem;
3793
3794 if (netdev->type != LXC_NET_VETH)
3795 continue;
3796
3797 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3798 if (ret < 0)
3799 return -1;
3800
3801 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3802 if (ret < 0)
3803 return -1;
3804 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
3805 }
3806
3807 return 0;
3808 }
3809
3810 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3811 {
3812 struct lxc_list *iterator, *network;
3813 int data_sock = handler->data_sock[0];
3814
3815 if (!handler->am_root)
3816 return 0;
3817
3818 network = &handler->conf->network;
3819 lxc_list_for_each(iterator, network) {
3820 int ret;
3821 struct lxc_netdev *netdev = iterator->elem;
3822
3823 /* Send network device name in the child's namespace to parent. */
3824 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3825 if (ret < 0)
3826 return -1;
3827
3828 /* Send network device ifindex in the child's namespace to
3829 * parent.
3830 */
3831 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
3832 if (ret < 0)
3833 return -1;
3834 }
3835
3836 if (!lxc_list_empty(network))
3837 TRACE("Sent network device names and ifindices to parent");
3838
3839 return 0;
3840 }
3841
3842 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3843 {
3844 struct lxc_list *iterator, *network;
3845 int data_sock = handler->data_sock[1];
3846
3847 if (!handler->am_root)
3848 return 0;
3849
3850 network = &handler->conf->network;
3851 lxc_list_for_each(iterator, network) {
3852 int ret;
3853 struct lxc_netdev *netdev = iterator->elem;
3854
3855 /* Receive network device name in the child's namespace to
3856 * parent.
3857 */
3858 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3859 if (ret < 0)
3860 return -1;
3861
3862 /* Receive network device ifindex in the child's namespace to
3863 * parent.
3864 */
3865 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3866 if (ret < 0)
3867 return -1;
3868 }
3869
3870 return 0;
3871 }
3872
3873 void lxc_delete_network(struct lxc_handler *handler)
3874 {
3875 bool bret;
3876
3877 if (handler->am_root)
3878 bret = lxc_delete_network_priv(handler);
3879 else
3880 bret = lxc_delete_network_unpriv(handler);
3881 if (!bret)
3882 DEBUG("Failed to delete network devices");
3883 else
3884 DEBUG("Deleted network devices");
3885 }
3886
3887 int lxc_netns_set_nsid(int fd)
3888 {
3889 int ret;
3890 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3891 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3892 NLMSG_ALIGN(1024)];
3893 struct nl_handler nlh;
3894 struct nlmsghdr *hdr;
3895 struct rtgenmsg *msg;
3896 int saved_errno;
3897 const __s32 ns_id = -1;
3898 const __u32 netns_fd = fd;
3899
3900 ret = netlink_open(&nlh, NETLINK_ROUTE);
3901 if (ret < 0)
3902 return -1;
3903
3904 memset(buf, 0, sizeof(buf));
3905
3906 #pragma GCC diagnostic push
3907 #pragma GCC diagnostic ignored "-Wcast-align"
3908 hdr = (struct nlmsghdr *)buf;
3909 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3910 #pragma GCC diagnostic pop
3911
3912 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3913 hdr->nlmsg_type = RTM_NEWNSID;
3914 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3915 hdr->nlmsg_pid = 0;
3916 hdr->nlmsg_seq = RTM_NEWNSID;
3917 msg->rtgen_family = AF_UNSPEC;
3918
3919 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3920 if (ret < 0)
3921 goto on_error;
3922
3923 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3924 if (ret < 0)
3925 goto on_error;
3926
3927 ret = __netlink_transaction(&nlh, hdr, hdr);
3928
3929 on_error:
3930 saved_errno = errno;
3931 netlink_close(&nlh);
3932 errno = saved_errno;
3933
3934 return ret;
3935 }
3936
3937 static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3938 {
3939
3940 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3941
3942 while (RTA_OK(rta, len)) {
3943 unsigned short type = rta->rta_type;
3944
3945 if ((type <= max) && (!tb[type]))
3946 tb[type] = rta;
3947
3948 #pragma GCC diagnostic push
3949 #pragma GCC diagnostic ignored "-Wcast-align"
3950 rta = RTA_NEXT(rta, len);
3951 #pragma GCC diagnostic pop
3952 }
3953
3954 return 0;
3955 }
3956
3957 static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3958 {
3959 return *(__s32 *)RTA_DATA(rta);
3960 }
3961
3962 #ifndef NETNS_RTA
3963 #define NETNS_RTA(r) \
3964 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3965 #endif
3966
3967 int lxc_netns_get_nsid(int fd)
3968 {
3969 int ret;
3970 ssize_t len;
3971 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3972 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3973 NLMSG_ALIGN(1024)];
3974 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3975 struct nl_handler nlh;
3976 struct nlmsghdr *hdr;
3977 struct rtgenmsg *msg;
3978 int saved_errno;
3979 __u32 netns_fd = fd;
3980
3981 ret = netlink_open(&nlh, NETLINK_ROUTE);
3982 if (ret < 0)
3983 return -1;
3984
3985 memset(buf, 0, sizeof(buf));
3986
3987 #pragma GCC diagnostic push
3988 #pragma GCC diagnostic ignored "-Wcast-align"
3989 hdr = (struct nlmsghdr *)buf;
3990 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3991 #pragma GCC diagnostic pop
3992
3993 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3994 hdr->nlmsg_type = RTM_GETNSID;
3995 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3996 hdr->nlmsg_pid = 0;
3997 hdr->nlmsg_seq = RTM_GETNSID;
3998 msg->rtgen_family = AF_UNSPEC;
3999
4000 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
4001 if (ret == 0)
4002 ret = __netlink_transaction(&nlh, hdr, hdr);
4003
4004 saved_errno = errno;
4005 netlink_close(&nlh);
4006 errno = saved_errno;
4007 if (ret < 0)
4008 return -1;
4009
4010 errno = EINVAL;
4011 msg = NLMSG_DATA(hdr);
4012 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4013 if (len < 0)
4014 return -1;
4015
4016 #pragma GCC diagnostic push
4017 #pragma GCC diagnostic ignored "-Wcast-align"
4018 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4019 if (tb[__LXC_NETNSA_NSID])
4020 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
4021 #pragma GCC diagnostic pop
4022
4023 return -1;
4024 }
4025
4026 int lxc_create_network(struct lxc_handler *handler)
4027 {
4028 int ret;
4029
4030 /*
4031 * Find gateway addresses from the link device, which is no longer
4032 * accessible inside the container. Do this before creating network
4033 * interfaces, since goto out_delete_net does not work before
4034 * lxc_clone.
4035 */
4036 ret = lxc_find_gateway_addresses(handler);
4037 if (ret) {
4038 ERROR("Failed to find gateway addresses");
4039 return -1;
4040 }
4041
4042 if (handler->am_root) {
4043 ret = lxc_create_network_priv(handler);
4044 if (ret)
4045 return -1;
4046
4047 return lxc_network_move_created_netdev_priv(handler);
4048 }
4049
4050 return lxc_create_network_unpriv(handler);
4051 }