]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/network.c
network: simplify instantiate_ipvlan()
[mirror_lxc.git] / src / lxc / network.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE 1
26 #endif
27 #include <arpa/inet.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <linux/netlink.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/sockios.h>
34 #include <net/ethernet.h>
35 #include <net/if.h>
36 #include <net/if_arp.h>
37 #include <netinet/in.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/inotify.h>
42 #include <sys/ioctl.h>
43 #include <sys/param.h>
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47 #include <time.h>
48 #include <unistd.h>
49
50 #include "../include/netns_ifaddrs.h"
51 #include "af_unix.h"
52 #include "conf.h"
53 #include "config.h"
54 #include "file_utils.h"
55 #include "log.h"
56 #include "macro.h"
57 #include "memory_utils.h"
58 #include "network.h"
59 #include "nl.h"
60 #include "raw_syscalls.h"
61 #include "syscall_wrappers.h"
62 #include "utils.h"
63
64 #ifndef HAVE_STRLCPY
65 #include "include/strlcpy.h"
66 #endif
67
68 lxc_log_define(network, lxc);
69
70 typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
71 static const char loop_device[] = "lo";
72
73 static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
74 {
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116 out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121 }
122
123 static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124 {
125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
126 }
127
128 static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129 {
130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131 }
132
133 static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134 {
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136 }
137
138 static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139 {
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
141 }
142
143 static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144 {
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160 }
161
162 static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163 {
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179 }
180
181 static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182 {
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
188 if (netdev->priv.veth_attr.pair[0] != '\0') {
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
227 goto out_delete;
228 }
229
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
253 } else if (netdev->link[0] != '\0') {
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
268
269 if (err) {
270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
273 goto out_delete;
274 }
275 }
276
277 if (netdev->link[0] != '\0') {
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
292 goto out_delete;
293 }
294
295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
307 if (netdev->upscript) {
308 char *argv[] = {
309 "veth",
310 netdev->link,
311 veth1,
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327 out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
330 return -1;
331 }
332
333 static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334 {
335 char peer[IFNAMSIZ];
336 int err;
337 unsigned int mtu = 0;
338
339 if (netdev->link[0] == '\0') {
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
346 return -1;
347
348 if (!lxc_mkifname(peer))
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
357 goto on_error;
358 }
359
360 strlcpy(netdev->created_name, peer, IFNAMSIZ);
361
362 netdev->ifindex = if_nametoindex(peer);
363 if (!netdev->ifindex) {
364 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
365 goto on_error;
366 }
367
368 if (netdev->mtu) {
369 err = lxc_safe_uint(netdev->mtu, &mtu);
370 if (err < 0) {
371 errno = -err;
372 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
373 goto on_error;
374 }
375
376 err = lxc_netdev_set_mtu(peer, mtu);
377 if (err < 0) {
378 errno = -err;
379 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
380 goto on_error;
381 }
382 }
383
384 if (netdev->upscript) {
385 char *argv[] = {
386 "macvlan",
387 netdev->link,
388 NULL,
389 };
390
391 err = run_script_argv(handler->name,
392 handler->conf->hooks_version, "net",
393 netdev->upscript, "up", argv);
394 if (err < 0)
395 goto on_error;
396 }
397
398 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
399 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
400
401 return 0;
402
403 on_error:
404 lxc_netdev_delete_by_name(peer);
405 return -1;
406 }
407
408 static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
409 {
410 int err, index, len;
411 struct ifinfomsg *ifi;
412 struct nl_handler nlh;
413 struct rtattr *nest, *nest2;
414 struct nlmsg *answer = NULL, *nlmsg = NULL;
415
416 len = strlen(master);
417 if (len == 1 || len >= IFNAMSIZ)
418 return minus_one_set_errno(EINVAL);
419
420 len = strlen(name);
421 if (len == 1 || len >= IFNAMSIZ)
422 return minus_one_set_errno(EINVAL);
423
424 index = if_nametoindex(master);
425 if (!index)
426 return minus_one_set_errno(EINVAL);
427
428 err = netlink_open(&nlh, NETLINK_ROUTE);
429 if (err)
430 return minus_one_set_errno(-err);
431
432 err = -ENOMEM;
433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
434 if (!nlmsg)
435 goto out;
436
437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
438 if (!answer)
439 goto out;
440
441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
443
444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
445 if (!ifi) {
446 goto out;
447 }
448 ifi->ifi_family = AF_UNSPEC;
449
450 err = -EPROTO;
451 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
452 if (!nest)
453 goto out;
454
455 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
456 goto out;
457
458 if (mode) {
459 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
460 if (!nest2)
461 goto out;
462
463 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
464 goto out;
465
466 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
467 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
468 */
469 if (isolation > 0) {
470 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
471 goto out;
472 }
473
474 nla_end_nested(nlmsg, nest2);
475 }
476
477 nla_end_nested(nlmsg, nest);
478
479 if (nla_put_u32(nlmsg, IFLA_LINK, index))
480 goto out;
481
482 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
483 goto out;
484
485 err = netlink_transaction(&nlh, nlmsg, answer);
486 out:
487 netlink_close(&nlh);
488 nlmsg_free(answer);
489 nlmsg_free(nlmsg);
490 if (err < 0)
491 return minus_one_set_errno(-err);
492 return 0;
493 }
494
495 static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
496 {
497 char peer[IFNAMSIZ];
498 int err;
499 unsigned int mtu = 0;
500
501 if (netdev->link[0] == '\0') {
502 ERROR("No link for ipvlan network device specified");
503 return -1;
504 }
505
506 err = snprintf(peer, sizeof(peer), "ipXXXXXX");
507 if (err < 0 || (size_t)err >= sizeof(peer))
508 return -1;
509
510 if (!lxc_mkifname(peer))
511 return -1;
512
513 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode,
514 netdev->priv.ipvlan_attr.isolation);
515 if (err) {
516 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
517 peer, netdev->link);
518 goto on_error;
519 }
520
521 netdev->ifindex = if_nametoindex(peer);
522 if (!netdev->ifindex) {
523 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
524 goto on_error;
525 }
526
527 if (netdev->mtu) {
528 err = lxc_safe_uint(netdev->mtu, &mtu);
529 if (err < 0) {
530 errno = -err;
531 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"",
532 netdev->mtu, peer);
533 goto on_error;
534 }
535
536 err = lxc_netdev_set_mtu(peer, mtu);
537 if (err < 0) {
538 errno = -err;
539 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"",
540 netdev->mtu, peer);
541 goto on_error;
542 }
543 }
544
545 if (netdev->upscript) {
546 char *argv[] = {
547 "ipvlan",
548 netdev->link,
549 NULL,
550 };
551
552 err = run_script_argv(handler->name, handler->conf->hooks_version,
553 "net", netdev->upscript, "up", argv);
554 if (err < 0)
555 goto on_error;
556 }
557
558 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer,
559 netdev->ifindex, netdev->priv.macvlan_attr.mode);
560
561 return 0;
562
563 on_error:
564 lxc_netdev_delete_by_name(peer);
565 return -1;
566 }
567
568 static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
569 {
570 char peer[IFNAMSIZ];
571 int err;
572 static uint16_t vlan_cntr = 0;
573 unsigned int mtu = 0;
574
575 if (netdev->link[0] == '\0') {
576 ERROR("No link for vlan network device specified");
577 return -1;
578 }
579
580 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
581 if (err < 0 || (size_t)err >= sizeof(peer))
582 return -1;
583
584 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
585 if (err) {
586 errno = -err;
587 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
588 peer, netdev->link);
589 return -1;
590 }
591
592 netdev->ifindex = if_nametoindex(peer);
593 if (!netdev->ifindex) {
594 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
595 goto on_error;
596 }
597
598 if (netdev->mtu) {
599 err = lxc_safe_uint(netdev->mtu, &mtu);
600 if (err < 0) {
601 errno = -err;
602 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
603 goto on_error;
604 }
605
606 err = lxc_netdev_set_mtu(peer, mtu);
607 if (err) {
608 errno = -err;
609 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
610 goto on_error;
611 }
612 }
613
614 if (netdev->upscript) {
615 char *argv[] = {
616 "vlan",
617 netdev->link,
618 NULL,
619 };
620
621 err = run_script_argv(handler->name,
622 handler->conf->hooks_version, "net",
623 netdev->upscript, "up", argv);
624 if (err < 0) {
625 goto on_error;
626 }
627 }
628
629 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
630 peer, netdev->ifindex);
631
632 return 0;
633
634 on_error:
635 lxc_netdev_delete_by_name(peer);
636 return -1;
637 }
638
639 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
640 {
641 int err, mtu_orig = 0;
642 unsigned int mtu = 0;
643
644 if (netdev->link[0] == '\0') {
645 ERROR("No link for physical interface specified");
646 return -1;
647 }
648
649 /* Note that we're retrieving the container's ifindex in the host's
650 * network namespace because we need it to move the device from the
651 * host's network namespace to the container's network namespace later
652 * on.
653 * Note that netdev->link will contain the name of the physical network
654 * device in the host's namespace.
655 */
656 netdev->ifindex = if_nametoindex(netdev->link);
657 if (!netdev->ifindex) {
658 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
659 return -1;
660 }
661
662 /* Store the ifindex of the host's network device in the host's
663 * namespace.
664 */
665 netdev->priv.phys_attr.ifindex = netdev->ifindex;
666
667 /* Get original device MTU setting and store for restoration after container shutdown. */
668 mtu_orig = netdev_get_mtu(netdev->ifindex);
669 if (mtu_orig < 0) {
670 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
671 return minus_one_set_errno(-mtu_orig);
672 }
673
674 netdev->priv.phys_attr.mtu = mtu_orig;
675
676 if (netdev->mtu) {
677 err = lxc_safe_uint(netdev->mtu, &mtu);
678 if (err < 0) {
679 errno = -err;
680 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
681 return -1;
682 }
683
684 err = lxc_netdev_set_mtu(netdev->link, mtu);
685 if (err < 0) {
686 errno = -err;
687 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
688 return -1;
689 }
690 }
691
692 if (netdev->upscript) {
693 char *argv[] = {
694 "phys",
695 netdev->link,
696 NULL,
697 };
698
699 err = run_script_argv(handler->name,
700 handler->conf->hooks_version, "net",
701 netdev->upscript, "up", argv);
702 if (err < 0) {
703 return -1;
704 }
705 }
706
707 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
708
709 return 0;
710 }
711
712 static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
713 {
714 int ret;
715 char *argv[] = {
716 "empty",
717 NULL,
718 };
719
720 netdev->ifindex = 0;
721 if (!netdev->upscript)
722 return 0;
723
724 ret = run_script_argv(handler->name, handler->conf->hooks_version,
725 "net", netdev->upscript, "up", argv);
726 if (ret < 0)
727 return -1;
728
729 return 0;
730 }
731
732 static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
733 {
734 netdev->ifindex = 0;
735 return 0;
736 }
737
738 static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
739 [LXC_NET_VETH] = instantiate_veth,
740 [LXC_NET_MACVLAN] = instantiate_macvlan,
741 [LXC_NET_IPVLAN] = instantiate_ipvlan,
742 [LXC_NET_VLAN] = instantiate_vlan,
743 [LXC_NET_PHYS] = instantiate_phys,
744 [LXC_NET_EMPTY] = instantiate_empty,
745 [LXC_NET_NONE] = instantiate_none,
746 };
747
748 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
749 {
750 int ret;
751 char *argv[] = {
752 "veth",
753 netdev->link,
754 NULL,
755 NULL,
756 };
757
758 if (!netdev->downscript)
759 return 0;
760
761 if (netdev->priv.veth_attr.pair[0] != '\0')
762 argv[2] = netdev->priv.veth_attr.pair;
763 else
764 argv[2] = netdev->priv.veth_attr.veth1;
765
766 ret = run_script_argv(handler->name,
767 handler->conf->hooks_version, "net",
768 netdev->downscript, "down", argv);
769 if (ret < 0)
770 return -1;
771
772 return 0;
773 }
774
775 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
776 {
777 int ret;
778 char *argv[] = {
779 "macvlan",
780 netdev->link,
781 NULL,
782 };
783
784 if (!netdev->downscript)
785 return 0;
786
787 ret = run_script_argv(handler->name, handler->conf->hooks_version,
788 "net", netdev->downscript, "down", argv);
789 if (ret < 0)
790 return -1;
791
792 return 0;
793 }
794
795 static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
796 {
797 int ret;
798 char *argv[] = {
799 "ipvlan",
800 netdev->link,
801 NULL,
802 };
803
804 if (!netdev->downscript)
805 return 0;
806
807 ret = run_script_argv(handler->name, handler->conf->hooks_version,
808 "net", netdev->downscript, "down", argv);
809 if (ret < 0)
810 return -1;
811
812 return 0;
813 }
814
815 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
816 {
817 int ret;
818 char *argv[] = {
819 "vlan",
820 netdev->link,
821 NULL,
822 };
823
824 if (!netdev->downscript)
825 return 0;
826
827 ret = run_script_argv(handler->name, handler->conf->hooks_version,
828 "net", netdev->downscript, "down", argv);
829 if (ret < 0)
830 return -1;
831
832 return 0;
833 }
834
835 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
836 {
837 int ret;
838 char *argv[] = {
839 "phys",
840 netdev->link,
841 NULL,
842 };
843
844 if (!netdev->downscript)
845 return 0;
846
847 ret = run_script_argv(handler->name, handler->conf->hooks_version,
848 "net", netdev->downscript, "down", argv);
849 if (ret < 0)
850 return -1;
851
852 return 0;
853 }
854
855 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
856 {
857 int ret;
858 char *argv[] = {
859 "empty",
860 NULL,
861 };
862
863 if (!netdev->downscript)
864 return 0;
865
866 ret = run_script_argv(handler->name, handler->conf->hooks_version,
867 "net", netdev->downscript, "down", argv);
868 if (ret < 0)
869 return -1;
870
871 return 0;
872 }
873
874 static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
875 {
876 return 0;
877 }
878
879 static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
880 [LXC_NET_VETH] = shutdown_veth,
881 [LXC_NET_MACVLAN] = shutdown_macvlan,
882 [LXC_NET_IPVLAN] = shutdown_ipvlan,
883 [LXC_NET_VLAN] = shutdown_vlan,
884 [LXC_NET_PHYS] = shutdown_phys,
885 [LXC_NET_EMPTY] = shutdown_empty,
886 [LXC_NET_NONE] = shutdown_none,
887 };
888
889 static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
890 {
891 int err;
892 struct nl_handler nlh;
893 struct ifinfomsg *ifi;
894 struct nlmsg *nlmsg = NULL;
895
896 err = netlink_open(&nlh, NETLINK_ROUTE);
897 if (err)
898 return err;
899
900 err = -ENOMEM;
901 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
902 if (!nlmsg)
903 goto out;
904
905 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
906 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
907
908 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
909 if (!ifi)
910 goto out;
911 ifi->ifi_family = AF_UNSPEC;
912 ifi->ifi_index = ifindex;
913
914 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
915 goto out;
916
917 if (ifname != NULL) {
918 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
919 goto out;
920 }
921
922 err = netlink_transaction(&nlh, nlmsg, nlmsg);
923 out:
924 netlink_close(&nlh);
925 nlmsg_free(nlmsg);
926 return err;
927 }
928
929 int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
930 {
931 int err;
932 struct nl_handler nlh;
933 struct ifinfomsg *ifi;
934 struct nlmsg *nlmsg = NULL;
935
936 err = netlink_open(&nlh, NETLINK_ROUTE);
937 if (err)
938 return err;
939
940 err = -ENOMEM;
941 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
942 if (!nlmsg)
943 goto out;
944
945 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
946 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
947
948 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
949 if (!ifi)
950 goto out;
951 ifi->ifi_family = AF_UNSPEC;
952 ifi->ifi_index = ifindex;
953
954 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
955 goto out;
956
957 if (ifname != NULL) {
958 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
959 goto out;
960 }
961
962 err = netlink_transaction(&nlh, nlmsg, nlmsg);
963 out:
964 netlink_close(&nlh);
965 nlmsg_free(nlmsg);
966 return err;
967 }
968
969 /* If we are asked to move a wireless interface, then we must actually move its
970 * phyN device. Detect that condition and return the physname here. The physname
971 * will be passed to lxc_netdev_move_wlan() which will free it when done.
972 */
973 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
974 static char *is_wlan(const char *ifname)
975 {
976 __do_free char *path = NULL;
977 int i, ret;
978 long physlen;
979 size_t len;
980 FILE *f;
981 char *physname = NULL;
982
983 len = strlen(ifname) + strlen(PHYSNAME) - 1;
984 path = must_realloc(NULL, len + 1);
985 ret = snprintf(path, len, PHYSNAME, ifname);
986 if (ret < 0 || (size_t)ret >= len)
987 goto bad;
988
989 f = fopen(path, "r");
990 if (!f)
991 goto bad;
992
993 /* Feh - sb.st_size is always 4096. */
994 fseek(f, 0, SEEK_END);
995 physlen = ftell(f);
996 fseek(f, 0, SEEK_SET);
997 if (physlen < 0) {
998 fclose(f);
999 goto bad;
1000 }
1001
1002 physname = malloc(physlen + 1);
1003 if (!physname) {
1004 fclose(f);
1005 goto bad;
1006 }
1007
1008 memset(physname, 0, physlen + 1);
1009 ret = fread(physname, 1, physlen, f);
1010 fclose(f);
1011 if (ret < 0)
1012 goto bad;
1013
1014 for (i = 0; i < physlen; i++) {
1015 if (physname[i] == '\n')
1016 physname[i] = '\0';
1017
1018 if (physname[i] == '\0')
1019 break;
1020 }
1021
1022 return physname;
1023
1024 bad:
1025 free(physname);
1026 return NULL;
1027 }
1028
1029 static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1030 const char *new)
1031 {
1032 pid_t fpid;
1033
1034 fpid = fork();
1035 if (fpid < 0)
1036 return -1;
1037
1038 if (fpid != 0)
1039 return wait_for_pid(fpid);
1040
1041 if (!switch_to_ns(pid, "net"))
1042 return -1;
1043
1044 _exit(lxc_netdev_rename_by_name(old, new));
1045 }
1046
1047 static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1048 const char *newname)
1049 {
1050 char *cmd;
1051 pid_t fpid;
1052 int err = -1;
1053
1054 /* Move phyN into the container. TODO - do this using netlink.
1055 * However, IIUC this involves a bit more complicated work to talk to
1056 * the 80211 module, so for now just call out to iw.
1057 */
1058 cmd = on_path("iw", NULL);
1059 if (!cmd)
1060 goto out1;
1061 free(cmd);
1062
1063 fpid = fork();
1064 if (fpid < 0)
1065 goto out1;
1066
1067 if (fpid == 0) {
1068 char pidstr[30];
1069 sprintf(pidstr, "%d", pid);
1070 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1071 (char *)NULL);
1072 _exit(EXIT_FAILURE);
1073 }
1074
1075 if (wait_for_pid(fpid))
1076 goto out1;
1077
1078 err = 0;
1079 if (newname)
1080 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1081
1082 out1:
1083 free(physname);
1084 return err;
1085 }
1086
1087 int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
1088 {
1089 int index;
1090 char *physname;
1091
1092 if (!ifname)
1093 return -EINVAL;
1094
1095 index = if_nametoindex(ifname);
1096 if (!index)
1097 return -EINVAL;
1098
1099 physname = is_wlan(ifname);
1100 if (physname)
1101 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1102
1103 return lxc_netdev_move_by_index(index, pid, newname);
1104 }
1105
1106 int lxc_netdev_delete_by_index(int ifindex)
1107 {
1108 int err;
1109 struct ifinfomsg *ifi;
1110 struct nl_handler nlh;
1111 struct nlmsg *answer = NULL, *nlmsg = NULL;
1112
1113 err = netlink_open(&nlh, NETLINK_ROUTE);
1114 if (err)
1115 return err;
1116
1117 err = -ENOMEM;
1118 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1119 if (!nlmsg)
1120 goto out;
1121
1122 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1123 if (!answer)
1124 goto out;
1125
1126 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1127 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1128
1129 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1130 if (!ifi)
1131 goto out;
1132 ifi->ifi_family = AF_UNSPEC;
1133 ifi->ifi_index = ifindex;
1134
1135 err = netlink_transaction(&nlh, nlmsg, answer);
1136 out:
1137 netlink_close(&nlh);
1138 nlmsg_free(answer);
1139 nlmsg_free(nlmsg);
1140 return err;
1141 }
1142
1143 int lxc_netdev_delete_by_name(const char *name)
1144 {
1145 int index;
1146
1147 index = if_nametoindex(name);
1148 if (!index)
1149 return -EINVAL;
1150
1151 return lxc_netdev_delete_by_index(index);
1152 }
1153
1154 int lxc_netdev_rename_by_index(int ifindex, const char *newname)
1155 {
1156 int err, len;
1157 struct ifinfomsg *ifi;
1158 struct nl_handler nlh;
1159 struct nlmsg *answer = NULL, *nlmsg = NULL;
1160
1161 err = netlink_open(&nlh, NETLINK_ROUTE);
1162 if (err)
1163 return err;
1164
1165 len = strlen(newname);
1166 if (len == 1 || len >= IFNAMSIZ) {
1167 err = -EINVAL;
1168 goto out;
1169 }
1170
1171 err = -ENOMEM;
1172 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1173 if (!nlmsg)
1174 goto out;
1175
1176 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1177 if (!answer)
1178 goto out;
1179
1180 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1181 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1182
1183 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1184 if (!ifi)
1185 goto out;
1186 ifi->ifi_family = AF_UNSPEC;
1187 ifi->ifi_index = ifindex;
1188
1189 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1190 goto out;
1191
1192 err = netlink_transaction(&nlh, nlmsg, answer);
1193 out:
1194 netlink_close(&nlh);
1195 nlmsg_free(answer);
1196 nlmsg_free(nlmsg);
1197 return err;
1198 }
1199
1200 int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1201 {
1202 int len, index;
1203
1204 len = strlen(oldname);
1205 if (len == 1 || len >= IFNAMSIZ)
1206 return -EINVAL;
1207
1208 index = if_nametoindex(oldname);
1209 if (!index)
1210 return -EINVAL;
1211
1212 return lxc_netdev_rename_by_index(index, newname);
1213 }
1214
1215 int netdev_set_flag(const char *name, int flag)
1216 {
1217 int err, index, len;
1218 struct ifinfomsg *ifi;
1219 struct nl_handler nlh;
1220 struct nlmsg *answer = NULL, *nlmsg = NULL;
1221
1222 err = netlink_open(&nlh, NETLINK_ROUTE);
1223 if (err)
1224 return err;
1225
1226 err = -EINVAL;
1227 len = strlen(name);
1228 if (len == 1 || len >= IFNAMSIZ)
1229 goto out;
1230
1231 err = -ENOMEM;
1232 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1233 if (!nlmsg)
1234 goto out;
1235
1236 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1237 if (!answer)
1238 goto out;
1239
1240 err = -EINVAL;
1241 index = if_nametoindex(name);
1242 if (!index)
1243 goto out;
1244
1245 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1246 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1247
1248 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1249 if (!ifi) {
1250 err = -ENOMEM;
1251 goto out;
1252 }
1253 ifi->ifi_family = AF_UNSPEC;
1254 ifi->ifi_index = index;
1255 ifi->ifi_change |= IFF_UP;
1256 ifi->ifi_flags |= flag;
1257
1258 err = netlink_transaction(&nlh, nlmsg, answer);
1259 out:
1260 netlink_close(&nlh);
1261 nlmsg_free(nlmsg);
1262 nlmsg_free(answer);
1263 return err;
1264 }
1265
1266 int netdev_get_flag(const char *name, int *flag)
1267 {
1268 int err, index, len;
1269 struct ifinfomsg *ifi;
1270 struct nl_handler nlh;
1271 struct nlmsg *answer = NULL, *nlmsg = NULL;
1272
1273 if (!name)
1274 return -EINVAL;
1275
1276 err = netlink_open(&nlh, NETLINK_ROUTE);
1277 if (err)
1278 return err;
1279
1280 err = -EINVAL;
1281 len = strlen(name);
1282 if (len == 1 || len >= IFNAMSIZ)
1283 goto out;
1284
1285 err = -ENOMEM;
1286 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1287 if (!nlmsg)
1288 goto out;
1289
1290 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1291 if (!answer)
1292 goto out;
1293
1294 err = -EINVAL;
1295 index = if_nametoindex(name);
1296 if (!index)
1297 goto out;
1298
1299 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1300 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1301
1302 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1303 if (!ifi) {
1304 err = -ENOMEM;
1305 goto out;
1306 }
1307 ifi->ifi_family = AF_UNSPEC;
1308 ifi->ifi_index = index;
1309
1310 err = netlink_transaction(&nlh, nlmsg, answer);
1311 if (err)
1312 goto out;
1313
1314 ifi = NLMSG_DATA(answer->nlmsghdr);
1315
1316 *flag = ifi->ifi_flags;
1317 out:
1318 netlink_close(&nlh);
1319 nlmsg_free(nlmsg);
1320 nlmsg_free(answer);
1321 return err;
1322 }
1323
1324 /*
1325 * \brief Check a interface is up or not.
1326 *
1327 * \param name: name for the interface.
1328 *
1329 * \return int.
1330 * 0 means interface is down.
1331 * 1 means interface is up.
1332 * Others means error happened, and ret-value is the error number.
1333 */
1334 int lxc_netdev_isup(const char *name)
1335 {
1336 int err, flag;
1337
1338 err = netdev_get_flag(name, &flag);
1339 if (err)
1340 return err;
1341
1342 if (flag & IFF_UP)
1343 return 1;
1344
1345 return 0;
1346 }
1347
1348 int netdev_get_mtu(int ifindex)
1349 {
1350 int answer_len, err, res;
1351 struct nl_handler nlh;
1352 struct ifinfomsg *ifi;
1353 struct nlmsghdr *msg;
1354 int readmore = 0, recv_len = 0;
1355 struct nlmsg *answer = NULL, *nlmsg = NULL;
1356
1357 err = netlink_open(&nlh, NETLINK_ROUTE);
1358 if (err)
1359 return err;
1360
1361 err = -ENOMEM;
1362 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1363 if (!nlmsg)
1364 goto out;
1365
1366 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1367 if (!answer)
1368 goto out;
1369
1370 /* Save the answer buffer length, since it will be overwritten
1371 * on the first receive (and we might need to receive more than
1372 * once.
1373 */
1374 answer_len = answer->nlmsghdr->nlmsg_len;
1375
1376 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
1377 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1378
1379 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1380 if (!ifi)
1381 goto out;
1382 ifi->ifi_family = AF_UNSPEC;
1383
1384 /* Send the request for addresses, which returns all addresses
1385 * on all interfaces. */
1386 err = netlink_send(&nlh, nlmsg);
1387 if (err < 0)
1388 goto out;
1389
1390 #pragma GCC diagnostic push
1391 #pragma GCC diagnostic ignored "-Wcast-align"
1392
1393 do {
1394 /* Restore the answer buffer length, it might have been
1395 * overwritten by a previous receive.
1396 */
1397 answer->nlmsghdr->nlmsg_len = answer_len;
1398
1399 /* Get the (next) batch of reply messages */
1400 err = netlink_rcv(&nlh, answer);
1401 if (err < 0)
1402 goto out;
1403
1404 recv_len = err;
1405
1406 /* Satisfy the typing for the netlink macros */
1407 msg = answer->nlmsghdr;
1408
1409 while (NLMSG_OK(msg, recv_len)) {
1410
1411 /* Stop reading if we see an error message */
1412 if (msg->nlmsg_type == NLMSG_ERROR) {
1413 struct nlmsgerr *errmsg =
1414 (struct nlmsgerr *)NLMSG_DATA(msg);
1415 err = errmsg->error;
1416 goto out;
1417 }
1418
1419 /* Stop reading if we see a NLMSG_DONE message */
1420 if (msg->nlmsg_type == NLMSG_DONE) {
1421 readmore = 0;
1422 break;
1423 }
1424
1425 ifi = NLMSG_DATA(msg);
1426 if (ifi->ifi_index == ifindex) {
1427 struct rtattr *rta = IFLA_RTA(ifi);
1428 int attr_len =
1429 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1430 res = 0;
1431 while (RTA_OK(rta, attr_len)) {
1432 /* Found a local address for the
1433 * requested interface, return it.
1434 */
1435 if (rta->rta_type == IFLA_MTU) {
1436 memcpy(&res, RTA_DATA(rta),
1437 sizeof(int));
1438 err = res;
1439 goto out;
1440 }
1441 rta = RTA_NEXT(rta, attr_len);
1442 }
1443 }
1444
1445 /* Keep reading more data from the socket if the last
1446 * message had the NLF_F_MULTI flag set.
1447 */
1448 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1449
1450 /* Look at the next message received in this buffer. */
1451 msg = NLMSG_NEXT(msg, recv_len);
1452 }
1453 } while (readmore);
1454
1455 #pragma GCC diagnostic pop
1456
1457 /* If we end up here, we didn't find any result, so signal an error. */
1458 err = -1;
1459
1460 out:
1461 netlink_close(&nlh);
1462 nlmsg_free(answer);
1463 nlmsg_free(nlmsg);
1464 return err;
1465 }
1466
1467 int lxc_netdev_set_mtu(const char *name, int mtu)
1468 {
1469 int err, index, len;
1470 struct ifinfomsg *ifi;
1471 struct nl_handler nlh;
1472 struct nlmsg *answer = NULL, *nlmsg = NULL;
1473
1474 err = netlink_open(&nlh, NETLINK_ROUTE);
1475 if (err)
1476 return err;
1477
1478 err = -EINVAL;
1479 len = strlen(name);
1480 if (len == 1 || len >= IFNAMSIZ)
1481 goto out;
1482
1483 err = -ENOMEM;
1484 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1485 if (!nlmsg)
1486 goto out;
1487
1488 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1489 if (!answer)
1490 goto out;
1491
1492 err = -EINVAL;
1493 index = if_nametoindex(name);
1494 if (!index)
1495 goto out;
1496
1497 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1498 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1499
1500 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1501 if (!ifi) {
1502 err = -ENOMEM;
1503 goto out;
1504 }
1505 ifi->ifi_family = AF_UNSPEC;
1506 ifi->ifi_index = index;
1507
1508 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1509 goto out;
1510
1511 err = netlink_transaction(&nlh, nlmsg, answer);
1512 out:
1513 netlink_close(&nlh);
1514 nlmsg_free(nlmsg);
1515 nlmsg_free(answer);
1516 return err;
1517 }
1518
1519 int lxc_netdev_up(const char *name)
1520 {
1521 return netdev_set_flag(name, IFF_UP);
1522 }
1523
1524 int lxc_netdev_down(const char *name)
1525 {
1526 return netdev_set_flag(name, 0);
1527 }
1528
1529 int lxc_veth_create(const char *name1, const char *name2)
1530 {
1531 int err, len;
1532 struct ifinfomsg *ifi;
1533 struct nl_handler nlh;
1534 struct rtattr *nest1, *nest2, *nest3;
1535 struct nlmsg *answer = NULL, *nlmsg = NULL;
1536
1537 err = netlink_open(&nlh, NETLINK_ROUTE);
1538 if (err)
1539 return err;
1540
1541 err = -EINVAL;
1542 len = strlen(name1);
1543 if (len == 1 || len >= IFNAMSIZ)
1544 goto out;
1545
1546 len = strlen(name2);
1547 if (len == 1 || len >= IFNAMSIZ)
1548 goto out;
1549
1550 err = -ENOMEM;
1551 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1552 if (!nlmsg)
1553 goto out;
1554
1555 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1556 if (!answer)
1557 goto out;
1558
1559 nlmsg->nlmsghdr->nlmsg_flags =
1560 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1561 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1562
1563 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1564 if (!ifi)
1565 goto out;
1566 ifi->ifi_family = AF_UNSPEC;
1567
1568 err = -EINVAL;
1569 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1570 if (!nest1)
1571 goto out;
1572
1573 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1574 goto out;
1575
1576 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1577 if (!nest2)
1578 goto out;
1579
1580 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1581 if (!nest3)
1582 goto out;
1583
1584 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1585 if (!ifi) {
1586 err = -ENOMEM;
1587 goto out;
1588 }
1589
1590 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1591 goto out;
1592
1593 nla_end_nested(nlmsg, nest3);
1594 nla_end_nested(nlmsg, nest2);
1595 nla_end_nested(nlmsg, nest1);
1596
1597 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1598 goto out;
1599
1600 err = netlink_transaction(&nlh, nlmsg, answer);
1601 out:
1602 netlink_close(&nlh);
1603 nlmsg_free(answer);
1604 nlmsg_free(nlmsg);
1605 return err;
1606 }
1607
1608 /* TODO: merge with lxc_macvlan_create */
1609 int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
1610 {
1611 int err, len, lindex;
1612 struct ifinfomsg *ifi;
1613 struct nl_handler nlh;
1614 struct rtattr *nest, *nest2;
1615 struct nlmsg *answer = NULL, *nlmsg = NULL;
1616
1617 err = netlink_open(&nlh, NETLINK_ROUTE);
1618 if (err)
1619 return err;
1620
1621 err = -EINVAL;
1622 len = strlen(master);
1623 if (len == 1 || len >= IFNAMSIZ)
1624 goto err3;
1625
1626 len = strlen(name);
1627 if (len == 1 || len >= IFNAMSIZ)
1628 goto err3;
1629
1630 err = -ENOMEM;
1631 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1632 if (!nlmsg)
1633 goto err3;
1634
1635 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1636 if (!answer)
1637 goto err2;
1638
1639 err = -EINVAL;
1640 lindex = if_nametoindex(master);
1641 if (!lindex)
1642 goto err1;
1643
1644 nlmsg->nlmsghdr->nlmsg_flags =
1645 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1646 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1647
1648 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1649 if (!ifi) {
1650 err = -ENOMEM;
1651 goto err1;
1652 }
1653 ifi->ifi_family = AF_UNSPEC;
1654
1655 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1656 if (!nest)
1657 goto err1;
1658
1659 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1660 goto err1;
1661
1662 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1663 if (!nest2)
1664 goto err1;
1665
1666 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1667 goto err1;
1668
1669 nla_end_nested(nlmsg, nest2);
1670 nla_end_nested(nlmsg, nest);
1671
1672 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1673 goto err1;
1674
1675 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1676 goto err1;
1677
1678 err = netlink_transaction(&nlh, nlmsg, answer);
1679 err1:
1680 nlmsg_free(answer);
1681 err2:
1682 nlmsg_free(nlmsg);
1683 err3:
1684 netlink_close(&nlh);
1685 return err;
1686 }
1687
1688 int lxc_macvlan_create(const char *master, const char *name, int mode)
1689 {
1690 int err, index, len;
1691 struct ifinfomsg *ifi;
1692 struct nl_handler nlh;
1693 struct rtattr *nest, *nest2;
1694 struct nlmsg *answer = NULL, *nlmsg = NULL;
1695
1696 err = netlink_open(&nlh, NETLINK_ROUTE);
1697 if (err)
1698 return err;
1699
1700 err = -EINVAL;
1701 len = strlen(master);
1702 if (len == 1 || len >= IFNAMSIZ)
1703 goto out;
1704
1705 len = strlen(name);
1706 if (len == 1 || len >= IFNAMSIZ)
1707 goto out;
1708
1709 err = -ENOMEM;
1710 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1711 if (!nlmsg)
1712 goto out;
1713
1714 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1715 if (!answer)
1716 goto out;
1717
1718 err = -EINVAL;
1719 index = if_nametoindex(master);
1720 if (!index)
1721 goto out;
1722
1723 nlmsg->nlmsghdr->nlmsg_flags =
1724 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1725 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1726
1727 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1728 if (!ifi) {
1729 err = -ENOMEM;
1730 goto out;
1731 }
1732 ifi->ifi_family = AF_UNSPEC;
1733
1734 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1735 if (!nest)
1736 goto out;
1737
1738 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1739 goto out;
1740
1741 if (mode) {
1742 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1743 if (!nest2)
1744 goto out;
1745
1746 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1747 goto out;
1748
1749 nla_end_nested(nlmsg, nest2);
1750 }
1751
1752 nla_end_nested(nlmsg, nest);
1753
1754 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1755 goto out;
1756
1757 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1758 goto out;
1759
1760 err = netlink_transaction(&nlh, nlmsg, answer);
1761 out:
1762 netlink_close(&nlh);
1763 nlmsg_free(answer);
1764 nlmsg_free(nlmsg);
1765 return err;
1766 }
1767
1768 static int proc_sys_net_write(const char *path, const char *value)
1769 {
1770 int fd;
1771 int err = 0;
1772
1773 fd = open(path, O_WRONLY);
1774 if (fd < 0)
1775 return -errno;
1776
1777 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
1778 err = -errno;
1779
1780 close(fd);
1781 return err;
1782 }
1783
1784 static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1785 {
1786 int ret;
1787 char path[PATH_MAX];
1788 char buf[1] = "";
1789
1790 if (family != AF_INET && family != AF_INET6)
1791 return minus_one_set_errno(EINVAL);
1792
1793 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1794 family == AF_INET ? "ipv4" : "ipv6", ifname,
1795 "forwarding");
1796 if (ret < 0 || (size_t)ret >= PATH_MAX)
1797 return minus_one_set_errno(E2BIG);
1798
1799 return lxc_read_file_expect(path, buf, 1, "1");
1800 }
1801
1802 static int neigh_proxy_set(const char *ifname, int family, int flag)
1803 {
1804 int ret;
1805 char path[PATH_MAX];
1806
1807 if (family != AF_INET && family != AF_INET6)
1808 return -EINVAL;
1809
1810 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1811 family == AF_INET ? "ipv4" : "ipv6", ifname,
1812 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1813 if (ret < 0 || (size_t)ret >= PATH_MAX)
1814 return -E2BIG;
1815
1816 return proc_sys_net_write(path, flag ? "1" : "0");
1817 }
1818
1819 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1820 {
1821 int ret;
1822 char path[PATH_MAX];
1823 char buf[1] = "";
1824
1825 if (family != AF_INET && family != AF_INET6)
1826 return minus_one_set_errno(EINVAL);
1827
1828 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1829 family == AF_INET ? "ipv4" : "ipv6", ifname,
1830 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1831 if (ret < 0 || (size_t)ret >= PATH_MAX)
1832 return minus_one_set_errno(E2BIG);
1833
1834 return lxc_read_file_expect(path, buf, 1, "1");
1835 }
1836
1837 int lxc_neigh_proxy_on(const char *name, int family)
1838 {
1839 return neigh_proxy_set(name, family, 1);
1840 }
1841
1842 int lxc_neigh_proxy_off(const char *name, int family)
1843 {
1844 return neigh_proxy_set(name, family, 0);
1845 }
1846
1847 int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1848 {
1849 int i = 0;
1850 unsigned val;
1851 char c;
1852 unsigned char *data;
1853
1854 sockaddr->sa_family = ARPHRD_ETHER;
1855 data = (unsigned char *)sockaddr->sa_data;
1856
1857 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
1858 c = *macaddr++;
1859 if (isdigit(c))
1860 val = c - '0';
1861 else if (c >= 'a' && c <= 'f')
1862 val = c - 'a' + 10;
1863 else if (c >= 'A' && c <= 'F')
1864 val = c - 'A' + 10;
1865 else
1866 return -EINVAL;
1867
1868 val <<= 4;
1869 c = *macaddr;
1870 if (isdigit(c))
1871 val |= c - '0';
1872 else if (c >= 'a' && c <= 'f')
1873 val |= c - 'a' + 10;
1874 else if (c >= 'A' && c <= 'F')
1875 val |= c - 'A' + 10;
1876 else if (c == ':' || c == 0)
1877 val >>= 4;
1878 else
1879 return -EINVAL;
1880 if (c != 0)
1881 macaddr++;
1882 *data++ = (unsigned char)(val & 0377);
1883 i++;
1884
1885 if (*macaddr == ':')
1886 macaddr++;
1887 }
1888
1889 return 0;
1890 }
1891
1892 static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1893 void *acast, int prefix)
1894 {
1895 int addrlen, err;
1896 struct ifaddrmsg *ifa;
1897 struct nl_handler nlh;
1898 struct nlmsg *answer = NULL, *nlmsg = NULL;
1899
1900 addrlen = family == AF_INET ? sizeof(struct in_addr)
1901 : sizeof(struct in6_addr);
1902
1903 err = netlink_open(&nlh, NETLINK_ROUTE);
1904 if (err)
1905 return err;
1906
1907 err = -ENOMEM;
1908 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1909 if (!nlmsg)
1910 goto out;
1911
1912 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1913 if (!answer)
1914 goto out;
1915
1916 nlmsg->nlmsghdr->nlmsg_flags =
1917 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1918 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1919
1920 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
1921 if (!ifa)
1922 goto out;
1923 ifa->ifa_prefixlen = prefix;
1924 ifa->ifa_index = ifindex;
1925 ifa->ifa_family = family;
1926 ifa->ifa_scope = 0;
1927
1928 err = -EINVAL;
1929 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
1930 goto out;
1931
1932 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
1933 goto out;
1934
1935 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1936 goto out;
1937
1938 /* TODO: multicast, anycast with ipv6 */
1939 err = -EPROTONOSUPPORT;
1940 if (family == AF_INET6 &&
1941 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1942 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1943 goto out;
1944
1945 err = netlink_transaction(&nlh, nlmsg, answer);
1946 out:
1947 netlink_close(&nlh);
1948 nlmsg_free(answer);
1949 nlmsg_free(nlmsg);
1950 return err;
1951 }
1952
1953 int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
1954 struct in6_addr *mcast, struct in6_addr *acast,
1955 int prefix)
1956 {
1957 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1958 }
1959
1960 int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1961 int prefix)
1962 {
1963 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1964 }
1965
1966 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1967 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1968 * that pointer in *res (so res should be an in_addr** or in6_addr**).
1969 */
1970 #pragma GCC diagnostic push
1971 #pragma GCC diagnostic ignored "-Wcast-align"
1972
1973 static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1974 {
1975 int addrlen;
1976 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1977 struct rtattr *rta = IFA_RTA(ifa);
1978 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
1979
1980 if (ifa->ifa_family != family)
1981 return 0;
1982
1983 addrlen = family == AF_INET ? sizeof(struct in_addr)
1984 : sizeof(struct in6_addr);
1985
1986 /* Loop over the rtattr's in this message */
1987 while (RTA_OK(rta, attr_len)) {
1988 /* Found a local address for the requested interface,
1989 * return it.
1990 */
1991 if (rta->rta_type == IFA_LOCAL ||
1992 rta->rta_type == IFA_ADDRESS) {
1993 /* Sanity check. The family check above should make sure
1994 * the address length is correct, but check here just in
1995 * case.
1996 */
1997 if (RTA_PAYLOAD(rta) != addrlen)
1998 return -1;
1999
2000 /* We might have found an IFA_ADDRESS before, which we
2001 * now overwrite with an IFA_LOCAL.
2002 */
2003 if (!*res) {
2004 *res = malloc(addrlen);
2005 if (!*res)
2006 return -1;
2007 }
2008
2009 memcpy(*res, RTA_DATA(rta), addrlen);
2010 if (rta->rta_type == IFA_LOCAL)
2011 break;
2012 }
2013 rta = RTA_NEXT(rta, attr_len);
2014 }
2015 return 0;
2016 }
2017
2018 #pragma GCC diagnostic pop
2019
2020 static int ip_addr_get(int family, int ifindex, void **res)
2021 {
2022 int answer_len, err;
2023 struct ifaddrmsg *ifa;
2024 struct nl_handler nlh;
2025 struct nlmsghdr *msg;
2026 int readmore = 0, recv_len = 0;
2027 struct nlmsg *answer = NULL, *nlmsg = NULL;
2028
2029 err = netlink_open(&nlh, NETLINK_ROUTE);
2030 if (err)
2031 return err;
2032
2033 err = -ENOMEM;
2034 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2035 if (!nlmsg)
2036 goto out;
2037
2038 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2039 if (!answer)
2040 goto out;
2041
2042 /* Save the answer buffer length, since it will be overwritten on the
2043 * first receive (and we might need to receive more than once).
2044 */
2045 answer_len = answer->nlmsghdr->nlmsg_len;
2046
2047 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
2048 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
2049
2050 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2051 if (!ifa)
2052 goto out;
2053 ifa->ifa_family = family;
2054
2055 /* Send the request for addresses, which returns all addresses on all
2056 * interfaces.
2057 */
2058 err = netlink_send(&nlh, nlmsg);
2059 if (err < 0)
2060 goto out;
2061
2062 #pragma GCC diagnostic push
2063 #pragma GCC diagnostic ignored "-Wcast-align"
2064
2065 do {
2066 /* Restore the answer buffer length, it might have been
2067 * overwritten by a previous receive.
2068 */
2069 answer->nlmsghdr->nlmsg_len = answer_len;
2070
2071 /* Get the (next) batch of reply messages. */
2072 err = netlink_rcv(&nlh, answer);
2073 if (err < 0)
2074 goto out;
2075
2076 recv_len = err;
2077 err = 0;
2078
2079 /* Satisfy the typing for the netlink macros. */
2080 msg = answer->nlmsghdr;
2081
2082 while (NLMSG_OK(msg, recv_len)) {
2083 /* Stop reading if we see an error message. */
2084 if (msg->nlmsg_type == NLMSG_ERROR) {
2085 struct nlmsgerr *errmsg =
2086 (struct nlmsgerr *)NLMSG_DATA(msg);
2087 err = errmsg->error;
2088 goto out;
2089 }
2090
2091 /* Stop reading if we see a NLMSG_DONE message. */
2092 if (msg->nlmsg_type == NLMSG_DONE) {
2093 readmore = 0;
2094 break;
2095 }
2096
2097 if (msg->nlmsg_type != RTM_NEWADDR) {
2098 err = -1;
2099 goto out;
2100 }
2101
2102 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2103 if (ifa->ifa_index == ifindex) {
2104 if (ifa_get_local_ip(family, msg, res) < 0) {
2105 err = -1;
2106 goto out;
2107 }
2108
2109 /* Found a result, stop searching. */
2110 if (*res)
2111 goto out;
2112 }
2113
2114 /* Keep reading more data from the socket if the last
2115 * message had the NLF_F_MULTI flag set.
2116 */
2117 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2118
2119 /* Look at the next message received in this buffer. */
2120 msg = NLMSG_NEXT(msg, recv_len);
2121 }
2122 } while (readmore);
2123
2124 #pragma GCC diagnostic pop
2125
2126 /* If we end up here, we didn't find any result, so signal an
2127 * error.
2128 */
2129 err = -1;
2130
2131 out:
2132 netlink_close(&nlh);
2133 nlmsg_free(answer);
2134 nlmsg_free(nlmsg);
2135 return err;
2136 }
2137
2138 int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2139 {
2140 return ip_addr_get(AF_INET6, ifindex, (void **)res);
2141 }
2142
2143 int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
2144 {
2145 return ip_addr_get(AF_INET, ifindex, (void **)res);
2146 }
2147
2148 static int ip_gateway_add(int family, int ifindex, void *gw)
2149 {
2150 int addrlen, err;
2151 struct nl_handler nlh;
2152 struct rtmsg *rt;
2153 struct nlmsg *answer = NULL, *nlmsg = NULL;
2154
2155 addrlen = family == AF_INET ? sizeof(struct in_addr)
2156 : sizeof(struct in6_addr);
2157
2158 err = netlink_open(&nlh, NETLINK_ROUTE);
2159 if (err)
2160 return err;
2161
2162 err = -ENOMEM;
2163 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2164 if (!nlmsg)
2165 goto out;
2166
2167 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2168 if (!answer)
2169 goto out;
2170
2171 nlmsg->nlmsghdr->nlmsg_flags =
2172 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2173 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2174
2175 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
2176 if (!rt)
2177 goto out;
2178 rt->rtm_family = family;
2179 rt->rtm_table = RT_TABLE_MAIN;
2180 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2181 rt->rtm_protocol = RTPROT_BOOT;
2182 rt->rtm_type = RTN_UNICAST;
2183 /* "default" destination */
2184 rt->rtm_dst_len = 0;
2185
2186 err = -EINVAL;
2187
2188 /* If gateway address not supplied, then a device route will be created instead */
2189 if (gw != NULL) {
2190 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2191 goto out;
2192 }
2193
2194 /* Adding the interface index enables the use of link-local
2195 * addresses for the gateway.
2196 */
2197 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2198 goto out;
2199
2200 err = netlink_transaction(&nlh, nlmsg, answer);
2201 out:
2202 netlink_close(&nlh);
2203 nlmsg_free(answer);
2204 nlmsg_free(nlmsg);
2205 return err;
2206 }
2207
2208 int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2209 {
2210 return ip_gateway_add(AF_INET, ifindex, gw);
2211 }
2212
2213 int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2214 {
2215 return ip_gateway_add(AF_INET6, ifindex, gw);
2216 }
2217 bool is_ovs_bridge(const char *bridge)
2218 {
2219 int ret;
2220 struct stat sb;
2221 char brdirname[22 + IFNAMSIZ + 1] = {0};
2222
2223 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2224 bridge);
2225 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2226 return false;
2227
2228 ret = stat(brdirname, &sb);
2229 if (ret < 0 && errno == ENOENT)
2230 return true;
2231
2232 return false;
2233 }
2234
2235 struct ovs_veth_args {
2236 const char *bridge;
2237 const char *nic;
2238 };
2239
2240 /* Called from a background thread - when nic goes away, remove it from the
2241 * bridge.
2242 */
2243 static int lxc_ovs_delete_port_exec(void *data)
2244 {
2245 struct ovs_veth_args *args = data;
2246
2247 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2248 (char *)NULL);
2249 return -1;
2250 }
2251
2252 int lxc_ovs_delete_port(const char *bridge, const char *nic)
2253 {
2254 int ret;
2255 char cmd_output[PATH_MAX];
2256 struct ovs_veth_args args;
2257
2258 args.bridge = bridge;
2259 args.nic = nic;
2260 ret = run_command(cmd_output, sizeof(cmd_output),
2261 lxc_ovs_delete_port_exec, (void *)&args);
2262 if (ret < 0) {
2263 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2264 "%s", bridge, nic, cmd_output);
2265 return -1;
2266 }
2267
2268 return 0;
2269 }
2270
2271 static int lxc_ovs_attach_bridge_exec(void *data)
2272 {
2273 struct ovs_veth_args *args = data;
2274
2275 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2276 (char *)NULL);
2277 return -1;
2278 }
2279
2280 static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2281 {
2282 int ret;
2283 char cmd_output[PATH_MAX];
2284 struct ovs_veth_args args;
2285
2286 args.bridge = bridge;
2287 args.nic = nic;
2288 ret = run_command(cmd_output, sizeof(cmd_output),
2289 lxc_ovs_attach_bridge_exec, (void *)&args);
2290 if (ret < 0) {
2291 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2292 bridge, nic, cmd_output);
2293 return -1;
2294 }
2295
2296 return 0;
2297 }
2298
2299 int lxc_bridge_attach(const char *bridge, const char *ifname)
2300 {
2301 int err, fd, index;
2302 size_t retlen;
2303 struct ifreq ifr;
2304
2305 if (strlen(ifname) >= IFNAMSIZ)
2306 return -EINVAL;
2307
2308 index = if_nametoindex(ifname);
2309 if (!index)
2310 return -EINVAL;
2311
2312 if (is_ovs_bridge(bridge))
2313 return lxc_ovs_attach_bridge(bridge, ifname);
2314
2315 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
2316 if (fd < 0)
2317 return -errno;
2318
2319 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
2320 if (retlen >= IFNAMSIZ) {
2321 close(fd);
2322 return -E2BIG;
2323 }
2324
2325 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2326 ifr.ifr_ifindex = index;
2327 err = ioctl(fd, SIOCBRADDIF, &ifr);
2328 close(fd);
2329 if (err)
2330 err = -errno;
2331
2332 return err;
2333 }
2334
2335 static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
2336 [LXC_NET_EMPTY] = "empty",
2337 [LXC_NET_VETH] = "veth",
2338 [LXC_NET_MACVLAN] = "macvlan",
2339 [LXC_NET_IPVLAN] = "ipvlan",
2340 [LXC_NET_PHYS] = "phys",
2341 [LXC_NET_VLAN] = "vlan",
2342 [LXC_NET_NONE] = "none",
2343 };
2344
2345 const char *lxc_net_type_to_str(int type)
2346 {
2347 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2348 return NULL;
2349
2350 return lxc_network_types[type];
2351 }
2352
2353 static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2354
2355 char *lxc_mkifname(char *template)
2356 {
2357 int ret;
2358 struct netns_ifaddrs *ifa, *ifaddr;
2359 char name[IFNAMSIZ];
2360 bool exists = false;
2361 size_t i = 0;
2362 #ifdef HAVE_RAND_R
2363 unsigned int seed;
2364
2365 seed = randseed(false);
2366 #else
2367
2368 (void)randseed(true);
2369 #endif
2370
2371 if (strlen(template) >= IFNAMSIZ)
2372 return NULL;
2373
2374 /* Get all the network interfaces. */
2375 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2376 if (ret < 0) {
2377 SYSERROR("Failed to get network interfaces");
2378 return NULL;
2379 }
2380
2381 /* Generate random names until we find one that doesn't exist. */
2382 for (;;) {
2383 name[0] = '\0';
2384 (void)strlcpy(name, template, IFNAMSIZ);
2385
2386 exists = false;
2387
2388 for (i = 0; i < strlen(name); i++) {
2389 if (name[i] == 'X') {
2390 #ifdef HAVE_RAND_R
2391 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
2392 #else
2393 name[i] = padchar[rand() % strlen(padchar)];
2394 #endif
2395 }
2396 }
2397
2398 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
2399 if (!strcmp(ifa->ifa_name, name)) {
2400 exists = true;
2401 break;
2402 }
2403 }
2404
2405 if (!exists)
2406 break;
2407 }
2408
2409 netns_freeifaddrs(ifaddr);
2410 (void)strlcpy(template, name, strlen(template) + 1);
2411
2412 return template;
2413 }
2414
2415 int setup_private_host_hw_addr(char *veth1)
2416 {
2417 int err, sockfd;
2418 struct ifreq ifr;
2419
2420 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
2421 if (sockfd < 0)
2422 return -errno;
2423
2424 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2425 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2426 close(sockfd);
2427 return -E2BIG;
2428 }
2429
2430 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2431 if (err < 0) {
2432 close(sockfd);
2433 return -errno;
2434 }
2435
2436 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2437 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2438 close(sockfd);
2439 if (err < 0)
2440 return -errno;
2441
2442 return 0;
2443 }
2444
2445 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2446 {
2447 struct lxc_list *network = &handler->conf->network;
2448 struct lxc_list *iterator;
2449 struct lxc_netdev *netdev;
2450 int link_index;
2451
2452 lxc_list_for_each(iterator, network) {
2453 netdev = iterator->elem;
2454
2455 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2456 continue;
2457
2458 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2459 ERROR("Automatic gateway detection is only supported "
2460 "for veth and macvlan");
2461 return -1;
2462 }
2463
2464 if (netdev->link[0] == '\0') {
2465 ERROR("Automatic gateway detection needs a link interface");
2466 return -1;
2467 }
2468
2469 link_index = if_nametoindex(netdev->link);
2470 if (!link_index)
2471 return -EINVAL;
2472
2473 if (netdev->ipv4_gateway_auto) {
2474 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2475 ERROR("Failed to automatically find ipv4 gateway "
2476 "address from link interface \"%s\"", netdev->link);
2477 return -1;
2478 }
2479 }
2480
2481 if (netdev->ipv6_gateway_auto) {
2482 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2483 ERROR("Failed to automatically find ipv6 gateway "
2484 "address from link interface \"%s\"", netdev->link);
2485 return -1;
2486 }
2487 }
2488 }
2489
2490 return 0;
2491 }
2492
2493 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2494 static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2495 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
2496 {
2497 int ret;
2498 pid_t child;
2499 int bytes, pipefd[2];
2500 char *token, *saveptr = NULL;
2501 char netdev_link[IFNAMSIZ];
2502 char buffer[PATH_MAX] = {0};
2503 size_t retlen;
2504
2505 if (netdev->type != LXC_NET_VETH) {
2506 ERROR("Network type %d not support for unprivileged use", netdev->type);
2507 return -1;
2508 }
2509
2510 ret = pipe(pipefd);
2511 if (ret < 0) {
2512 SYSERROR("Failed to create pipe");
2513 return -1;
2514 }
2515
2516 child = fork();
2517 if (child < 0) {
2518 SYSERROR("Failed to create new process");
2519 close(pipefd[0]);
2520 close(pipefd[1]);
2521 return -1;
2522 }
2523
2524 if (child == 0) {
2525 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
2526
2527 close(pipefd[0]);
2528
2529 ret = dup2(pipefd[1], STDOUT_FILENO);
2530 if (ret >= 0)
2531 ret = dup2(pipefd[1], STDERR_FILENO);
2532 close(pipefd[1]);
2533 if (ret < 0) {
2534 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2535 _exit(EXIT_FAILURE);
2536 }
2537
2538 if (netdev->link[0] != '\0')
2539 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
2540 else
2541 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2542 if (retlen >= IFNAMSIZ) {
2543 SYSERROR("Invalid network device name");
2544 _exit(EXIT_FAILURE);
2545 }
2546
2547 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2548 if (ret < 0 || ret >= sizeof(pidstr))
2549 _exit(EXIT_FAILURE);
2550 pidstr[sizeof(pidstr) - 1] = '\0';
2551
2552 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2553 lxcname, pidstr, netdev_link,
2554 netdev->name[0] != '\0' ? netdev->name : "(null)");
2555 if (netdev->name[0] != '\0')
2556 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2557 lxcpath, lxcname, pidstr, "veth", netdev_link,
2558 netdev->name, (char *)NULL);
2559 else
2560 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2561 lxcpath, lxcname, pidstr, "veth", netdev_link,
2562 (char *)NULL);
2563 SYSERROR("Failed to execute lxc-user-nic");
2564 _exit(EXIT_FAILURE);
2565 }
2566
2567 /* close the write-end of the pipe */
2568 close(pipefd[1]);
2569
2570 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2571 if (bytes < 0) {
2572 SYSERROR("Failed to read from pipe file descriptor");
2573 close(pipefd[0]);
2574 } else {
2575 buffer[bytes - 1] = '\0';
2576 }
2577
2578 ret = wait_for_pid(child);
2579 close(pipefd[0]);
2580 if (ret != 0 || bytes < 0) {
2581 ERROR("lxc-user-nic failed to configure requested network: %s",
2582 buffer[0] != '\0' ? buffer : "(null)");
2583 return -1;
2584 }
2585 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2586
2587 /* netdev->name */
2588 token = strtok_r(buffer, ":", &saveptr);
2589 if (!token) {
2590 ERROR("Failed to parse lxc-user-nic output");
2591 return -1;
2592 }
2593
2594 /*
2595 * lxc-user-nic will take care of proper network device naming. So
2596 * netdev->name and netdev->created_name need to be identical to not
2597 * trigger another rename later on.
2598 */
2599 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2600 if (retlen < IFNAMSIZ)
2601 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2602 if (retlen >= IFNAMSIZ) {
2603 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2604 return -E2BIG;
2605 }
2606
2607 /* netdev->ifindex */
2608 token = strtok_r(NULL, ":", &saveptr);
2609 if (!token) {
2610 ERROR("Failed to parse lxc-user-nic output");
2611 return -1;
2612 }
2613
2614 ret = lxc_safe_int(token, &netdev->ifindex);
2615 if (ret < 0) {
2616 errno = -ret;
2617 SYSERROR("Failed to convert string \"%s\" to integer", token);
2618 return -1;
2619 }
2620
2621 /* netdev->priv.veth_attr.veth1 */
2622 token = strtok_r(NULL, ":", &saveptr);
2623 if (!token) {
2624 ERROR("Failed to parse lxc-user-nic output");
2625 return -1;
2626 }
2627
2628 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2629 if (retlen >= IFNAMSIZ) {
2630 ERROR("Host side veth device name returned by lxc-user-nic is "
2631 "too long");
2632 return -E2BIG;
2633 }
2634
2635 /* netdev->priv.veth_attr.ifindex */
2636 token = strtok_r(NULL, ":", &saveptr);
2637 if (!token) {
2638 ERROR("Failed to parse lxc-user-nic output");
2639 return -1;
2640 }
2641
2642 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
2643 if (ret < 0) {
2644 errno = -ret;
2645 SYSERROR("Failed to convert string \"%s\" to integer", token);
2646 return -1;
2647 }
2648
2649 if (netdev->upscript) {
2650 char *argv[] = {
2651 "veth",
2652 netdev->link,
2653 netdev->priv.veth_attr.veth1,
2654 NULL,
2655 };
2656
2657 ret = run_script_argv(lxcname, hooks_version, "net",
2658 netdev->upscript, "up", argv);
2659 if (ret < 0)
2660 return -1;
2661 }
2662
2663 return 0;
2664 }
2665
2666 static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2667 struct lxc_netdev *netdev,
2668 const char *netns_path)
2669 {
2670 int bytes, ret;
2671 pid_t child;
2672 int pipefd[2];
2673 char buffer[PATH_MAX] = {0};
2674
2675 if (netdev->type != LXC_NET_VETH) {
2676 ERROR("Network type %d not support for unprivileged use", netdev->type);
2677 return -1;
2678 }
2679
2680 ret = pipe(pipefd);
2681 if (ret < 0) {
2682 SYSERROR("Failed to create pipe");
2683 return -1;
2684 }
2685
2686 child = fork();
2687 if (child < 0) {
2688 SYSERROR("Failed to create new process");
2689 close(pipefd[0]);
2690 close(pipefd[1]);
2691 return -1;
2692 }
2693
2694 if (child == 0) {
2695 char *hostveth;
2696
2697 close(pipefd[0]);
2698
2699 ret = dup2(pipefd[1], STDOUT_FILENO);
2700 if (ret >= 0)
2701 ret = dup2(pipefd[1], STDERR_FILENO);
2702 close(pipefd[1]);
2703 if (ret < 0) {
2704 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2705 _exit(EXIT_FAILURE);
2706 }
2707
2708 if (netdev->priv.veth_attr.pair[0] != '\0')
2709 hostveth = netdev->priv.veth_attr.pair;
2710 else
2711 hostveth = netdev->priv.veth_attr.veth1;
2712 if (hostveth[0] == '\0') {
2713 SYSERROR("Host side veth device name is missing");
2714 _exit(EXIT_FAILURE);
2715 }
2716
2717 if (netdev->link[0] == '\0') {
2718 SYSERROR("Network link for network device \"%s\" is "
2719 "missing", netdev->priv.veth_attr.veth1);
2720 _exit(EXIT_FAILURE);
2721 }
2722
2723 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
2724 lxcname, netns_path, netdev->link, hostveth);
2725 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
2726 lxcname, netns_path, "veth", netdev->link, hostveth,
2727 (char *)NULL);
2728 SYSERROR("Failed to exec lxc-user-nic.");
2729 _exit(EXIT_FAILURE);
2730 }
2731
2732 close(pipefd[1]);
2733
2734 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2735 if (bytes < 0) {
2736 SYSERROR("Failed to read from pipe file descriptor.");
2737 close(pipefd[0]);
2738 } else {
2739 buffer[bytes - 1] = '\0';
2740 }
2741
2742 ret = wait_for_pid(child);
2743 close(pipefd[0]);
2744 if (ret != 0 || bytes < 0) {
2745 ERROR("lxc-user-nic failed to delete requested network: %s",
2746 buffer[0] != '\0' ? buffer : "(null)");
2747 return -1;
2748 }
2749
2750 return 0;
2751 }
2752
2753 bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2754 {
2755 int ret;
2756 struct lxc_list *iterator;
2757 struct lxc_list *network = &handler->conf->network;
2758 /* strlen("/proc/") = 6
2759 * +
2760 * INTTYPE_TO_STRLEN(pid_t)
2761 * +
2762 * strlen("/fd/") = 4
2763 * +
2764 * INTTYPE_TO_STRLEN(int)
2765 * +
2766 * \0
2767 */
2768 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
2769
2770 *netns_path = '\0';
2771
2772 if (handler->nsfd[LXC_NS_NET] < 0) {
2773 DEBUG("Cannot not guarantee safe deletion of network devices. "
2774 "Manual cleanup maybe needed");
2775 return false;
2776 }
2777
2778 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
2779 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
2780 if (ret < 0 || ret >= sizeof(netns_path))
2781 return false;
2782
2783 lxc_list_for_each(iterator, network) {
2784 char *hostveth = NULL;
2785 struct lxc_netdev *netdev = iterator->elem;
2786
2787 /* We can only delete devices whose ifindex we have. If we don't
2788 * have the index it means that we didn't create it.
2789 */
2790 if (!netdev->ifindex)
2791 continue;
2792
2793 if (netdev->type == LXC_NET_PHYS) {
2794 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2795 netdev->link);
2796 if (ret < 0)
2797 WARN("Failed to rename interface with index %d "
2798 "to its initial name \"%s\"",
2799 netdev->ifindex, netdev->link);
2800 else
2801 TRACE("Renamed interface with index %d to its "
2802 "initial name \"%s\"",
2803 netdev->ifindex, netdev->link);
2804
2805 ret = netdev_deconf[netdev->type](handler, netdev);
2806 goto clear_ifindices;
2807 }
2808
2809 ret = netdev_deconf[netdev->type](handler, netdev);
2810 if (ret < 0)
2811 WARN("Failed to deconfigure network device");
2812
2813 if (netdev->type != LXC_NET_VETH)
2814 goto clear_ifindices;
2815
2816 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
2817 goto clear_ifindices;
2818
2819 if (netdev->priv.veth_attr.pair[0] != '\0')
2820 hostveth = netdev->priv.veth_attr.pair;
2821 else
2822 hostveth = netdev->priv.veth_attr.veth1;
2823 if (hostveth[0] == '\0')
2824 goto clear_ifindices;
2825
2826 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2827 handler->name, netdev,
2828 netns_path);
2829 if (ret < 0) {
2830 WARN("Failed to remove port \"%s\" from openvswitch "
2831 "bridge \"%s\"", hostveth, netdev->link);
2832 goto clear_ifindices;
2833 }
2834 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2835 netdev->link);
2836
2837 clear_ifindices:
2838 /* We need to clear any ifindices we recorded so liblxc won't
2839 * have cached stale data which would cause it to fail on reboot
2840 * we're we don't re-read the on-disk config file.
2841 */
2842 netdev->ifindex = 0;
2843 if (netdev->type == LXC_NET_PHYS) {
2844 netdev->priv.phys_attr.ifindex = 0;
2845 } else if (netdev->type == LXC_NET_VETH) {
2846 netdev->priv.veth_attr.veth1[0] = '\0';
2847 netdev->priv.veth_attr.ifindex = 0;
2848 }
2849 }
2850
2851 return true;
2852 }
2853
2854 struct ip_proxy_args {
2855 const char *ip;
2856 const char *dev;
2857 };
2858
2859 static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2860 {
2861 struct ip_proxy_args *args = data;
2862
2863 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2864 return -1;
2865 }
2866
2867 static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2868 {
2869 struct ip_proxy_args *args = data;
2870
2871 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2872 return -1;
2873 }
2874
2875 static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2876 {
2877 int ret;
2878 char cmd_output[PATH_MAX];
2879 struct ip_proxy_args args = {
2880 .ip = ip,
2881 .dev = dev,
2882 };
2883
2884 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2885 if (ret < 0) {
2886 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2887 return -1;
2888 }
2889
2890 return 0;
2891 }
2892
2893 static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2894 {
2895 int ret;
2896 char cmd_output[PATH_MAX];
2897 struct ip_proxy_args args = {
2898 .ip = ip,
2899 .dev = dev,
2900 };
2901
2902 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2903 if (ret < 0) {
2904 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2905 return -1;
2906 }
2907
2908 return 0;
2909 }
2910
2911 static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2912 struct lxc_list *cur, *next;
2913 struct lxc_inetdev *inet4dev;
2914 struct lxc_inet6dev *inet6dev;
2915 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
2916 int err = 0;
2917 unsigned int lo_ifindex = 0;
2918
2919 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2920 if (!lxc_list_empty(&netdev->ipv4)) {
2921 /* Check for net.ipv4.conf.[link].forwarding=1 */
2922 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2923 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2924 return minus_one_set_errno(EINVAL);
2925 }
2926 }
2927
2928 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2929 if (!lxc_list_empty(&netdev->ipv6)) {
2930 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2931 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2932 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2933 return minus_one_set_errno(EINVAL);
2934 }
2935
2936 /* Check for net.ipv6.conf.[link].forwarding=1 */
2937 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2938 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2939 return minus_one_set_errno(EINVAL);
2940 }
2941 }
2942
2943 /* Perform IPVLAN specific checks. */
2944 if (netdev->type == LXC_NET_IPVLAN) {
2945 /* Check mode is l3s as other modes do not work with l2proxy. */
2946 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2947 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2948 return minus_one_set_errno(EINVAL);
2949 }
2950
2951 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
2952 lo_ifindex = if_nametoindex(loop_device);
2953 if (lo_ifindex == 0) {
2954 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
2955 return minus_one_set_errno(EINVAL);
2956 }
2957 }
2958
2959 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2960 inet4dev = cur->elem;
2961 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2962 return minus_one_set_errno(-errno);
2963
2964 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2965 return minus_one_set_errno(EINVAL);
2966
2967 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2968 if (netdev->type == LXC_NET_IPVLAN) {
2969 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2970 if (err < 0) {
2971 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
2972 return minus_one_set_errno(-err);
2973 }
2974 }
2975 }
2976
2977 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2978 inet6dev = cur->elem;
2979 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2980 return minus_one_set_errno(-errno);
2981
2982 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2983 return minus_one_set_errno(EINVAL);
2984
2985 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2986 if (netdev->type == LXC_NET_IPVLAN) {
2987 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2988 if (err < 0) {
2989 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
2990 return minus_one_set_errno(-err);
2991 }
2992 }
2993 }
2994
2995 return 0;
2996 }
2997
2998 static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
2999 char bufinet4[INET_ADDRSTRLEN];
3000 unsigned int errCount = 0;
3001
3002 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3003 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3004 return minus_one_set_errno(EINVAL);
3005 }
3006
3007 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3008 if (lo_ifindex > 0) {
3009 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3010 errCount++;
3011 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3012 }
3013 }
3014
3015 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3016 if (link[0] != '\0') {
3017 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3018 errCount++;
3019 }
3020
3021 if (errCount > 0)
3022 return minus_one_set_errno(EINVAL);
3023
3024 return 0;
3025 }
3026
3027 static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3028 char bufinet6[INET6_ADDRSTRLEN];
3029 unsigned int errCount = 0;
3030
3031 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3032 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3033 return minus_one_set_errno(EINVAL);
3034 }
3035
3036 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3037 if (lo_ifindex > 0) {
3038 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3039 errCount++;
3040 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3041 }
3042 }
3043
3044 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3045 if (link[0] != '\0') {
3046 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3047 errCount++;
3048 }
3049
3050 if (errCount > 0)
3051 return minus_one_set_errno(EINVAL);
3052
3053 return 0;
3054 }
3055
3056 static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
3057 unsigned int lo_ifindex = 0;
3058 unsigned int errCount = 0;
3059 struct lxc_list *cur, *next;
3060 struct lxc_inetdev *inet4dev;
3061 struct lxc_inet6dev *inet6dev;
3062
3063 /* Perform IPVLAN specific checks. */
3064 if (netdev->type == LXC_NET_IPVLAN) {
3065 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3066 lo_ifindex = if_nametoindex(loop_device);
3067 if (lo_ifindex == 0) {
3068 errCount++;
3069 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3070 }
3071 }
3072
3073 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3074 inet4dev = cur->elem;
3075 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3076 errCount++;
3077 }
3078
3079 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3080 inet6dev = cur->elem;
3081 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3082 errCount++;
3083 }
3084
3085 if (errCount > 0)
3086 return minus_one_set_errno(EINVAL);
3087
3088 return 0;
3089 }
3090
3091 static int lxc_create_network_priv(struct lxc_handler *handler)
3092 {
3093 struct lxc_list *iterator;
3094 struct lxc_list *network = &handler->conf->network;
3095
3096 lxc_list_for_each(iterator, network) {
3097 struct lxc_netdev *netdev = iterator->elem;
3098
3099 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3100 ERROR("Invalid network configuration type %d", netdev->type);
3101 return -1;
3102 }
3103
3104 /* Setup l2proxy entries if enabled and used with a link property */
3105 if (netdev->l2proxy && netdev->link[0] != '\0') {
3106 if (lxc_setup_l2proxy(netdev)) {
3107 ERROR("Failed to setup l2proxy");
3108 return -1;
3109 }
3110 }
3111
3112 if (netdev_conf[netdev->type](handler, netdev)) {
3113 ERROR("Failed to create network device");
3114 return -1;
3115 }
3116 }
3117
3118 return 0;
3119 }
3120
3121 int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
3122 {
3123 pid_t pid = handler->pid;
3124 struct lxc_list *network = &handler->conf->network;
3125 struct lxc_list *iterator;
3126
3127 if (am_guest_unpriv())
3128 return 0;
3129
3130 lxc_list_for_each(iterator, network) {
3131 int ret;
3132 char ifname[IFNAMSIZ];
3133 struct lxc_netdev *netdev = iterator->elem;
3134
3135 if (!netdev->ifindex)
3136 continue;
3137
3138 /* retrieve the name of the interface */
3139 if (!if_indextoname(netdev->ifindex, ifname)) {
3140 ERROR("No interface corresponding to ifindex \"%d\"",
3141 netdev->ifindex);
3142 return -1;
3143 }
3144
3145 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3146 if (ret) {
3147 errno = -ret;
3148 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3149 ifname, pid);
3150 return -1;
3151 }
3152
3153 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3154
3155 DEBUG("Moved network device \"%s\" to network namespace of %d",
3156 netdev->created_name, pid);
3157 }
3158
3159 return 0;
3160 }
3161
3162 static int lxc_create_network_unpriv(struct lxc_handler *handler)
3163 {
3164 int hooks_version = handler->conf->hooks_version;
3165 const char *lxcname = handler->name;
3166 const char *lxcpath = handler->lxcpath;
3167 struct lxc_list *network = &handler->conf->network;
3168 pid_t pid = handler->pid;
3169 struct lxc_list *iterator;
3170
3171 lxc_list_for_each(iterator, network) {
3172 struct lxc_netdev *netdev = iterator->elem;
3173
3174 if (netdev->type == LXC_NET_EMPTY)
3175 continue;
3176
3177 if (netdev->type == LXC_NET_NONE)
3178 continue;
3179
3180 if (netdev->type != LXC_NET_VETH) {
3181 ERROR("Networks of type %s are not supported by unprivileged containers",
3182 lxc_net_type_to_str(netdev->type));
3183 return -1;
3184 }
3185
3186 if (netdev->mtu)
3187 INFO("mtu ignored due to insufficient privilege");
3188
3189 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3190 pid, hooks_version))
3191 return -1;
3192 }
3193
3194 return 0;
3195 }
3196
3197 bool lxc_delete_network_priv(struct lxc_handler *handler)
3198 {
3199 int ret;
3200 struct lxc_list *iterator;
3201 struct lxc_list *network = &handler->conf->network;
3202
3203 lxc_list_for_each(iterator, network) {
3204 char *hostveth = NULL;
3205 struct lxc_netdev *netdev = iterator->elem;
3206
3207 /* We can only delete devices whose ifindex we have. If we don't
3208 * have the index it means that we didn't create it.
3209 */
3210 if (!netdev->ifindex)
3211 continue;
3212
3213 /* Delete l2proxy entries if enabled and used with a link property */
3214 if (netdev->l2proxy && netdev->link[0] != '\0') {
3215 if (lxc_delete_l2proxy(netdev))
3216 WARN("Failed to delete all l2proxy config");
3217 /* Don't return, let the network be cleaned up as normal. */
3218 }
3219
3220 if (netdev->type == LXC_NET_PHYS) {
3221 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3222 if (ret < 0)
3223 WARN("Failed to rename interface with index %d "
3224 "from \"%s\" to its initial name \"%s\"",
3225 netdev->ifindex, netdev->name, netdev->link);
3226 else {
3227 TRACE("Renamed interface with index %d from "
3228 "\"%s\" to its initial name \"%s\"",
3229 netdev->ifindex, netdev->name,
3230 netdev->link);
3231
3232 /* Restore original MTU */
3233 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3234 if (ret < 0) {
3235 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3236 netdev->link, netdev->priv.phys_attr.mtu);
3237 } else {
3238 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3239 netdev->link, netdev->priv.phys_attr.mtu);
3240 }
3241 }
3242
3243 ret = netdev_deconf[netdev->type](handler, netdev);
3244 goto clear_ifindices;
3245 }
3246
3247 ret = netdev_deconf[netdev->type](handler, netdev);
3248 if (ret < 0)
3249 WARN("Failed to deconfigure network device");
3250
3251 /* Recent kernels remove the virtual interfaces when the network
3252 * namespace is destroyed but in case we did not move the
3253 * interface to the network namespace, we have to destroy it.
3254 */
3255 ret = lxc_netdev_delete_by_index(netdev->ifindex);
3256 if (ret < 0) {
3257 if (errno != ENODEV) {
3258 WARN("Failed to remove interface \"%s\" with index %d",
3259 netdev->name[0] != '\0' ? netdev->name : "(null)",
3260 netdev->ifindex);
3261 goto clear_ifindices;
3262 }
3263 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
3264 netdev->name[0] != '\0' ? netdev->name : "(null)",
3265 netdev->ifindex);
3266 }
3267 INFO("Removed interface \"%s\" with index %d",
3268 netdev->name[0] != '\0' ? netdev->name : "(null)",
3269 netdev->ifindex);
3270
3271 if (netdev->type != LXC_NET_VETH)
3272 goto clear_ifindices;
3273
3274 /* Explicitly delete host veth device to prevent lingering
3275 * devices. We had issues in LXD around this.
3276 */
3277 if (netdev->priv.veth_attr.pair[0] != '\0')
3278 hostveth = netdev->priv.veth_attr.pair;
3279 else
3280 hostveth = netdev->priv.veth_attr.veth1;
3281 if (hostveth[0] == '\0')
3282 goto clear_ifindices;
3283
3284 ret = lxc_netdev_delete_by_name(hostveth);
3285 if (ret < 0) {
3286 WARN("Failed to remove interface \"%s\" from \"%s\"",
3287 hostveth, netdev->link);
3288 goto clear_ifindices;
3289 }
3290 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3291
3292 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
3293 netdev->priv.veth_attr.veth1[0] = '\0';
3294 netdev->ifindex = 0;
3295 netdev->priv.veth_attr.ifindex = 0;
3296 goto clear_ifindices;
3297 }
3298
3299 /* Delete the openvswitch port. */
3300 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3301 if (ret < 0)
3302 WARN("Failed to remove port \"%s\" from openvswitch "
3303 "bridge \"%s\"", hostveth, netdev->link);
3304 else
3305 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3306 hostveth, netdev->link);
3307
3308 clear_ifindices:
3309 /* We need to clear any ifindices we recorded so liblxc won't
3310 * have cached stale data which would cause it to fail on reboot
3311 * we're we don't re-read the on-disk config file.
3312 */
3313 netdev->ifindex = 0;
3314 if (netdev->type == LXC_NET_PHYS) {
3315 netdev->priv.phys_attr.ifindex = 0;
3316 } else if (netdev->type == LXC_NET_VETH) {
3317 netdev->priv.veth_attr.veth1[0] = '\0';
3318 netdev->priv.veth_attr.ifindex = 0;
3319 }
3320 }
3321
3322 return true;
3323 }
3324
3325 int lxc_requests_empty_network(struct lxc_handler *handler)
3326 {
3327 struct lxc_list *network = &handler->conf->network;
3328 struct lxc_list *iterator;
3329 bool found_none = false, found_nic = false;
3330
3331 if (lxc_list_empty(network))
3332 return 0;
3333
3334 lxc_list_for_each(iterator, network) {
3335 struct lxc_netdev *netdev = iterator->elem;
3336
3337 if (netdev->type == LXC_NET_NONE)
3338 found_none = true;
3339 else
3340 found_nic = true;
3341 }
3342 if (found_none && !found_nic)
3343 return 1;
3344 return 0;
3345 }
3346
3347 /* try to move physical nics to the init netns */
3348 int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
3349 {
3350 int ret;
3351 int oldfd;
3352 char ifname[IFNAMSIZ];
3353 struct lxc_list *iterator;
3354 int netnsfd = handler->nsfd[LXC_NS_NET];
3355 struct lxc_conf *conf = handler->conf;
3356
3357 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3358 * the parent network namespace. We won't have this capability if we are
3359 * unprivileged.
3360 */
3361 if (!handler->am_root)
3362 return 0;
3363
3364 TRACE("Moving physical network devices back to parent network namespace");
3365
3366 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
3367 if (oldfd < 0) {
3368 SYSERROR("Failed to preserve network namespace");
3369 return -1;
3370 }
3371
3372 ret = setns(netnsfd, CLONE_NEWNET);
3373 if (ret < 0) {
3374 SYSERROR("Failed to enter network namespace");
3375 close(oldfd);
3376 return -1;
3377 }
3378
3379 lxc_list_for_each(iterator, &conf->network) {
3380 struct lxc_netdev *netdev = iterator->elem;
3381
3382 if (netdev->type != LXC_NET_PHYS)
3383 continue;
3384
3385 /* Retrieve the name of the interface in the container's network
3386 * namespace.
3387 */
3388 if (!if_indextoname(netdev->ifindex, ifname)) {
3389 WARN("No interface corresponding to ifindex %d",
3390 netdev->ifindex);
3391 continue;
3392 }
3393
3394 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
3395 if (ret < 0)
3396 WARN("Error moving network device \"%s\" back to "
3397 "network namespace", ifname);
3398 else
3399 TRACE("Moved network device \"%s\" back to network "
3400 "namespace", ifname);
3401 }
3402
3403 ret = setns(oldfd, CLONE_NEWNET);
3404 close(oldfd);
3405 if (ret < 0) {
3406 SYSERROR("Failed to enter network namespace");
3407 return -1;
3408 }
3409
3410 return 0;
3411 }
3412
3413 static int setup_hw_addr(char *hwaddr, const char *ifname)
3414 {
3415 struct sockaddr sockaddr;
3416 struct ifreq ifr;
3417 int ret, fd;
3418
3419 ret = lxc_convert_mac(hwaddr, &sockaddr);
3420 if (ret) {
3421 errno = -ret;
3422 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
3423 return -1;
3424 }
3425
3426 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3427 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3428 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3429
3430 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
3431 if (fd < 0)
3432 return -1;
3433
3434 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
3435 if (ret)
3436 SYSERROR("Failed to perform ioctl");
3437
3438 close(fd);
3439
3440 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3441 ifr.ifr_name);
3442
3443 return ret;
3444 }
3445
3446 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3447 {
3448 struct lxc_list *iterator;
3449 int err;
3450
3451 lxc_list_for_each(iterator, ip) {
3452 struct lxc_inetdev *inetdev = iterator->elem;
3453
3454 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3455 &inetdev->bcast, inetdev->prefix);
3456 if (err) {
3457 errno = -err;
3458 SYSERROR("Failed to setup ipv4 address for network device "
3459 "with ifindex %d", ifindex);
3460 return -1;
3461 }
3462 }
3463
3464 return 0;
3465 }
3466
3467 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3468 {
3469 struct lxc_list *iterator;
3470 int err;
3471
3472 lxc_list_for_each(iterator, ip) {
3473 struct lxc_inet6dev *inet6dev = iterator->elem;
3474
3475 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3476 &inet6dev->mcast, &inet6dev->acast,
3477 inet6dev->prefix);
3478 if (err) {
3479 errno = -err;
3480 SYSERROR("Failed to setup ipv6 address for network device "
3481 "with ifindex %d", ifindex);
3482 return -1;
3483 }
3484 }
3485
3486 return 0;
3487 }
3488
3489 static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3490 {
3491 char ifname[IFNAMSIZ];
3492 int err;
3493 char *current_ifname = ifname;
3494 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3495
3496 /* empty network namespace */
3497 if (!netdev->ifindex) {
3498 if (netdev->flags & IFF_UP) {
3499 err = lxc_netdev_up("lo");
3500 if (err) {
3501 errno = -err;
3502 SYSERROR("Failed to set the loopback network device up");
3503 return -1;
3504 }
3505 }
3506
3507 if (netdev->type == LXC_NET_EMPTY)
3508 return 0;
3509
3510 if (netdev->type == LXC_NET_NONE)
3511 return 0;
3512
3513 netdev->ifindex = if_nametoindex(netdev->created_name);
3514 if (!netdev->ifindex)
3515 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3516 netdev->name ?: "(null)");
3517 }
3518
3519 /* get the new ifindex in case of physical netdev */
3520 if (netdev->type == LXC_NET_PHYS) {
3521 netdev->ifindex = if_nametoindex(netdev->link);
3522 if (!netdev->ifindex) {
3523 ERROR("Failed to get ifindex for network device \"%s\"",
3524 netdev->link);
3525 return -1;
3526 }
3527 }
3528
3529 /* retrieve the name of the interface */
3530 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3531 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3532 netdev->ifindex);
3533 return -1;
3534 }
3535
3536 /* Default: let the system choose an interface name.
3537 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3538 * netlink will replace the format specifier with an appropriate index.
3539 */
3540 if (netdev->name[0] == '\0') {
3541 if (netdev->type == LXC_NET_PHYS)
3542 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
3543 else
3544 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
3545 }
3546
3547 /* rename the interface name */
3548 if (strcmp(current_ifname, netdev->name) != 0) {
3549 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
3550 if (err) {
3551 errno = -err;
3552 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
3553 current_ifname, netdev->name);
3554 return -1;
3555 }
3556
3557 TRACE("Renamed network device from \"%s\" to \"%s\"",
3558 current_ifname, netdev->name);
3559 }
3560
3561 /* Re-read the name of the interface because its name has changed
3562 * and would be automatically allocated by the system
3563 */
3564 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3565 ERROR("Failed get name for network device with ifindex %d",
3566 netdev->ifindex);
3567 return -1;
3568 }
3569
3570 /* Now update the recorded name of the network device to reflect the
3571 * name of the network device in the child's network namespace. We will
3572 * later on send this information back to the parent.
3573 */
3574 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
3575
3576 /* set a mac address */
3577 if (netdev->hwaddr) {
3578 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3579 ERROR("Failed to setup hw address for network device \"%s\"",
3580 current_ifname);
3581 return -1;
3582 }
3583 }
3584
3585 /* setup ipv4 addresses on the interface */
3586 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3587 ERROR("Failed to setup ip addresses for network device \"%s\"",
3588 current_ifname);
3589 return -1;
3590 }
3591
3592 /* setup ipv6 addresses on the interface */
3593 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3594 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
3595 current_ifname);
3596 return -1;
3597 }
3598
3599 /* set the network device up */
3600 if (netdev->flags & IFF_UP) {
3601 err = lxc_netdev_up(current_ifname);
3602 if (err) {
3603 errno = -err;
3604 SYSERROR("Failed to set network device \"%s\" up",
3605 current_ifname);
3606 return -1;
3607 }
3608
3609 /* the network is up, make the loopback up too */
3610 err = lxc_netdev_up("lo");
3611 if (err) {
3612 errno = -err;
3613 SYSERROR("Failed to set the loopback network device up");
3614 return -1;
3615 }
3616 }
3617
3618 /* setup ipv4 gateway on the interface */
3619 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
3620 if (!(netdev->flags & IFF_UP)) {
3621 ERROR("Cannot add ipv4 gateway for network device "
3622 "\"%s\" when not bringing up the interface", current_ifname);
3623 return -1;
3624 }
3625
3626 if (lxc_list_empty(&netdev->ipv4)) {
3627 ERROR("Cannot add ipv4 gateway for network device "
3628 "\"%s\" when not assigning an address", current_ifname);
3629 return -1;
3630 }
3631
3632 /* Setup device route if ipv4_gateway_dev is enabled */
3633 if (netdev->ipv4_gateway_dev) {
3634 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3635 if (err < 0) {
3636 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
3637 current_ifname);
3638 return minus_one_set_errno(-err);
3639 }
3640 } else {
3641 /* Check the gateway address is valid */
3642 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3643 return minus_one_set_errno(errno);
3644
3645 /* Try adding a default route to the gateway address */
3646 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3647 if (err < 0) {
3648 /* If adding the default route fails, this could be because the
3649 * gateway address is in a different subnet to the container's address.
3650 * To work around this, we try adding a static device route to the
3651 * gateway address first, and then try again.
3652 */
3653 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
3654 if (err < 0) {
3655 errno = -err;
3656 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
3657 bufinet4, current_ifname);
3658 return -1;
3659 }
3660
3661 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3662 if (err < 0) {
3663 errno = -err;
3664 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
3665 bufinet4, current_ifname);
3666 return -1;
3667 }
3668 }
3669 }
3670 }
3671
3672 /* setup ipv6 gateway on the interface */
3673 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
3674 if (!(netdev->flags & IFF_UP)) {
3675 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3676 current_ifname);
3677 return -1;
3678 }
3679
3680 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
3681 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3682 current_ifname);
3683 return -1;
3684 }
3685
3686 /* Setup device route if ipv6_gateway_dev is enabled */
3687 if (netdev->ipv6_gateway_dev) {
3688 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3689 if (err < 0) {
3690 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
3691 current_ifname);
3692 return minus_one_set_errno(-err);
3693 }
3694 } else {
3695 /* Check the gateway address is valid */
3696 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3697 return minus_one_set_errno(errno);
3698
3699 /* Try adding a default route to the gateway address */
3700 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3701 if (err < 0) {
3702 /* If adding the default route fails, this could be because the
3703 * gateway address is in a different subnet to the container's address.
3704 * To work around this, we try adding a static device route to the
3705 * gateway address first, and then try again.
3706 */
3707 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
3708 if (err < 0) {
3709 errno = -err;
3710 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
3711 bufinet6, current_ifname);
3712 return -1;
3713 }
3714
3715 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3716 if (err < 0) {
3717 errno = -err;
3718 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
3719 bufinet6, current_ifname);
3720 return -1;
3721 }
3722 }
3723 }
3724 }
3725
3726 DEBUG("Network device \"%s\" has been setup", current_ifname);
3727
3728 return 0;
3729 }
3730
3731 int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3732 struct lxc_list *network)
3733 {
3734 struct lxc_list *iterator;
3735
3736 lxc_list_for_each(iterator, network) {
3737 struct lxc_netdev *netdev = iterator->elem;
3738
3739 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
3740 ERROR("Failed to setup netdev");
3741 return -1;
3742 }
3743 }
3744
3745 if (!lxc_list_empty(network))
3746 INFO("Network has been setup");
3747
3748 return 0;
3749 }
3750
3751 int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3752 {
3753 struct lxc_list *iterator;
3754 struct lxc_list *network = &handler->conf->network;
3755 int data_sock = handler->data_sock[0];
3756
3757 lxc_list_for_each(iterator, network) {
3758 int ret;
3759 struct lxc_netdev *netdev = iterator->elem;
3760
3761 if (netdev->type != LXC_NET_VETH)
3762 continue;
3763
3764 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3765 if (ret < 0)
3766 return -1;
3767
3768 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3769 if (ret < 0)
3770 return -1;
3771
3772 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
3773 }
3774
3775 return 0;
3776 }
3777
3778 int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3779 {
3780 struct lxc_list *iterator;
3781 struct lxc_list *network = &handler->conf->network;
3782 int data_sock = handler->data_sock[1];
3783
3784 lxc_list_for_each(iterator, network) {
3785 int ret;
3786 struct lxc_netdev *netdev = iterator->elem;
3787
3788 if (netdev->type != LXC_NET_VETH)
3789 continue;
3790
3791 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3792 if (ret < 0)
3793 return -1;
3794
3795 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3796 if (ret < 0)
3797 return -1;
3798 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
3799 }
3800
3801 return 0;
3802 }
3803
3804 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3805 {
3806 struct lxc_list *iterator, *network;
3807 int data_sock = handler->data_sock[0];
3808
3809 if (!handler->am_root)
3810 return 0;
3811
3812 network = &handler->conf->network;
3813 lxc_list_for_each(iterator, network) {
3814 int ret;
3815 struct lxc_netdev *netdev = iterator->elem;
3816
3817 /* Send network device name in the child's namespace to parent. */
3818 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3819 if (ret < 0)
3820 return -1;
3821
3822 /* Send network device ifindex in the child's namespace to
3823 * parent.
3824 */
3825 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
3826 if (ret < 0)
3827 return -1;
3828 }
3829
3830 if (!lxc_list_empty(network))
3831 TRACE("Sent network device names and ifindices to parent");
3832
3833 return 0;
3834 }
3835
3836 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3837 {
3838 struct lxc_list *iterator, *network;
3839 int data_sock = handler->data_sock[1];
3840
3841 if (!handler->am_root)
3842 return 0;
3843
3844 network = &handler->conf->network;
3845 lxc_list_for_each(iterator, network) {
3846 int ret;
3847 struct lxc_netdev *netdev = iterator->elem;
3848
3849 /* Receive network device name in the child's namespace to
3850 * parent.
3851 */
3852 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3853 if (ret < 0)
3854 return -1;
3855
3856 /* Receive network device ifindex in the child's namespace to
3857 * parent.
3858 */
3859 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3860 if (ret < 0)
3861 return -1;
3862 }
3863
3864 return 0;
3865 }
3866
3867 void lxc_delete_network(struct lxc_handler *handler)
3868 {
3869 bool bret;
3870
3871 if (handler->am_root)
3872 bret = lxc_delete_network_priv(handler);
3873 else
3874 bret = lxc_delete_network_unpriv(handler);
3875 if (!bret)
3876 DEBUG("Failed to delete network devices");
3877 else
3878 DEBUG("Deleted network devices");
3879 }
3880
3881 int lxc_netns_set_nsid(int fd)
3882 {
3883 int ret;
3884 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3885 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3886 NLMSG_ALIGN(1024)];
3887 struct nl_handler nlh;
3888 struct nlmsghdr *hdr;
3889 struct rtgenmsg *msg;
3890 int saved_errno;
3891 const __s32 ns_id = -1;
3892 const __u32 netns_fd = fd;
3893
3894 ret = netlink_open(&nlh, NETLINK_ROUTE);
3895 if (ret < 0)
3896 return -1;
3897
3898 memset(buf, 0, sizeof(buf));
3899
3900 #pragma GCC diagnostic push
3901 #pragma GCC diagnostic ignored "-Wcast-align"
3902 hdr = (struct nlmsghdr *)buf;
3903 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3904 #pragma GCC diagnostic pop
3905
3906 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3907 hdr->nlmsg_type = RTM_NEWNSID;
3908 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3909 hdr->nlmsg_pid = 0;
3910 hdr->nlmsg_seq = RTM_NEWNSID;
3911 msg->rtgen_family = AF_UNSPEC;
3912
3913 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3914 if (ret < 0)
3915 goto on_error;
3916
3917 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3918 if (ret < 0)
3919 goto on_error;
3920
3921 ret = __netlink_transaction(&nlh, hdr, hdr);
3922
3923 on_error:
3924 saved_errno = errno;
3925 netlink_close(&nlh);
3926 errno = saved_errno;
3927
3928 return ret;
3929 }
3930
3931 static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3932 {
3933
3934 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3935
3936 while (RTA_OK(rta, len)) {
3937 unsigned short type = rta->rta_type;
3938
3939 if ((type <= max) && (!tb[type]))
3940 tb[type] = rta;
3941
3942 #pragma GCC diagnostic push
3943 #pragma GCC diagnostic ignored "-Wcast-align"
3944 rta = RTA_NEXT(rta, len);
3945 #pragma GCC diagnostic pop
3946 }
3947
3948 return 0;
3949 }
3950
3951 static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3952 {
3953 return *(__s32 *)RTA_DATA(rta);
3954 }
3955
3956 #ifndef NETNS_RTA
3957 #define NETNS_RTA(r) \
3958 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3959 #endif
3960
3961 int lxc_netns_get_nsid(int fd)
3962 {
3963 int ret;
3964 ssize_t len;
3965 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3966 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3967 NLMSG_ALIGN(1024)];
3968 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3969 struct nl_handler nlh;
3970 struct nlmsghdr *hdr;
3971 struct rtgenmsg *msg;
3972 int saved_errno;
3973 __u32 netns_fd = fd;
3974
3975 ret = netlink_open(&nlh, NETLINK_ROUTE);
3976 if (ret < 0)
3977 return -1;
3978
3979 memset(buf, 0, sizeof(buf));
3980
3981 #pragma GCC diagnostic push
3982 #pragma GCC diagnostic ignored "-Wcast-align"
3983 hdr = (struct nlmsghdr *)buf;
3984 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3985 #pragma GCC diagnostic pop
3986
3987 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3988 hdr->nlmsg_type = RTM_GETNSID;
3989 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3990 hdr->nlmsg_pid = 0;
3991 hdr->nlmsg_seq = RTM_GETNSID;
3992 msg->rtgen_family = AF_UNSPEC;
3993
3994 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3995 if (ret == 0)
3996 ret = __netlink_transaction(&nlh, hdr, hdr);
3997
3998 saved_errno = errno;
3999 netlink_close(&nlh);
4000 errno = saved_errno;
4001 if (ret < 0)
4002 return -1;
4003
4004 errno = EINVAL;
4005 msg = NLMSG_DATA(hdr);
4006 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4007 if (len < 0)
4008 return -1;
4009
4010 #pragma GCC diagnostic push
4011 #pragma GCC diagnostic ignored "-Wcast-align"
4012 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4013 if (tb[__LXC_NETNSA_NSID])
4014 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
4015 #pragma GCC diagnostic pop
4016
4017 return -1;
4018 }
4019
4020 int lxc_create_network(struct lxc_handler *handler)
4021 {
4022 int ret;
4023
4024 /*
4025 * Find gateway addresses from the link device, which is no longer
4026 * accessible inside the container. Do this before creating network
4027 * interfaces, since goto out_delete_net does not work before
4028 * lxc_clone.
4029 */
4030 ret = lxc_find_gateway_addresses(handler);
4031 if (ret) {
4032 ERROR("Failed to find gateway addresses");
4033 return -1;
4034 }
4035
4036 if (handler->am_root) {
4037 ret = lxc_create_network_priv(handler);
4038 if (ret)
4039 return -1;
4040
4041 return lxc_network_move_created_netdev_priv(handler);
4042 }
4043
4044 return lxc_create_network_unpriv(handler);
4045 }