]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/network.c
de8438ad7a99e55eb316bb698ee9262eb993c000
[mirror_lxc.git] / src / lxc / network.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE 1
26 #endif
27 #include <arpa/inet.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <linux/netlink.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/sockios.h>
34 #include <net/ethernet.h>
35 #include <net/if.h>
36 #include <net/if_arp.h>
37 #include <netinet/in.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/inotify.h>
42 #include <sys/ioctl.h>
43 #include <sys/param.h>
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47 #include <time.h>
48 #include <unistd.h>
49
50 #include "../include/netns_ifaddrs.h"
51 #include "af_unix.h"
52 #include "conf.h"
53 #include "config.h"
54 #include "file_utils.h"
55 #include "log.h"
56 #include "macro.h"
57 #include "memory_utils.h"
58 #include "network.h"
59 #include "nl.h"
60 #include "raw_syscalls.h"
61 #include "syscall_wrappers.h"
62 #include "utils.h"
63
64 #ifndef HAVE_STRLCPY
65 #include "include/strlcpy.h"
66 #endif
67
68 lxc_log_define(network, lxc);
69
70 typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
71 static const char loop_device[] = "lo";
72
73 static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
74 {
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116 out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121 }
122
123 static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124 {
125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
126 }
127
128 static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129 {
130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131 }
132
133 static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134 {
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136 }
137
138 static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139 {
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
141 }
142
143 static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144 {
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160 }
161
162 static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163 {
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179 }
180
181 static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182 {
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
188 if (netdev->priv.veth_attr.pair[0] != '\0') {
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
227 goto out_delete;
228 }
229
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
253 } else if (netdev->link[0] != '\0') {
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
268
269 if (err) {
270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
273 goto out_delete;
274 }
275 }
276
277 if (netdev->link[0] != '\0') {
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
292 goto out_delete;
293 }
294
295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
307 if (netdev->upscript) {
308 char *argv[] = {
309 "veth",
310 netdev->link,
311 veth1,
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327 out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
330 return -1;
331 }
332
333 static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334 {
335 char peer[IFNAMSIZ];
336 int err;
337 unsigned int mtu = 0;
338
339 if (netdev->link[0] == '\0') {
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
346 return -1;
347
348 if (!lxc_mkifname(peer))
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
357 goto on_error;
358 }
359
360 strlcpy(netdev->created_name, peer, IFNAMSIZ);
361
362 netdev->ifindex = if_nametoindex(peer);
363 if (!netdev->ifindex) {
364 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
365 goto on_error;
366 }
367
368 if (netdev->mtu) {
369 err = lxc_safe_uint(netdev->mtu, &mtu);
370 if (err < 0) {
371 errno = -err;
372 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
373 goto on_error;
374 }
375
376 err = lxc_netdev_set_mtu(peer, mtu);
377 if (err < 0) {
378 errno = -err;
379 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
380 goto on_error;
381 }
382 }
383
384 if (netdev->upscript) {
385 char *argv[] = {
386 "macvlan",
387 netdev->link,
388 NULL,
389 };
390
391 err = run_script_argv(handler->name,
392 handler->conf->hooks_version, "net",
393 netdev->upscript, "up", argv);
394 if (err < 0)
395 goto on_error;
396 }
397
398 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
399 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
400
401 return 0;
402
403 on_error:
404 lxc_netdev_delete_by_name(peer);
405 return -1;
406 }
407
408 static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
409 {
410 int err, index, len;
411 struct ifinfomsg *ifi;
412 struct nl_handler nlh;
413 struct rtattr *nest, *nest2;
414 struct nlmsg *answer = NULL, *nlmsg = NULL;
415
416 len = strlen(master);
417 if (len == 1 || len >= IFNAMSIZ)
418 return minus_one_set_errno(EINVAL);
419
420 len = strlen(name);
421 if (len == 1 || len >= IFNAMSIZ)
422 return minus_one_set_errno(EINVAL);
423
424 index = if_nametoindex(master);
425 if (!index)
426 return minus_one_set_errno(EINVAL);
427
428 err = netlink_open(&nlh, NETLINK_ROUTE);
429 if (err)
430 return minus_one_set_errno(-err);
431
432 err = -ENOMEM;
433 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
434 if (!nlmsg)
435 goto out;
436
437 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
438 if (!answer)
439 goto out;
440
441 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
442 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
443
444 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
445 if (!ifi) {
446 goto out;
447 }
448 ifi->ifi_family = AF_UNSPEC;
449
450 err = -EPROTO;
451 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
452 if (!nest)
453 goto out;
454
455 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
456 goto out;
457
458 if (mode) {
459 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
460 if (!nest2)
461 goto out;
462
463 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
464 goto out;
465
466 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
467 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
468 */
469 if (isolation > 0) {
470 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
471 goto out;
472 }
473
474 nla_end_nested(nlmsg, nest2);
475 }
476
477 nla_end_nested(nlmsg, nest);
478
479 if (nla_put_u32(nlmsg, IFLA_LINK, index))
480 goto out;
481
482 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
483 goto out;
484
485 err = netlink_transaction(&nlh, nlmsg, answer);
486 out:
487 netlink_close(&nlh);
488 nlmsg_free(answer);
489 nlmsg_free(nlmsg);
490 if (err < 0)
491 return minus_one_set_errno(-err);
492 return 0;
493 }
494
495 static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
496 {
497 char peerbuf[IFNAMSIZ], *peer;
498 int err;
499 unsigned int mtu = 0;
500
501 if (netdev->link[0] == '\0') {
502 ERROR("No link for ipvlan network device specified");
503 return -1;
504 }
505
506 err = snprintf(peerbuf, sizeof(peerbuf), "ipXXXXXX");
507 if (err < 0 || (size_t)err >= sizeof(peerbuf))
508 return -1;
509
510 peer = lxc_mkifname(peerbuf);
511 if (!peer)
512 return -1;
513
514 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode, netdev->priv.ipvlan_attr.isolation);
515 if (err) {
516 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"", peer, netdev->link);
517 goto on_error;
518 }
519
520 netdev->ifindex = if_nametoindex(peer);
521 if (!netdev->ifindex) {
522 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
523 goto on_error;
524 }
525
526 if (netdev->mtu) {
527 err = lxc_safe_uint(netdev->mtu, &mtu);
528 if (err < 0) {
529 errno = -err;
530 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
531 goto on_error;
532 }
533
534 err = lxc_netdev_set_mtu(peer, mtu);
535 if (err < 0) {
536 errno = -err;
537 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
538 goto on_error;
539 }
540 }
541
542 if (netdev->upscript) {
543 char *argv[] = {
544 "ipvlan",
545 netdev->link,
546 NULL,
547 };
548
549 err = run_script_argv(handler->name,
550 handler->conf->hooks_version, "net",
551 netdev->upscript, "up", argv);
552 if (err < 0)
553 goto on_error;
554 }
555
556 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d",
557 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
558
559 return 0;
560
561 on_error:
562 lxc_netdev_delete_by_name(peer);
563 return -1;
564 }
565
566 static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
567 {
568 char peer[IFNAMSIZ];
569 int err;
570 static uint16_t vlan_cntr = 0;
571 unsigned int mtu = 0;
572
573 if (netdev->link[0] == '\0') {
574 ERROR("No link for vlan network device specified");
575 return -1;
576 }
577
578 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
579 if (err < 0 || (size_t)err >= sizeof(peer))
580 return -1;
581
582 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
583 if (err) {
584 errno = -err;
585 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
586 peer, netdev->link);
587 return -1;
588 }
589
590 netdev->ifindex = if_nametoindex(peer);
591 if (!netdev->ifindex) {
592 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
593 goto on_error;
594 }
595
596 if (netdev->mtu) {
597 err = lxc_safe_uint(netdev->mtu, &mtu);
598 if (err < 0) {
599 errno = -err;
600 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
601 goto on_error;
602 }
603
604 err = lxc_netdev_set_mtu(peer, mtu);
605 if (err) {
606 errno = -err;
607 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
608 goto on_error;
609 }
610 }
611
612 if (netdev->upscript) {
613 char *argv[] = {
614 "vlan",
615 netdev->link,
616 NULL,
617 };
618
619 err = run_script_argv(handler->name,
620 handler->conf->hooks_version, "net",
621 netdev->upscript, "up", argv);
622 if (err < 0) {
623 goto on_error;
624 }
625 }
626
627 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
628 peer, netdev->ifindex);
629
630 return 0;
631
632 on_error:
633 lxc_netdev_delete_by_name(peer);
634 return -1;
635 }
636
637 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
638 {
639 int err, mtu_orig = 0;
640 unsigned int mtu = 0;
641
642 if (netdev->link[0] == '\0') {
643 ERROR("No link for physical interface specified");
644 return -1;
645 }
646
647 /* Note that we're retrieving the container's ifindex in the host's
648 * network namespace because we need it to move the device from the
649 * host's network namespace to the container's network namespace later
650 * on.
651 * Note that netdev->link will contain the name of the physical network
652 * device in the host's namespace.
653 */
654 netdev->ifindex = if_nametoindex(netdev->link);
655 if (!netdev->ifindex) {
656 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
657 return -1;
658 }
659
660 /* Store the ifindex of the host's network device in the host's
661 * namespace.
662 */
663 netdev->priv.phys_attr.ifindex = netdev->ifindex;
664
665 /* Get original device MTU setting and store for restoration after container shutdown. */
666 mtu_orig = netdev_get_mtu(netdev->ifindex);
667 if (mtu_orig < 0) {
668 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
669 return minus_one_set_errno(-mtu_orig);
670 }
671
672 netdev->priv.phys_attr.mtu = mtu_orig;
673
674 if (netdev->mtu) {
675 err = lxc_safe_uint(netdev->mtu, &mtu);
676 if (err < 0) {
677 errno = -err;
678 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
679 return -1;
680 }
681
682 err = lxc_netdev_set_mtu(netdev->link, mtu);
683 if (err < 0) {
684 errno = -err;
685 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
686 return -1;
687 }
688 }
689
690 if (netdev->upscript) {
691 char *argv[] = {
692 "phys",
693 netdev->link,
694 NULL,
695 };
696
697 err = run_script_argv(handler->name,
698 handler->conf->hooks_version, "net",
699 netdev->upscript, "up", argv);
700 if (err < 0) {
701 return -1;
702 }
703 }
704
705 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
706
707 return 0;
708 }
709
710 static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
711 {
712 int ret;
713 char *argv[] = {
714 "empty",
715 NULL,
716 };
717
718 netdev->ifindex = 0;
719 if (!netdev->upscript)
720 return 0;
721
722 ret = run_script_argv(handler->name, handler->conf->hooks_version,
723 "net", netdev->upscript, "up", argv);
724 if (ret < 0)
725 return -1;
726
727 return 0;
728 }
729
730 static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
731 {
732 netdev->ifindex = 0;
733 return 0;
734 }
735
736 static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
737 [LXC_NET_VETH] = instantiate_veth,
738 [LXC_NET_MACVLAN] = instantiate_macvlan,
739 [LXC_NET_IPVLAN] = instantiate_ipvlan,
740 [LXC_NET_VLAN] = instantiate_vlan,
741 [LXC_NET_PHYS] = instantiate_phys,
742 [LXC_NET_EMPTY] = instantiate_empty,
743 [LXC_NET_NONE] = instantiate_none,
744 };
745
746 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
747 {
748 int ret;
749 char *argv[] = {
750 "veth",
751 netdev->link,
752 NULL,
753 NULL,
754 };
755
756 if (!netdev->downscript)
757 return 0;
758
759 if (netdev->priv.veth_attr.pair[0] != '\0')
760 argv[2] = netdev->priv.veth_attr.pair;
761 else
762 argv[2] = netdev->priv.veth_attr.veth1;
763
764 ret = run_script_argv(handler->name,
765 handler->conf->hooks_version, "net",
766 netdev->downscript, "down", argv);
767 if (ret < 0)
768 return -1;
769
770 return 0;
771 }
772
773 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
774 {
775 int ret;
776 char *argv[] = {
777 "macvlan",
778 netdev->link,
779 NULL,
780 };
781
782 if (!netdev->downscript)
783 return 0;
784
785 ret = run_script_argv(handler->name, handler->conf->hooks_version,
786 "net", netdev->downscript, "down", argv);
787 if (ret < 0)
788 return -1;
789
790 return 0;
791 }
792
793 static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
794 {
795 int ret;
796 char *argv[] = {
797 "ipvlan",
798 netdev->link,
799 NULL,
800 };
801
802 if (!netdev->downscript)
803 return 0;
804
805 ret = run_script_argv(handler->name, handler->conf->hooks_version,
806 "net", netdev->downscript, "down", argv);
807 if (ret < 0)
808 return -1;
809
810 return 0;
811 }
812
813 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
814 {
815 int ret;
816 char *argv[] = {
817 "vlan",
818 netdev->link,
819 NULL,
820 };
821
822 if (!netdev->downscript)
823 return 0;
824
825 ret = run_script_argv(handler->name, handler->conf->hooks_version,
826 "net", netdev->downscript, "down", argv);
827 if (ret < 0)
828 return -1;
829
830 return 0;
831 }
832
833 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
834 {
835 int ret;
836 char *argv[] = {
837 "phys",
838 netdev->link,
839 NULL,
840 };
841
842 if (!netdev->downscript)
843 return 0;
844
845 ret = run_script_argv(handler->name, handler->conf->hooks_version,
846 "net", netdev->downscript, "down", argv);
847 if (ret < 0)
848 return -1;
849
850 return 0;
851 }
852
853 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
854 {
855 int ret;
856 char *argv[] = {
857 "empty",
858 NULL,
859 };
860
861 if (!netdev->downscript)
862 return 0;
863
864 ret = run_script_argv(handler->name, handler->conf->hooks_version,
865 "net", netdev->downscript, "down", argv);
866 if (ret < 0)
867 return -1;
868
869 return 0;
870 }
871
872 static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
873 {
874 return 0;
875 }
876
877 static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
878 [LXC_NET_VETH] = shutdown_veth,
879 [LXC_NET_MACVLAN] = shutdown_macvlan,
880 [LXC_NET_IPVLAN] = shutdown_ipvlan,
881 [LXC_NET_VLAN] = shutdown_vlan,
882 [LXC_NET_PHYS] = shutdown_phys,
883 [LXC_NET_EMPTY] = shutdown_empty,
884 [LXC_NET_NONE] = shutdown_none,
885 };
886
887 static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
888 {
889 int err;
890 struct nl_handler nlh;
891 struct ifinfomsg *ifi;
892 struct nlmsg *nlmsg = NULL;
893
894 err = netlink_open(&nlh, NETLINK_ROUTE);
895 if (err)
896 return err;
897
898 err = -ENOMEM;
899 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
900 if (!nlmsg)
901 goto out;
902
903 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
904 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
905
906 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
907 if (!ifi)
908 goto out;
909 ifi->ifi_family = AF_UNSPEC;
910 ifi->ifi_index = ifindex;
911
912 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
913 goto out;
914
915 if (ifname != NULL) {
916 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
917 goto out;
918 }
919
920 err = netlink_transaction(&nlh, nlmsg, nlmsg);
921 out:
922 netlink_close(&nlh);
923 nlmsg_free(nlmsg);
924 return err;
925 }
926
927 int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
928 {
929 int err;
930 struct nl_handler nlh;
931 struct ifinfomsg *ifi;
932 struct nlmsg *nlmsg = NULL;
933
934 err = netlink_open(&nlh, NETLINK_ROUTE);
935 if (err)
936 return err;
937
938 err = -ENOMEM;
939 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
940 if (!nlmsg)
941 goto out;
942
943 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
944 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
945
946 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
947 if (!ifi)
948 goto out;
949 ifi->ifi_family = AF_UNSPEC;
950 ifi->ifi_index = ifindex;
951
952 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
953 goto out;
954
955 if (ifname != NULL) {
956 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
957 goto out;
958 }
959
960 err = netlink_transaction(&nlh, nlmsg, nlmsg);
961 out:
962 netlink_close(&nlh);
963 nlmsg_free(nlmsg);
964 return err;
965 }
966
967 /* If we are asked to move a wireless interface, then we must actually move its
968 * phyN device. Detect that condition and return the physname here. The physname
969 * will be passed to lxc_netdev_move_wlan() which will free it when done.
970 */
971 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
972 static char *is_wlan(const char *ifname)
973 {
974 __do_free char *path = NULL;
975 int i, ret;
976 long physlen;
977 size_t len;
978 FILE *f;
979 char *physname = NULL;
980
981 len = strlen(ifname) + strlen(PHYSNAME) - 1;
982 path = must_realloc(NULL, len + 1);
983 ret = snprintf(path, len, PHYSNAME, ifname);
984 if (ret < 0 || (size_t)ret >= len)
985 goto bad;
986
987 f = fopen(path, "r");
988 if (!f)
989 goto bad;
990
991 /* Feh - sb.st_size is always 4096. */
992 fseek(f, 0, SEEK_END);
993 physlen = ftell(f);
994 fseek(f, 0, SEEK_SET);
995 if (physlen < 0) {
996 fclose(f);
997 goto bad;
998 }
999
1000 physname = malloc(physlen + 1);
1001 if (!physname) {
1002 fclose(f);
1003 goto bad;
1004 }
1005
1006 memset(physname, 0, physlen + 1);
1007 ret = fread(physname, 1, physlen, f);
1008 fclose(f);
1009 if (ret < 0)
1010 goto bad;
1011
1012 for (i = 0; i < physlen; i++) {
1013 if (physname[i] == '\n')
1014 physname[i] = '\0';
1015
1016 if (physname[i] == '\0')
1017 break;
1018 }
1019
1020 return physname;
1021
1022 bad:
1023 free(physname);
1024 return NULL;
1025 }
1026
1027 static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1028 const char *new)
1029 {
1030 pid_t fpid;
1031
1032 fpid = fork();
1033 if (fpid < 0)
1034 return -1;
1035
1036 if (fpid != 0)
1037 return wait_for_pid(fpid);
1038
1039 if (!switch_to_ns(pid, "net"))
1040 return -1;
1041
1042 _exit(lxc_netdev_rename_by_name(old, new));
1043 }
1044
1045 static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1046 const char *newname)
1047 {
1048 char *cmd;
1049 pid_t fpid;
1050 int err = -1;
1051
1052 /* Move phyN into the container. TODO - do this using netlink.
1053 * However, IIUC this involves a bit more complicated work to talk to
1054 * the 80211 module, so for now just call out to iw.
1055 */
1056 cmd = on_path("iw", NULL);
1057 if (!cmd)
1058 goto out1;
1059 free(cmd);
1060
1061 fpid = fork();
1062 if (fpid < 0)
1063 goto out1;
1064
1065 if (fpid == 0) {
1066 char pidstr[30];
1067 sprintf(pidstr, "%d", pid);
1068 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1069 (char *)NULL);
1070 _exit(EXIT_FAILURE);
1071 }
1072
1073 if (wait_for_pid(fpid))
1074 goto out1;
1075
1076 err = 0;
1077 if (newname)
1078 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1079
1080 out1:
1081 free(physname);
1082 return err;
1083 }
1084
1085 int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
1086 {
1087 int index;
1088 char *physname;
1089
1090 if (!ifname)
1091 return -EINVAL;
1092
1093 index = if_nametoindex(ifname);
1094 if (!index)
1095 return -EINVAL;
1096
1097 physname = is_wlan(ifname);
1098 if (physname)
1099 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1100
1101 return lxc_netdev_move_by_index(index, pid, newname);
1102 }
1103
1104 int lxc_netdev_delete_by_index(int ifindex)
1105 {
1106 int err;
1107 struct ifinfomsg *ifi;
1108 struct nl_handler nlh;
1109 struct nlmsg *answer = NULL, *nlmsg = NULL;
1110
1111 err = netlink_open(&nlh, NETLINK_ROUTE);
1112 if (err)
1113 return err;
1114
1115 err = -ENOMEM;
1116 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1117 if (!nlmsg)
1118 goto out;
1119
1120 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1121 if (!answer)
1122 goto out;
1123
1124 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1125 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1126
1127 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1128 if (!ifi)
1129 goto out;
1130 ifi->ifi_family = AF_UNSPEC;
1131 ifi->ifi_index = ifindex;
1132
1133 err = netlink_transaction(&nlh, nlmsg, answer);
1134 out:
1135 netlink_close(&nlh);
1136 nlmsg_free(answer);
1137 nlmsg_free(nlmsg);
1138 return err;
1139 }
1140
1141 int lxc_netdev_delete_by_name(const char *name)
1142 {
1143 int index;
1144
1145 index = if_nametoindex(name);
1146 if (!index)
1147 return -EINVAL;
1148
1149 return lxc_netdev_delete_by_index(index);
1150 }
1151
1152 int lxc_netdev_rename_by_index(int ifindex, const char *newname)
1153 {
1154 int err, len;
1155 struct ifinfomsg *ifi;
1156 struct nl_handler nlh;
1157 struct nlmsg *answer = NULL, *nlmsg = NULL;
1158
1159 err = netlink_open(&nlh, NETLINK_ROUTE);
1160 if (err)
1161 return err;
1162
1163 len = strlen(newname);
1164 if (len == 1 || len >= IFNAMSIZ) {
1165 err = -EINVAL;
1166 goto out;
1167 }
1168
1169 err = -ENOMEM;
1170 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1171 if (!nlmsg)
1172 goto out;
1173
1174 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1175 if (!answer)
1176 goto out;
1177
1178 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1179 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1180
1181 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1182 if (!ifi)
1183 goto out;
1184 ifi->ifi_family = AF_UNSPEC;
1185 ifi->ifi_index = ifindex;
1186
1187 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1188 goto out;
1189
1190 err = netlink_transaction(&nlh, nlmsg, answer);
1191 out:
1192 netlink_close(&nlh);
1193 nlmsg_free(answer);
1194 nlmsg_free(nlmsg);
1195 return err;
1196 }
1197
1198 int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1199 {
1200 int len, index;
1201
1202 len = strlen(oldname);
1203 if (len == 1 || len >= IFNAMSIZ)
1204 return -EINVAL;
1205
1206 index = if_nametoindex(oldname);
1207 if (!index)
1208 return -EINVAL;
1209
1210 return lxc_netdev_rename_by_index(index, newname);
1211 }
1212
1213 int netdev_set_flag(const char *name, int flag)
1214 {
1215 int err, index, len;
1216 struct ifinfomsg *ifi;
1217 struct nl_handler nlh;
1218 struct nlmsg *answer = NULL, *nlmsg = NULL;
1219
1220 err = netlink_open(&nlh, NETLINK_ROUTE);
1221 if (err)
1222 return err;
1223
1224 err = -EINVAL;
1225 len = strlen(name);
1226 if (len == 1 || len >= IFNAMSIZ)
1227 goto out;
1228
1229 err = -ENOMEM;
1230 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1231 if (!nlmsg)
1232 goto out;
1233
1234 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1235 if (!answer)
1236 goto out;
1237
1238 err = -EINVAL;
1239 index = if_nametoindex(name);
1240 if (!index)
1241 goto out;
1242
1243 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1244 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1245
1246 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1247 if (!ifi) {
1248 err = -ENOMEM;
1249 goto out;
1250 }
1251 ifi->ifi_family = AF_UNSPEC;
1252 ifi->ifi_index = index;
1253 ifi->ifi_change |= IFF_UP;
1254 ifi->ifi_flags |= flag;
1255
1256 err = netlink_transaction(&nlh, nlmsg, answer);
1257 out:
1258 netlink_close(&nlh);
1259 nlmsg_free(nlmsg);
1260 nlmsg_free(answer);
1261 return err;
1262 }
1263
1264 int netdev_get_flag(const char *name, int *flag)
1265 {
1266 int err, index, len;
1267 struct ifinfomsg *ifi;
1268 struct nl_handler nlh;
1269 struct nlmsg *answer = NULL, *nlmsg = NULL;
1270
1271 if (!name)
1272 return -EINVAL;
1273
1274 err = netlink_open(&nlh, NETLINK_ROUTE);
1275 if (err)
1276 return err;
1277
1278 err = -EINVAL;
1279 len = strlen(name);
1280 if (len == 1 || len >= IFNAMSIZ)
1281 goto out;
1282
1283 err = -ENOMEM;
1284 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1285 if (!nlmsg)
1286 goto out;
1287
1288 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1289 if (!answer)
1290 goto out;
1291
1292 err = -EINVAL;
1293 index = if_nametoindex(name);
1294 if (!index)
1295 goto out;
1296
1297 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1298 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1299
1300 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1301 if (!ifi) {
1302 err = -ENOMEM;
1303 goto out;
1304 }
1305 ifi->ifi_family = AF_UNSPEC;
1306 ifi->ifi_index = index;
1307
1308 err = netlink_transaction(&nlh, nlmsg, answer);
1309 if (err)
1310 goto out;
1311
1312 ifi = NLMSG_DATA(answer->nlmsghdr);
1313
1314 *flag = ifi->ifi_flags;
1315 out:
1316 netlink_close(&nlh);
1317 nlmsg_free(nlmsg);
1318 nlmsg_free(answer);
1319 return err;
1320 }
1321
1322 /*
1323 * \brief Check a interface is up or not.
1324 *
1325 * \param name: name for the interface.
1326 *
1327 * \return int.
1328 * 0 means interface is down.
1329 * 1 means interface is up.
1330 * Others means error happened, and ret-value is the error number.
1331 */
1332 int lxc_netdev_isup(const char *name)
1333 {
1334 int err, flag;
1335
1336 err = netdev_get_flag(name, &flag);
1337 if (err)
1338 return err;
1339
1340 if (flag & IFF_UP)
1341 return 1;
1342
1343 return 0;
1344 }
1345
1346 int netdev_get_mtu(int ifindex)
1347 {
1348 int answer_len, err, res;
1349 struct nl_handler nlh;
1350 struct ifinfomsg *ifi;
1351 struct nlmsghdr *msg;
1352 int readmore = 0, recv_len = 0;
1353 struct nlmsg *answer = NULL, *nlmsg = NULL;
1354
1355 err = netlink_open(&nlh, NETLINK_ROUTE);
1356 if (err)
1357 return err;
1358
1359 err = -ENOMEM;
1360 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1361 if (!nlmsg)
1362 goto out;
1363
1364 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1365 if (!answer)
1366 goto out;
1367
1368 /* Save the answer buffer length, since it will be overwritten
1369 * on the first receive (and we might need to receive more than
1370 * once.
1371 */
1372 answer_len = answer->nlmsghdr->nlmsg_len;
1373
1374 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
1375 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1376
1377 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1378 if (!ifi)
1379 goto out;
1380 ifi->ifi_family = AF_UNSPEC;
1381
1382 /* Send the request for addresses, which returns all addresses
1383 * on all interfaces. */
1384 err = netlink_send(&nlh, nlmsg);
1385 if (err < 0)
1386 goto out;
1387
1388 #pragma GCC diagnostic push
1389 #pragma GCC diagnostic ignored "-Wcast-align"
1390
1391 do {
1392 /* Restore the answer buffer length, it might have been
1393 * overwritten by a previous receive.
1394 */
1395 answer->nlmsghdr->nlmsg_len = answer_len;
1396
1397 /* Get the (next) batch of reply messages */
1398 err = netlink_rcv(&nlh, answer);
1399 if (err < 0)
1400 goto out;
1401
1402 recv_len = err;
1403
1404 /* Satisfy the typing for the netlink macros */
1405 msg = answer->nlmsghdr;
1406
1407 while (NLMSG_OK(msg, recv_len)) {
1408
1409 /* Stop reading if we see an error message */
1410 if (msg->nlmsg_type == NLMSG_ERROR) {
1411 struct nlmsgerr *errmsg =
1412 (struct nlmsgerr *)NLMSG_DATA(msg);
1413 err = errmsg->error;
1414 goto out;
1415 }
1416
1417 /* Stop reading if we see a NLMSG_DONE message */
1418 if (msg->nlmsg_type == NLMSG_DONE) {
1419 readmore = 0;
1420 break;
1421 }
1422
1423 ifi = NLMSG_DATA(msg);
1424 if (ifi->ifi_index == ifindex) {
1425 struct rtattr *rta = IFLA_RTA(ifi);
1426 int attr_len =
1427 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1428 res = 0;
1429 while (RTA_OK(rta, attr_len)) {
1430 /* Found a local address for the
1431 * requested interface, return it.
1432 */
1433 if (rta->rta_type == IFLA_MTU) {
1434 memcpy(&res, RTA_DATA(rta),
1435 sizeof(int));
1436 err = res;
1437 goto out;
1438 }
1439 rta = RTA_NEXT(rta, attr_len);
1440 }
1441 }
1442
1443 /* Keep reading more data from the socket if the last
1444 * message had the NLF_F_MULTI flag set.
1445 */
1446 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1447
1448 /* Look at the next message received in this buffer. */
1449 msg = NLMSG_NEXT(msg, recv_len);
1450 }
1451 } while (readmore);
1452
1453 #pragma GCC diagnostic pop
1454
1455 /* If we end up here, we didn't find any result, so signal an error. */
1456 err = -1;
1457
1458 out:
1459 netlink_close(&nlh);
1460 nlmsg_free(answer);
1461 nlmsg_free(nlmsg);
1462 return err;
1463 }
1464
1465 int lxc_netdev_set_mtu(const char *name, int mtu)
1466 {
1467 int err, index, len;
1468 struct ifinfomsg *ifi;
1469 struct nl_handler nlh;
1470 struct nlmsg *answer = NULL, *nlmsg = NULL;
1471
1472 err = netlink_open(&nlh, NETLINK_ROUTE);
1473 if (err)
1474 return err;
1475
1476 err = -EINVAL;
1477 len = strlen(name);
1478 if (len == 1 || len >= IFNAMSIZ)
1479 goto out;
1480
1481 err = -ENOMEM;
1482 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1483 if (!nlmsg)
1484 goto out;
1485
1486 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1487 if (!answer)
1488 goto out;
1489
1490 err = -EINVAL;
1491 index = if_nametoindex(name);
1492 if (!index)
1493 goto out;
1494
1495 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1496 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1497
1498 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1499 if (!ifi) {
1500 err = -ENOMEM;
1501 goto out;
1502 }
1503 ifi->ifi_family = AF_UNSPEC;
1504 ifi->ifi_index = index;
1505
1506 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1507 goto out;
1508
1509 err = netlink_transaction(&nlh, nlmsg, answer);
1510 out:
1511 netlink_close(&nlh);
1512 nlmsg_free(nlmsg);
1513 nlmsg_free(answer);
1514 return err;
1515 }
1516
1517 int lxc_netdev_up(const char *name)
1518 {
1519 return netdev_set_flag(name, IFF_UP);
1520 }
1521
1522 int lxc_netdev_down(const char *name)
1523 {
1524 return netdev_set_flag(name, 0);
1525 }
1526
1527 int lxc_veth_create(const char *name1, const char *name2)
1528 {
1529 int err, len;
1530 struct ifinfomsg *ifi;
1531 struct nl_handler nlh;
1532 struct rtattr *nest1, *nest2, *nest3;
1533 struct nlmsg *answer = NULL, *nlmsg = NULL;
1534
1535 err = netlink_open(&nlh, NETLINK_ROUTE);
1536 if (err)
1537 return err;
1538
1539 err = -EINVAL;
1540 len = strlen(name1);
1541 if (len == 1 || len >= IFNAMSIZ)
1542 goto out;
1543
1544 len = strlen(name2);
1545 if (len == 1 || len >= IFNAMSIZ)
1546 goto out;
1547
1548 err = -ENOMEM;
1549 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1550 if (!nlmsg)
1551 goto out;
1552
1553 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1554 if (!answer)
1555 goto out;
1556
1557 nlmsg->nlmsghdr->nlmsg_flags =
1558 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1559 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1560
1561 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1562 if (!ifi)
1563 goto out;
1564 ifi->ifi_family = AF_UNSPEC;
1565
1566 err = -EINVAL;
1567 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1568 if (!nest1)
1569 goto out;
1570
1571 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1572 goto out;
1573
1574 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1575 if (!nest2)
1576 goto out;
1577
1578 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1579 if (!nest3)
1580 goto out;
1581
1582 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1583 if (!ifi) {
1584 err = -ENOMEM;
1585 goto out;
1586 }
1587
1588 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1589 goto out;
1590
1591 nla_end_nested(nlmsg, nest3);
1592 nla_end_nested(nlmsg, nest2);
1593 nla_end_nested(nlmsg, nest1);
1594
1595 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1596 goto out;
1597
1598 err = netlink_transaction(&nlh, nlmsg, answer);
1599 out:
1600 netlink_close(&nlh);
1601 nlmsg_free(answer);
1602 nlmsg_free(nlmsg);
1603 return err;
1604 }
1605
1606 /* TODO: merge with lxc_macvlan_create */
1607 int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
1608 {
1609 int err, len, lindex;
1610 struct ifinfomsg *ifi;
1611 struct nl_handler nlh;
1612 struct rtattr *nest, *nest2;
1613 struct nlmsg *answer = NULL, *nlmsg = NULL;
1614
1615 err = netlink_open(&nlh, NETLINK_ROUTE);
1616 if (err)
1617 return err;
1618
1619 err = -EINVAL;
1620 len = strlen(master);
1621 if (len == 1 || len >= IFNAMSIZ)
1622 goto err3;
1623
1624 len = strlen(name);
1625 if (len == 1 || len >= IFNAMSIZ)
1626 goto err3;
1627
1628 err = -ENOMEM;
1629 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1630 if (!nlmsg)
1631 goto err3;
1632
1633 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1634 if (!answer)
1635 goto err2;
1636
1637 err = -EINVAL;
1638 lindex = if_nametoindex(master);
1639 if (!lindex)
1640 goto err1;
1641
1642 nlmsg->nlmsghdr->nlmsg_flags =
1643 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1644 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1645
1646 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1647 if (!ifi) {
1648 err = -ENOMEM;
1649 goto err1;
1650 }
1651 ifi->ifi_family = AF_UNSPEC;
1652
1653 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1654 if (!nest)
1655 goto err1;
1656
1657 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1658 goto err1;
1659
1660 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1661 if (!nest2)
1662 goto err1;
1663
1664 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1665 goto err1;
1666
1667 nla_end_nested(nlmsg, nest2);
1668 nla_end_nested(nlmsg, nest);
1669
1670 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1671 goto err1;
1672
1673 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1674 goto err1;
1675
1676 err = netlink_transaction(&nlh, nlmsg, answer);
1677 err1:
1678 nlmsg_free(answer);
1679 err2:
1680 nlmsg_free(nlmsg);
1681 err3:
1682 netlink_close(&nlh);
1683 return err;
1684 }
1685
1686 int lxc_macvlan_create(const char *master, const char *name, int mode)
1687 {
1688 int err, index, len;
1689 struct ifinfomsg *ifi;
1690 struct nl_handler nlh;
1691 struct rtattr *nest, *nest2;
1692 struct nlmsg *answer = NULL, *nlmsg = NULL;
1693
1694 err = netlink_open(&nlh, NETLINK_ROUTE);
1695 if (err)
1696 return err;
1697
1698 err = -EINVAL;
1699 len = strlen(master);
1700 if (len == 1 || len >= IFNAMSIZ)
1701 goto out;
1702
1703 len = strlen(name);
1704 if (len == 1 || len >= IFNAMSIZ)
1705 goto out;
1706
1707 err = -ENOMEM;
1708 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1709 if (!nlmsg)
1710 goto out;
1711
1712 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1713 if (!answer)
1714 goto out;
1715
1716 err = -EINVAL;
1717 index = if_nametoindex(master);
1718 if (!index)
1719 goto out;
1720
1721 nlmsg->nlmsghdr->nlmsg_flags =
1722 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1723 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1724
1725 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1726 if (!ifi) {
1727 err = -ENOMEM;
1728 goto out;
1729 }
1730 ifi->ifi_family = AF_UNSPEC;
1731
1732 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1733 if (!nest)
1734 goto out;
1735
1736 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1737 goto out;
1738
1739 if (mode) {
1740 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1741 if (!nest2)
1742 goto out;
1743
1744 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1745 goto out;
1746
1747 nla_end_nested(nlmsg, nest2);
1748 }
1749
1750 nla_end_nested(nlmsg, nest);
1751
1752 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1753 goto out;
1754
1755 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1756 goto out;
1757
1758 err = netlink_transaction(&nlh, nlmsg, answer);
1759 out:
1760 netlink_close(&nlh);
1761 nlmsg_free(answer);
1762 nlmsg_free(nlmsg);
1763 return err;
1764 }
1765
1766 static int proc_sys_net_write(const char *path, const char *value)
1767 {
1768 int fd;
1769 int err = 0;
1770
1771 fd = open(path, O_WRONLY);
1772 if (fd < 0)
1773 return -errno;
1774
1775 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
1776 err = -errno;
1777
1778 close(fd);
1779 return err;
1780 }
1781
1782 static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1783 {
1784 int ret;
1785 char path[PATH_MAX];
1786 char buf[1] = "";
1787
1788 if (family != AF_INET && family != AF_INET6)
1789 return minus_one_set_errno(EINVAL);
1790
1791 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1792 family == AF_INET ? "ipv4" : "ipv6", ifname,
1793 "forwarding");
1794 if (ret < 0 || (size_t)ret >= PATH_MAX)
1795 return minus_one_set_errno(E2BIG);
1796
1797 return lxc_read_file_expect(path, buf, 1, "1");
1798 }
1799
1800 static int neigh_proxy_set(const char *ifname, int family, int flag)
1801 {
1802 int ret;
1803 char path[PATH_MAX];
1804
1805 if (family != AF_INET && family != AF_INET6)
1806 return -EINVAL;
1807
1808 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1809 family == AF_INET ? "ipv4" : "ipv6", ifname,
1810 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1811 if (ret < 0 || (size_t)ret >= PATH_MAX)
1812 return -E2BIG;
1813
1814 return proc_sys_net_write(path, flag ? "1" : "0");
1815 }
1816
1817 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1818 {
1819 int ret;
1820 char path[PATH_MAX];
1821 char buf[1] = "";
1822
1823 if (family != AF_INET && family != AF_INET6)
1824 return minus_one_set_errno(EINVAL);
1825
1826 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1827 family == AF_INET ? "ipv4" : "ipv6", ifname,
1828 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1829 if (ret < 0 || (size_t)ret >= PATH_MAX)
1830 return minus_one_set_errno(E2BIG);
1831
1832 return lxc_read_file_expect(path, buf, 1, "1");
1833 }
1834
1835 int lxc_neigh_proxy_on(const char *name, int family)
1836 {
1837 return neigh_proxy_set(name, family, 1);
1838 }
1839
1840 int lxc_neigh_proxy_off(const char *name, int family)
1841 {
1842 return neigh_proxy_set(name, family, 0);
1843 }
1844
1845 int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1846 {
1847 int i = 0;
1848 unsigned val;
1849 char c;
1850 unsigned char *data;
1851
1852 sockaddr->sa_family = ARPHRD_ETHER;
1853 data = (unsigned char *)sockaddr->sa_data;
1854
1855 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
1856 c = *macaddr++;
1857 if (isdigit(c))
1858 val = c - '0';
1859 else if (c >= 'a' && c <= 'f')
1860 val = c - 'a' + 10;
1861 else if (c >= 'A' && c <= 'F')
1862 val = c - 'A' + 10;
1863 else
1864 return -EINVAL;
1865
1866 val <<= 4;
1867 c = *macaddr;
1868 if (isdigit(c))
1869 val |= c - '0';
1870 else if (c >= 'a' && c <= 'f')
1871 val |= c - 'a' + 10;
1872 else if (c >= 'A' && c <= 'F')
1873 val |= c - 'A' + 10;
1874 else if (c == ':' || c == 0)
1875 val >>= 4;
1876 else
1877 return -EINVAL;
1878 if (c != 0)
1879 macaddr++;
1880 *data++ = (unsigned char)(val & 0377);
1881 i++;
1882
1883 if (*macaddr == ':')
1884 macaddr++;
1885 }
1886
1887 return 0;
1888 }
1889
1890 static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1891 void *acast, int prefix)
1892 {
1893 int addrlen, err;
1894 struct ifaddrmsg *ifa;
1895 struct nl_handler nlh;
1896 struct nlmsg *answer = NULL, *nlmsg = NULL;
1897
1898 addrlen = family == AF_INET ? sizeof(struct in_addr)
1899 : sizeof(struct in6_addr);
1900
1901 err = netlink_open(&nlh, NETLINK_ROUTE);
1902 if (err)
1903 return err;
1904
1905 err = -ENOMEM;
1906 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1907 if (!nlmsg)
1908 goto out;
1909
1910 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1911 if (!answer)
1912 goto out;
1913
1914 nlmsg->nlmsghdr->nlmsg_flags =
1915 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1916 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1917
1918 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
1919 if (!ifa)
1920 goto out;
1921 ifa->ifa_prefixlen = prefix;
1922 ifa->ifa_index = ifindex;
1923 ifa->ifa_family = family;
1924 ifa->ifa_scope = 0;
1925
1926 err = -EINVAL;
1927 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
1928 goto out;
1929
1930 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
1931 goto out;
1932
1933 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1934 goto out;
1935
1936 /* TODO: multicast, anycast with ipv6 */
1937 err = -EPROTONOSUPPORT;
1938 if (family == AF_INET6 &&
1939 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1940 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1941 goto out;
1942
1943 err = netlink_transaction(&nlh, nlmsg, answer);
1944 out:
1945 netlink_close(&nlh);
1946 nlmsg_free(answer);
1947 nlmsg_free(nlmsg);
1948 return err;
1949 }
1950
1951 int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
1952 struct in6_addr *mcast, struct in6_addr *acast,
1953 int prefix)
1954 {
1955 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1956 }
1957
1958 int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1959 int prefix)
1960 {
1961 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1962 }
1963
1964 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1965 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1966 * that pointer in *res (so res should be an in_addr** or in6_addr**).
1967 */
1968 #pragma GCC diagnostic push
1969 #pragma GCC diagnostic ignored "-Wcast-align"
1970
1971 static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1972 {
1973 int addrlen;
1974 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1975 struct rtattr *rta = IFA_RTA(ifa);
1976 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
1977
1978 if (ifa->ifa_family != family)
1979 return 0;
1980
1981 addrlen = family == AF_INET ? sizeof(struct in_addr)
1982 : sizeof(struct in6_addr);
1983
1984 /* Loop over the rtattr's in this message */
1985 while (RTA_OK(rta, attr_len)) {
1986 /* Found a local address for the requested interface,
1987 * return it.
1988 */
1989 if (rta->rta_type == IFA_LOCAL ||
1990 rta->rta_type == IFA_ADDRESS) {
1991 /* Sanity check. The family check above should make sure
1992 * the address length is correct, but check here just in
1993 * case.
1994 */
1995 if (RTA_PAYLOAD(rta) != addrlen)
1996 return -1;
1997
1998 /* We might have found an IFA_ADDRESS before, which we
1999 * now overwrite with an IFA_LOCAL.
2000 */
2001 if (!*res) {
2002 *res = malloc(addrlen);
2003 if (!*res)
2004 return -1;
2005 }
2006
2007 memcpy(*res, RTA_DATA(rta), addrlen);
2008 if (rta->rta_type == IFA_LOCAL)
2009 break;
2010 }
2011 rta = RTA_NEXT(rta, attr_len);
2012 }
2013 return 0;
2014 }
2015
2016 #pragma GCC diagnostic pop
2017
2018 static int ip_addr_get(int family, int ifindex, void **res)
2019 {
2020 int answer_len, err;
2021 struct ifaddrmsg *ifa;
2022 struct nl_handler nlh;
2023 struct nlmsghdr *msg;
2024 int readmore = 0, recv_len = 0;
2025 struct nlmsg *answer = NULL, *nlmsg = NULL;
2026
2027 err = netlink_open(&nlh, NETLINK_ROUTE);
2028 if (err)
2029 return err;
2030
2031 err = -ENOMEM;
2032 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2033 if (!nlmsg)
2034 goto out;
2035
2036 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2037 if (!answer)
2038 goto out;
2039
2040 /* Save the answer buffer length, since it will be overwritten on the
2041 * first receive (and we might need to receive more than once).
2042 */
2043 answer_len = answer->nlmsghdr->nlmsg_len;
2044
2045 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
2046 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
2047
2048 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2049 if (!ifa)
2050 goto out;
2051 ifa->ifa_family = family;
2052
2053 /* Send the request for addresses, which returns all addresses on all
2054 * interfaces.
2055 */
2056 err = netlink_send(&nlh, nlmsg);
2057 if (err < 0)
2058 goto out;
2059
2060 #pragma GCC diagnostic push
2061 #pragma GCC diagnostic ignored "-Wcast-align"
2062
2063 do {
2064 /* Restore the answer buffer length, it might have been
2065 * overwritten by a previous receive.
2066 */
2067 answer->nlmsghdr->nlmsg_len = answer_len;
2068
2069 /* Get the (next) batch of reply messages. */
2070 err = netlink_rcv(&nlh, answer);
2071 if (err < 0)
2072 goto out;
2073
2074 recv_len = err;
2075 err = 0;
2076
2077 /* Satisfy the typing for the netlink macros. */
2078 msg = answer->nlmsghdr;
2079
2080 while (NLMSG_OK(msg, recv_len)) {
2081 /* Stop reading if we see an error message. */
2082 if (msg->nlmsg_type == NLMSG_ERROR) {
2083 struct nlmsgerr *errmsg =
2084 (struct nlmsgerr *)NLMSG_DATA(msg);
2085 err = errmsg->error;
2086 goto out;
2087 }
2088
2089 /* Stop reading if we see a NLMSG_DONE message. */
2090 if (msg->nlmsg_type == NLMSG_DONE) {
2091 readmore = 0;
2092 break;
2093 }
2094
2095 if (msg->nlmsg_type != RTM_NEWADDR) {
2096 err = -1;
2097 goto out;
2098 }
2099
2100 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2101 if (ifa->ifa_index == ifindex) {
2102 if (ifa_get_local_ip(family, msg, res) < 0) {
2103 err = -1;
2104 goto out;
2105 }
2106
2107 /* Found a result, stop searching. */
2108 if (*res)
2109 goto out;
2110 }
2111
2112 /* Keep reading more data from the socket if the last
2113 * message had the NLF_F_MULTI flag set.
2114 */
2115 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2116
2117 /* Look at the next message received in this buffer. */
2118 msg = NLMSG_NEXT(msg, recv_len);
2119 }
2120 } while (readmore);
2121
2122 #pragma GCC diagnostic pop
2123
2124 /* If we end up here, we didn't find any result, so signal an
2125 * error.
2126 */
2127 err = -1;
2128
2129 out:
2130 netlink_close(&nlh);
2131 nlmsg_free(answer);
2132 nlmsg_free(nlmsg);
2133 return err;
2134 }
2135
2136 int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2137 {
2138 return ip_addr_get(AF_INET6, ifindex, (void **)res);
2139 }
2140
2141 int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
2142 {
2143 return ip_addr_get(AF_INET, ifindex, (void **)res);
2144 }
2145
2146 static int ip_gateway_add(int family, int ifindex, void *gw)
2147 {
2148 int addrlen, err;
2149 struct nl_handler nlh;
2150 struct rtmsg *rt;
2151 struct nlmsg *answer = NULL, *nlmsg = NULL;
2152
2153 addrlen = family == AF_INET ? sizeof(struct in_addr)
2154 : sizeof(struct in6_addr);
2155
2156 err = netlink_open(&nlh, NETLINK_ROUTE);
2157 if (err)
2158 return err;
2159
2160 err = -ENOMEM;
2161 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2162 if (!nlmsg)
2163 goto out;
2164
2165 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2166 if (!answer)
2167 goto out;
2168
2169 nlmsg->nlmsghdr->nlmsg_flags =
2170 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2171 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2172
2173 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
2174 if (!rt)
2175 goto out;
2176 rt->rtm_family = family;
2177 rt->rtm_table = RT_TABLE_MAIN;
2178 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2179 rt->rtm_protocol = RTPROT_BOOT;
2180 rt->rtm_type = RTN_UNICAST;
2181 /* "default" destination */
2182 rt->rtm_dst_len = 0;
2183
2184 err = -EINVAL;
2185
2186 /* If gateway address not supplied, then a device route will be created instead */
2187 if (gw != NULL) {
2188 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2189 goto out;
2190 }
2191
2192 /* Adding the interface index enables the use of link-local
2193 * addresses for the gateway.
2194 */
2195 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2196 goto out;
2197
2198 err = netlink_transaction(&nlh, nlmsg, answer);
2199 out:
2200 netlink_close(&nlh);
2201 nlmsg_free(answer);
2202 nlmsg_free(nlmsg);
2203 return err;
2204 }
2205
2206 int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2207 {
2208 return ip_gateway_add(AF_INET, ifindex, gw);
2209 }
2210
2211 int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2212 {
2213 return ip_gateway_add(AF_INET6, ifindex, gw);
2214 }
2215 bool is_ovs_bridge(const char *bridge)
2216 {
2217 int ret;
2218 struct stat sb;
2219 char brdirname[22 + IFNAMSIZ + 1] = {0};
2220
2221 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2222 bridge);
2223 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2224 return false;
2225
2226 ret = stat(brdirname, &sb);
2227 if (ret < 0 && errno == ENOENT)
2228 return true;
2229
2230 return false;
2231 }
2232
2233 struct ovs_veth_args {
2234 const char *bridge;
2235 const char *nic;
2236 };
2237
2238 /* Called from a background thread - when nic goes away, remove it from the
2239 * bridge.
2240 */
2241 static int lxc_ovs_delete_port_exec(void *data)
2242 {
2243 struct ovs_veth_args *args = data;
2244
2245 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2246 (char *)NULL);
2247 return -1;
2248 }
2249
2250 int lxc_ovs_delete_port(const char *bridge, const char *nic)
2251 {
2252 int ret;
2253 char cmd_output[PATH_MAX];
2254 struct ovs_veth_args args;
2255
2256 args.bridge = bridge;
2257 args.nic = nic;
2258 ret = run_command(cmd_output, sizeof(cmd_output),
2259 lxc_ovs_delete_port_exec, (void *)&args);
2260 if (ret < 0) {
2261 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2262 "%s", bridge, nic, cmd_output);
2263 return -1;
2264 }
2265
2266 return 0;
2267 }
2268
2269 static int lxc_ovs_attach_bridge_exec(void *data)
2270 {
2271 struct ovs_veth_args *args = data;
2272
2273 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2274 (char *)NULL);
2275 return -1;
2276 }
2277
2278 static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2279 {
2280 int ret;
2281 char cmd_output[PATH_MAX];
2282 struct ovs_veth_args args;
2283
2284 args.bridge = bridge;
2285 args.nic = nic;
2286 ret = run_command(cmd_output, sizeof(cmd_output),
2287 lxc_ovs_attach_bridge_exec, (void *)&args);
2288 if (ret < 0) {
2289 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2290 bridge, nic, cmd_output);
2291 return -1;
2292 }
2293
2294 return 0;
2295 }
2296
2297 int lxc_bridge_attach(const char *bridge, const char *ifname)
2298 {
2299 int err, fd, index;
2300 size_t retlen;
2301 struct ifreq ifr;
2302
2303 if (strlen(ifname) >= IFNAMSIZ)
2304 return -EINVAL;
2305
2306 index = if_nametoindex(ifname);
2307 if (!index)
2308 return -EINVAL;
2309
2310 if (is_ovs_bridge(bridge))
2311 return lxc_ovs_attach_bridge(bridge, ifname);
2312
2313 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
2314 if (fd < 0)
2315 return -errno;
2316
2317 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
2318 if (retlen >= IFNAMSIZ) {
2319 close(fd);
2320 return -E2BIG;
2321 }
2322
2323 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2324 ifr.ifr_ifindex = index;
2325 err = ioctl(fd, SIOCBRADDIF, &ifr);
2326 close(fd);
2327 if (err)
2328 err = -errno;
2329
2330 return err;
2331 }
2332
2333 static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
2334 [LXC_NET_EMPTY] = "empty",
2335 [LXC_NET_VETH] = "veth",
2336 [LXC_NET_MACVLAN] = "macvlan",
2337 [LXC_NET_IPVLAN] = "ipvlan",
2338 [LXC_NET_PHYS] = "phys",
2339 [LXC_NET_VLAN] = "vlan",
2340 [LXC_NET_NONE] = "none",
2341 };
2342
2343 const char *lxc_net_type_to_str(int type)
2344 {
2345 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2346 return NULL;
2347
2348 return lxc_network_types[type];
2349 }
2350
2351 static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2352
2353 char *lxc_mkifname(char *template)
2354 {
2355 int ret;
2356 struct netns_ifaddrs *ifa, *ifaddr;
2357 char name[IFNAMSIZ];
2358 bool exists = false;
2359 size_t i = 0;
2360 #ifdef HAVE_RAND_R
2361 unsigned int seed;
2362
2363 seed = randseed(false);
2364 #else
2365
2366 (void)randseed(true);
2367 #endif
2368
2369 if (strlen(template) >= IFNAMSIZ)
2370 return NULL;
2371
2372 /* Get all the network interfaces. */
2373 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2374 if (ret < 0) {
2375 SYSERROR("Failed to get network interfaces");
2376 return NULL;
2377 }
2378
2379 /* Generate random names until we find one that doesn't exist. */
2380 for (;;) {
2381 name[0] = '\0';
2382 (void)strlcpy(name, template, IFNAMSIZ);
2383
2384 exists = false;
2385
2386 for (i = 0; i < strlen(name); i++) {
2387 if (name[i] == 'X') {
2388 #ifdef HAVE_RAND_R
2389 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
2390 #else
2391 name[i] = padchar[rand() % strlen(padchar)];
2392 #endif
2393 }
2394 }
2395
2396 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
2397 if (!strcmp(ifa->ifa_name, name)) {
2398 exists = true;
2399 break;
2400 }
2401 }
2402
2403 if (!exists)
2404 break;
2405 }
2406
2407 netns_freeifaddrs(ifaddr);
2408 (void)strlcpy(template, name, strlen(template) + 1);
2409
2410 return template;
2411 }
2412
2413 int setup_private_host_hw_addr(char *veth1)
2414 {
2415 int err, sockfd;
2416 struct ifreq ifr;
2417
2418 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
2419 if (sockfd < 0)
2420 return -errno;
2421
2422 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2423 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2424 close(sockfd);
2425 return -E2BIG;
2426 }
2427
2428 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2429 if (err < 0) {
2430 close(sockfd);
2431 return -errno;
2432 }
2433
2434 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2435 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2436 close(sockfd);
2437 if (err < 0)
2438 return -errno;
2439
2440 return 0;
2441 }
2442
2443 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2444 {
2445 struct lxc_list *network = &handler->conf->network;
2446 struct lxc_list *iterator;
2447 struct lxc_netdev *netdev;
2448 int link_index;
2449
2450 lxc_list_for_each(iterator, network) {
2451 netdev = iterator->elem;
2452
2453 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2454 continue;
2455
2456 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2457 ERROR("Automatic gateway detection is only supported "
2458 "for veth and macvlan");
2459 return -1;
2460 }
2461
2462 if (netdev->link[0] == '\0') {
2463 ERROR("Automatic gateway detection needs a link interface");
2464 return -1;
2465 }
2466
2467 link_index = if_nametoindex(netdev->link);
2468 if (!link_index)
2469 return -EINVAL;
2470
2471 if (netdev->ipv4_gateway_auto) {
2472 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2473 ERROR("Failed to automatically find ipv4 gateway "
2474 "address from link interface \"%s\"", netdev->link);
2475 return -1;
2476 }
2477 }
2478
2479 if (netdev->ipv6_gateway_auto) {
2480 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2481 ERROR("Failed to automatically find ipv6 gateway "
2482 "address from link interface \"%s\"", netdev->link);
2483 return -1;
2484 }
2485 }
2486 }
2487
2488 return 0;
2489 }
2490
2491 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2492 static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2493 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
2494 {
2495 int ret;
2496 pid_t child;
2497 int bytes, pipefd[2];
2498 char *token, *saveptr = NULL;
2499 char netdev_link[IFNAMSIZ];
2500 char buffer[PATH_MAX] = {0};
2501 size_t retlen;
2502
2503 if (netdev->type != LXC_NET_VETH) {
2504 ERROR("Network type %d not support for unprivileged use", netdev->type);
2505 return -1;
2506 }
2507
2508 ret = pipe(pipefd);
2509 if (ret < 0) {
2510 SYSERROR("Failed to create pipe");
2511 return -1;
2512 }
2513
2514 child = fork();
2515 if (child < 0) {
2516 SYSERROR("Failed to create new process");
2517 close(pipefd[0]);
2518 close(pipefd[1]);
2519 return -1;
2520 }
2521
2522 if (child == 0) {
2523 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
2524
2525 close(pipefd[0]);
2526
2527 ret = dup2(pipefd[1], STDOUT_FILENO);
2528 if (ret >= 0)
2529 ret = dup2(pipefd[1], STDERR_FILENO);
2530 close(pipefd[1]);
2531 if (ret < 0) {
2532 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2533 _exit(EXIT_FAILURE);
2534 }
2535
2536 if (netdev->link[0] != '\0')
2537 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
2538 else
2539 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2540 if (retlen >= IFNAMSIZ) {
2541 SYSERROR("Invalid network device name");
2542 _exit(EXIT_FAILURE);
2543 }
2544
2545 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2546 if (ret < 0 || ret >= sizeof(pidstr))
2547 _exit(EXIT_FAILURE);
2548 pidstr[sizeof(pidstr) - 1] = '\0';
2549
2550 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2551 lxcname, pidstr, netdev_link,
2552 netdev->name[0] != '\0' ? netdev->name : "(null)");
2553 if (netdev->name[0] != '\0')
2554 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2555 lxcpath, lxcname, pidstr, "veth", netdev_link,
2556 netdev->name, (char *)NULL);
2557 else
2558 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2559 lxcpath, lxcname, pidstr, "veth", netdev_link,
2560 (char *)NULL);
2561 SYSERROR("Failed to execute lxc-user-nic");
2562 _exit(EXIT_FAILURE);
2563 }
2564
2565 /* close the write-end of the pipe */
2566 close(pipefd[1]);
2567
2568 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2569 if (bytes < 0) {
2570 SYSERROR("Failed to read from pipe file descriptor");
2571 close(pipefd[0]);
2572 } else {
2573 buffer[bytes - 1] = '\0';
2574 }
2575
2576 ret = wait_for_pid(child);
2577 close(pipefd[0]);
2578 if (ret != 0 || bytes < 0) {
2579 ERROR("lxc-user-nic failed to configure requested network: %s",
2580 buffer[0] != '\0' ? buffer : "(null)");
2581 return -1;
2582 }
2583 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2584
2585 /* netdev->name */
2586 token = strtok_r(buffer, ":", &saveptr);
2587 if (!token) {
2588 ERROR("Failed to parse lxc-user-nic output");
2589 return -1;
2590 }
2591
2592 /*
2593 * lxc-user-nic will take care of proper network device naming. So
2594 * netdev->name and netdev->created_name need to be identical to not
2595 * trigger another rename later on.
2596 */
2597 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2598 if (retlen < IFNAMSIZ)
2599 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2600 if (retlen >= IFNAMSIZ) {
2601 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2602 return -E2BIG;
2603 }
2604
2605 /* netdev->ifindex */
2606 token = strtok_r(NULL, ":", &saveptr);
2607 if (!token) {
2608 ERROR("Failed to parse lxc-user-nic output");
2609 return -1;
2610 }
2611
2612 ret = lxc_safe_int(token, &netdev->ifindex);
2613 if (ret < 0) {
2614 errno = -ret;
2615 SYSERROR("Failed to convert string \"%s\" to integer", token);
2616 return -1;
2617 }
2618
2619 /* netdev->priv.veth_attr.veth1 */
2620 token = strtok_r(NULL, ":", &saveptr);
2621 if (!token) {
2622 ERROR("Failed to parse lxc-user-nic output");
2623 return -1;
2624 }
2625
2626 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2627 if (retlen >= IFNAMSIZ) {
2628 ERROR("Host side veth device name returned by lxc-user-nic is "
2629 "too long");
2630 return -E2BIG;
2631 }
2632
2633 /* netdev->priv.veth_attr.ifindex */
2634 token = strtok_r(NULL, ":", &saveptr);
2635 if (!token) {
2636 ERROR("Failed to parse lxc-user-nic output");
2637 return -1;
2638 }
2639
2640 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
2641 if (ret < 0) {
2642 errno = -ret;
2643 SYSERROR("Failed to convert string \"%s\" to integer", token);
2644 return -1;
2645 }
2646
2647 if (netdev->upscript) {
2648 char *argv[] = {
2649 "veth",
2650 netdev->link,
2651 netdev->priv.veth_attr.veth1,
2652 NULL,
2653 };
2654
2655 ret = run_script_argv(lxcname, hooks_version, "net",
2656 netdev->upscript, "up", argv);
2657 if (ret < 0)
2658 return -1;
2659 }
2660
2661 return 0;
2662 }
2663
2664 static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2665 struct lxc_netdev *netdev,
2666 const char *netns_path)
2667 {
2668 int bytes, ret;
2669 pid_t child;
2670 int pipefd[2];
2671 char buffer[PATH_MAX] = {0};
2672
2673 if (netdev->type != LXC_NET_VETH) {
2674 ERROR("Network type %d not support for unprivileged use", netdev->type);
2675 return -1;
2676 }
2677
2678 ret = pipe(pipefd);
2679 if (ret < 0) {
2680 SYSERROR("Failed to create pipe");
2681 return -1;
2682 }
2683
2684 child = fork();
2685 if (child < 0) {
2686 SYSERROR("Failed to create new process");
2687 close(pipefd[0]);
2688 close(pipefd[1]);
2689 return -1;
2690 }
2691
2692 if (child == 0) {
2693 char *hostveth;
2694
2695 close(pipefd[0]);
2696
2697 ret = dup2(pipefd[1], STDOUT_FILENO);
2698 if (ret >= 0)
2699 ret = dup2(pipefd[1], STDERR_FILENO);
2700 close(pipefd[1]);
2701 if (ret < 0) {
2702 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2703 _exit(EXIT_FAILURE);
2704 }
2705
2706 if (netdev->priv.veth_attr.pair[0] != '\0')
2707 hostveth = netdev->priv.veth_attr.pair;
2708 else
2709 hostveth = netdev->priv.veth_attr.veth1;
2710 if (hostveth[0] == '\0') {
2711 SYSERROR("Host side veth device name is missing");
2712 _exit(EXIT_FAILURE);
2713 }
2714
2715 if (netdev->link[0] == '\0') {
2716 SYSERROR("Network link for network device \"%s\" is "
2717 "missing", netdev->priv.veth_attr.veth1);
2718 _exit(EXIT_FAILURE);
2719 }
2720
2721 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
2722 lxcname, netns_path, netdev->link, hostveth);
2723 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
2724 lxcname, netns_path, "veth", netdev->link, hostveth,
2725 (char *)NULL);
2726 SYSERROR("Failed to exec lxc-user-nic.");
2727 _exit(EXIT_FAILURE);
2728 }
2729
2730 close(pipefd[1]);
2731
2732 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2733 if (bytes < 0) {
2734 SYSERROR("Failed to read from pipe file descriptor.");
2735 close(pipefd[0]);
2736 } else {
2737 buffer[bytes - 1] = '\0';
2738 }
2739
2740 ret = wait_for_pid(child);
2741 close(pipefd[0]);
2742 if (ret != 0 || bytes < 0) {
2743 ERROR("lxc-user-nic failed to delete requested network: %s",
2744 buffer[0] != '\0' ? buffer : "(null)");
2745 return -1;
2746 }
2747
2748 return 0;
2749 }
2750
2751 bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2752 {
2753 int ret;
2754 struct lxc_list *iterator;
2755 struct lxc_list *network = &handler->conf->network;
2756 /* strlen("/proc/") = 6
2757 * +
2758 * INTTYPE_TO_STRLEN(pid_t)
2759 * +
2760 * strlen("/fd/") = 4
2761 * +
2762 * INTTYPE_TO_STRLEN(int)
2763 * +
2764 * \0
2765 */
2766 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
2767
2768 *netns_path = '\0';
2769
2770 if (handler->nsfd[LXC_NS_NET] < 0) {
2771 DEBUG("Cannot not guarantee safe deletion of network devices. "
2772 "Manual cleanup maybe needed");
2773 return false;
2774 }
2775
2776 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
2777 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
2778 if (ret < 0 || ret >= sizeof(netns_path))
2779 return false;
2780
2781 lxc_list_for_each(iterator, network) {
2782 char *hostveth = NULL;
2783 struct lxc_netdev *netdev = iterator->elem;
2784
2785 /* We can only delete devices whose ifindex we have. If we don't
2786 * have the index it means that we didn't create it.
2787 */
2788 if (!netdev->ifindex)
2789 continue;
2790
2791 if (netdev->type == LXC_NET_PHYS) {
2792 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2793 netdev->link);
2794 if (ret < 0)
2795 WARN("Failed to rename interface with index %d "
2796 "to its initial name \"%s\"",
2797 netdev->ifindex, netdev->link);
2798 else
2799 TRACE("Renamed interface with index %d to its "
2800 "initial name \"%s\"",
2801 netdev->ifindex, netdev->link);
2802
2803 ret = netdev_deconf[netdev->type](handler, netdev);
2804 goto clear_ifindices;
2805 }
2806
2807 ret = netdev_deconf[netdev->type](handler, netdev);
2808 if (ret < 0)
2809 WARN("Failed to deconfigure network device");
2810
2811 if (netdev->type != LXC_NET_VETH)
2812 goto clear_ifindices;
2813
2814 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
2815 goto clear_ifindices;
2816
2817 if (netdev->priv.veth_attr.pair[0] != '\0')
2818 hostveth = netdev->priv.veth_attr.pair;
2819 else
2820 hostveth = netdev->priv.veth_attr.veth1;
2821 if (hostveth[0] == '\0')
2822 goto clear_ifindices;
2823
2824 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2825 handler->name, netdev,
2826 netns_path);
2827 if (ret < 0) {
2828 WARN("Failed to remove port \"%s\" from openvswitch "
2829 "bridge \"%s\"", hostveth, netdev->link);
2830 goto clear_ifindices;
2831 }
2832 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2833 netdev->link);
2834
2835 clear_ifindices:
2836 /* We need to clear any ifindices we recorded so liblxc won't
2837 * have cached stale data which would cause it to fail on reboot
2838 * we're we don't re-read the on-disk config file.
2839 */
2840 netdev->ifindex = 0;
2841 if (netdev->type == LXC_NET_PHYS) {
2842 netdev->priv.phys_attr.ifindex = 0;
2843 } else if (netdev->type == LXC_NET_VETH) {
2844 netdev->priv.veth_attr.veth1[0] = '\0';
2845 netdev->priv.veth_attr.ifindex = 0;
2846 }
2847 }
2848
2849 return true;
2850 }
2851
2852 struct ip_proxy_args {
2853 const char *ip;
2854 const char *dev;
2855 };
2856
2857 static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2858 {
2859 struct ip_proxy_args *args = data;
2860
2861 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2862 return -1;
2863 }
2864
2865 static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2866 {
2867 struct ip_proxy_args *args = data;
2868
2869 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2870 return -1;
2871 }
2872
2873 static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2874 {
2875 int ret;
2876 char cmd_output[PATH_MAX];
2877 struct ip_proxy_args args = {
2878 .ip = ip,
2879 .dev = dev,
2880 };
2881
2882 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2883 if (ret < 0) {
2884 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2885 return -1;
2886 }
2887
2888 return 0;
2889 }
2890
2891 static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2892 {
2893 int ret;
2894 char cmd_output[PATH_MAX];
2895 struct ip_proxy_args args = {
2896 .ip = ip,
2897 .dev = dev,
2898 };
2899
2900 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2901 if (ret < 0) {
2902 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2903 return -1;
2904 }
2905
2906 return 0;
2907 }
2908
2909 static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2910 struct lxc_list *cur, *next;
2911 struct lxc_inetdev *inet4dev;
2912 struct lxc_inet6dev *inet6dev;
2913 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
2914 int err = 0;
2915 unsigned int lo_ifindex = 0;
2916
2917 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2918 if (!lxc_list_empty(&netdev->ipv4)) {
2919 /* Check for net.ipv4.conf.[link].forwarding=1 */
2920 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2921 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2922 return minus_one_set_errno(EINVAL);
2923 }
2924 }
2925
2926 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2927 if (!lxc_list_empty(&netdev->ipv6)) {
2928 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2929 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2930 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2931 return minus_one_set_errno(EINVAL);
2932 }
2933
2934 /* Check for net.ipv6.conf.[link].forwarding=1 */
2935 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2936 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2937 return minus_one_set_errno(EINVAL);
2938 }
2939 }
2940
2941 /* Perform IPVLAN specific checks. */
2942 if (netdev->type == LXC_NET_IPVLAN) {
2943 /* Check mode is l3s as other modes do not work with l2proxy. */
2944 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2945 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2946 return minus_one_set_errno(EINVAL);
2947 }
2948
2949 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
2950 lo_ifindex = if_nametoindex(loop_device);
2951 if (lo_ifindex == 0) {
2952 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
2953 return minus_one_set_errno(EINVAL);
2954 }
2955 }
2956
2957 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2958 inet4dev = cur->elem;
2959 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2960 return minus_one_set_errno(-errno);
2961
2962 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2963 return minus_one_set_errno(EINVAL);
2964
2965 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2966 if (netdev->type == LXC_NET_IPVLAN) {
2967 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2968 if (err < 0) {
2969 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
2970 return minus_one_set_errno(-err);
2971 }
2972 }
2973 }
2974
2975 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2976 inet6dev = cur->elem;
2977 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2978 return minus_one_set_errno(-errno);
2979
2980 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2981 return minus_one_set_errno(EINVAL);
2982
2983 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2984 if (netdev->type == LXC_NET_IPVLAN) {
2985 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2986 if (err < 0) {
2987 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
2988 return minus_one_set_errno(-err);
2989 }
2990 }
2991 }
2992
2993 return 0;
2994 }
2995
2996 static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
2997 char bufinet4[INET_ADDRSTRLEN];
2998 unsigned int errCount = 0;
2999
3000 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
3001 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3002 return minus_one_set_errno(EINVAL);
3003 }
3004
3005 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3006 if (lo_ifindex > 0) {
3007 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3008 errCount++;
3009 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3010 }
3011 }
3012
3013 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3014 if (link[0] != '\0') {
3015 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3016 errCount++;
3017 }
3018
3019 if (errCount > 0)
3020 return minus_one_set_errno(EINVAL);
3021
3022 return 0;
3023 }
3024
3025 static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3026 char bufinet6[INET6_ADDRSTRLEN];
3027 unsigned int errCount = 0;
3028
3029 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3030 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3031 return minus_one_set_errno(EINVAL);
3032 }
3033
3034 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3035 if (lo_ifindex > 0) {
3036 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3037 errCount++;
3038 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3039 }
3040 }
3041
3042 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3043 if (link[0] != '\0') {
3044 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3045 errCount++;
3046 }
3047
3048 if (errCount > 0)
3049 return minus_one_set_errno(EINVAL);
3050
3051 return 0;
3052 }
3053
3054 static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
3055 unsigned int lo_ifindex = 0;
3056 unsigned int errCount = 0;
3057 struct lxc_list *cur, *next;
3058 struct lxc_inetdev *inet4dev;
3059 struct lxc_inet6dev *inet6dev;
3060
3061 /* Perform IPVLAN specific checks. */
3062 if (netdev->type == LXC_NET_IPVLAN) {
3063 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3064 lo_ifindex = if_nametoindex(loop_device);
3065 if (lo_ifindex == 0) {
3066 errCount++;
3067 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3068 }
3069 }
3070
3071 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3072 inet4dev = cur->elem;
3073 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3074 errCount++;
3075 }
3076
3077 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3078 inet6dev = cur->elem;
3079 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3080 errCount++;
3081 }
3082
3083 if (errCount > 0)
3084 return minus_one_set_errno(EINVAL);
3085
3086 return 0;
3087 }
3088
3089 static int lxc_create_network_priv(struct lxc_handler *handler)
3090 {
3091 struct lxc_list *iterator;
3092 struct lxc_list *network = &handler->conf->network;
3093
3094 lxc_list_for_each(iterator, network) {
3095 struct lxc_netdev *netdev = iterator->elem;
3096
3097 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3098 ERROR("Invalid network configuration type %d", netdev->type);
3099 return -1;
3100 }
3101
3102 /* Setup l2proxy entries if enabled and used with a link property */
3103 if (netdev->l2proxy && netdev->link[0] != '\0') {
3104 if (lxc_setup_l2proxy(netdev)) {
3105 ERROR("Failed to setup l2proxy");
3106 return -1;
3107 }
3108 }
3109
3110 if (netdev_conf[netdev->type](handler, netdev)) {
3111 ERROR("Failed to create network device");
3112 return -1;
3113 }
3114 }
3115
3116 return 0;
3117 }
3118
3119 int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
3120 {
3121 pid_t pid = handler->pid;
3122 struct lxc_list *network = &handler->conf->network;
3123 struct lxc_list *iterator;
3124
3125 if (am_guest_unpriv())
3126 return 0;
3127
3128 lxc_list_for_each(iterator, network) {
3129 int ret;
3130 char ifname[IFNAMSIZ];
3131 struct lxc_netdev *netdev = iterator->elem;
3132
3133 if (!netdev->ifindex)
3134 continue;
3135
3136 /* retrieve the name of the interface */
3137 if (!if_indextoname(netdev->ifindex, ifname)) {
3138 ERROR("No interface corresponding to ifindex \"%d\"",
3139 netdev->ifindex);
3140 return -1;
3141 }
3142
3143 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3144 if (ret) {
3145 errno = -ret;
3146 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3147 ifname, pid);
3148 return -1;
3149 }
3150
3151 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3152
3153 DEBUG("Moved network device \"%s\" to network namespace of %d",
3154 netdev->created_name, pid);
3155 }
3156
3157 return 0;
3158 }
3159
3160 static int lxc_create_network_unpriv(struct lxc_handler *handler)
3161 {
3162 int hooks_version = handler->conf->hooks_version;
3163 const char *lxcname = handler->name;
3164 const char *lxcpath = handler->lxcpath;
3165 struct lxc_list *network = &handler->conf->network;
3166 pid_t pid = handler->pid;
3167 struct lxc_list *iterator;
3168
3169 lxc_list_for_each(iterator, network) {
3170 struct lxc_netdev *netdev = iterator->elem;
3171
3172 if (netdev->type == LXC_NET_EMPTY)
3173 continue;
3174
3175 if (netdev->type == LXC_NET_NONE)
3176 continue;
3177
3178 if (netdev->type != LXC_NET_VETH) {
3179 ERROR("Networks of type %s are not supported by unprivileged containers",
3180 lxc_net_type_to_str(netdev->type));
3181 return -1;
3182 }
3183
3184 if (netdev->mtu)
3185 INFO("mtu ignored due to insufficient privilege");
3186
3187 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3188 pid, hooks_version))
3189 return -1;
3190 }
3191
3192 return 0;
3193 }
3194
3195 bool lxc_delete_network_priv(struct lxc_handler *handler)
3196 {
3197 int ret;
3198 struct lxc_list *iterator;
3199 struct lxc_list *network = &handler->conf->network;
3200
3201 lxc_list_for_each(iterator, network) {
3202 char *hostveth = NULL;
3203 struct lxc_netdev *netdev = iterator->elem;
3204
3205 /* We can only delete devices whose ifindex we have. If we don't
3206 * have the index it means that we didn't create it.
3207 */
3208 if (!netdev->ifindex)
3209 continue;
3210
3211 /* Delete l2proxy entries if enabled and used with a link property */
3212 if (netdev->l2proxy && netdev->link[0] != '\0') {
3213 if (lxc_delete_l2proxy(netdev))
3214 WARN("Failed to delete all l2proxy config");
3215 /* Don't return, let the network be cleaned up as normal. */
3216 }
3217
3218 if (netdev->type == LXC_NET_PHYS) {
3219 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3220 if (ret < 0)
3221 WARN("Failed to rename interface with index %d "
3222 "from \"%s\" to its initial name \"%s\"",
3223 netdev->ifindex, netdev->name, netdev->link);
3224 else {
3225 TRACE("Renamed interface with index %d from "
3226 "\"%s\" to its initial name \"%s\"",
3227 netdev->ifindex, netdev->name,
3228 netdev->link);
3229
3230 /* Restore original MTU */
3231 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3232 if (ret < 0) {
3233 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3234 netdev->link, netdev->priv.phys_attr.mtu);
3235 } else {
3236 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3237 netdev->link, netdev->priv.phys_attr.mtu);
3238 }
3239 }
3240
3241 ret = netdev_deconf[netdev->type](handler, netdev);
3242 goto clear_ifindices;
3243 }
3244
3245 ret = netdev_deconf[netdev->type](handler, netdev);
3246 if (ret < 0)
3247 WARN("Failed to deconfigure network device");
3248
3249 /* Recent kernels remove the virtual interfaces when the network
3250 * namespace is destroyed but in case we did not move the
3251 * interface to the network namespace, we have to destroy it.
3252 */
3253 ret = lxc_netdev_delete_by_index(netdev->ifindex);
3254 if (ret < 0) {
3255 if (errno != ENODEV) {
3256 WARN("Failed to remove interface \"%s\" with index %d",
3257 netdev->name[0] != '\0' ? netdev->name : "(null)",
3258 netdev->ifindex);
3259 goto clear_ifindices;
3260 }
3261 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
3262 netdev->name[0] != '\0' ? netdev->name : "(null)",
3263 netdev->ifindex);
3264 }
3265 INFO("Removed interface \"%s\" with index %d",
3266 netdev->name[0] != '\0' ? netdev->name : "(null)",
3267 netdev->ifindex);
3268
3269 if (netdev->type != LXC_NET_VETH)
3270 goto clear_ifindices;
3271
3272 /* Explicitly delete host veth device to prevent lingering
3273 * devices. We had issues in LXD around this.
3274 */
3275 if (netdev->priv.veth_attr.pair[0] != '\0')
3276 hostveth = netdev->priv.veth_attr.pair;
3277 else
3278 hostveth = netdev->priv.veth_attr.veth1;
3279 if (hostveth[0] == '\0')
3280 goto clear_ifindices;
3281
3282 ret = lxc_netdev_delete_by_name(hostveth);
3283 if (ret < 0) {
3284 WARN("Failed to remove interface \"%s\" from \"%s\"",
3285 hostveth, netdev->link);
3286 goto clear_ifindices;
3287 }
3288 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3289
3290 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
3291 netdev->priv.veth_attr.veth1[0] = '\0';
3292 netdev->ifindex = 0;
3293 netdev->priv.veth_attr.ifindex = 0;
3294 goto clear_ifindices;
3295 }
3296
3297 /* Delete the openvswitch port. */
3298 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3299 if (ret < 0)
3300 WARN("Failed to remove port \"%s\" from openvswitch "
3301 "bridge \"%s\"", hostveth, netdev->link);
3302 else
3303 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3304 hostveth, netdev->link);
3305
3306 clear_ifindices:
3307 /* We need to clear any ifindices we recorded so liblxc won't
3308 * have cached stale data which would cause it to fail on reboot
3309 * we're we don't re-read the on-disk config file.
3310 */
3311 netdev->ifindex = 0;
3312 if (netdev->type == LXC_NET_PHYS) {
3313 netdev->priv.phys_attr.ifindex = 0;
3314 } else if (netdev->type == LXC_NET_VETH) {
3315 netdev->priv.veth_attr.veth1[0] = '\0';
3316 netdev->priv.veth_attr.ifindex = 0;
3317 }
3318 }
3319
3320 return true;
3321 }
3322
3323 int lxc_requests_empty_network(struct lxc_handler *handler)
3324 {
3325 struct lxc_list *network = &handler->conf->network;
3326 struct lxc_list *iterator;
3327 bool found_none = false, found_nic = false;
3328
3329 if (lxc_list_empty(network))
3330 return 0;
3331
3332 lxc_list_for_each(iterator, network) {
3333 struct lxc_netdev *netdev = iterator->elem;
3334
3335 if (netdev->type == LXC_NET_NONE)
3336 found_none = true;
3337 else
3338 found_nic = true;
3339 }
3340 if (found_none && !found_nic)
3341 return 1;
3342 return 0;
3343 }
3344
3345 /* try to move physical nics to the init netns */
3346 int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
3347 {
3348 int ret;
3349 int oldfd;
3350 char ifname[IFNAMSIZ];
3351 struct lxc_list *iterator;
3352 int netnsfd = handler->nsfd[LXC_NS_NET];
3353 struct lxc_conf *conf = handler->conf;
3354
3355 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3356 * the parent network namespace. We won't have this capability if we are
3357 * unprivileged.
3358 */
3359 if (!handler->am_root)
3360 return 0;
3361
3362 TRACE("Moving physical network devices back to parent network namespace");
3363
3364 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
3365 if (oldfd < 0) {
3366 SYSERROR("Failed to preserve network namespace");
3367 return -1;
3368 }
3369
3370 ret = setns(netnsfd, CLONE_NEWNET);
3371 if (ret < 0) {
3372 SYSERROR("Failed to enter network namespace");
3373 close(oldfd);
3374 return -1;
3375 }
3376
3377 lxc_list_for_each(iterator, &conf->network) {
3378 struct lxc_netdev *netdev = iterator->elem;
3379
3380 if (netdev->type != LXC_NET_PHYS)
3381 continue;
3382
3383 /* Retrieve the name of the interface in the container's network
3384 * namespace.
3385 */
3386 if (!if_indextoname(netdev->ifindex, ifname)) {
3387 WARN("No interface corresponding to ifindex %d",
3388 netdev->ifindex);
3389 continue;
3390 }
3391
3392 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
3393 if (ret < 0)
3394 WARN("Error moving network device \"%s\" back to "
3395 "network namespace", ifname);
3396 else
3397 TRACE("Moved network device \"%s\" back to network "
3398 "namespace", ifname);
3399 }
3400
3401 ret = setns(oldfd, CLONE_NEWNET);
3402 close(oldfd);
3403 if (ret < 0) {
3404 SYSERROR("Failed to enter network namespace");
3405 return -1;
3406 }
3407
3408 return 0;
3409 }
3410
3411 static int setup_hw_addr(char *hwaddr, const char *ifname)
3412 {
3413 struct sockaddr sockaddr;
3414 struct ifreq ifr;
3415 int ret, fd;
3416
3417 ret = lxc_convert_mac(hwaddr, &sockaddr);
3418 if (ret) {
3419 errno = -ret;
3420 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
3421 return -1;
3422 }
3423
3424 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3425 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3426 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3427
3428 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
3429 if (fd < 0)
3430 return -1;
3431
3432 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
3433 if (ret)
3434 SYSERROR("Failed to perform ioctl");
3435
3436 close(fd);
3437
3438 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3439 ifr.ifr_name);
3440
3441 return ret;
3442 }
3443
3444 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3445 {
3446 struct lxc_list *iterator;
3447 int err;
3448
3449 lxc_list_for_each(iterator, ip) {
3450 struct lxc_inetdev *inetdev = iterator->elem;
3451
3452 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3453 &inetdev->bcast, inetdev->prefix);
3454 if (err) {
3455 errno = -err;
3456 SYSERROR("Failed to setup ipv4 address for network device "
3457 "with ifindex %d", ifindex);
3458 return -1;
3459 }
3460 }
3461
3462 return 0;
3463 }
3464
3465 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3466 {
3467 struct lxc_list *iterator;
3468 int err;
3469
3470 lxc_list_for_each(iterator, ip) {
3471 struct lxc_inet6dev *inet6dev = iterator->elem;
3472
3473 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3474 &inet6dev->mcast, &inet6dev->acast,
3475 inet6dev->prefix);
3476 if (err) {
3477 errno = -err;
3478 SYSERROR("Failed to setup ipv6 address for network device "
3479 "with ifindex %d", ifindex);
3480 return -1;
3481 }
3482 }
3483
3484 return 0;
3485 }
3486
3487 static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3488 {
3489 char ifname[IFNAMSIZ];
3490 int err;
3491 char *current_ifname = ifname;
3492 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3493
3494 /* empty network namespace */
3495 if (!netdev->ifindex) {
3496 if (netdev->flags & IFF_UP) {
3497 err = lxc_netdev_up("lo");
3498 if (err) {
3499 errno = -err;
3500 SYSERROR("Failed to set the loopback network device up");
3501 return -1;
3502 }
3503 }
3504
3505 if (netdev->type == LXC_NET_EMPTY)
3506 return 0;
3507
3508 if (netdev->type == LXC_NET_NONE)
3509 return 0;
3510
3511 netdev->ifindex = if_nametoindex(netdev->created_name);
3512 if (!netdev->ifindex)
3513 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3514 netdev->name ?: "(null)");
3515 }
3516
3517 /* get the new ifindex in case of physical netdev */
3518 if (netdev->type == LXC_NET_PHYS) {
3519 netdev->ifindex = if_nametoindex(netdev->link);
3520 if (!netdev->ifindex) {
3521 ERROR("Failed to get ifindex for network device \"%s\"",
3522 netdev->link);
3523 return -1;
3524 }
3525 }
3526
3527 /* retrieve the name of the interface */
3528 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3529 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3530 netdev->ifindex);
3531 return -1;
3532 }
3533
3534 /* Default: let the system choose an interface name.
3535 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3536 * netlink will replace the format specifier with an appropriate index.
3537 */
3538 if (netdev->name[0] == '\0') {
3539 if (netdev->type == LXC_NET_PHYS)
3540 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
3541 else
3542 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
3543 }
3544
3545 /* rename the interface name */
3546 if (strcmp(current_ifname, netdev->name) != 0) {
3547 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
3548 if (err) {
3549 errno = -err;
3550 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
3551 current_ifname, netdev->name);
3552 return -1;
3553 }
3554
3555 TRACE("Renamed network device from \"%s\" to \"%s\"",
3556 current_ifname, netdev->name);
3557 }
3558
3559 /* Re-read the name of the interface because its name has changed
3560 * and would be automatically allocated by the system
3561 */
3562 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3563 ERROR("Failed get name for network device with ifindex %d",
3564 netdev->ifindex);
3565 return -1;
3566 }
3567
3568 /* Now update the recorded name of the network device to reflect the
3569 * name of the network device in the child's network namespace. We will
3570 * later on send this information back to the parent.
3571 */
3572 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
3573
3574 /* set a mac address */
3575 if (netdev->hwaddr) {
3576 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3577 ERROR("Failed to setup hw address for network device \"%s\"",
3578 current_ifname);
3579 return -1;
3580 }
3581 }
3582
3583 /* setup ipv4 addresses on the interface */
3584 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3585 ERROR("Failed to setup ip addresses for network device \"%s\"",
3586 current_ifname);
3587 return -1;
3588 }
3589
3590 /* setup ipv6 addresses on the interface */
3591 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3592 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
3593 current_ifname);
3594 return -1;
3595 }
3596
3597 /* set the network device up */
3598 if (netdev->flags & IFF_UP) {
3599 err = lxc_netdev_up(current_ifname);
3600 if (err) {
3601 errno = -err;
3602 SYSERROR("Failed to set network device \"%s\" up",
3603 current_ifname);
3604 return -1;
3605 }
3606
3607 /* the network is up, make the loopback up too */
3608 err = lxc_netdev_up("lo");
3609 if (err) {
3610 errno = -err;
3611 SYSERROR("Failed to set the loopback network device up");
3612 return -1;
3613 }
3614 }
3615
3616 /* setup ipv4 gateway on the interface */
3617 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
3618 if (!(netdev->flags & IFF_UP)) {
3619 ERROR("Cannot add ipv4 gateway for network device "
3620 "\"%s\" when not bringing up the interface", current_ifname);
3621 return -1;
3622 }
3623
3624 if (lxc_list_empty(&netdev->ipv4)) {
3625 ERROR("Cannot add ipv4 gateway for network device "
3626 "\"%s\" when not assigning an address", current_ifname);
3627 return -1;
3628 }
3629
3630 /* Setup device route if ipv4_gateway_dev is enabled */
3631 if (netdev->ipv4_gateway_dev) {
3632 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3633 if (err < 0) {
3634 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
3635 current_ifname);
3636 return minus_one_set_errno(-err);
3637 }
3638 } else {
3639 /* Check the gateway address is valid */
3640 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3641 return minus_one_set_errno(errno);
3642
3643 /* Try adding a default route to the gateway address */
3644 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3645 if (err < 0) {
3646 /* If adding the default route fails, this could be because the
3647 * gateway address is in a different subnet to the container's address.
3648 * To work around this, we try adding a static device route to the
3649 * gateway address first, and then try again.
3650 */
3651 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
3652 if (err < 0) {
3653 errno = -err;
3654 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
3655 bufinet4, current_ifname);
3656 return -1;
3657 }
3658
3659 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3660 if (err < 0) {
3661 errno = -err;
3662 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
3663 bufinet4, current_ifname);
3664 return -1;
3665 }
3666 }
3667 }
3668 }
3669
3670 /* setup ipv6 gateway on the interface */
3671 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
3672 if (!(netdev->flags & IFF_UP)) {
3673 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3674 current_ifname);
3675 return -1;
3676 }
3677
3678 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
3679 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3680 current_ifname);
3681 return -1;
3682 }
3683
3684 /* Setup device route if ipv6_gateway_dev is enabled */
3685 if (netdev->ipv6_gateway_dev) {
3686 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3687 if (err < 0) {
3688 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
3689 current_ifname);
3690 return minus_one_set_errno(-err);
3691 }
3692 } else {
3693 /* Check the gateway address is valid */
3694 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3695 return minus_one_set_errno(errno);
3696
3697 /* Try adding a default route to the gateway address */
3698 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3699 if (err < 0) {
3700 /* If adding the default route fails, this could be because the
3701 * gateway address is in a different subnet to the container's address.
3702 * To work around this, we try adding a static device route to the
3703 * gateway address first, and then try again.
3704 */
3705 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
3706 if (err < 0) {
3707 errno = -err;
3708 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
3709 bufinet6, current_ifname);
3710 return -1;
3711 }
3712
3713 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3714 if (err < 0) {
3715 errno = -err;
3716 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
3717 bufinet6, current_ifname);
3718 return -1;
3719 }
3720 }
3721 }
3722 }
3723
3724 DEBUG("Network device \"%s\" has been setup", current_ifname);
3725
3726 return 0;
3727 }
3728
3729 int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3730 struct lxc_list *network)
3731 {
3732 struct lxc_list *iterator;
3733
3734 lxc_list_for_each(iterator, network) {
3735 struct lxc_netdev *netdev = iterator->elem;
3736
3737 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
3738 ERROR("Failed to setup netdev");
3739 return -1;
3740 }
3741 }
3742
3743 if (!lxc_list_empty(network))
3744 INFO("Network has been setup");
3745
3746 return 0;
3747 }
3748
3749 int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3750 {
3751 struct lxc_list *iterator;
3752 struct lxc_list *network = &handler->conf->network;
3753 int data_sock = handler->data_sock[0];
3754
3755 lxc_list_for_each(iterator, network) {
3756 int ret;
3757 struct lxc_netdev *netdev = iterator->elem;
3758
3759 if (netdev->type != LXC_NET_VETH)
3760 continue;
3761
3762 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3763 if (ret < 0)
3764 return -1;
3765
3766 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3767 if (ret < 0)
3768 return -1;
3769
3770 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
3771 }
3772
3773 return 0;
3774 }
3775
3776 int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3777 {
3778 struct lxc_list *iterator;
3779 struct lxc_list *network = &handler->conf->network;
3780 int data_sock = handler->data_sock[1];
3781
3782 lxc_list_for_each(iterator, network) {
3783 int ret;
3784 struct lxc_netdev *netdev = iterator->elem;
3785
3786 if (netdev->type != LXC_NET_VETH)
3787 continue;
3788
3789 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3790 if (ret < 0)
3791 return -1;
3792
3793 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3794 if (ret < 0)
3795 return -1;
3796 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
3797 }
3798
3799 return 0;
3800 }
3801
3802 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3803 {
3804 struct lxc_list *iterator, *network;
3805 int data_sock = handler->data_sock[0];
3806
3807 if (!handler->am_root)
3808 return 0;
3809
3810 network = &handler->conf->network;
3811 lxc_list_for_each(iterator, network) {
3812 int ret;
3813 struct lxc_netdev *netdev = iterator->elem;
3814
3815 /* Send network device name in the child's namespace to parent. */
3816 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3817 if (ret < 0)
3818 return -1;
3819
3820 /* Send network device ifindex in the child's namespace to
3821 * parent.
3822 */
3823 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
3824 if (ret < 0)
3825 return -1;
3826 }
3827
3828 if (!lxc_list_empty(network))
3829 TRACE("Sent network device names and ifindices to parent");
3830
3831 return 0;
3832 }
3833
3834 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3835 {
3836 struct lxc_list *iterator, *network;
3837 int data_sock = handler->data_sock[1];
3838
3839 if (!handler->am_root)
3840 return 0;
3841
3842 network = &handler->conf->network;
3843 lxc_list_for_each(iterator, network) {
3844 int ret;
3845 struct lxc_netdev *netdev = iterator->elem;
3846
3847 /* Receive network device name in the child's namespace to
3848 * parent.
3849 */
3850 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3851 if (ret < 0)
3852 return -1;
3853
3854 /* Receive network device ifindex in the child's namespace to
3855 * parent.
3856 */
3857 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3858 if (ret < 0)
3859 return -1;
3860 }
3861
3862 return 0;
3863 }
3864
3865 void lxc_delete_network(struct lxc_handler *handler)
3866 {
3867 bool bret;
3868
3869 if (handler->am_root)
3870 bret = lxc_delete_network_priv(handler);
3871 else
3872 bret = lxc_delete_network_unpriv(handler);
3873 if (!bret)
3874 DEBUG("Failed to delete network devices");
3875 else
3876 DEBUG("Deleted network devices");
3877 }
3878
3879 int lxc_netns_set_nsid(int fd)
3880 {
3881 int ret;
3882 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3883 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3884 NLMSG_ALIGN(1024)];
3885 struct nl_handler nlh;
3886 struct nlmsghdr *hdr;
3887 struct rtgenmsg *msg;
3888 int saved_errno;
3889 const __s32 ns_id = -1;
3890 const __u32 netns_fd = fd;
3891
3892 ret = netlink_open(&nlh, NETLINK_ROUTE);
3893 if (ret < 0)
3894 return -1;
3895
3896 memset(buf, 0, sizeof(buf));
3897
3898 #pragma GCC diagnostic push
3899 #pragma GCC diagnostic ignored "-Wcast-align"
3900 hdr = (struct nlmsghdr *)buf;
3901 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3902 #pragma GCC diagnostic pop
3903
3904 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3905 hdr->nlmsg_type = RTM_NEWNSID;
3906 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3907 hdr->nlmsg_pid = 0;
3908 hdr->nlmsg_seq = RTM_NEWNSID;
3909 msg->rtgen_family = AF_UNSPEC;
3910
3911 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3912 if (ret < 0)
3913 goto on_error;
3914
3915 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3916 if (ret < 0)
3917 goto on_error;
3918
3919 ret = __netlink_transaction(&nlh, hdr, hdr);
3920
3921 on_error:
3922 saved_errno = errno;
3923 netlink_close(&nlh);
3924 errno = saved_errno;
3925
3926 return ret;
3927 }
3928
3929 static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3930 {
3931
3932 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3933
3934 while (RTA_OK(rta, len)) {
3935 unsigned short type = rta->rta_type;
3936
3937 if ((type <= max) && (!tb[type]))
3938 tb[type] = rta;
3939
3940 #pragma GCC diagnostic push
3941 #pragma GCC diagnostic ignored "-Wcast-align"
3942 rta = RTA_NEXT(rta, len);
3943 #pragma GCC diagnostic pop
3944 }
3945
3946 return 0;
3947 }
3948
3949 static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3950 {
3951 return *(__s32 *)RTA_DATA(rta);
3952 }
3953
3954 #ifndef NETNS_RTA
3955 #define NETNS_RTA(r) \
3956 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3957 #endif
3958
3959 int lxc_netns_get_nsid(int fd)
3960 {
3961 int ret;
3962 ssize_t len;
3963 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3964 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3965 NLMSG_ALIGN(1024)];
3966 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3967 struct nl_handler nlh;
3968 struct nlmsghdr *hdr;
3969 struct rtgenmsg *msg;
3970 int saved_errno;
3971 __u32 netns_fd = fd;
3972
3973 ret = netlink_open(&nlh, NETLINK_ROUTE);
3974 if (ret < 0)
3975 return -1;
3976
3977 memset(buf, 0, sizeof(buf));
3978
3979 #pragma GCC diagnostic push
3980 #pragma GCC diagnostic ignored "-Wcast-align"
3981 hdr = (struct nlmsghdr *)buf;
3982 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3983 #pragma GCC diagnostic pop
3984
3985 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3986 hdr->nlmsg_type = RTM_GETNSID;
3987 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3988 hdr->nlmsg_pid = 0;
3989 hdr->nlmsg_seq = RTM_GETNSID;
3990 msg->rtgen_family = AF_UNSPEC;
3991
3992 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3993 if (ret == 0)
3994 ret = __netlink_transaction(&nlh, hdr, hdr);
3995
3996 saved_errno = errno;
3997 netlink_close(&nlh);
3998 errno = saved_errno;
3999 if (ret < 0)
4000 return -1;
4001
4002 errno = EINVAL;
4003 msg = NLMSG_DATA(hdr);
4004 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4005 if (len < 0)
4006 return -1;
4007
4008 #pragma GCC diagnostic push
4009 #pragma GCC diagnostic ignored "-Wcast-align"
4010 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4011 if (tb[__LXC_NETNSA_NSID])
4012 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
4013 #pragma GCC diagnostic pop
4014
4015 return -1;
4016 }
4017
4018 int lxc_create_network(struct lxc_handler *handler)
4019 {
4020 int ret;
4021
4022 /*
4023 * Find gateway addresses from the link device, which is no longer
4024 * accessible inside the container. Do this before creating network
4025 * interfaces, since goto out_delete_net does not work before
4026 * lxc_clone.
4027 */
4028 ret = lxc_find_gateway_addresses(handler);
4029 if (ret) {
4030 ERROR("Failed to find gateway addresses");
4031 return -1;
4032 }
4033
4034 if (handler->am_root) {
4035 ret = lxc_create_network_priv(handler);
4036 if (ret)
4037 return -1;
4038
4039 return lxc_network_move_created_netdev_priv(handler);
4040 }
4041
4042 return lxc_create_network_unpriv(handler);
4043 }