]> git.proxmox.com Git - mirror_lxc.git/blob - src/lxc/network.c
network: simplify instantiate_macvlan()
[mirror_lxc.git] / src / lxc / network.c
1 /*
2 * lxc: linux Container library
3 *
4 * (C) Copyright IBM Corp. 2007, 2008
5 *
6 * Authors:
7 * Daniel Lezcano <daniel.lezcano at free.fr>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 */
23
24 #ifndef _GNU_SOURCE
25 #define _GNU_SOURCE 1
26 #endif
27 #include <arpa/inet.h>
28 #include <ctype.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <linux/netlink.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/sockios.h>
34 #include <net/ethernet.h>
35 #include <net/if.h>
36 #include <net/if_arp.h>
37 #include <netinet/in.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <sys/inotify.h>
42 #include <sys/ioctl.h>
43 #include <sys/param.h>
44 #include <sys/socket.h>
45 #include <sys/stat.h>
46 #include <sys/types.h>
47 #include <time.h>
48 #include <unistd.h>
49
50 #include "../include/netns_ifaddrs.h"
51 #include "af_unix.h"
52 #include "conf.h"
53 #include "config.h"
54 #include "file_utils.h"
55 #include "log.h"
56 #include "macro.h"
57 #include "memory_utils.h"
58 #include "network.h"
59 #include "nl.h"
60 #include "raw_syscalls.h"
61 #include "syscall_wrappers.h"
62 #include "utils.h"
63
64 #ifndef HAVE_STRLCPY
65 #include "include/strlcpy.h"
66 #endif
67
68 lxc_log_define(network, lxc);
69
70 typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
71 static const char loop_device[] = "lo";
72
73 static int lxc_ip_route_dest(__u16 nlmsg_type, int family, int ifindex, void *dest, unsigned int netmask)
74 {
75 int addrlen, err;
76 struct nl_handler nlh;
77 struct rtmsg *rt;
78 struct nlmsg *answer = NULL, *nlmsg = NULL;
79
80 addrlen = family == AF_INET ? sizeof(struct in_addr)
81 : sizeof(struct in6_addr);
82
83 err = netlink_open(&nlh, NETLINK_ROUTE);
84 if (err)
85 return err;
86
87 err = -ENOMEM;
88 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
89 if (!nlmsg)
90 goto out;
91
92 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
93 if (!answer)
94 goto out;
95
96 nlmsg->nlmsghdr->nlmsg_flags =
97 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
98 nlmsg->nlmsghdr->nlmsg_type = nlmsg_type;
99
100 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
101 if (!rt)
102 goto out;
103 rt->rtm_family = family;
104 rt->rtm_table = RT_TABLE_MAIN;
105 rt->rtm_scope = RT_SCOPE_LINK;
106 rt->rtm_protocol = RTPROT_BOOT;
107 rt->rtm_type = RTN_UNICAST;
108 rt->rtm_dst_len = netmask;
109
110 err = -EINVAL;
111 if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
112 goto out;
113 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
114 goto out;
115 err = netlink_transaction(&nlh, nlmsg, answer);
116 out:
117 netlink_close(&nlh);
118 nlmsg_free(answer);
119 nlmsg_free(nlmsg);
120 return err;
121 }
122
123 static int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest, unsigned int netmask)
124 {
125 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET, ifindex, dest, netmask);
126 }
127
128 static int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest, unsigned int netmask)
129 {
130 return lxc_ip_route_dest(RTM_NEWROUTE, AF_INET6, ifindex, dest, netmask);
131 }
132
133 static int lxc_ipv4_dest_del(int ifindex, struct in_addr *dest, unsigned int netmask)
134 {
135 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET, ifindex, dest, netmask);
136 }
137
138 static int lxc_ipv6_dest_del(int ifindex, struct in6_addr *dest, unsigned int netmask)
139 {
140 return lxc_ip_route_dest(RTM_DELROUTE, AF_INET6, ifindex, dest, netmask);
141 }
142
143 static int lxc_setup_ipv4_routes(struct lxc_list *ip, int ifindex)
144 {
145 struct lxc_list *iterator;
146 int err;
147
148 lxc_list_for_each(iterator, ip) {
149 struct lxc_inetdev *inetdev = iterator->elem;
150
151 err = lxc_ipv4_dest_add(ifindex, &inetdev->addr, inetdev->prefix);
152 if (err) {
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex);
155 return minus_one_set_errno(-err);
156 }
157 }
158
159 return 0;
160 }
161
162 static int lxc_setup_ipv6_routes(struct lxc_list *ip, int ifindex)
163 {
164 struct lxc_list *iterator;
165 int err;
166
167 lxc_list_for_each(iterator, ip) {
168 struct lxc_inet6dev *inet6dev = iterator->elem;
169
170 err = lxc_ipv6_dest_add(ifindex, &inet6dev->addr, inet6dev->prefix);
171 if (err) {
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex);
174 return minus_one_set_errno(-err);
175 }
176 }
177
178 return 0;
179 }
180
181 static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
182 {
183 int bridge_index, err;
184 char *veth1, *veth2;
185 char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
186 unsigned int mtu = 0;
187
188 if (netdev->priv.veth_attr.pair[0] != '\0') {
189 veth1 = netdev->priv.veth_attr.pair;
190 if (handler->conf->reboot)
191 lxc_netdev_delete_by_name(veth1);
192 } else {
193 err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
194 if (err < 0 || (size_t)err >= sizeof(veth1buf))
195 return -1;
196
197 veth1 = lxc_mkifname(veth1buf);
198 if (!veth1)
199 return -1;
200
201 /* store away for deconf */
202 memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
203 }
204
205 err = snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
206 if (err < 0 || (size_t)err >= sizeof(veth2buf))
207 return -1;
208
209 veth2 = lxc_mkifname(veth2buf);
210 if (!veth2)
211 goto out_delete;
212
213 err = lxc_veth_create(veth1, veth2);
214 if (err) {
215 errno = -err;
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1, veth2);
217 goto out_delete;
218 }
219
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
222 * of a container */
223 err = setup_private_host_hw_addr(veth1);
224 if (err) {
225 errno = -err;
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1);
227 goto out_delete;
228 }
229
230 /* Retrieve ifindex of the host's veth device. */
231 netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
232 if (!netdev->priv.veth_attr.ifindex) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
234 goto out_delete;
235 }
236
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
240 * on.
241 */
242 netdev->ifindex = if_nametoindex(veth2);
243 if (!netdev->ifindex) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
245 goto out_delete;
246 }
247
248 if (netdev->mtu) {
249 if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
250 WARN("Failed to parse mtu");
251 else
252 INFO("Retrieved mtu %d", mtu);
253 } else if (netdev->link[0] != '\0') {
254 bridge_index = if_nametoindex(netdev->link);
255 if (bridge_index) {
256 mtu = netdev_get_mtu(bridge_index);
257 INFO("Retrieved mtu %d from %s", mtu, netdev->link);
258 } else {
259 mtu = netdev_get_mtu(netdev->ifindex);
260 INFO("Retrieved mtu %d from %s", mtu, veth2);
261 }
262 }
263
264 if (mtu) {
265 err = lxc_netdev_set_mtu(veth1, mtu);
266 if (!err)
267 err = lxc_netdev_set_mtu(veth2, mtu);
268
269 if (err) {
270 errno = -err;
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu, veth1, veth2);
273 goto out_delete;
274 }
275 }
276
277 if (netdev->link[0] != '\0') {
278 err = lxc_bridge_attach(netdev->link, veth1);
279 if (err) {
280 errno = -err;
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1, netdev->link);
283 goto out_delete;
284 }
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
286 }
287
288 err = lxc_netdev_up(veth1);
289 if (err) {
290 errno = -err;
291 SYSERROR("Failed to set \"%s\" up", veth1);
292 goto out_delete;
293 }
294
295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev->priv.veth_attr.ipv4_routes, netdev->priv.veth_attr.ifindex)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1);
298 goto out_delete;
299 }
300
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev->priv.veth_attr.ipv6_routes, netdev->priv.veth_attr.ifindex)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1);
304 goto out_delete;
305 }
306
307 if (netdev->upscript) {
308 char *argv[] = {
309 "veth",
310 netdev->link,
311 veth1,
312 NULL,
313 };
314
315 err = run_script_argv(handler->name,
316 handler->conf->hooks_version, "net",
317 netdev->upscript, "up", argv);
318 if (err < 0)
319 goto out_delete;
320 }
321
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
323 netdev->ifindex);
324
325 return 0;
326
327 out_delete:
328 if (netdev->ifindex != 0)
329 lxc_netdev_delete_by_name(veth1);
330 return -1;
331 }
332
333 static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
334 {
335 char peer[IFNAMSIZ];
336 int err;
337 unsigned int mtu = 0;
338
339 if (netdev->link[0] == '\0') {
340 ERROR("No link for macvlan network device specified");
341 return -1;
342 }
343
344 err = snprintf(peer, sizeof(peer), "mcXXXXXX");
345 if (err < 0 || (size_t)err >= sizeof(peer))
346 return -1;
347
348 if (!lxc_mkifname(peer))
349 return -1;
350
351 err = lxc_macvlan_create(netdev->link, peer,
352 netdev->priv.macvlan_attr.mode);
353 if (err) {
354 errno = -err;
355 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
356 peer, netdev->link);
357 goto on_error;
358 }
359
360 netdev->ifindex = if_nametoindex(peer);
361 if (!netdev->ifindex) {
362 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
363 goto on_error;
364 }
365
366 if (netdev->mtu) {
367 err = lxc_safe_uint(netdev->mtu, &mtu);
368 if (err < 0) {
369 errno = -err;
370 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
371 goto on_error;
372 }
373
374 err = lxc_netdev_set_mtu(peer, mtu);
375 if (err < 0) {
376 errno = -err;
377 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
378 goto on_error;
379 }
380 }
381
382 if (netdev->upscript) {
383 char *argv[] = {
384 "macvlan",
385 netdev->link,
386 NULL,
387 };
388
389 err = run_script_argv(handler->name,
390 handler->conf->hooks_version, "net",
391 netdev->upscript, "up", argv);
392 if (err < 0)
393 goto on_error;
394 }
395
396 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
397 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
398
399 return 0;
400
401 on_error:
402 lxc_netdev_delete_by_name(peer);
403 return -1;
404 }
405
406 static int lxc_ipvlan_create(const char *master, const char *name, int mode, int isolation)
407 {
408 int err, index, len;
409 struct ifinfomsg *ifi;
410 struct nl_handler nlh;
411 struct rtattr *nest, *nest2;
412 struct nlmsg *answer = NULL, *nlmsg = NULL;
413
414 len = strlen(master);
415 if (len == 1 || len >= IFNAMSIZ)
416 return minus_one_set_errno(EINVAL);
417
418 len = strlen(name);
419 if (len == 1 || len >= IFNAMSIZ)
420 return minus_one_set_errno(EINVAL);
421
422 index = if_nametoindex(master);
423 if (!index)
424 return minus_one_set_errno(EINVAL);
425
426 err = netlink_open(&nlh, NETLINK_ROUTE);
427 if (err)
428 return minus_one_set_errno(-err);
429
430 err = -ENOMEM;
431 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
432 if (!nlmsg)
433 goto out;
434
435 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
436 if (!answer)
437 goto out;
438
439 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
440 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
441
442 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
443 if (!ifi) {
444 goto out;
445 }
446 ifi->ifi_family = AF_UNSPEC;
447
448 err = -EPROTO;
449 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
450 if (!nest)
451 goto out;
452
453 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "ipvlan"))
454 goto out;
455
456 if (mode) {
457 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
458 if (!nest2)
459 goto out;
460
461 if (nla_put_u32(nlmsg, IFLA_IPVLAN_MODE, mode))
462 goto out;
463
464 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
465 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
466 */
467 if (isolation > 0) {
468 if (nla_put_u16(nlmsg, IFLA_IPVLAN_ISOLATION, isolation))
469 goto out;
470 }
471
472 nla_end_nested(nlmsg, nest2);
473 }
474
475 nla_end_nested(nlmsg, nest);
476
477 if (nla_put_u32(nlmsg, IFLA_LINK, index))
478 goto out;
479
480 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
481 goto out;
482
483 err = netlink_transaction(&nlh, nlmsg, answer);
484 out:
485 netlink_close(&nlh);
486 nlmsg_free(answer);
487 nlmsg_free(nlmsg);
488 if (err < 0)
489 return minus_one_set_errno(-err);
490 return 0;
491 }
492
493 static int instantiate_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
494 {
495 char peerbuf[IFNAMSIZ], *peer;
496 int err;
497 unsigned int mtu = 0;
498
499 if (netdev->link[0] == '\0') {
500 ERROR("No link for ipvlan network device specified");
501 return -1;
502 }
503
504 err = snprintf(peerbuf, sizeof(peerbuf), "ipXXXXXX");
505 if (err < 0 || (size_t)err >= sizeof(peerbuf))
506 return -1;
507
508 peer = lxc_mkifname(peerbuf);
509 if (!peer)
510 return -1;
511
512 err = lxc_ipvlan_create(netdev->link, peer, netdev->priv.ipvlan_attr.mode, netdev->priv.ipvlan_attr.isolation);
513 if (err) {
514 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"", peer, netdev->link);
515 goto on_error;
516 }
517
518 netdev->ifindex = if_nametoindex(peer);
519 if (!netdev->ifindex) {
520 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
521 goto on_error;
522 }
523
524 if (netdev->mtu) {
525 err = lxc_safe_uint(netdev->mtu, &mtu);
526 if (err < 0) {
527 errno = -err;
528 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
529 goto on_error;
530 }
531
532 err = lxc_netdev_set_mtu(peer, mtu);
533 if (err < 0) {
534 errno = -err;
535 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
536 goto on_error;
537 }
538 }
539
540 if (netdev->upscript) {
541 char *argv[] = {
542 "ipvlan",
543 netdev->link,
544 NULL,
545 };
546
547 err = run_script_argv(handler->name,
548 handler->conf->hooks_version, "net",
549 netdev->upscript, "up", argv);
550 if (err < 0)
551 goto on_error;
552 }
553
554 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d",
555 peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
556
557 return 0;
558
559 on_error:
560 lxc_netdev_delete_by_name(peer);
561 return -1;
562 }
563
564 static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
565 {
566 char peer[IFNAMSIZ];
567 int err;
568 static uint16_t vlan_cntr = 0;
569 unsigned int mtu = 0;
570
571 if (netdev->link[0] == '\0') {
572 ERROR("No link for vlan network device specified");
573 return -1;
574 }
575
576 err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
577 if (err < 0 || (size_t)err >= sizeof(peer))
578 return -1;
579
580 err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
581 if (err) {
582 errno = -err;
583 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
584 peer, netdev->link);
585 return -1;
586 }
587
588 netdev->ifindex = if_nametoindex(peer);
589 if (!netdev->ifindex) {
590 ERROR("Failed to retrieve ifindex for \"%s\"", peer);
591 goto on_error;
592 }
593
594 if (netdev->mtu) {
595 err = lxc_safe_uint(netdev->mtu, &mtu);
596 if (err < 0) {
597 errno = -err;
598 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
599 goto on_error;
600 }
601
602 err = lxc_netdev_set_mtu(peer, mtu);
603 if (err) {
604 errno = -err;
605 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, peer);
606 goto on_error;
607 }
608 }
609
610 if (netdev->upscript) {
611 char *argv[] = {
612 "vlan",
613 netdev->link,
614 NULL,
615 };
616
617 err = run_script_argv(handler->name,
618 handler->conf->hooks_version, "net",
619 netdev->upscript, "up", argv);
620 if (err < 0) {
621 goto on_error;
622 }
623 }
624
625 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
626 peer, netdev->ifindex);
627
628 return 0;
629
630 on_error:
631 lxc_netdev_delete_by_name(peer);
632 return -1;
633 }
634
635 static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
636 {
637 int err, mtu_orig = 0;
638 unsigned int mtu = 0;
639
640 if (netdev->link[0] == '\0') {
641 ERROR("No link for physical interface specified");
642 return -1;
643 }
644
645 /* Note that we're retrieving the container's ifindex in the host's
646 * network namespace because we need it to move the device from the
647 * host's network namespace to the container's network namespace later
648 * on.
649 * Note that netdev->link will contain the name of the physical network
650 * device in the host's namespace.
651 */
652 netdev->ifindex = if_nametoindex(netdev->link);
653 if (!netdev->ifindex) {
654 ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
655 return -1;
656 }
657
658 /* Store the ifindex of the host's network device in the host's
659 * namespace.
660 */
661 netdev->priv.phys_attr.ifindex = netdev->ifindex;
662
663 /* Get original device MTU setting and store for restoration after container shutdown. */
664 mtu_orig = netdev_get_mtu(netdev->ifindex);
665 if (mtu_orig < 0) {
666 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev->link);
667 return minus_one_set_errno(-mtu_orig);
668 }
669
670 netdev->priv.phys_attr.mtu = mtu_orig;
671
672 if (netdev->mtu) {
673 err = lxc_safe_uint(netdev->mtu, &mtu);
674 if (err < 0) {
675 errno = -err;
676 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
677 return -1;
678 }
679
680 err = lxc_netdev_set_mtu(netdev->link, mtu);
681 if (err < 0) {
682 errno = -err;
683 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev->mtu, netdev->link);
684 return -1;
685 }
686 }
687
688 if (netdev->upscript) {
689 char *argv[] = {
690 "phys",
691 netdev->link,
692 NULL,
693 };
694
695 err = run_script_argv(handler->name,
696 handler->conf->hooks_version, "net",
697 netdev->upscript, "up", argv);
698 if (err < 0) {
699 return -1;
700 }
701 }
702
703 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev->link, netdev->ifindex);
704
705 return 0;
706 }
707
708 static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
709 {
710 int ret;
711 char *argv[] = {
712 "empty",
713 NULL,
714 };
715
716 netdev->ifindex = 0;
717 if (!netdev->upscript)
718 return 0;
719
720 ret = run_script_argv(handler->name, handler->conf->hooks_version,
721 "net", netdev->upscript, "up", argv);
722 if (ret < 0)
723 return -1;
724
725 return 0;
726 }
727
728 static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
729 {
730 netdev->ifindex = 0;
731 return 0;
732 }
733
734 static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
735 [LXC_NET_VETH] = instantiate_veth,
736 [LXC_NET_MACVLAN] = instantiate_macvlan,
737 [LXC_NET_IPVLAN] = instantiate_ipvlan,
738 [LXC_NET_VLAN] = instantiate_vlan,
739 [LXC_NET_PHYS] = instantiate_phys,
740 [LXC_NET_EMPTY] = instantiate_empty,
741 [LXC_NET_NONE] = instantiate_none,
742 };
743
744 static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
745 {
746 int ret;
747 char *argv[] = {
748 "veth",
749 netdev->link,
750 NULL,
751 NULL,
752 };
753
754 if (!netdev->downscript)
755 return 0;
756
757 if (netdev->priv.veth_attr.pair[0] != '\0')
758 argv[2] = netdev->priv.veth_attr.pair;
759 else
760 argv[2] = netdev->priv.veth_attr.veth1;
761
762 ret = run_script_argv(handler->name,
763 handler->conf->hooks_version, "net",
764 netdev->downscript, "down", argv);
765 if (ret < 0)
766 return -1;
767
768 return 0;
769 }
770
771 static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
772 {
773 int ret;
774 char *argv[] = {
775 "macvlan",
776 netdev->link,
777 NULL,
778 };
779
780 if (!netdev->downscript)
781 return 0;
782
783 ret = run_script_argv(handler->name, handler->conf->hooks_version,
784 "net", netdev->downscript, "down", argv);
785 if (ret < 0)
786 return -1;
787
788 return 0;
789 }
790
791 static int shutdown_ipvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
792 {
793 int ret;
794 char *argv[] = {
795 "ipvlan",
796 netdev->link,
797 NULL,
798 };
799
800 if (!netdev->downscript)
801 return 0;
802
803 ret = run_script_argv(handler->name, handler->conf->hooks_version,
804 "net", netdev->downscript, "down", argv);
805 if (ret < 0)
806 return -1;
807
808 return 0;
809 }
810
811 static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
812 {
813 int ret;
814 char *argv[] = {
815 "vlan",
816 netdev->link,
817 NULL,
818 };
819
820 if (!netdev->downscript)
821 return 0;
822
823 ret = run_script_argv(handler->name, handler->conf->hooks_version,
824 "net", netdev->downscript, "down", argv);
825 if (ret < 0)
826 return -1;
827
828 return 0;
829 }
830
831 static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
832 {
833 int ret;
834 char *argv[] = {
835 "phys",
836 netdev->link,
837 NULL,
838 };
839
840 if (!netdev->downscript)
841 return 0;
842
843 ret = run_script_argv(handler->name, handler->conf->hooks_version,
844 "net", netdev->downscript, "down", argv);
845 if (ret < 0)
846 return -1;
847
848 return 0;
849 }
850
851 static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
852 {
853 int ret;
854 char *argv[] = {
855 "empty",
856 NULL,
857 };
858
859 if (!netdev->downscript)
860 return 0;
861
862 ret = run_script_argv(handler->name, handler->conf->hooks_version,
863 "net", netdev->downscript, "down", argv);
864 if (ret < 0)
865 return -1;
866
867 return 0;
868 }
869
870 static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
871 {
872 return 0;
873 }
874
875 static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
876 [LXC_NET_VETH] = shutdown_veth,
877 [LXC_NET_MACVLAN] = shutdown_macvlan,
878 [LXC_NET_IPVLAN] = shutdown_ipvlan,
879 [LXC_NET_VLAN] = shutdown_vlan,
880 [LXC_NET_PHYS] = shutdown_phys,
881 [LXC_NET_EMPTY] = shutdown_empty,
882 [LXC_NET_NONE] = shutdown_none,
883 };
884
885 static int lxc_netdev_move_by_index_fd(int ifindex, int fd, const char *ifname)
886 {
887 int err;
888 struct nl_handler nlh;
889 struct ifinfomsg *ifi;
890 struct nlmsg *nlmsg = NULL;
891
892 err = netlink_open(&nlh, NETLINK_ROUTE);
893 if (err)
894 return err;
895
896 err = -ENOMEM;
897 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
898 if (!nlmsg)
899 goto out;
900
901 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
902 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
903
904 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
905 if (!ifi)
906 goto out;
907 ifi->ifi_family = AF_UNSPEC;
908 ifi->ifi_index = ifindex;
909
910 if (nla_put_u32(nlmsg, IFLA_NET_NS_FD, fd))
911 goto out;
912
913 if (ifname != NULL) {
914 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
915 goto out;
916 }
917
918 err = netlink_transaction(&nlh, nlmsg, nlmsg);
919 out:
920 netlink_close(&nlh);
921 nlmsg_free(nlmsg);
922 return err;
923 }
924
925 int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
926 {
927 int err;
928 struct nl_handler nlh;
929 struct ifinfomsg *ifi;
930 struct nlmsg *nlmsg = NULL;
931
932 err = netlink_open(&nlh, NETLINK_ROUTE);
933 if (err)
934 return err;
935
936 err = -ENOMEM;
937 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
938 if (!nlmsg)
939 goto out;
940
941 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
942 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
943
944 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
945 if (!ifi)
946 goto out;
947 ifi->ifi_family = AF_UNSPEC;
948 ifi->ifi_index = ifindex;
949
950 if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
951 goto out;
952
953 if (ifname != NULL) {
954 if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
955 goto out;
956 }
957
958 err = netlink_transaction(&nlh, nlmsg, nlmsg);
959 out:
960 netlink_close(&nlh);
961 nlmsg_free(nlmsg);
962 return err;
963 }
964
965 /* If we are asked to move a wireless interface, then we must actually move its
966 * phyN device. Detect that condition and return the physname here. The physname
967 * will be passed to lxc_netdev_move_wlan() which will free it when done.
968 */
969 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
970 static char *is_wlan(const char *ifname)
971 {
972 __do_free char *path = NULL;
973 int i, ret;
974 long physlen;
975 size_t len;
976 FILE *f;
977 char *physname = NULL;
978
979 len = strlen(ifname) + strlen(PHYSNAME) - 1;
980 path = must_realloc(NULL, len + 1);
981 ret = snprintf(path, len, PHYSNAME, ifname);
982 if (ret < 0 || (size_t)ret >= len)
983 goto bad;
984
985 f = fopen(path, "r");
986 if (!f)
987 goto bad;
988
989 /* Feh - sb.st_size is always 4096. */
990 fseek(f, 0, SEEK_END);
991 physlen = ftell(f);
992 fseek(f, 0, SEEK_SET);
993 if (physlen < 0) {
994 fclose(f);
995 goto bad;
996 }
997
998 physname = malloc(physlen + 1);
999 if (!physname) {
1000 fclose(f);
1001 goto bad;
1002 }
1003
1004 memset(physname, 0, physlen + 1);
1005 ret = fread(physname, 1, physlen, f);
1006 fclose(f);
1007 if (ret < 0)
1008 goto bad;
1009
1010 for (i = 0; i < physlen; i++) {
1011 if (physname[i] == '\n')
1012 physname[i] = '\0';
1013
1014 if (physname[i] == '\0')
1015 break;
1016 }
1017
1018 return physname;
1019
1020 bad:
1021 free(physname);
1022 return NULL;
1023 }
1024
1025 static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
1026 const char *new)
1027 {
1028 pid_t fpid;
1029
1030 fpid = fork();
1031 if (fpid < 0)
1032 return -1;
1033
1034 if (fpid != 0)
1035 return wait_for_pid(fpid);
1036
1037 if (!switch_to_ns(pid, "net"))
1038 return -1;
1039
1040 _exit(lxc_netdev_rename_by_name(old, new));
1041 }
1042
1043 static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
1044 const char *newname)
1045 {
1046 char *cmd;
1047 pid_t fpid;
1048 int err = -1;
1049
1050 /* Move phyN into the container. TODO - do this using netlink.
1051 * However, IIUC this involves a bit more complicated work to talk to
1052 * the 80211 module, so for now just call out to iw.
1053 */
1054 cmd = on_path("iw", NULL);
1055 if (!cmd)
1056 goto out1;
1057 free(cmd);
1058
1059 fpid = fork();
1060 if (fpid < 0)
1061 goto out1;
1062
1063 if (fpid == 0) {
1064 char pidstr[30];
1065 sprintf(pidstr, "%d", pid);
1066 execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
1067 (char *)NULL);
1068 _exit(EXIT_FAILURE);
1069 }
1070
1071 if (wait_for_pid(fpid))
1072 goto out1;
1073
1074 err = 0;
1075 if (newname)
1076 err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
1077
1078 out1:
1079 free(physname);
1080 return err;
1081 }
1082
1083 int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
1084 {
1085 int index;
1086 char *physname;
1087
1088 if (!ifname)
1089 return -EINVAL;
1090
1091 index = if_nametoindex(ifname);
1092 if (!index)
1093 return -EINVAL;
1094
1095 physname = is_wlan(ifname);
1096 if (physname)
1097 return lxc_netdev_move_wlan(physname, ifname, pid, newname);
1098
1099 return lxc_netdev_move_by_index(index, pid, newname);
1100 }
1101
1102 int lxc_netdev_delete_by_index(int ifindex)
1103 {
1104 int err;
1105 struct ifinfomsg *ifi;
1106 struct nl_handler nlh;
1107 struct nlmsg *answer = NULL, *nlmsg = NULL;
1108
1109 err = netlink_open(&nlh, NETLINK_ROUTE);
1110 if (err)
1111 return err;
1112
1113 err = -ENOMEM;
1114 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1115 if (!nlmsg)
1116 goto out;
1117
1118 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1119 if (!answer)
1120 goto out;
1121
1122 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1123 nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
1124
1125 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1126 if (!ifi)
1127 goto out;
1128 ifi->ifi_family = AF_UNSPEC;
1129 ifi->ifi_index = ifindex;
1130
1131 err = netlink_transaction(&nlh, nlmsg, answer);
1132 out:
1133 netlink_close(&nlh);
1134 nlmsg_free(answer);
1135 nlmsg_free(nlmsg);
1136 return err;
1137 }
1138
1139 int lxc_netdev_delete_by_name(const char *name)
1140 {
1141 int index;
1142
1143 index = if_nametoindex(name);
1144 if (!index)
1145 return -EINVAL;
1146
1147 return lxc_netdev_delete_by_index(index);
1148 }
1149
1150 int lxc_netdev_rename_by_index(int ifindex, const char *newname)
1151 {
1152 int err, len;
1153 struct ifinfomsg *ifi;
1154 struct nl_handler nlh;
1155 struct nlmsg *answer = NULL, *nlmsg = NULL;
1156
1157 err = netlink_open(&nlh, NETLINK_ROUTE);
1158 if (err)
1159 return err;
1160
1161 len = strlen(newname);
1162 if (len == 1 || len >= IFNAMSIZ) {
1163 err = -EINVAL;
1164 goto out;
1165 }
1166
1167 err = -ENOMEM;
1168 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1169 if (!nlmsg)
1170 goto out;
1171
1172 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1173 if (!answer)
1174 goto out;
1175
1176 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
1177 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1178
1179 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1180 if (!ifi)
1181 goto out;
1182 ifi->ifi_family = AF_UNSPEC;
1183 ifi->ifi_index = ifindex;
1184
1185 if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
1186 goto out;
1187
1188 err = netlink_transaction(&nlh, nlmsg, answer);
1189 out:
1190 netlink_close(&nlh);
1191 nlmsg_free(answer);
1192 nlmsg_free(nlmsg);
1193 return err;
1194 }
1195
1196 int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
1197 {
1198 int len, index;
1199
1200 len = strlen(oldname);
1201 if (len == 1 || len >= IFNAMSIZ)
1202 return -EINVAL;
1203
1204 index = if_nametoindex(oldname);
1205 if (!index)
1206 return -EINVAL;
1207
1208 return lxc_netdev_rename_by_index(index, newname);
1209 }
1210
1211 int netdev_set_flag(const char *name, int flag)
1212 {
1213 int err, index, len;
1214 struct ifinfomsg *ifi;
1215 struct nl_handler nlh;
1216 struct nlmsg *answer = NULL, *nlmsg = NULL;
1217
1218 err = netlink_open(&nlh, NETLINK_ROUTE);
1219 if (err)
1220 return err;
1221
1222 err = -EINVAL;
1223 len = strlen(name);
1224 if (len == 1 || len >= IFNAMSIZ)
1225 goto out;
1226
1227 err = -ENOMEM;
1228 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1229 if (!nlmsg)
1230 goto out;
1231
1232 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1233 if (!answer)
1234 goto out;
1235
1236 err = -EINVAL;
1237 index = if_nametoindex(name);
1238 if (!index)
1239 goto out;
1240
1241 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1242 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1243
1244 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1245 if (!ifi) {
1246 err = -ENOMEM;
1247 goto out;
1248 }
1249 ifi->ifi_family = AF_UNSPEC;
1250 ifi->ifi_index = index;
1251 ifi->ifi_change |= IFF_UP;
1252 ifi->ifi_flags |= flag;
1253
1254 err = netlink_transaction(&nlh, nlmsg, answer);
1255 out:
1256 netlink_close(&nlh);
1257 nlmsg_free(nlmsg);
1258 nlmsg_free(answer);
1259 return err;
1260 }
1261
1262 int netdev_get_flag(const char *name, int *flag)
1263 {
1264 int err, index, len;
1265 struct ifinfomsg *ifi;
1266 struct nl_handler nlh;
1267 struct nlmsg *answer = NULL, *nlmsg = NULL;
1268
1269 if (!name)
1270 return -EINVAL;
1271
1272 err = netlink_open(&nlh, NETLINK_ROUTE);
1273 if (err)
1274 return err;
1275
1276 err = -EINVAL;
1277 len = strlen(name);
1278 if (len == 1 || len >= IFNAMSIZ)
1279 goto out;
1280
1281 err = -ENOMEM;
1282 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1283 if (!nlmsg)
1284 goto out;
1285
1286 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1287 if (!answer)
1288 goto out;
1289
1290 err = -EINVAL;
1291 index = if_nametoindex(name);
1292 if (!index)
1293 goto out;
1294
1295 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
1296 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1297
1298 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1299 if (!ifi) {
1300 err = -ENOMEM;
1301 goto out;
1302 }
1303 ifi->ifi_family = AF_UNSPEC;
1304 ifi->ifi_index = index;
1305
1306 err = netlink_transaction(&nlh, nlmsg, answer);
1307 if (err)
1308 goto out;
1309
1310 ifi = NLMSG_DATA(answer->nlmsghdr);
1311
1312 *flag = ifi->ifi_flags;
1313 out:
1314 netlink_close(&nlh);
1315 nlmsg_free(nlmsg);
1316 nlmsg_free(answer);
1317 return err;
1318 }
1319
1320 /*
1321 * \brief Check a interface is up or not.
1322 *
1323 * \param name: name for the interface.
1324 *
1325 * \return int.
1326 * 0 means interface is down.
1327 * 1 means interface is up.
1328 * Others means error happened, and ret-value is the error number.
1329 */
1330 int lxc_netdev_isup(const char *name)
1331 {
1332 int err, flag;
1333
1334 err = netdev_get_flag(name, &flag);
1335 if (err)
1336 return err;
1337
1338 if (flag & IFF_UP)
1339 return 1;
1340
1341 return 0;
1342 }
1343
1344 int netdev_get_mtu(int ifindex)
1345 {
1346 int answer_len, err, res;
1347 struct nl_handler nlh;
1348 struct ifinfomsg *ifi;
1349 struct nlmsghdr *msg;
1350 int readmore = 0, recv_len = 0;
1351 struct nlmsg *answer = NULL, *nlmsg = NULL;
1352
1353 err = netlink_open(&nlh, NETLINK_ROUTE);
1354 if (err)
1355 return err;
1356
1357 err = -ENOMEM;
1358 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1359 if (!nlmsg)
1360 goto out;
1361
1362 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1363 if (!answer)
1364 goto out;
1365
1366 /* Save the answer buffer length, since it will be overwritten
1367 * on the first receive (and we might need to receive more than
1368 * once.
1369 */
1370 answer_len = answer->nlmsghdr->nlmsg_len;
1371
1372 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
1373 nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
1374
1375 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1376 if (!ifi)
1377 goto out;
1378 ifi->ifi_family = AF_UNSPEC;
1379
1380 /* Send the request for addresses, which returns all addresses
1381 * on all interfaces. */
1382 err = netlink_send(&nlh, nlmsg);
1383 if (err < 0)
1384 goto out;
1385
1386 #pragma GCC diagnostic push
1387 #pragma GCC diagnostic ignored "-Wcast-align"
1388
1389 do {
1390 /* Restore the answer buffer length, it might have been
1391 * overwritten by a previous receive.
1392 */
1393 answer->nlmsghdr->nlmsg_len = answer_len;
1394
1395 /* Get the (next) batch of reply messages */
1396 err = netlink_rcv(&nlh, answer);
1397 if (err < 0)
1398 goto out;
1399
1400 recv_len = err;
1401
1402 /* Satisfy the typing for the netlink macros */
1403 msg = answer->nlmsghdr;
1404
1405 while (NLMSG_OK(msg, recv_len)) {
1406
1407 /* Stop reading if we see an error message */
1408 if (msg->nlmsg_type == NLMSG_ERROR) {
1409 struct nlmsgerr *errmsg =
1410 (struct nlmsgerr *)NLMSG_DATA(msg);
1411 err = errmsg->error;
1412 goto out;
1413 }
1414
1415 /* Stop reading if we see a NLMSG_DONE message */
1416 if (msg->nlmsg_type == NLMSG_DONE) {
1417 readmore = 0;
1418 break;
1419 }
1420
1421 ifi = NLMSG_DATA(msg);
1422 if (ifi->ifi_index == ifindex) {
1423 struct rtattr *rta = IFLA_RTA(ifi);
1424 int attr_len =
1425 msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
1426 res = 0;
1427 while (RTA_OK(rta, attr_len)) {
1428 /* Found a local address for the
1429 * requested interface, return it.
1430 */
1431 if (rta->rta_type == IFLA_MTU) {
1432 memcpy(&res, RTA_DATA(rta),
1433 sizeof(int));
1434 err = res;
1435 goto out;
1436 }
1437 rta = RTA_NEXT(rta, attr_len);
1438 }
1439 }
1440
1441 /* Keep reading more data from the socket if the last
1442 * message had the NLF_F_MULTI flag set.
1443 */
1444 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
1445
1446 /* Look at the next message received in this buffer. */
1447 msg = NLMSG_NEXT(msg, recv_len);
1448 }
1449 } while (readmore);
1450
1451 #pragma GCC diagnostic pop
1452
1453 /* If we end up here, we didn't find any result, so signal an error. */
1454 err = -1;
1455
1456 out:
1457 netlink_close(&nlh);
1458 nlmsg_free(answer);
1459 nlmsg_free(nlmsg);
1460 return err;
1461 }
1462
1463 int lxc_netdev_set_mtu(const char *name, int mtu)
1464 {
1465 int err, index, len;
1466 struct ifinfomsg *ifi;
1467 struct nl_handler nlh;
1468 struct nlmsg *answer = NULL, *nlmsg = NULL;
1469
1470 err = netlink_open(&nlh, NETLINK_ROUTE);
1471 if (err)
1472 return err;
1473
1474 err = -EINVAL;
1475 len = strlen(name);
1476 if (len == 1 || len >= IFNAMSIZ)
1477 goto out;
1478
1479 err = -ENOMEM;
1480 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1481 if (!nlmsg)
1482 goto out;
1483
1484 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1485 if (!answer)
1486 goto out;
1487
1488 err = -EINVAL;
1489 index = if_nametoindex(name);
1490 if (!index)
1491 goto out;
1492
1493 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
1494 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1495
1496 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1497 if (!ifi) {
1498 err = -ENOMEM;
1499 goto out;
1500 }
1501 ifi->ifi_family = AF_UNSPEC;
1502 ifi->ifi_index = index;
1503
1504 if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
1505 goto out;
1506
1507 err = netlink_transaction(&nlh, nlmsg, answer);
1508 out:
1509 netlink_close(&nlh);
1510 nlmsg_free(nlmsg);
1511 nlmsg_free(answer);
1512 return err;
1513 }
1514
1515 int lxc_netdev_up(const char *name)
1516 {
1517 return netdev_set_flag(name, IFF_UP);
1518 }
1519
1520 int lxc_netdev_down(const char *name)
1521 {
1522 return netdev_set_flag(name, 0);
1523 }
1524
1525 int lxc_veth_create(const char *name1, const char *name2)
1526 {
1527 int err, len;
1528 struct ifinfomsg *ifi;
1529 struct nl_handler nlh;
1530 struct rtattr *nest1, *nest2, *nest3;
1531 struct nlmsg *answer = NULL, *nlmsg = NULL;
1532
1533 err = netlink_open(&nlh, NETLINK_ROUTE);
1534 if (err)
1535 return err;
1536
1537 err = -EINVAL;
1538 len = strlen(name1);
1539 if (len == 1 || len >= IFNAMSIZ)
1540 goto out;
1541
1542 len = strlen(name2);
1543 if (len == 1 || len >= IFNAMSIZ)
1544 goto out;
1545
1546 err = -ENOMEM;
1547 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1548 if (!nlmsg)
1549 goto out;
1550
1551 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1552 if (!answer)
1553 goto out;
1554
1555 nlmsg->nlmsghdr->nlmsg_flags =
1556 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1557 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1558
1559 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1560 if (!ifi)
1561 goto out;
1562 ifi->ifi_family = AF_UNSPEC;
1563
1564 err = -EINVAL;
1565 nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1566 if (!nest1)
1567 goto out;
1568
1569 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
1570 goto out;
1571
1572 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1573 if (!nest2)
1574 goto out;
1575
1576 nest3 = nla_begin_nested(nlmsg, VETH_INFO_PEER);
1577 if (!nest3)
1578 goto out;
1579
1580 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1581 if (!ifi) {
1582 err = -ENOMEM;
1583 goto out;
1584 }
1585
1586 if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
1587 goto out;
1588
1589 nla_end_nested(nlmsg, nest3);
1590 nla_end_nested(nlmsg, nest2);
1591 nla_end_nested(nlmsg, nest1);
1592
1593 if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
1594 goto out;
1595
1596 err = netlink_transaction(&nlh, nlmsg, answer);
1597 out:
1598 netlink_close(&nlh);
1599 nlmsg_free(answer);
1600 nlmsg_free(nlmsg);
1601 return err;
1602 }
1603
1604 /* TODO: merge with lxc_macvlan_create */
1605 int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
1606 {
1607 int err, len, lindex;
1608 struct ifinfomsg *ifi;
1609 struct nl_handler nlh;
1610 struct rtattr *nest, *nest2;
1611 struct nlmsg *answer = NULL, *nlmsg = NULL;
1612
1613 err = netlink_open(&nlh, NETLINK_ROUTE);
1614 if (err)
1615 return err;
1616
1617 err = -EINVAL;
1618 len = strlen(master);
1619 if (len == 1 || len >= IFNAMSIZ)
1620 goto err3;
1621
1622 len = strlen(name);
1623 if (len == 1 || len >= IFNAMSIZ)
1624 goto err3;
1625
1626 err = -ENOMEM;
1627 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1628 if (!nlmsg)
1629 goto err3;
1630
1631 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1632 if (!answer)
1633 goto err2;
1634
1635 err = -EINVAL;
1636 lindex = if_nametoindex(master);
1637 if (!lindex)
1638 goto err1;
1639
1640 nlmsg->nlmsghdr->nlmsg_flags =
1641 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1642 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1643
1644 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1645 if (!ifi) {
1646 err = -ENOMEM;
1647 goto err1;
1648 }
1649 ifi->ifi_family = AF_UNSPEC;
1650
1651 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1652 if (!nest)
1653 goto err1;
1654
1655 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "vlan"))
1656 goto err1;
1657
1658 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1659 if (!nest2)
1660 goto err1;
1661
1662 if (nla_put_u16(nlmsg, IFLA_VLAN_ID, vlanid))
1663 goto err1;
1664
1665 nla_end_nested(nlmsg, nest2);
1666 nla_end_nested(nlmsg, nest);
1667
1668 if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
1669 goto err1;
1670
1671 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1672 goto err1;
1673
1674 err = netlink_transaction(&nlh, nlmsg, answer);
1675 err1:
1676 nlmsg_free(answer);
1677 err2:
1678 nlmsg_free(nlmsg);
1679 err3:
1680 netlink_close(&nlh);
1681 return err;
1682 }
1683
1684 int lxc_macvlan_create(const char *master, const char *name, int mode)
1685 {
1686 int err, index, len;
1687 struct ifinfomsg *ifi;
1688 struct nl_handler nlh;
1689 struct rtattr *nest, *nest2;
1690 struct nlmsg *answer = NULL, *nlmsg = NULL;
1691
1692 err = netlink_open(&nlh, NETLINK_ROUTE);
1693 if (err)
1694 return err;
1695
1696 err = -EINVAL;
1697 len = strlen(master);
1698 if (len == 1 || len >= IFNAMSIZ)
1699 goto out;
1700
1701 len = strlen(name);
1702 if (len == 1 || len >= IFNAMSIZ)
1703 goto out;
1704
1705 err = -ENOMEM;
1706 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1707 if (!nlmsg)
1708 goto out;
1709
1710 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1711 if (!answer)
1712 goto out;
1713
1714 err = -EINVAL;
1715 index = if_nametoindex(master);
1716 if (!index)
1717 goto out;
1718
1719 nlmsg->nlmsghdr->nlmsg_flags =
1720 NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
1721 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
1722
1723 ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
1724 if (!ifi) {
1725 err = -ENOMEM;
1726 goto out;
1727 }
1728 ifi->ifi_family = AF_UNSPEC;
1729
1730 nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
1731 if (!nest)
1732 goto out;
1733
1734 if (nla_put_string(nlmsg, IFLA_INFO_KIND, "macvlan"))
1735 goto out;
1736
1737 if (mode) {
1738 nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
1739 if (!nest2)
1740 goto out;
1741
1742 if (nla_put_u32(nlmsg, IFLA_MACVLAN_MODE, mode))
1743 goto out;
1744
1745 nla_end_nested(nlmsg, nest2);
1746 }
1747
1748 nla_end_nested(nlmsg, nest);
1749
1750 if (nla_put_u32(nlmsg, IFLA_LINK, index))
1751 goto out;
1752
1753 if (nla_put_string(nlmsg, IFLA_IFNAME, name))
1754 goto out;
1755
1756 err = netlink_transaction(&nlh, nlmsg, answer);
1757 out:
1758 netlink_close(&nlh);
1759 nlmsg_free(answer);
1760 nlmsg_free(nlmsg);
1761 return err;
1762 }
1763
1764 static int proc_sys_net_write(const char *path, const char *value)
1765 {
1766 int fd;
1767 int err = 0;
1768
1769 fd = open(path, O_WRONLY);
1770 if (fd < 0)
1771 return -errno;
1772
1773 if (lxc_write_nointr(fd, value, strlen(value)) < 0)
1774 err = -errno;
1775
1776 close(fd);
1777 return err;
1778 }
1779
1780 static int lxc_is_ip_forwarding_enabled(const char *ifname, int family)
1781 {
1782 int ret;
1783 char path[PATH_MAX];
1784 char buf[1] = "";
1785
1786 if (family != AF_INET && family != AF_INET6)
1787 return minus_one_set_errno(EINVAL);
1788
1789 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1790 family == AF_INET ? "ipv4" : "ipv6", ifname,
1791 "forwarding");
1792 if (ret < 0 || (size_t)ret >= PATH_MAX)
1793 return minus_one_set_errno(E2BIG);
1794
1795 return lxc_read_file_expect(path, buf, 1, "1");
1796 }
1797
1798 static int neigh_proxy_set(const char *ifname, int family, int flag)
1799 {
1800 int ret;
1801 char path[PATH_MAX];
1802
1803 if (family != AF_INET && family != AF_INET6)
1804 return -EINVAL;
1805
1806 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1807 family == AF_INET ? "ipv4" : "ipv6", ifname,
1808 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1809 if (ret < 0 || (size_t)ret >= PATH_MAX)
1810 return -E2BIG;
1811
1812 return proc_sys_net_write(path, flag ? "1" : "0");
1813 }
1814
1815 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname, int family)
1816 {
1817 int ret;
1818 char path[PATH_MAX];
1819 char buf[1] = "";
1820
1821 if (family != AF_INET && family != AF_INET6)
1822 return minus_one_set_errno(EINVAL);
1823
1824 ret = snprintf(path, PATH_MAX, "/proc/sys/net/%s/conf/%s/%s",
1825 family == AF_INET ? "ipv4" : "ipv6", ifname,
1826 family == AF_INET ? "proxy_arp" : "proxy_ndp");
1827 if (ret < 0 || (size_t)ret >= PATH_MAX)
1828 return minus_one_set_errno(E2BIG);
1829
1830 return lxc_read_file_expect(path, buf, 1, "1");
1831 }
1832
1833 int lxc_neigh_proxy_on(const char *name, int family)
1834 {
1835 return neigh_proxy_set(name, family, 1);
1836 }
1837
1838 int lxc_neigh_proxy_off(const char *name, int family)
1839 {
1840 return neigh_proxy_set(name, family, 0);
1841 }
1842
1843 int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
1844 {
1845 int i = 0;
1846 unsigned val;
1847 char c;
1848 unsigned char *data;
1849
1850 sockaddr->sa_family = ARPHRD_ETHER;
1851 data = (unsigned char *)sockaddr->sa_data;
1852
1853 while ((*macaddr != '\0') && (i < ETH_ALEN)) {
1854 c = *macaddr++;
1855 if (isdigit(c))
1856 val = c - '0';
1857 else if (c >= 'a' && c <= 'f')
1858 val = c - 'a' + 10;
1859 else if (c >= 'A' && c <= 'F')
1860 val = c - 'A' + 10;
1861 else
1862 return -EINVAL;
1863
1864 val <<= 4;
1865 c = *macaddr;
1866 if (isdigit(c))
1867 val |= c - '0';
1868 else if (c >= 'a' && c <= 'f')
1869 val |= c - 'a' + 10;
1870 else if (c >= 'A' && c <= 'F')
1871 val |= c - 'A' + 10;
1872 else if (c == ':' || c == 0)
1873 val >>= 4;
1874 else
1875 return -EINVAL;
1876 if (c != 0)
1877 macaddr++;
1878 *data++ = (unsigned char)(val & 0377);
1879 i++;
1880
1881 if (*macaddr == ':')
1882 macaddr++;
1883 }
1884
1885 return 0;
1886 }
1887
1888 static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
1889 void *acast, int prefix)
1890 {
1891 int addrlen, err;
1892 struct ifaddrmsg *ifa;
1893 struct nl_handler nlh;
1894 struct nlmsg *answer = NULL, *nlmsg = NULL;
1895
1896 addrlen = family == AF_INET ? sizeof(struct in_addr)
1897 : sizeof(struct in6_addr);
1898
1899 err = netlink_open(&nlh, NETLINK_ROUTE);
1900 if (err)
1901 return err;
1902
1903 err = -ENOMEM;
1904 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
1905 if (!nlmsg)
1906 goto out;
1907
1908 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
1909 if (!answer)
1910 goto out;
1911
1912 nlmsg->nlmsghdr->nlmsg_flags =
1913 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
1914 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
1915
1916 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
1917 if (!ifa)
1918 goto out;
1919 ifa->ifa_prefixlen = prefix;
1920 ifa->ifa_index = ifindex;
1921 ifa->ifa_family = family;
1922 ifa->ifa_scope = 0;
1923
1924 err = -EINVAL;
1925 if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
1926 goto out;
1927
1928 if (nla_put_buffer(nlmsg, IFA_ADDRESS, addr, addrlen))
1929 goto out;
1930
1931 if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
1932 goto out;
1933
1934 /* TODO: multicast, anycast with ipv6 */
1935 err = -EPROTONOSUPPORT;
1936 if (family == AF_INET6 &&
1937 (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
1938 memcmp(acast, &in6addr_any, sizeof(in6addr_any))))
1939 goto out;
1940
1941 err = netlink_transaction(&nlh, nlmsg, answer);
1942 out:
1943 netlink_close(&nlh);
1944 nlmsg_free(answer);
1945 nlmsg_free(nlmsg);
1946 return err;
1947 }
1948
1949 int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
1950 struct in6_addr *mcast, struct in6_addr *acast,
1951 int prefix)
1952 {
1953 return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
1954 }
1955
1956 int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
1957 int prefix)
1958 {
1959 return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
1960 }
1961
1962 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1963 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1964 * that pointer in *res (so res should be an in_addr** or in6_addr**).
1965 */
1966 #pragma GCC diagnostic push
1967 #pragma GCC diagnostic ignored "-Wcast-align"
1968
1969 static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
1970 {
1971 int addrlen;
1972 struct ifaddrmsg *ifa = NLMSG_DATA(msg);
1973 struct rtattr *rta = IFA_RTA(ifa);
1974 int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
1975
1976 if (ifa->ifa_family != family)
1977 return 0;
1978
1979 addrlen = family == AF_INET ? sizeof(struct in_addr)
1980 : sizeof(struct in6_addr);
1981
1982 /* Loop over the rtattr's in this message */
1983 while (RTA_OK(rta, attr_len)) {
1984 /* Found a local address for the requested interface,
1985 * return it.
1986 */
1987 if (rta->rta_type == IFA_LOCAL ||
1988 rta->rta_type == IFA_ADDRESS) {
1989 /* Sanity check. The family check above should make sure
1990 * the address length is correct, but check here just in
1991 * case.
1992 */
1993 if (RTA_PAYLOAD(rta) != addrlen)
1994 return -1;
1995
1996 /* We might have found an IFA_ADDRESS before, which we
1997 * now overwrite with an IFA_LOCAL.
1998 */
1999 if (!*res) {
2000 *res = malloc(addrlen);
2001 if (!*res)
2002 return -1;
2003 }
2004
2005 memcpy(*res, RTA_DATA(rta), addrlen);
2006 if (rta->rta_type == IFA_LOCAL)
2007 break;
2008 }
2009 rta = RTA_NEXT(rta, attr_len);
2010 }
2011 return 0;
2012 }
2013
2014 #pragma GCC diagnostic pop
2015
2016 static int ip_addr_get(int family, int ifindex, void **res)
2017 {
2018 int answer_len, err;
2019 struct ifaddrmsg *ifa;
2020 struct nl_handler nlh;
2021 struct nlmsghdr *msg;
2022 int readmore = 0, recv_len = 0;
2023 struct nlmsg *answer = NULL, *nlmsg = NULL;
2024
2025 err = netlink_open(&nlh, NETLINK_ROUTE);
2026 if (err)
2027 return err;
2028
2029 err = -ENOMEM;
2030 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2031 if (!nlmsg)
2032 goto out;
2033
2034 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2035 if (!answer)
2036 goto out;
2037
2038 /* Save the answer buffer length, since it will be overwritten on the
2039 * first receive (and we might need to receive more than once).
2040 */
2041 answer_len = answer->nlmsghdr->nlmsg_len;
2042
2043 nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
2044 nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
2045
2046 ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
2047 if (!ifa)
2048 goto out;
2049 ifa->ifa_family = family;
2050
2051 /* Send the request for addresses, which returns all addresses on all
2052 * interfaces.
2053 */
2054 err = netlink_send(&nlh, nlmsg);
2055 if (err < 0)
2056 goto out;
2057
2058 #pragma GCC diagnostic push
2059 #pragma GCC diagnostic ignored "-Wcast-align"
2060
2061 do {
2062 /* Restore the answer buffer length, it might have been
2063 * overwritten by a previous receive.
2064 */
2065 answer->nlmsghdr->nlmsg_len = answer_len;
2066
2067 /* Get the (next) batch of reply messages. */
2068 err = netlink_rcv(&nlh, answer);
2069 if (err < 0)
2070 goto out;
2071
2072 recv_len = err;
2073 err = 0;
2074
2075 /* Satisfy the typing for the netlink macros. */
2076 msg = answer->nlmsghdr;
2077
2078 while (NLMSG_OK(msg, recv_len)) {
2079 /* Stop reading if we see an error message. */
2080 if (msg->nlmsg_type == NLMSG_ERROR) {
2081 struct nlmsgerr *errmsg =
2082 (struct nlmsgerr *)NLMSG_DATA(msg);
2083 err = errmsg->error;
2084 goto out;
2085 }
2086
2087 /* Stop reading if we see a NLMSG_DONE message. */
2088 if (msg->nlmsg_type == NLMSG_DONE) {
2089 readmore = 0;
2090 break;
2091 }
2092
2093 if (msg->nlmsg_type != RTM_NEWADDR) {
2094 err = -1;
2095 goto out;
2096 }
2097
2098 ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
2099 if (ifa->ifa_index == ifindex) {
2100 if (ifa_get_local_ip(family, msg, res) < 0) {
2101 err = -1;
2102 goto out;
2103 }
2104
2105 /* Found a result, stop searching. */
2106 if (*res)
2107 goto out;
2108 }
2109
2110 /* Keep reading more data from the socket if the last
2111 * message had the NLF_F_MULTI flag set.
2112 */
2113 readmore = (msg->nlmsg_flags & NLM_F_MULTI);
2114
2115 /* Look at the next message received in this buffer. */
2116 msg = NLMSG_NEXT(msg, recv_len);
2117 }
2118 } while (readmore);
2119
2120 #pragma GCC diagnostic pop
2121
2122 /* If we end up here, we didn't find any result, so signal an
2123 * error.
2124 */
2125 err = -1;
2126
2127 out:
2128 netlink_close(&nlh);
2129 nlmsg_free(answer);
2130 nlmsg_free(nlmsg);
2131 return err;
2132 }
2133
2134 int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
2135 {
2136 return ip_addr_get(AF_INET6, ifindex, (void **)res);
2137 }
2138
2139 int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
2140 {
2141 return ip_addr_get(AF_INET, ifindex, (void **)res);
2142 }
2143
2144 static int ip_gateway_add(int family, int ifindex, void *gw)
2145 {
2146 int addrlen, err;
2147 struct nl_handler nlh;
2148 struct rtmsg *rt;
2149 struct nlmsg *answer = NULL, *nlmsg = NULL;
2150
2151 addrlen = family == AF_INET ? sizeof(struct in_addr)
2152 : sizeof(struct in6_addr);
2153
2154 err = netlink_open(&nlh, NETLINK_ROUTE);
2155 if (err)
2156 return err;
2157
2158 err = -ENOMEM;
2159 nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
2160 if (!nlmsg)
2161 goto out;
2162
2163 answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
2164 if (!answer)
2165 goto out;
2166
2167 nlmsg->nlmsghdr->nlmsg_flags =
2168 NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
2169 nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
2170
2171 rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
2172 if (!rt)
2173 goto out;
2174 rt->rtm_family = family;
2175 rt->rtm_table = RT_TABLE_MAIN;
2176 rt->rtm_scope = RT_SCOPE_UNIVERSE;
2177 rt->rtm_protocol = RTPROT_BOOT;
2178 rt->rtm_type = RTN_UNICAST;
2179 /* "default" destination */
2180 rt->rtm_dst_len = 0;
2181
2182 err = -EINVAL;
2183
2184 /* If gateway address not supplied, then a device route will be created instead */
2185 if (gw != NULL) {
2186 if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
2187 goto out;
2188 }
2189
2190 /* Adding the interface index enables the use of link-local
2191 * addresses for the gateway.
2192 */
2193 if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
2194 goto out;
2195
2196 err = netlink_transaction(&nlh, nlmsg, answer);
2197 out:
2198 netlink_close(&nlh);
2199 nlmsg_free(answer);
2200 nlmsg_free(nlmsg);
2201 return err;
2202 }
2203
2204 int lxc_ipv4_gateway_add(int ifindex, struct in_addr *gw)
2205 {
2206 return ip_gateway_add(AF_INET, ifindex, gw);
2207 }
2208
2209 int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
2210 {
2211 return ip_gateway_add(AF_INET6, ifindex, gw);
2212 }
2213 bool is_ovs_bridge(const char *bridge)
2214 {
2215 int ret;
2216 struct stat sb;
2217 char brdirname[22 + IFNAMSIZ + 1] = {0};
2218
2219 ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
2220 bridge);
2221 if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
2222 return false;
2223
2224 ret = stat(brdirname, &sb);
2225 if (ret < 0 && errno == ENOENT)
2226 return true;
2227
2228 return false;
2229 }
2230
2231 struct ovs_veth_args {
2232 const char *bridge;
2233 const char *nic;
2234 };
2235
2236 /* Called from a background thread - when nic goes away, remove it from the
2237 * bridge.
2238 */
2239 static int lxc_ovs_delete_port_exec(void *data)
2240 {
2241 struct ovs_veth_args *args = data;
2242
2243 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
2244 (char *)NULL);
2245 return -1;
2246 }
2247
2248 int lxc_ovs_delete_port(const char *bridge, const char *nic)
2249 {
2250 int ret;
2251 char cmd_output[PATH_MAX];
2252 struct ovs_veth_args args;
2253
2254 args.bridge = bridge;
2255 args.nic = nic;
2256 ret = run_command(cmd_output, sizeof(cmd_output),
2257 lxc_ovs_delete_port_exec, (void *)&args);
2258 if (ret < 0) {
2259 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2260 "%s", bridge, nic, cmd_output);
2261 return -1;
2262 }
2263
2264 return 0;
2265 }
2266
2267 static int lxc_ovs_attach_bridge_exec(void *data)
2268 {
2269 struct ovs_veth_args *args = data;
2270
2271 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
2272 (char *)NULL);
2273 return -1;
2274 }
2275
2276 static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
2277 {
2278 int ret;
2279 char cmd_output[PATH_MAX];
2280 struct ovs_veth_args args;
2281
2282 args.bridge = bridge;
2283 args.nic = nic;
2284 ret = run_command(cmd_output, sizeof(cmd_output),
2285 lxc_ovs_attach_bridge_exec, (void *)&args);
2286 if (ret < 0) {
2287 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2288 bridge, nic, cmd_output);
2289 return -1;
2290 }
2291
2292 return 0;
2293 }
2294
2295 int lxc_bridge_attach(const char *bridge, const char *ifname)
2296 {
2297 int err, fd, index;
2298 size_t retlen;
2299 struct ifreq ifr;
2300
2301 if (strlen(ifname) >= IFNAMSIZ)
2302 return -EINVAL;
2303
2304 index = if_nametoindex(ifname);
2305 if (!index)
2306 return -EINVAL;
2307
2308 if (is_ovs_bridge(bridge))
2309 return lxc_ovs_attach_bridge(bridge, ifname);
2310
2311 fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
2312 if (fd < 0)
2313 return -errno;
2314
2315 retlen = strlcpy(ifr.ifr_name, bridge, IFNAMSIZ);
2316 if (retlen >= IFNAMSIZ) {
2317 close(fd);
2318 return -E2BIG;
2319 }
2320
2321 ifr.ifr_name[IFNAMSIZ - 1] = '\0';
2322 ifr.ifr_ifindex = index;
2323 err = ioctl(fd, SIOCBRADDIF, &ifr);
2324 close(fd);
2325 if (err)
2326 err = -errno;
2327
2328 return err;
2329 }
2330
2331 static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
2332 [LXC_NET_EMPTY] = "empty",
2333 [LXC_NET_VETH] = "veth",
2334 [LXC_NET_MACVLAN] = "macvlan",
2335 [LXC_NET_IPVLAN] = "ipvlan",
2336 [LXC_NET_PHYS] = "phys",
2337 [LXC_NET_VLAN] = "vlan",
2338 [LXC_NET_NONE] = "none",
2339 };
2340
2341 const char *lxc_net_type_to_str(int type)
2342 {
2343 if (type < 0 || type > LXC_NET_MAXCONFTYPE)
2344 return NULL;
2345
2346 return lxc_network_types[type];
2347 }
2348
2349 static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2350
2351 char *lxc_mkifname(char *template)
2352 {
2353 int ret;
2354 struct netns_ifaddrs *ifa, *ifaddr;
2355 char name[IFNAMSIZ];
2356 bool exists = false;
2357 size_t i = 0;
2358 #ifdef HAVE_RAND_R
2359 unsigned int seed;
2360
2361 seed = randseed(false);
2362 #else
2363
2364 (void)randseed(true);
2365 #endif
2366
2367 if (strlen(template) >= IFNAMSIZ)
2368 return NULL;
2369
2370 /* Get all the network interfaces. */
2371 ret = netns_getifaddrs(&ifaddr, -1, &(bool){false});
2372 if (ret < 0) {
2373 SYSERROR("Failed to get network interfaces");
2374 return NULL;
2375 }
2376
2377 /* Generate random names until we find one that doesn't exist. */
2378 for (;;) {
2379 name[0] = '\0';
2380 (void)strlcpy(name, template, IFNAMSIZ);
2381
2382 exists = false;
2383
2384 for (i = 0; i < strlen(name); i++) {
2385 if (name[i] == 'X') {
2386 #ifdef HAVE_RAND_R
2387 name[i] = padchar[rand_r(&seed) % strlen(padchar)];
2388 #else
2389 name[i] = padchar[rand() % strlen(padchar)];
2390 #endif
2391 }
2392 }
2393
2394 for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
2395 if (!strcmp(ifa->ifa_name, name)) {
2396 exists = true;
2397 break;
2398 }
2399 }
2400
2401 if (!exists)
2402 break;
2403 }
2404
2405 netns_freeifaddrs(ifaddr);
2406 (void)strlcpy(template, name, strlen(template) + 1);
2407
2408 return template;
2409 }
2410
2411 int setup_private_host_hw_addr(char *veth1)
2412 {
2413 int err, sockfd;
2414 struct ifreq ifr;
2415
2416 sockfd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
2417 if (sockfd < 0)
2418 return -errno;
2419
2420 err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
2421 if (err < 0 || (size_t)err >= IFNAMSIZ) {
2422 close(sockfd);
2423 return -E2BIG;
2424 }
2425
2426 err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
2427 if (err < 0) {
2428 close(sockfd);
2429 return -errno;
2430 }
2431
2432 ifr.ifr_hwaddr.sa_data[0] = 0xfe;
2433 err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
2434 close(sockfd);
2435 if (err < 0)
2436 return -errno;
2437
2438 return 0;
2439 }
2440
2441 int lxc_find_gateway_addresses(struct lxc_handler *handler)
2442 {
2443 struct lxc_list *network = &handler->conf->network;
2444 struct lxc_list *iterator;
2445 struct lxc_netdev *netdev;
2446 int link_index;
2447
2448 lxc_list_for_each(iterator, network) {
2449 netdev = iterator->elem;
2450
2451 if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
2452 continue;
2453
2454 if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
2455 ERROR("Automatic gateway detection is only supported "
2456 "for veth and macvlan");
2457 return -1;
2458 }
2459
2460 if (netdev->link[0] == '\0') {
2461 ERROR("Automatic gateway detection needs a link interface");
2462 return -1;
2463 }
2464
2465 link_index = if_nametoindex(netdev->link);
2466 if (!link_index)
2467 return -EINVAL;
2468
2469 if (netdev->ipv4_gateway_auto) {
2470 if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
2471 ERROR("Failed to automatically find ipv4 gateway "
2472 "address from link interface \"%s\"", netdev->link);
2473 return -1;
2474 }
2475 }
2476
2477 if (netdev->ipv6_gateway_auto) {
2478 if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
2479 ERROR("Failed to automatically find ipv6 gateway "
2480 "address from link interface \"%s\"", netdev->link);
2481 return -1;
2482 }
2483 }
2484 }
2485
2486 return 0;
2487 }
2488
2489 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2490 static int lxc_create_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2491 struct lxc_netdev *netdev, pid_t pid, unsigned int hooks_version)
2492 {
2493 int ret;
2494 pid_t child;
2495 int bytes, pipefd[2];
2496 char *token, *saveptr = NULL;
2497 char netdev_link[IFNAMSIZ];
2498 char buffer[PATH_MAX] = {0};
2499 size_t retlen;
2500
2501 if (netdev->type != LXC_NET_VETH) {
2502 ERROR("Network type %d not support for unprivileged use", netdev->type);
2503 return -1;
2504 }
2505
2506 ret = pipe(pipefd);
2507 if (ret < 0) {
2508 SYSERROR("Failed to create pipe");
2509 return -1;
2510 }
2511
2512 child = fork();
2513 if (child < 0) {
2514 SYSERROR("Failed to create new process");
2515 close(pipefd[0]);
2516 close(pipefd[1]);
2517 return -1;
2518 }
2519
2520 if (child == 0) {
2521 char pidstr[INTTYPE_TO_STRLEN(pid_t)];
2522
2523 close(pipefd[0]);
2524
2525 ret = dup2(pipefd[1], STDOUT_FILENO);
2526 if (ret >= 0)
2527 ret = dup2(pipefd[1], STDERR_FILENO);
2528 close(pipefd[1]);
2529 if (ret < 0) {
2530 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2531 _exit(EXIT_FAILURE);
2532 }
2533
2534 if (netdev->link[0] != '\0')
2535 retlen = strlcpy(netdev_link, netdev->link, IFNAMSIZ);
2536 else
2537 retlen = strlcpy(netdev_link, "none", IFNAMSIZ);
2538 if (retlen >= IFNAMSIZ) {
2539 SYSERROR("Invalid network device name");
2540 _exit(EXIT_FAILURE);
2541 }
2542
2543 ret = snprintf(pidstr, sizeof(pidstr), "%d", pid);
2544 if (ret < 0 || ret >= sizeof(pidstr))
2545 _exit(EXIT_FAILURE);
2546 pidstr[sizeof(pidstr) - 1] = '\0';
2547
2548 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
2549 lxcname, pidstr, netdev_link,
2550 netdev->name[0] != '\0' ? netdev->name : "(null)");
2551 if (netdev->name[0] != '\0')
2552 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2553 lxcpath, lxcname, pidstr, "veth", netdev_link,
2554 netdev->name, (char *)NULL);
2555 else
2556 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
2557 lxcpath, lxcname, pidstr, "veth", netdev_link,
2558 (char *)NULL);
2559 SYSERROR("Failed to execute lxc-user-nic");
2560 _exit(EXIT_FAILURE);
2561 }
2562
2563 /* close the write-end of the pipe */
2564 close(pipefd[1]);
2565
2566 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2567 if (bytes < 0) {
2568 SYSERROR("Failed to read from pipe file descriptor");
2569 close(pipefd[0]);
2570 } else {
2571 buffer[bytes - 1] = '\0';
2572 }
2573
2574 ret = wait_for_pid(child);
2575 close(pipefd[0]);
2576 if (ret != 0 || bytes < 0) {
2577 ERROR("lxc-user-nic failed to configure requested network: %s",
2578 buffer[0] != '\0' ? buffer : "(null)");
2579 return -1;
2580 }
2581 TRACE("Received output \"%s\" from lxc-user-nic", buffer);
2582
2583 /* netdev->name */
2584 token = strtok_r(buffer, ":", &saveptr);
2585 if (!token) {
2586 ERROR("Failed to parse lxc-user-nic output");
2587 return -1;
2588 }
2589
2590 /*
2591 * lxc-user-nic will take care of proper network device naming. So
2592 * netdev->name and netdev->created_name need to be identical to not
2593 * trigger another rename later on.
2594 */
2595 retlen = strlcpy(netdev->name, token, IFNAMSIZ);
2596 if (retlen < IFNAMSIZ)
2597 retlen = strlcpy(netdev->created_name, token, IFNAMSIZ);
2598 if (retlen >= IFNAMSIZ) {
2599 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2600 return -E2BIG;
2601 }
2602
2603 /* netdev->ifindex */
2604 token = strtok_r(NULL, ":", &saveptr);
2605 if (!token) {
2606 ERROR("Failed to parse lxc-user-nic output");
2607 return -1;
2608 }
2609
2610 ret = lxc_safe_int(token, &netdev->ifindex);
2611 if (ret < 0) {
2612 errno = -ret;
2613 SYSERROR("Failed to convert string \"%s\" to integer", token);
2614 return -1;
2615 }
2616
2617 /* netdev->priv.veth_attr.veth1 */
2618 token = strtok_r(NULL, ":", &saveptr);
2619 if (!token) {
2620 ERROR("Failed to parse lxc-user-nic output");
2621 return -1;
2622 }
2623
2624 retlen = strlcpy(netdev->priv.veth_attr.veth1, token, IFNAMSIZ);
2625 if (retlen >= IFNAMSIZ) {
2626 ERROR("Host side veth device name returned by lxc-user-nic is "
2627 "too long");
2628 return -E2BIG;
2629 }
2630
2631 /* netdev->priv.veth_attr.ifindex */
2632 token = strtok_r(NULL, ":", &saveptr);
2633 if (!token) {
2634 ERROR("Failed to parse lxc-user-nic output");
2635 return -1;
2636 }
2637
2638 ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
2639 if (ret < 0) {
2640 errno = -ret;
2641 SYSERROR("Failed to convert string \"%s\" to integer", token);
2642 return -1;
2643 }
2644
2645 if (netdev->upscript) {
2646 char *argv[] = {
2647 "veth",
2648 netdev->link,
2649 netdev->priv.veth_attr.veth1,
2650 NULL,
2651 };
2652
2653 ret = run_script_argv(lxcname, hooks_version, "net",
2654 netdev->upscript, "up", argv);
2655 if (ret < 0)
2656 return -1;
2657 }
2658
2659 return 0;
2660 }
2661
2662 static int lxc_delete_network_unpriv_exec(const char *lxcpath, const char *lxcname,
2663 struct lxc_netdev *netdev,
2664 const char *netns_path)
2665 {
2666 int bytes, ret;
2667 pid_t child;
2668 int pipefd[2];
2669 char buffer[PATH_MAX] = {0};
2670
2671 if (netdev->type != LXC_NET_VETH) {
2672 ERROR("Network type %d not support for unprivileged use", netdev->type);
2673 return -1;
2674 }
2675
2676 ret = pipe(pipefd);
2677 if (ret < 0) {
2678 SYSERROR("Failed to create pipe");
2679 return -1;
2680 }
2681
2682 child = fork();
2683 if (child < 0) {
2684 SYSERROR("Failed to create new process");
2685 close(pipefd[0]);
2686 close(pipefd[1]);
2687 return -1;
2688 }
2689
2690 if (child == 0) {
2691 char *hostveth;
2692
2693 close(pipefd[0]);
2694
2695 ret = dup2(pipefd[1], STDOUT_FILENO);
2696 if (ret >= 0)
2697 ret = dup2(pipefd[1], STDERR_FILENO);
2698 close(pipefd[1]);
2699 if (ret < 0) {
2700 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2701 _exit(EXIT_FAILURE);
2702 }
2703
2704 if (netdev->priv.veth_attr.pair[0] != '\0')
2705 hostveth = netdev->priv.veth_attr.pair;
2706 else
2707 hostveth = netdev->priv.veth_attr.veth1;
2708 if (hostveth[0] == '\0') {
2709 SYSERROR("Host side veth device name is missing");
2710 _exit(EXIT_FAILURE);
2711 }
2712
2713 if (netdev->link[0] == '\0') {
2714 SYSERROR("Network link for network device \"%s\" is "
2715 "missing", netdev->priv.veth_attr.veth1);
2716 _exit(EXIT_FAILURE);
2717 }
2718
2719 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
2720 lxcname, netns_path, netdev->link, hostveth);
2721 execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
2722 lxcname, netns_path, "veth", netdev->link, hostveth,
2723 (char *)NULL);
2724 SYSERROR("Failed to exec lxc-user-nic.");
2725 _exit(EXIT_FAILURE);
2726 }
2727
2728 close(pipefd[1]);
2729
2730 bytes = lxc_read_nointr(pipefd[0], &buffer, PATH_MAX);
2731 if (bytes < 0) {
2732 SYSERROR("Failed to read from pipe file descriptor.");
2733 close(pipefd[0]);
2734 } else {
2735 buffer[bytes - 1] = '\0';
2736 }
2737
2738 ret = wait_for_pid(child);
2739 close(pipefd[0]);
2740 if (ret != 0 || bytes < 0) {
2741 ERROR("lxc-user-nic failed to delete requested network: %s",
2742 buffer[0] != '\0' ? buffer : "(null)");
2743 return -1;
2744 }
2745
2746 return 0;
2747 }
2748
2749 bool lxc_delete_network_unpriv(struct lxc_handler *handler)
2750 {
2751 int ret;
2752 struct lxc_list *iterator;
2753 struct lxc_list *network = &handler->conf->network;
2754 /* strlen("/proc/") = 6
2755 * +
2756 * INTTYPE_TO_STRLEN(pid_t)
2757 * +
2758 * strlen("/fd/") = 4
2759 * +
2760 * INTTYPE_TO_STRLEN(int)
2761 * +
2762 * \0
2763 */
2764 char netns_path[6 + INTTYPE_TO_STRLEN(pid_t) + 4 + INTTYPE_TO_STRLEN(int) + 1];
2765
2766 *netns_path = '\0';
2767
2768 if (handler->nsfd[LXC_NS_NET] < 0) {
2769 DEBUG("Cannot not guarantee safe deletion of network devices. "
2770 "Manual cleanup maybe needed");
2771 return false;
2772 }
2773
2774 ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
2775 lxc_raw_getpid(), handler->nsfd[LXC_NS_NET]);
2776 if (ret < 0 || ret >= sizeof(netns_path))
2777 return false;
2778
2779 lxc_list_for_each(iterator, network) {
2780 char *hostveth = NULL;
2781 struct lxc_netdev *netdev = iterator->elem;
2782
2783 /* We can only delete devices whose ifindex we have. If we don't
2784 * have the index it means that we didn't create it.
2785 */
2786 if (!netdev->ifindex)
2787 continue;
2788
2789 if (netdev->type == LXC_NET_PHYS) {
2790 ret = lxc_netdev_rename_by_index(netdev->ifindex,
2791 netdev->link);
2792 if (ret < 0)
2793 WARN("Failed to rename interface with index %d "
2794 "to its initial name \"%s\"",
2795 netdev->ifindex, netdev->link);
2796 else
2797 TRACE("Renamed interface with index %d to its "
2798 "initial name \"%s\"",
2799 netdev->ifindex, netdev->link);
2800
2801 ret = netdev_deconf[netdev->type](handler, netdev);
2802 goto clear_ifindices;
2803 }
2804
2805 ret = netdev_deconf[netdev->type](handler, netdev);
2806 if (ret < 0)
2807 WARN("Failed to deconfigure network device");
2808
2809 if (netdev->type != LXC_NET_VETH)
2810 goto clear_ifindices;
2811
2812 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link))
2813 goto clear_ifindices;
2814
2815 if (netdev->priv.veth_attr.pair[0] != '\0')
2816 hostveth = netdev->priv.veth_attr.pair;
2817 else
2818 hostveth = netdev->priv.veth_attr.veth1;
2819 if (hostveth[0] == '\0')
2820 goto clear_ifindices;
2821
2822 ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
2823 handler->name, netdev,
2824 netns_path);
2825 if (ret < 0) {
2826 WARN("Failed to remove port \"%s\" from openvswitch "
2827 "bridge \"%s\"", hostveth, netdev->link);
2828 goto clear_ifindices;
2829 }
2830 INFO("Removed interface \"%s\" from \"%s\"", hostveth,
2831 netdev->link);
2832
2833 clear_ifindices:
2834 /* We need to clear any ifindices we recorded so liblxc won't
2835 * have cached stale data which would cause it to fail on reboot
2836 * we're we don't re-read the on-disk config file.
2837 */
2838 netdev->ifindex = 0;
2839 if (netdev->type == LXC_NET_PHYS) {
2840 netdev->priv.phys_attr.ifindex = 0;
2841 } else if (netdev->type == LXC_NET_VETH) {
2842 netdev->priv.veth_attr.veth1[0] = '\0';
2843 netdev->priv.veth_attr.ifindex = 0;
2844 }
2845 }
2846
2847 return true;
2848 }
2849
2850 struct ip_proxy_args {
2851 const char *ip;
2852 const char *dev;
2853 };
2854
2855 static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data)
2856 {
2857 struct ip_proxy_args *args = data;
2858
2859 execlp("ip", "ip", "neigh", "add", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2860 return -1;
2861 }
2862
2863 static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data)
2864 {
2865 struct ip_proxy_args *args = data;
2866
2867 execlp("ip", "ip", "neigh", "flush", "proxy", args->ip, "dev", args->dev, (char *)NULL);
2868 return -1;
2869 }
2870
2871 static int lxc_add_ip_neigh_proxy(const char *ip, const char *dev)
2872 {
2873 int ret;
2874 char cmd_output[PATH_MAX];
2875 struct ip_proxy_args args = {
2876 .ip = ip,
2877 .dev = dev,
2878 };
2879
2880 ret = run_command(cmd_output, sizeof(cmd_output), lxc_add_ip_neigh_proxy_exec_wrapper, &args);
2881 if (ret < 0) {
2882 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2883 return -1;
2884 }
2885
2886 return 0;
2887 }
2888
2889 static int lxc_del_ip_neigh_proxy(const char *ip, const char *dev)
2890 {
2891 int ret;
2892 char cmd_output[PATH_MAX];
2893 struct ip_proxy_args args = {
2894 .ip = ip,
2895 .dev = dev,
2896 };
2897
2898 ret = run_command(cmd_output, sizeof(cmd_output), lxc_del_ip_neigh_proxy_exec_wrapper, &args);
2899 if (ret < 0) {
2900 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip, dev, cmd_output);
2901 return -1;
2902 }
2903
2904 return 0;
2905 }
2906
2907 static int lxc_setup_l2proxy(struct lxc_netdev *netdev) {
2908 struct lxc_list *cur, *next;
2909 struct lxc_inetdev *inet4dev;
2910 struct lxc_inet6dev *inet6dev;
2911 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
2912 int err = 0;
2913 unsigned int lo_ifindex = 0;
2914
2915 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2916 if (!lxc_list_empty(&netdev->ipv4)) {
2917 /* Check for net.ipv4.conf.[link].forwarding=1 */
2918 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET) < 0) {
2919 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev->link);
2920 return minus_one_set_errno(EINVAL);
2921 }
2922 }
2923
2924 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2925 if (!lxc_list_empty(&netdev->ipv6)) {
2926 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2927 if (lxc_is_ip_neigh_proxy_enabled(netdev->link, AF_INET6) < 0) {
2928 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev->link);
2929 return minus_one_set_errno(EINVAL);
2930 }
2931
2932 /* Check for net.ipv6.conf.[link].forwarding=1 */
2933 if (lxc_is_ip_forwarding_enabled(netdev->link, AF_INET6) < 0) {
2934 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev->link);
2935 return minus_one_set_errno(EINVAL);
2936 }
2937 }
2938
2939 /* Perform IPVLAN specific checks. */
2940 if (netdev->type == LXC_NET_IPVLAN) {
2941 /* Check mode is l3s as other modes do not work with l2proxy. */
2942 if (netdev->priv.ipvlan_attr.mode != IPVLAN_MODE_L3S) {
2943 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev->link);
2944 return minus_one_set_errno(EINVAL);
2945 }
2946
2947 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
2948 lo_ifindex = if_nametoindex(loop_device);
2949 if (lo_ifindex == 0) {
2950 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
2951 return minus_one_set_errno(EINVAL);
2952 }
2953 }
2954
2955 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
2956 inet4dev = cur->elem;
2957 if (!inet_ntop(AF_INET, &inet4dev->addr, bufinet4, sizeof(bufinet4)))
2958 return minus_one_set_errno(-errno);
2959
2960 if (lxc_add_ip_neigh_proxy(bufinet4, netdev->link) < 0)
2961 return minus_one_set_errno(EINVAL);
2962
2963 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2964 if (netdev->type == LXC_NET_IPVLAN) {
2965 err = lxc_ipv4_dest_add(lo_ifindex, &inet4dev->addr, 32);
2966 if (err < 0) {
2967 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4, loop_device);
2968 return minus_one_set_errno(-err);
2969 }
2970 }
2971 }
2972
2973 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
2974 inet6dev = cur->elem;
2975 if (!inet_ntop(AF_INET6, &inet6dev->addr, bufinet6, sizeof(bufinet6)))
2976 return minus_one_set_errno(-errno);
2977
2978 if (lxc_add_ip_neigh_proxy(bufinet6, netdev->link) < 0)
2979 return minus_one_set_errno(EINVAL);
2980
2981 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2982 if (netdev->type == LXC_NET_IPVLAN) {
2983 err = lxc_ipv6_dest_add(lo_ifindex, &inet6dev->addr, 128);
2984 if (err < 0) {
2985 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6, loop_device);
2986 return minus_one_set_errno(-err);
2987 }
2988 }
2989 }
2990
2991 return 0;
2992 }
2993
2994 static int lxc_delete_ipv4_l2proxy(struct in_addr *ip, char *link, unsigned int lo_ifindex) {
2995 char bufinet4[INET_ADDRSTRLEN];
2996 unsigned int errCount = 0;
2997
2998 if (!inet_ntop(AF_INET, ip, bufinet4, sizeof(bufinet4))) {
2999 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link);
3000 return minus_one_set_errno(EINVAL);
3001 }
3002
3003 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3004 if (lo_ifindex > 0) {
3005 if (lxc_ipv4_dest_del(lo_ifindex, ip, 32) < 0) {
3006 errCount++;
3007 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4, lo_ifindex);
3008 }
3009 }
3010
3011 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3012 if (link[0] != '\0') {
3013 if (lxc_del_ip_neigh_proxy(bufinet4, link) < 0)
3014 errCount++;
3015 }
3016
3017 if (errCount > 0)
3018 return minus_one_set_errno(EINVAL);
3019
3020 return 0;
3021 }
3022
3023 static int lxc_delete_ipv6_l2proxy(struct in6_addr *ip, char *link, unsigned int lo_ifindex) {
3024 char bufinet6[INET6_ADDRSTRLEN];
3025 unsigned int errCount = 0;
3026
3027 if (!inet_ntop(AF_INET6, ip, bufinet6, sizeof(bufinet6))) {
3028 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link);
3029 return minus_one_set_errno(EINVAL);
3030 }
3031
3032 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3033 if (lo_ifindex > 0) {
3034 if (lxc_ipv6_dest_del(lo_ifindex, ip, 128) < 0) {
3035 errCount++;
3036 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6, lo_ifindex);
3037 }
3038 }
3039
3040 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3041 if (link[0] != '\0') {
3042 if (lxc_del_ip_neigh_proxy(bufinet6, link) < 0)
3043 errCount++;
3044 }
3045
3046 if (errCount > 0)
3047 return minus_one_set_errno(EINVAL);
3048
3049 return 0;
3050 }
3051
3052 static int lxc_delete_l2proxy(struct lxc_netdev *netdev) {
3053 unsigned int lo_ifindex = 0;
3054 unsigned int errCount = 0;
3055 struct lxc_list *cur, *next;
3056 struct lxc_inetdev *inet4dev;
3057 struct lxc_inet6dev *inet6dev;
3058
3059 /* Perform IPVLAN specific checks. */
3060 if (netdev->type == LXC_NET_IPVLAN) {
3061 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3062 lo_ifindex = if_nametoindex(loop_device);
3063 if (lo_ifindex == 0) {
3064 errCount++;
3065 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device);
3066 }
3067 }
3068
3069 lxc_list_for_each_safe(cur, &netdev->ipv4, next) {
3070 inet4dev = cur->elem;
3071 if (lxc_delete_ipv4_l2proxy(&inet4dev->addr, netdev->link, lo_ifindex) < 0)
3072 errCount++;
3073 }
3074
3075 lxc_list_for_each_safe(cur, &netdev->ipv6, next) {
3076 inet6dev = cur->elem;
3077 if (lxc_delete_ipv6_l2proxy(&inet6dev->addr, netdev->link, lo_ifindex) < 0)
3078 errCount++;
3079 }
3080
3081 if (errCount > 0)
3082 return minus_one_set_errno(EINVAL);
3083
3084 return 0;
3085 }
3086
3087 static int lxc_create_network_priv(struct lxc_handler *handler)
3088 {
3089 struct lxc_list *iterator;
3090 struct lxc_list *network = &handler->conf->network;
3091
3092 lxc_list_for_each(iterator, network) {
3093 struct lxc_netdev *netdev = iterator->elem;
3094
3095 if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
3096 ERROR("Invalid network configuration type %d", netdev->type);
3097 return -1;
3098 }
3099
3100 /* Setup l2proxy entries if enabled and used with a link property */
3101 if (netdev->l2proxy && netdev->link[0] != '\0') {
3102 if (lxc_setup_l2proxy(netdev)) {
3103 ERROR("Failed to setup l2proxy");
3104 return -1;
3105 }
3106 }
3107
3108 if (netdev_conf[netdev->type](handler, netdev)) {
3109 ERROR("Failed to create network device");
3110 return -1;
3111 }
3112 }
3113
3114 return 0;
3115 }
3116
3117 int lxc_network_move_created_netdev_priv(struct lxc_handler *handler)
3118 {
3119 pid_t pid = handler->pid;
3120 struct lxc_list *network = &handler->conf->network;
3121 struct lxc_list *iterator;
3122
3123 if (am_guest_unpriv())
3124 return 0;
3125
3126 lxc_list_for_each(iterator, network) {
3127 int ret;
3128 char ifname[IFNAMSIZ];
3129 struct lxc_netdev *netdev = iterator->elem;
3130
3131 if (!netdev->ifindex)
3132 continue;
3133
3134 /* retrieve the name of the interface */
3135 if (!if_indextoname(netdev->ifindex, ifname)) {
3136 ERROR("No interface corresponding to ifindex \"%d\"",
3137 netdev->ifindex);
3138 return -1;
3139 }
3140
3141 ret = lxc_netdev_move_by_name(ifname, pid, NULL);
3142 if (ret) {
3143 errno = -ret;
3144 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3145 ifname, pid);
3146 return -1;
3147 }
3148
3149 strlcpy(netdev->created_name, ifname, IFNAMSIZ);
3150
3151 DEBUG("Moved network device \"%s\" to network namespace of %d",
3152 netdev->created_name, pid);
3153 }
3154
3155 return 0;
3156 }
3157
3158 static int lxc_create_network_unpriv(struct lxc_handler *handler)
3159 {
3160 int hooks_version = handler->conf->hooks_version;
3161 const char *lxcname = handler->name;
3162 const char *lxcpath = handler->lxcpath;
3163 struct lxc_list *network = &handler->conf->network;
3164 pid_t pid = handler->pid;
3165 struct lxc_list *iterator;
3166
3167 lxc_list_for_each(iterator, network) {
3168 struct lxc_netdev *netdev = iterator->elem;
3169
3170 if (netdev->type == LXC_NET_EMPTY)
3171 continue;
3172
3173 if (netdev->type == LXC_NET_NONE)
3174 continue;
3175
3176 if (netdev->type != LXC_NET_VETH) {
3177 ERROR("Networks of type %s are not supported by unprivileged containers",
3178 lxc_net_type_to_str(netdev->type));
3179 return -1;
3180 }
3181
3182 if (netdev->mtu)
3183 INFO("mtu ignored due to insufficient privilege");
3184
3185 if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev,
3186 pid, hooks_version))
3187 return -1;
3188 }
3189
3190 return 0;
3191 }
3192
3193 bool lxc_delete_network_priv(struct lxc_handler *handler)
3194 {
3195 int ret;
3196 struct lxc_list *iterator;
3197 struct lxc_list *network = &handler->conf->network;
3198
3199 lxc_list_for_each(iterator, network) {
3200 char *hostveth = NULL;
3201 struct lxc_netdev *netdev = iterator->elem;
3202
3203 /* We can only delete devices whose ifindex we have. If we don't
3204 * have the index it means that we didn't create it.
3205 */
3206 if (!netdev->ifindex)
3207 continue;
3208
3209 /* Delete l2proxy entries if enabled and used with a link property */
3210 if (netdev->l2proxy && netdev->link[0] != '\0') {
3211 if (lxc_delete_l2proxy(netdev))
3212 WARN("Failed to delete all l2proxy config");
3213 /* Don't return, let the network be cleaned up as normal. */
3214 }
3215
3216 if (netdev->type == LXC_NET_PHYS) {
3217 ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
3218 if (ret < 0)
3219 WARN("Failed to rename interface with index %d "
3220 "from \"%s\" to its initial name \"%s\"",
3221 netdev->ifindex, netdev->name, netdev->link);
3222 else {
3223 TRACE("Renamed interface with index %d from "
3224 "\"%s\" to its initial name \"%s\"",
3225 netdev->ifindex, netdev->name,
3226 netdev->link);
3227
3228 /* Restore original MTU */
3229 ret = lxc_netdev_set_mtu(netdev->link, netdev->priv.phys_attr.mtu);
3230 if (ret < 0) {
3231 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3232 netdev->link, netdev->priv.phys_attr.mtu);
3233 } else {
3234 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3235 netdev->link, netdev->priv.phys_attr.mtu);
3236 }
3237 }
3238
3239 ret = netdev_deconf[netdev->type](handler, netdev);
3240 goto clear_ifindices;
3241 }
3242
3243 ret = netdev_deconf[netdev->type](handler, netdev);
3244 if (ret < 0)
3245 WARN("Failed to deconfigure network device");
3246
3247 /* Recent kernels remove the virtual interfaces when the network
3248 * namespace is destroyed but in case we did not move the
3249 * interface to the network namespace, we have to destroy it.
3250 */
3251 ret = lxc_netdev_delete_by_index(netdev->ifindex);
3252 if (ret < 0) {
3253 if (errno != ENODEV) {
3254 WARN("Failed to remove interface \"%s\" with index %d",
3255 netdev->name[0] != '\0' ? netdev->name : "(null)",
3256 netdev->ifindex);
3257 goto clear_ifindices;
3258 }
3259 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
3260 netdev->name[0] != '\0' ? netdev->name : "(null)",
3261 netdev->ifindex);
3262 }
3263 INFO("Removed interface \"%s\" with index %d",
3264 netdev->name[0] != '\0' ? netdev->name : "(null)",
3265 netdev->ifindex);
3266
3267 if (netdev->type != LXC_NET_VETH)
3268 goto clear_ifindices;
3269
3270 /* Explicitly delete host veth device to prevent lingering
3271 * devices. We had issues in LXD around this.
3272 */
3273 if (netdev->priv.veth_attr.pair[0] != '\0')
3274 hostveth = netdev->priv.veth_attr.pair;
3275 else
3276 hostveth = netdev->priv.veth_attr.veth1;
3277 if (hostveth[0] == '\0')
3278 goto clear_ifindices;
3279
3280 ret = lxc_netdev_delete_by_name(hostveth);
3281 if (ret < 0) {
3282 WARN("Failed to remove interface \"%s\" from \"%s\"",
3283 hostveth, netdev->link);
3284 goto clear_ifindices;
3285 }
3286 INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
3287
3288 if (netdev->link[0] == '\0' || !is_ovs_bridge(netdev->link)) {
3289 netdev->priv.veth_attr.veth1[0] = '\0';
3290 netdev->ifindex = 0;
3291 netdev->priv.veth_attr.ifindex = 0;
3292 goto clear_ifindices;
3293 }
3294
3295 /* Delete the openvswitch port. */
3296 ret = lxc_ovs_delete_port(netdev->link, hostveth);
3297 if (ret < 0)
3298 WARN("Failed to remove port \"%s\" from openvswitch "
3299 "bridge \"%s\"", hostveth, netdev->link);
3300 else
3301 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3302 hostveth, netdev->link);
3303
3304 clear_ifindices:
3305 /* We need to clear any ifindices we recorded so liblxc won't
3306 * have cached stale data which would cause it to fail on reboot
3307 * we're we don't re-read the on-disk config file.
3308 */
3309 netdev->ifindex = 0;
3310 if (netdev->type == LXC_NET_PHYS) {
3311 netdev->priv.phys_attr.ifindex = 0;
3312 } else if (netdev->type == LXC_NET_VETH) {
3313 netdev->priv.veth_attr.veth1[0] = '\0';
3314 netdev->priv.veth_attr.ifindex = 0;
3315 }
3316 }
3317
3318 return true;
3319 }
3320
3321 int lxc_requests_empty_network(struct lxc_handler *handler)
3322 {
3323 struct lxc_list *network = &handler->conf->network;
3324 struct lxc_list *iterator;
3325 bool found_none = false, found_nic = false;
3326
3327 if (lxc_list_empty(network))
3328 return 0;
3329
3330 lxc_list_for_each(iterator, network) {
3331 struct lxc_netdev *netdev = iterator->elem;
3332
3333 if (netdev->type == LXC_NET_NONE)
3334 found_none = true;
3335 else
3336 found_nic = true;
3337 }
3338 if (found_none && !found_nic)
3339 return 1;
3340 return 0;
3341 }
3342
3343 /* try to move physical nics to the init netns */
3344 int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
3345 {
3346 int ret;
3347 int oldfd;
3348 char ifname[IFNAMSIZ];
3349 struct lxc_list *iterator;
3350 int netnsfd = handler->nsfd[LXC_NS_NET];
3351 struct lxc_conf *conf = handler->conf;
3352
3353 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3354 * the parent network namespace. We won't have this capability if we are
3355 * unprivileged.
3356 */
3357 if (!handler->am_root)
3358 return 0;
3359
3360 TRACE("Moving physical network devices back to parent network namespace");
3361
3362 oldfd = lxc_preserve_ns(handler->monitor_pid, "net");
3363 if (oldfd < 0) {
3364 SYSERROR("Failed to preserve network namespace");
3365 return -1;
3366 }
3367
3368 ret = setns(netnsfd, CLONE_NEWNET);
3369 if (ret < 0) {
3370 SYSERROR("Failed to enter network namespace");
3371 close(oldfd);
3372 return -1;
3373 }
3374
3375 lxc_list_for_each(iterator, &conf->network) {
3376 struct lxc_netdev *netdev = iterator->elem;
3377
3378 if (netdev->type != LXC_NET_PHYS)
3379 continue;
3380
3381 /* Retrieve the name of the interface in the container's network
3382 * namespace.
3383 */
3384 if (!if_indextoname(netdev->ifindex, ifname)) {
3385 WARN("No interface corresponding to ifindex %d",
3386 netdev->ifindex);
3387 continue;
3388 }
3389
3390 ret = lxc_netdev_move_by_index_fd(netdev->ifindex, oldfd, netdev->link);
3391 if (ret < 0)
3392 WARN("Error moving network device \"%s\" back to "
3393 "network namespace", ifname);
3394 else
3395 TRACE("Moved network device \"%s\" back to network "
3396 "namespace", ifname);
3397 }
3398
3399 ret = setns(oldfd, CLONE_NEWNET);
3400 close(oldfd);
3401 if (ret < 0) {
3402 SYSERROR("Failed to enter network namespace");
3403 return -1;
3404 }
3405
3406 return 0;
3407 }
3408
3409 static int setup_hw_addr(char *hwaddr, const char *ifname)
3410 {
3411 struct sockaddr sockaddr;
3412 struct ifreq ifr;
3413 int ret, fd;
3414
3415 ret = lxc_convert_mac(hwaddr, &sockaddr);
3416 if (ret) {
3417 errno = -ret;
3418 SYSERROR("Mac address \"%s\" conversion failed", hwaddr);
3419 return -1;
3420 }
3421
3422 memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
3423 ifr.ifr_name[IFNAMSIZ-1] = '\0';
3424 memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
3425
3426 fd = socket(AF_INET, SOCK_DGRAM | SOCK_CLOEXEC, 0);
3427 if (fd < 0)
3428 return -1;
3429
3430 ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
3431 if (ret)
3432 SYSERROR("Failed to perform ioctl");
3433
3434 close(fd);
3435
3436 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
3437 ifr.ifr_name);
3438
3439 return ret;
3440 }
3441
3442 static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
3443 {
3444 struct lxc_list *iterator;
3445 int err;
3446
3447 lxc_list_for_each(iterator, ip) {
3448 struct lxc_inetdev *inetdev = iterator->elem;
3449
3450 err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
3451 &inetdev->bcast, inetdev->prefix);
3452 if (err) {
3453 errno = -err;
3454 SYSERROR("Failed to setup ipv4 address for network device "
3455 "with ifindex %d", ifindex);
3456 return -1;
3457 }
3458 }
3459
3460 return 0;
3461 }
3462
3463 static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
3464 {
3465 struct lxc_list *iterator;
3466 int err;
3467
3468 lxc_list_for_each(iterator, ip) {
3469 struct lxc_inet6dev *inet6dev = iterator->elem;
3470
3471 err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
3472 &inet6dev->mcast, &inet6dev->acast,
3473 inet6dev->prefix);
3474 if (err) {
3475 errno = -err;
3476 SYSERROR("Failed to setup ipv6 address for network device "
3477 "with ifindex %d", ifindex);
3478 return -1;
3479 }
3480 }
3481
3482 return 0;
3483 }
3484
3485 static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
3486 {
3487 char ifname[IFNAMSIZ];
3488 int err;
3489 char *current_ifname = ifname;
3490 char bufinet4[INET_ADDRSTRLEN], bufinet6[INET6_ADDRSTRLEN];
3491
3492 /* empty network namespace */
3493 if (!netdev->ifindex) {
3494 if (netdev->flags & IFF_UP) {
3495 err = lxc_netdev_up("lo");
3496 if (err) {
3497 errno = -err;
3498 SYSERROR("Failed to set the loopback network device up");
3499 return -1;
3500 }
3501 }
3502
3503 if (netdev->type == LXC_NET_EMPTY)
3504 return 0;
3505
3506 if (netdev->type == LXC_NET_NONE)
3507 return 0;
3508
3509 netdev->ifindex = if_nametoindex(netdev->created_name);
3510 if (!netdev->ifindex)
3511 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3512 netdev->name ?: "(null)");
3513 }
3514
3515 /* get the new ifindex in case of physical netdev */
3516 if (netdev->type == LXC_NET_PHYS) {
3517 netdev->ifindex = if_nametoindex(netdev->link);
3518 if (!netdev->ifindex) {
3519 ERROR("Failed to get ifindex for network device \"%s\"",
3520 netdev->link);
3521 return -1;
3522 }
3523 }
3524
3525 /* retrieve the name of the interface */
3526 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3527 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3528 netdev->ifindex);
3529 return -1;
3530 }
3531
3532 /* Default: let the system choose an interface name.
3533 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3534 * netlink will replace the format specifier with an appropriate index.
3535 */
3536 if (netdev->name[0] == '\0') {
3537 if (netdev->type == LXC_NET_PHYS)
3538 (void)strlcpy(netdev->name, netdev->link, IFNAMSIZ);
3539 else
3540 (void)strlcpy(netdev->name, "eth%d", IFNAMSIZ);
3541 }
3542
3543 /* rename the interface name */
3544 if (strcmp(current_ifname, netdev->name) != 0) {
3545 err = lxc_netdev_rename_by_name(current_ifname, netdev->name);
3546 if (err) {
3547 errno = -err;
3548 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
3549 current_ifname, netdev->name);
3550 return -1;
3551 }
3552
3553 TRACE("Renamed network device from \"%s\" to \"%s\"",
3554 current_ifname, netdev->name);
3555 }
3556
3557 /* Re-read the name of the interface because its name has changed
3558 * and would be automatically allocated by the system
3559 */
3560 if (!if_indextoname(netdev->ifindex, current_ifname)) {
3561 ERROR("Failed get name for network device with ifindex %d",
3562 netdev->ifindex);
3563 return -1;
3564 }
3565
3566 /* Now update the recorded name of the network device to reflect the
3567 * name of the network device in the child's network namespace. We will
3568 * later on send this information back to the parent.
3569 */
3570 (void)strlcpy(netdev->name, current_ifname, IFNAMSIZ);
3571
3572 /* set a mac address */
3573 if (netdev->hwaddr) {
3574 if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
3575 ERROR("Failed to setup hw address for network device \"%s\"",
3576 current_ifname);
3577 return -1;
3578 }
3579 }
3580
3581 /* setup ipv4 addresses on the interface */
3582 if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
3583 ERROR("Failed to setup ip addresses for network device \"%s\"",
3584 current_ifname);
3585 return -1;
3586 }
3587
3588 /* setup ipv6 addresses on the interface */
3589 if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
3590 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
3591 current_ifname);
3592 return -1;
3593 }
3594
3595 /* set the network device up */
3596 if (netdev->flags & IFF_UP) {
3597 err = lxc_netdev_up(current_ifname);
3598 if (err) {
3599 errno = -err;
3600 SYSERROR("Failed to set network device \"%s\" up",
3601 current_ifname);
3602 return -1;
3603 }
3604
3605 /* the network is up, make the loopback up too */
3606 err = lxc_netdev_up("lo");
3607 if (err) {
3608 errno = -err;
3609 SYSERROR("Failed to set the loopback network device up");
3610 return -1;
3611 }
3612 }
3613
3614 /* setup ipv4 gateway on the interface */
3615 if (netdev->ipv4_gateway || netdev->ipv4_gateway_dev) {
3616 if (!(netdev->flags & IFF_UP)) {
3617 ERROR("Cannot add ipv4 gateway for network device "
3618 "\"%s\" when not bringing up the interface", current_ifname);
3619 return -1;
3620 }
3621
3622 if (lxc_list_empty(&netdev->ipv4)) {
3623 ERROR("Cannot add ipv4 gateway for network device "
3624 "\"%s\" when not assigning an address", current_ifname);
3625 return -1;
3626 }
3627
3628 /* Setup device route if ipv4_gateway_dev is enabled */
3629 if (netdev->ipv4_gateway_dev) {
3630 err = lxc_ipv4_gateway_add(netdev->ifindex, NULL);
3631 if (err < 0) {
3632 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
3633 current_ifname);
3634 return minus_one_set_errno(-err);
3635 }
3636 } else {
3637 /* Check the gateway address is valid */
3638 if (!inet_ntop(AF_INET, netdev->ipv4_gateway, bufinet4, sizeof(bufinet4)))
3639 return minus_one_set_errno(errno);
3640
3641 /* Try adding a default route to the gateway address */
3642 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3643 if (err < 0) {
3644 /* If adding the default route fails, this could be because the
3645 * gateway address is in a different subnet to the container's address.
3646 * To work around this, we try adding a static device route to the
3647 * gateway address first, and then try again.
3648 */
3649 err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway, 32);
3650 if (err < 0) {
3651 errno = -err;
3652 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
3653 bufinet4, current_ifname);
3654 return -1;
3655 }
3656
3657 err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
3658 if (err < 0) {
3659 errno = -err;
3660 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
3661 bufinet4, current_ifname);
3662 return -1;
3663 }
3664 }
3665 }
3666 }
3667
3668 /* setup ipv6 gateway on the interface */
3669 if (netdev->ipv6_gateway || netdev->ipv6_gateway_dev) {
3670 if (!(netdev->flags & IFF_UP)) {
3671 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3672 current_ifname);
3673 return -1;
3674 }
3675
3676 if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
3677 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3678 current_ifname);
3679 return -1;
3680 }
3681
3682 /* Setup device route if ipv6_gateway_dev is enabled */
3683 if (netdev->ipv6_gateway_dev) {
3684 err = lxc_ipv6_gateway_add(netdev->ifindex, NULL);
3685 if (err < 0) {
3686 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
3687 current_ifname);
3688 return minus_one_set_errno(-err);
3689 }
3690 } else {
3691 /* Check the gateway address is valid */
3692 if (!inet_ntop(AF_INET6, netdev->ipv6_gateway, bufinet6, sizeof(bufinet6)))
3693 return minus_one_set_errno(errno);
3694
3695 /* Try adding a default route to the gateway address */
3696 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3697 if (err < 0) {
3698 /* If adding the default route fails, this could be because the
3699 * gateway address is in a different subnet to the container's address.
3700 * To work around this, we try adding a static device route to the
3701 * gateway address first, and then try again.
3702 */
3703 err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway, 128);
3704 if (err < 0) {
3705 errno = -err;
3706 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
3707 bufinet6, current_ifname);
3708 return -1;
3709 }
3710
3711 err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
3712 if (err < 0) {
3713 errno = -err;
3714 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
3715 bufinet6, current_ifname);
3716 return -1;
3717 }
3718 }
3719 }
3720 }
3721
3722 DEBUG("Network device \"%s\" has been setup", current_ifname);
3723
3724 return 0;
3725 }
3726
3727 int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
3728 struct lxc_list *network)
3729 {
3730 struct lxc_list *iterator;
3731
3732 lxc_list_for_each(iterator, network) {
3733 struct lxc_netdev *netdev = iterator->elem;
3734
3735 if (lxc_setup_netdev_in_child_namespaces(netdev)) {
3736 ERROR("Failed to setup netdev");
3737 return -1;
3738 }
3739 }
3740
3741 if (!lxc_list_empty(network))
3742 INFO("Network has been setup");
3743
3744 return 0;
3745 }
3746
3747 int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
3748 {
3749 struct lxc_list *iterator;
3750 struct lxc_list *network = &handler->conf->network;
3751 int data_sock = handler->data_sock[0];
3752
3753 lxc_list_for_each(iterator, network) {
3754 int ret;
3755 struct lxc_netdev *netdev = iterator->elem;
3756
3757 if (netdev->type != LXC_NET_VETH)
3758 continue;
3759
3760 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3761 if (ret < 0)
3762 return -1;
3763
3764 ret = lxc_send_nointr(data_sock, netdev->created_name, IFNAMSIZ, MSG_NOSIGNAL);
3765 if (ret < 0)
3766 return -1;
3767
3768 TRACE("Sent network device name \"%s\" to child", netdev->created_name);
3769 }
3770
3771 return 0;
3772 }
3773
3774 int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
3775 {
3776 struct lxc_list *iterator;
3777 struct lxc_list *network = &handler->conf->network;
3778 int data_sock = handler->data_sock[1];
3779
3780 lxc_list_for_each(iterator, network) {
3781 int ret;
3782 struct lxc_netdev *netdev = iterator->elem;
3783
3784 if (netdev->type != LXC_NET_VETH)
3785 continue;
3786
3787 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3788 if (ret < 0)
3789 return -1;
3790
3791 ret = lxc_recv_nointr(data_sock, netdev->created_name, IFNAMSIZ, 0);
3792 if (ret < 0)
3793 return -1;
3794 TRACE("Received network device name \"%s\" from parent", netdev->created_name);
3795 }
3796
3797 return 0;
3798 }
3799
3800 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
3801 {
3802 struct lxc_list *iterator, *network;
3803 int data_sock = handler->data_sock[0];
3804
3805 if (!handler->am_root)
3806 return 0;
3807
3808 network = &handler->conf->network;
3809 lxc_list_for_each(iterator, network) {
3810 int ret;
3811 struct lxc_netdev *netdev = iterator->elem;
3812
3813 /* Send network device name in the child's namespace to parent. */
3814 ret = lxc_send_nointr(data_sock, netdev->name, IFNAMSIZ, MSG_NOSIGNAL);
3815 if (ret < 0)
3816 return -1;
3817
3818 /* Send network device ifindex in the child's namespace to
3819 * parent.
3820 */
3821 ret = lxc_send_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), MSG_NOSIGNAL);
3822 if (ret < 0)
3823 return -1;
3824 }
3825
3826 if (!lxc_list_empty(network))
3827 TRACE("Sent network device names and ifindices to parent");
3828
3829 return 0;
3830 }
3831
3832 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
3833 {
3834 struct lxc_list *iterator, *network;
3835 int data_sock = handler->data_sock[1];
3836
3837 if (!handler->am_root)
3838 return 0;
3839
3840 network = &handler->conf->network;
3841 lxc_list_for_each(iterator, network) {
3842 int ret;
3843 struct lxc_netdev *netdev = iterator->elem;
3844
3845 /* Receive network device name in the child's namespace to
3846 * parent.
3847 */
3848 ret = lxc_recv_nointr(data_sock, netdev->name, IFNAMSIZ, 0);
3849 if (ret < 0)
3850 return -1;
3851
3852 /* Receive network device ifindex in the child's namespace to
3853 * parent.
3854 */
3855 ret = lxc_recv_nointr(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
3856 if (ret < 0)
3857 return -1;
3858 }
3859
3860 return 0;
3861 }
3862
3863 void lxc_delete_network(struct lxc_handler *handler)
3864 {
3865 bool bret;
3866
3867 if (handler->am_root)
3868 bret = lxc_delete_network_priv(handler);
3869 else
3870 bret = lxc_delete_network_unpriv(handler);
3871 if (!bret)
3872 DEBUG("Failed to delete network devices");
3873 else
3874 DEBUG("Deleted network devices");
3875 }
3876
3877 int lxc_netns_set_nsid(int fd)
3878 {
3879 int ret;
3880 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3881 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3882 NLMSG_ALIGN(1024)];
3883 struct nl_handler nlh;
3884 struct nlmsghdr *hdr;
3885 struct rtgenmsg *msg;
3886 int saved_errno;
3887 const __s32 ns_id = -1;
3888 const __u32 netns_fd = fd;
3889
3890 ret = netlink_open(&nlh, NETLINK_ROUTE);
3891 if (ret < 0)
3892 return -1;
3893
3894 memset(buf, 0, sizeof(buf));
3895
3896 #pragma GCC diagnostic push
3897 #pragma GCC diagnostic ignored "-Wcast-align"
3898 hdr = (struct nlmsghdr *)buf;
3899 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3900 #pragma GCC diagnostic pop
3901
3902 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3903 hdr->nlmsg_type = RTM_NEWNSID;
3904 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3905 hdr->nlmsg_pid = 0;
3906 hdr->nlmsg_seq = RTM_NEWNSID;
3907 msg->rtgen_family = AF_UNSPEC;
3908
3909 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3910 if (ret < 0)
3911 goto on_error;
3912
3913 ret = addattr(hdr, 1024, __LXC_NETNSA_NSID, &ns_id, sizeof(ns_id));
3914 if (ret < 0)
3915 goto on_error;
3916
3917 ret = __netlink_transaction(&nlh, hdr, hdr);
3918
3919 on_error:
3920 saved_errno = errno;
3921 netlink_close(&nlh);
3922 errno = saved_errno;
3923
3924 return ret;
3925 }
3926
3927 static int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
3928 {
3929
3930 memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
3931
3932 while (RTA_OK(rta, len)) {
3933 unsigned short type = rta->rta_type;
3934
3935 if ((type <= max) && (!tb[type]))
3936 tb[type] = rta;
3937
3938 #pragma GCC diagnostic push
3939 #pragma GCC diagnostic ignored "-Wcast-align"
3940 rta = RTA_NEXT(rta, len);
3941 #pragma GCC diagnostic pop
3942 }
3943
3944 return 0;
3945 }
3946
3947 static inline __s32 rta_getattr_s32(const struct rtattr *rta)
3948 {
3949 return *(__s32 *)RTA_DATA(rta);
3950 }
3951
3952 #ifndef NETNS_RTA
3953 #define NETNS_RTA(r) \
3954 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3955 #endif
3956
3957 int lxc_netns_get_nsid(int fd)
3958 {
3959 int ret;
3960 ssize_t len;
3961 char buf[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
3962 NLMSG_ALIGN(sizeof(struct rtgenmsg)) +
3963 NLMSG_ALIGN(1024)];
3964 struct rtattr *tb[__LXC_NETNSA_MAX + 1];
3965 struct nl_handler nlh;
3966 struct nlmsghdr *hdr;
3967 struct rtgenmsg *msg;
3968 int saved_errno;
3969 __u32 netns_fd = fd;
3970
3971 ret = netlink_open(&nlh, NETLINK_ROUTE);
3972 if (ret < 0)
3973 return -1;
3974
3975 memset(buf, 0, sizeof(buf));
3976
3977 #pragma GCC diagnostic push
3978 #pragma GCC diagnostic ignored "-Wcast-align"
3979 hdr = (struct nlmsghdr *)buf;
3980 msg = (struct rtgenmsg *)NLMSG_DATA(hdr);
3981 #pragma GCC diagnostic pop
3982
3983 hdr->nlmsg_len = NLMSG_LENGTH(sizeof(*msg));
3984 hdr->nlmsg_type = RTM_GETNSID;
3985 hdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
3986 hdr->nlmsg_pid = 0;
3987 hdr->nlmsg_seq = RTM_GETNSID;
3988 msg->rtgen_family = AF_UNSPEC;
3989
3990 ret = addattr(hdr, 1024, __LXC_NETNSA_FD, &netns_fd, sizeof(netns_fd));
3991 if (ret == 0)
3992 ret = __netlink_transaction(&nlh, hdr, hdr);
3993
3994 saved_errno = errno;
3995 netlink_close(&nlh);
3996 errno = saved_errno;
3997 if (ret < 0)
3998 return -1;
3999
4000 errno = EINVAL;
4001 msg = NLMSG_DATA(hdr);
4002 len = hdr->nlmsg_len - NLMSG_SPACE(sizeof(*msg));
4003 if (len < 0)
4004 return -1;
4005
4006 #pragma GCC diagnostic push
4007 #pragma GCC diagnostic ignored "-Wcast-align"
4008 parse_rtattr(tb, __LXC_NETNSA_MAX, NETNS_RTA(msg), len);
4009 if (tb[__LXC_NETNSA_NSID])
4010 return rta_getattr_s32(tb[__LXC_NETNSA_NSID]);
4011 #pragma GCC diagnostic pop
4012
4013 return -1;
4014 }
4015
4016 int lxc_create_network(struct lxc_handler *handler)
4017 {
4018 int ret;
4019
4020 /*
4021 * Find gateway addresses from the link device, which is no longer
4022 * accessible inside the container. Do this before creating network
4023 * interfaces, since goto out_delete_net does not work before
4024 * lxc_clone.
4025 */
4026 ret = lxc_find_gateway_addresses(handler);
4027 if (ret) {
4028 ERROR("Failed to find gateway addresses");
4029 return -1;
4030 }
4031
4032 if (handler->am_root) {
4033 ret = lxc_create_network_priv(handler);
4034 if (ret)
4035 return -1;
4036
4037 return lxc_network_move_created_netdev_priv(handler);
4038 }
4039
4040 return lxc_create_network_unpriv(handler);
4041 }