2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include <arpa/inet.h>
31 #include <linux/netlink.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/sockios.h>
34 #include <net/ethernet.h>
36 #include <net/if_arp.h>
37 #include <netinet/in.h>
41 #include <sys/inotify.h>
42 #include <sys/ioctl.h>
43 #include <sys/param.h>
44 #include <sys/socket.h>
46 #include <sys/types.h>
50 #include "../include/netns_ifaddrs.h"
54 #include "file_utils.h"
57 #include "memory_utils.h"
60 #include "raw_syscalls.h"
61 #include "syscall_wrappers.h"
65 #include "include/strlcpy.h"
68 lxc_log_define(network
, lxc
);
70 typedef int (*instantiate_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
71 static const char loDev
[] = "lo";
73 static int lxc_ip_route_dest(__u16 nlmsg_type
, int family
, int ifindex
, void *dest
, unsigned int netmask
)
76 struct nl_handler nlh
;
78 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
80 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
81 : sizeof(struct in6_addr
);
83 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
88 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
92 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
96 nlmsg
->nlmsghdr
->nlmsg_flags
=
97 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
98 nlmsg
->nlmsghdr
->nlmsg_type
= nlmsg_type
;
100 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
103 rt
->rtm_family
= family
;
104 rt
->rtm_table
= RT_TABLE_MAIN
;
105 rt
->rtm_scope
= RT_SCOPE_LINK
;
106 rt
->rtm_protocol
= RTPROT_BOOT
;
107 rt
->rtm_type
= RTN_UNICAST
;
108 rt
->rtm_dst_len
= netmask
;
111 if (nla_put_buffer(nlmsg
, RTA_DST
, dest
, addrlen
))
113 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
115 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
123 static int lxc_ipv4_dest_add(int ifindex
, struct in_addr
*dest
, unsigned int netmask
)
125 return lxc_ip_route_dest(RTM_NEWROUTE
, AF_INET
, ifindex
, dest
, netmask
);
128 static int lxc_ipv6_dest_add(int ifindex
, struct in6_addr
*dest
, unsigned int netmask
)
130 return lxc_ip_route_dest(RTM_NEWROUTE
, AF_INET6
, ifindex
, dest
, netmask
);
133 static int lxc_ipv4_dest_del(int ifindex
, struct in_addr
*dest
, unsigned int netmask
)
135 return lxc_ip_route_dest(RTM_DELROUTE
, AF_INET
, ifindex
, dest
, netmask
);
138 static int lxc_ipv6_dest_del(int ifindex
, struct in6_addr
*dest
, unsigned int netmask
)
140 return lxc_ip_route_dest(RTM_DELROUTE
, AF_INET6
, ifindex
, dest
, netmask
);
143 static int lxc_setup_ipv4_routes(struct lxc_list
*ip
, int ifindex
)
145 struct lxc_list
*iterator
;
148 lxc_list_for_each(iterator
, ip
) {
149 struct lxc_inetdev
*inetdev
= iterator
->elem
;
151 err
= lxc_ipv4_dest_add(ifindex
, &inetdev
->addr
, inetdev
->prefix
);
153 SYSERROR("Failed to setup ipv4 route for network device "
154 "with ifindex %d", ifindex
);
155 return minus_one_set_errno(-err
);
162 static int lxc_setup_ipv6_routes(struct lxc_list
*ip
, int ifindex
)
164 struct lxc_list
*iterator
;
167 lxc_list_for_each(iterator
, ip
) {
168 struct lxc_inet6dev
*inet6dev
= iterator
->elem
;
170 err
= lxc_ipv6_dest_add(ifindex
, &inet6dev
->addr
, inet6dev
->prefix
);
172 SYSERROR("Failed to setup ipv6 route for network device "
173 "with ifindex %d", ifindex
);
174 return minus_one_set_errno(-err
);
181 static int instantiate_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
183 int bridge_index
, err
;
185 char veth1buf
[IFNAMSIZ
], veth2buf
[IFNAMSIZ
];
186 unsigned int mtu
= 0;
188 if (netdev
->priv
.veth_attr
.pair
[0] != '\0') {
189 veth1
= netdev
->priv
.veth_attr
.pair
;
190 if (handler
->conf
->reboot
)
191 lxc_netdev_delete_by_name(veth1
);
193 err
= snprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
194 if (err
< 0 || (size_t)err
>= sizeof(veth1buf
))
197 veth1
= lxc_mkifname(veth1buf
);
201 /* store away for deconf */
202 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
205 err
= snprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
206 if (err
< 0 || (size_t)err
>= sizeof(veth2buf
))
209 veth2
= lxc_mkifname(veth2buf
);
213 err
= lxc_veth_create(veth1
, veth2
);
216 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1
, veth2
);
220 /* changing the high byte of the mac address to 0xfe, the bridge interface
221 * will always keep the host's mac address and not take the mac address
223 err
= setup_private_host_hw_addr(veth1
);
226 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1
);
230 /* Retrieve ifindex of the host's veth device. */
231 netdev
->priv
.veth_attr
.ifindex
= if_nametoindex(veth1
);
232 if (!netdev
->priv
.veth_attr
.ifindex
) {
233 ERROR("Failed to retrieve ifindex for \"%s\"", veth1
);
237 /* Note that we're retrieving the container's ifindex in the host's
238 * network namespace because we need it to move the device from the
239 * host's network namespace to the container's network namespace later
242 netdev
->ifindex
= if_nametoindex(veth2
);
243 if (!netdev
->ifindex
) {
244 ERROR("Failed to retrieve ifindex for \"%s\"", veth2
);
249 if (lxc_safe_uint(netdev
->mtu
, &mtu
) < 0)
250 WARN("Failed to parse mtu");
252 INFO("Retrieved mtu %d", mtu
);
253 } else if (netdev
->link
[0] != '\0') {
254 bridge_index
= if_nametoindex(netdev
->link
);
256 mtu
= netdev_get_mtu(bridge_index
);
257 INFO("Retrieved mtu %d from %s", mtu
, netdev
->link
);
259 mtu
= netdev_get_mtu(netdev
->ifindex
);
260 INFO("Retrieved mtu %d from %s", mtu
, veth2
);
265 err
= lxc_netdev_set_mtu(veth1
, mtu
);
267 err
= lxc_netdev_set_mtu(veth2
, mtu
);
271 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
272 "and \"%s\"", mtu
, veth1
, veth2
);
277 if (netdev
->link
[0] != '\0') {
278 err
= lxc_bridge_attach(netdev
->link
, veth1
);
281 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
282 veth1
, netdev
->link
);
285 INFO("Attached \"%s\" to bridge \"%s\"", veth1
, netdev
->link
);
288 err
= lxc_netdev_up(veth1
);
291 SYSERROR("Failed to set \"%s\" up", veth1
);
295 /* setup ipv4 routes on the host interface */
296 if (lxc_setup_ipv4_routes(&netdev
->priv
.veth_attr
.ipv4_routes
, netdev
->priv
.veth_attr
.ifindex
)) {
297 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1
);
301 /* setup ipv6 routes on the host interface */
302 if (lxc_setup_ipv6_routes(&netdev
->priv
.veth_attr
.ipv6_routes
, netdev
->priv
.veth_attr
.ifindex
)) {
303 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1
);
307 if (netdev
->upscript
) {
315 err
= run_script_argv(handler
->name
,
316 handler
->conf
->hooks_version
, "net",
317 netdev
->upscript
, "up", argv
);
322 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1
, veth2
,
328 if (netdev
->ifindex
!= 0)
329 lxc_netdev_delete_by_name(veth1
);
333 static int instantiate_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
335 char peerbuf
[IFNAMSIZ
], *peer
;
337 unsigned int mtu
= 0;
339 if (netdev
->link
[0] == '\0') {
340 ERROR("No link for macvlan network device specified");
344 err
= snprintf(peerbuf
, sizeof(peerbuf
), "mcXXXXXX");
345 if (err
< 0 || (size_t)err
>= sizeof(peerbuf
))
348 peer
= lxc_mkifname(peerbuf
);
352 err
= lxc_macvlan_create(netdev
->link
, peer
,
353 netdev
->priv
.macvlan_attr
.mode
);
356 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
361 netdev
->ifindex
= if_nametoindex(peer
);
362 if (!netdev
->ifindex
) {
363 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
368 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
371 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
375 err
= lxc_netdev_set_mtu(peer
, mtu
);
378 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
383 if (netdev
->upscript
) {
390 err
= run_script_argv(handler
->name
,
391 handler
->conf
->hooks_version
, "net",
392 netdev
->upscript
, "up", argv
);
397 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
398 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
403 lxc_netdev_delete_by_name(peer
);
407 static int lxc_ipvlan_create(const char *master
, const char *name
, int mode
, int isolation
)
410 struct ifinfomsg
*ifi
;
411 struct nl_handler nlh
;
412 struct rtattr
*nest
, *nest2
;
413 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
415 len
= strlen(master
);
416 if (len
== 1 || len
>= IFNAMSIZ
)
417 return minus_one_set_errno(EINVAL
);
420 if (len
== 1 || len
>= IFNAMSIZ
)
421 return minus_one_set_errno(EINVAL
);
423 index
= if_nametoindex(master
);
425 return minus_one_set_errno(EINVAL
);
427 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
429 return minus_one_set_errno(-err
);
432 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
436 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
440 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
441 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
443 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
447 ifi
->ifi_family
= AF_UNSPEC
;
450 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
454 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "ipvlan"))
458 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
462 if (nla_put_u32(nlmsg
, IFLA_IPVLAN_MODE
, mode
))
465 /* if_link.h does not define the isolation flag value for bridge mode so we define it as 0
466 * and only send mode if mode >0 as default mode is bridge anyway according to ipvlan docs.
469 if (nla_put_u16(nlmsg
, IFLA_IPVLAN_ISOLATION
, isolation
))
473 nla_end_nested(nlmsg
, nest2
);
476 nla_end_nested(nlmsg
, nest
);
478 if (nla_put_u32(nlmsg
, IFLA_LINK
, index
))
481 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
484 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
490 return minus_one_set_errno(-err
);
494 static int instantiate_ipvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
496 char peerbuf
[IFNAMSIZ
], *peer
;
498 unsigned int mtu
= 0;
500 if (netdev
->link
[0] == '\0') {
501 ERROR("No link for ipvlan network device specified");
505 err
= snprintf(peerbuf
, sizeof(peerbuf
), "ipXXXXXX");
506 if (err
< 0 || (size_t)err
>= sizeof(peerbuf
))
509 peer
= lxc_mkifname(peerbuf
);
513 err
= lxc_ipvlan_create(netdev
->link
, peer
, netdev
->priv
.ipvlan_attr
.mode
, netdev
->priv
.ipvlan_attr
.isolation
);
515 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"", peer
, netdev
->link
);
519 netdev
->ifindex
= if_nametoindex(peer
);
520 if (!netdev
->ifindex
) {
521 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
526 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
529 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
533 err
= lxc_netdev_set_mtu(peer
, mtu
);
536 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
541 if (netdev
->upscript
) {
548 err
= run_script_argv(handler
->name
,
549 handler
->conf
->hooks_version
, "net",
550 netdev
->upscript
, "up", argv
);
555 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d",
556 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
561 lxc_netdev_delete_by_name(peer
);
565 static int instantiate_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
569 static uint16_t vlan_cntr
= 0;
570 unsigned int mtu
= 0;
572 if (netdev
->link
[0] == '\0') {
573 ERROR("No link for vlan network device specified");
577 err
= snprintf(peer
, sizeof(peer
), "vlan%d-%d", netdev
->priv
.vlan_attr
.vid
, vlan_cntr
++);
578 if (err
< 0 || (size_t)err
>= sizeof(peer
))
581 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
584 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
589 netdev
->ifindex
= if_nametoindex(peer
);
590 if (!netdev
->ifindex
) {
591 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
596 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
599 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
603 err
= lxc_netdev_set_mtu(peer
, mtu
);
606 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
611 if (netdev
->upscript
) {
618 err
= run_script_argv(handler
->name
,
619 handler
->conf
->hooks_version
, "net",
620 netdev
->upscript
, "up", argv
);
626 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"",
627 peer
, netdev
->ifindex
);
632 lxc_netdev_delete_by_name(peer
);
636 static int instantiate_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
638 int err
, mtu_orig
= 0;
639 unsigned int mtu
= 0;
641 if (netdev
->link
[0] == '\0') {
642 ERROR("No link for physical interface specified");
646 /* Note that we're retrieving the container's ifindex in the host's
647 * network namespace because we need it to move the device from the
648 * host's network namespace to the container's network namespace later
650 * Note that netdev->link will contain the name of the physical network
651 * device in the host's namespace.
653 netdev
->ifindex
= if_nametoindex(netdev
->link
);
654 if (!netdev
->ifindex
) {
655 ERROR("Failed to retrieve ifindex for \"%s\"", netdev
->link
);
659 /* Store the ifindex of the host's network device in the host's
662 netdev
->priv
.phys_attr
.ifindex
= netdev
->ifindex
;
664 /* Get original device MTU setting and store for restoration after container shutdown. */
665 mtu_orig
= netdev_get_mtu(netdev
->ifindex
);
667 SYSERROR("Failed to get original mtu for interface \"%s\"", netdev
->link
);
668 return minus_one_set_errno(-mtu_orig
);
671 netdev
->priv
.phys_attr
.mtu
= mtu_orig
;
674 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
677 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, netdev
->link
);
681 err
= lxc_netdev_set_mtu(netdev
->link
, mtu
);
684 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, netdev
->link
);
689 if (netdev
->upscript
) {
696 err
= run_script_argv(handler
->name
,
697 handler
->conf
->hooks_version
, "net",
698 netdev
->upscript
, "up", argv
);
704 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev
->link
, netdev
->ifindex
);
709 static int instantiate_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
718 if (!netdev
->upscript
)
721 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
722 "net", netdev
->upscript
, "up", argv
);
729 static int instantiate_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
735 static instantiate_cb netdev_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
736 [LXC_NET_VETH
] = instantiate_veth
,
737 [LXC_NET_MACVLAN
] = instantiate_macvlan
,
738 [LXC_NET_IPVLAN
] = instantiate_ipvlan
,
739 [LXC_NET_VLAN
] = instantiate_vlan
,
740 [LXC_NET_PHYS
] = instantiate_phys
,
741 [LXC_NET_EMPTY
] = instantiate_empty
,
742 [LXC_NET_NONE
] = instantiate_none
,
745 static int shutdown_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
755 if (!netdev
->downscript
)
758 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
759 argv
[2] = netdev
->priv
.veth_attr
.pair
;
761 argv
[2] = netdev
->priv
.veth_attr
.veth1
;
763 ret
= run_script_argv(handler
->name
,
764 handler
->conf
->hooks_version
, "net",
765 netdev
->downscript
, "down", argv
);
772 static int shutdown_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
781 if (!netdev
->downscript
)
784 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
785 "net", netdev
->downscript
, "down", argv
);
792 static int shutdown_ipvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
801 if (!netdev
->downscript
)
804 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
805 "net", netdev
->downscript
, "down", argv
);
812 static int shutdown_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
821 if (!netdev
->downscript
)
824 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
825 "net", netdev
->downscript
, "down", argv
);
832 static int shutdown_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
841 if (!netdev
->downscript
)
844 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
845 "net", netdev
->downscript
, "down", argv
);
852 static int shutdown_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
860 if (!netdev
->downscript
)
863 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
864 "net", netdev
->downscript
, "down", argv
);
871 static int shutdown_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
876 static instantiate_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
877 [LXC_NET_VETH
] = shutdown_veth
,
878 [LXC_NET_MACVLAN
] = shutdown_macvlan
,
879 [LXC_NET_IPVLAN
] = shutdown_ipvlan
,
880 [LXC_NET_VLAN
] = shutdown_vlan
,
881 [LXC_NET_PHYS
] = shutdown_phys
,
882 [LXC_NET_EMPTY
] = shutdown_empty
,
883 [LXC_NET_NONE
] = shutdown_none
,
886 static int lxc_netdev_move_by_index_fd(int ifindex
, int fd
, const char *ifname
)
889 struct nl_handler nlh
;
890 struct ifinfomsg
*ifi
;
891 struct nlmsg
*nlmsg
= NULL
;
893 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
898 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
902 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
903 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
905 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
908 ifi
->ifi_family
= AF_UNSPEC
;
909 ifi
->ifi_index
= ifindex
;
911 if (nla_put_u32(nlmsg
, IFLA_NET_NS_FD
, fd
))
914 if (ifname
!= NULL
) {
915 if (nla_put_string(nlmsg
, IFLA_IFNAME
, ifname
))
919 err
= netlink_transaction(&nlh
, nlmsg
, nlmsg
);
926 int lxc_netdev_move_by_index(int ifindex
, pid_t pid
, const char *ifname
)
929 struct nl_handler nlh
;
930 struct ifinfomsg
*ifi
;
931 struct nlmsg
*nlmsg
= NULL
;
933 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
938 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
942 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
943 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
945 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
948 ifi
->ifi_family
= AF_UNSPEC
;
949 ifi
->ifi_index
= ifindex
;
951 if (nla_put_u32(nlmsg
, IFLA_NET_NS_PID
, pid
))
954 if (ifname
!= NULL
) {
955 if (nla_put_string(nlmsg
, IFLA_IFNAME
, ifname
))
959 err
= netlink_transaction(&nlh
, nlmsg
, nlmsg
);
966 /* If we are asked to move a wireless interface, then we must actually move its
967 * phyN device. Detect that condition and return the physname here. The physname
968 * will be passed to lxc_netdev_move_wlan() which will free it when done.
970 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
971 static char *is_wlan(const char *ifname
)
973 __do_free
char *path
= NULL
;
978 char *physname
= NULL
;
980 len
= strlen(ifname
) + strlen(PHYSNAME
) - 1;
981 path
= must_realloc(NULL
, len
+ 1);
982 ret
= snprintf(path
, len
, PHYSNAME
, ifname
);
983 if (ret
< 0 || (size_t)ret
>= len
)
986 f
= fopen(path
, "r");
990 /* Feh - sb.st_size is always 4096. */
991 fseek(f
, 0, SEEK_END
);
993 fseek(f
, 0, SEEK_SET
);
999 physname
= malloc(physlen
+ 1);
1005 memset(physname
, 0, physlen
+ 1);
1006 ret
= fread(physname
, 1, physlen
, f
);
1011 for (i
= 0; i
< physlen
; i
++) {
1012 if (physname
[i
] == '\n')
1015 if (physname
[i
] == '\0')
1026 static int lxc_netdev_rename_by_name_in_netns(pid_t pid
, const char *old
,
1036 return wait_for_pid(fpid
);
1038 if (!switch_to_ns(pid
, "net"))
1041 _exit(lxc_netdev_rename_by_name(old
, new));
1044 static int lxc_netdev_move_wlan(char *physname
, const char *ifname
, pid_t pid
,
1045 const char *newname
)
1051 /* Move phyN into the container. TODO - do this using netlink.
1052 * However, IIUC this involves a bit more complicated work to talk to
1053 * the 80211 module, so for now just call out to iw.
1055 cmd
= on_path("iw", NULL
);
1066 sprintf(pidstr
, "%d", pid
);
1067 execlp("iw", "iw", "phy", physname
, "set", "netns", pidstr
,
1069 _exit(EXIT_FAILURE
);
1072 if (wait_for_pid(fpid
))
1077 err
= lxc_netdev_rename_by_name_in_netns(pid
, ifname
, newname
);
1084 int lxc_netdev_move_by_name(const char *ifname
, pid_t pid
, const char* newname
)
1092 index
= if_nametoindex(ifname
);
1096 physname
= is_wlan(ifname
);
1098 return lxc_netdev_move_wlan(physname
, ifname
, pid
, newname
);
1100 return lxc_netdev_move_by_index(index
, pid
, newname
);
1103 int lxc_netdev_delete_by_index(int ifindex
)
1106 struct ifinfomsg
*ifi
;
1107 struct nl_handler nlh
;
1108 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1110 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1115 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1119 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1123 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
1124 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_DELLINK
;
1126 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1129 ifi
->ifi_family
= AF_UNSPEC
;
1130 ifi
->ifi_index
= ifindex
;
1132 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1134 netlink_close(&nlh
);
1140 int lxc_netdev_delete_by_name(const char *name
)
1144 index
= if_nametoindex(name
);
1148 return lxc_netdev_delete_by_index(index
);
1151 int lxc_netdev_rename_by_index(int ifindex
, const char *newname
)
1154 struct ifinfomsg
*ifi
;
1155 struct nl_handler nlh
;
1156 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1158 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1162 len
= strlen(newname
);
1163 if (len
== 1 || len
>= IFNAMSIZ
) {
1169 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1173 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1177 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
1178 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1180 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1183 ifi
->ifi_family
= AF_UNSPEC
;
1184 ifi
->ifi_index
= ifindex
;
1186 if (nla_put_string(nlmsg
, IFLA_IFNAME
, newname
))
1189 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1191 netlink_close(&nlh
);
1197 int lxc_netdev_rename_by_name(const char *oldname
, const char *newname
)
1201 len
= strlen(oldname
);
1202 if (len
== 1 || len
>= IFNAMSIZ
)
1205 index
= if_nametoindex(oldname
);
1209 return lxc_netdev_rename_by_index(index
, newname
);
1212 int netdev_set_flag(const char *name
, int flag
)
1214 int err
, index
, len
;
1215 struct ifinfomsg
*ifi
;
1216 struct nl_handler nlh
;
1217 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1219 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1225 if (len
== 1 || len
>= IFNAMSIZ
)
1229 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1233 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1238 index
= if_nametoindex(name
);
1242 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1243 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1245 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1250 ifi
->ifi_family
= AF_UNSPEC
;
1251 ifi
->ifi_index
= index
;
1252 ifi
->ifi_change
|= IFF_UP
;
1253 ifi
->ifi_flags
|= flag
;
1255 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1257 netlink_close(&nlh
);
1263 int netdev_get_flag(const char *name
, int *flag
)
1265 int err
, index
, len
;
1266 struct ifinfomsg
*ifi
;
1267 struct nl_handler nlh
;
1268 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1273 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1279 if (len
== 1 || len
>= IFNAMSIZ
)
1283 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1287 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1292 index
= if_nametoindex(name
);
1296 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
;
1297 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
1299 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1304 ifi
->ifi_family
= AF_UNSPEC
;
1305 ifi
->ifi_index
= index
;
1307 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1311 ifi
= NLMSG_DATA(answer
->nlmsghdr
);
1313 *flag
= ifi
->ifi_flags
;
1315 netlink_close(&nlh
);
1322 * \brief Check a interface is up or not.
1324 * \param name: name for the interface.
1327 * 0 means interface is down.
1328 * 1 means interface is up.
1329 * Others means error happened, and ret-value is the error number.
1331 int lxc_netdev_isup(const char *name
)
1335 err
= netdev_get_flag(name
, &flag
);
1345 int netdev_get_mtu(int ifindex
)
1347 int answer_len
, err
, res
;
1348 struct nl_handler nlh
;
1349 struct ifinfomsg
*ifi
;
1350 struct nlmsghdr
*msg
;
1351 int readmore
= 0, recv_len
= 0;
1352 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1354 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1359 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1363 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1367 /* Save the answer buffer length, since it will be overwritten
1368 * on the first receive (and we might need to receive more than
1371 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
1373 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
1374 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
1376 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1379 ifi
->ifi_family
= AF_UNSPEC
;
1381 /* Send the request for addresses, which returns all addresses
1382 * on all interfaces. */
1383 err
= netlink_send(&nlh
, nlmsg
);
1387 #pragma GCC diagnostic push
1388 #pragma GCC diagnostic ignored "-Wcast-align"
1391 /* Restore the answer buffer length, it might have been
1392 * overwritten by a previous receive.
1394 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
1396 /* Get the (next) batch of reply messages */
1397 err
= netlink_rcv(&nlh
, answer
);
1403 /* Satisfy the typing for the netlink macros */
1404 msg
= answer
->nlmsghdr
;
1406 while (NLMSG_OK(msg
, recv_len
)) {
1408 /* Stop reading if we see an error message */
1409 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
1410 struct nlmsgerr
*errmsg
=
1411 (struct nlmsgerr
*)NLMSG_DATA(msg
);
1412 err
= errmsg
->error
;
1416 /* Stop reading if we see a NLMSG_DONE message */
1417 if (msg
->nlmsg_type
== NLMSG_DONE
) {
1422 ifi
= NLMSG_DATA(msg
);
1423 if (ifi
->ifi_index
== ifindex
) {
1424 struct rtattr
*rta
= IFLA_RTA(ifi
);
1426 msg
->nlmsg_len
- NLMSG_LENGTH(sizeof(*ifi
));
1428 while (RTA_OK(rta
, attr_len
)) {
1429 /* Found a local address for the
1430 * requested interface, return it.
1432 if (rta
->rta_type
== IFLA_MTU
) {
1433 memcpy(&res
, RTA_DATA(rta
),
1438 rta
= RTA_NEXT(rta
, attr_len
);
1442 /* Keep reading more data from the socket if the last
1443 * message had the NLF_F_MULTI flag set.
1445 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
1447 /* Look at the next message received in this buffer. */
1448 msg
= NLMSG_NEXT(msg
, recv_len
);
1452 #pragma GCC diagnostic pop
1454 /* If we end up here, we didn't find any result, so signal an error. */
1458 netlink_close(&nlh
);
1464 int lxc_netdev_set_mtu(const char *name
, int mtu
)
1466 int err
, index
, len
;
1467 struct ifinfomsg
*ifi
;
1468 struct nl_handler nlh
;
1469 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1471 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1477 if (len
== 1 || len
>= IFNAMSIZ
)
1481 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1485 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1490 index
= if_nametoindex(name
);
1494 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1495 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1497 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1502 ifi
->ifi_family
= AF_UNSPEC
;
1503 ifi
->ifi_index
= index
;
1505 if (nla_put_u32(nlmsg
, IFLA_MTU
, mtu
))
1508 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1510 netlink_close(&nlh
);
1516 int lxc_netdev_up(const char *name
)
1518 return netdev_set_flag(name
, IFF_UP
);
1521 int lxc_netdev_down(const char *name
)
1523 return netdev_set_flag(name
, 0);
1526 int lxc_veth_create(const char *name1
, const char *name2
)
1529 struct ifinfomsg
*ifi
;
1530 struct nl_handler nlh
;
1531 struct rtattr
*nest1
, *nest2
, *nest3
;
1532 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1534 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1539 len
= strlen(name1
);
1540 if (len
== 1 || len
>= IFNAMSIZ
)
1543 len
= strlen(name2
);
1544 if (len
== 1 || len
>= IFNAMSIZ
)
1548 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1552 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1556 nlmsg
->nlmsghdr
->nlmsg_flags
=
1557 NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
1558 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1560 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1563 ifi
->ifi_family
= AF_UNSPEC
;
1566 nest1
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
1570 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "veth"))
1573 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
1577 nest3
= nla_begin_nested(nlmsg
, VETH_INFO_PEER
);
1581 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1587 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name2
))
1590 nla_end_nested(nlmsg
, nest3
);
1591 nla_end_nested(nlmsg
, nest2
);
1592 nla_end_nested(nlmsg
, nest1
);
1594 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name1
))
1597 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1599 netlink_close(&nlh
);
1605 /* TODO: merge with lxc_macvlan_create */
1606 int lxc_vlan_create(const char *master
, const char *name
, unsigned short vlanid
)
1608 int err
, len
, lindex
;
1609 struct ifinfomsg
*ifi
;
1610 struct nl_handler nlh
;
1611 struct rtattr
*nest
, *nest2
;
1612 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1614 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1619 len
= strlen(master
);
1620 if (len
== 1 || len
>= IFNAMSIZ
)
1624 if (len
== 1 || len
>= IFNAMSIZ
)
1628 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1632 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1637 lindex
= if_nametoindex(master
);
1641 nlmsg
->nlmsghdr
->nlmsg_flags
=
1642 NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
1643 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1645 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1650 ifi
->ifi_family
= AF_UNSPEC
;
1652 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
1656 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "vlan"))
1659 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
1663 if (nla_put_u16(nlmsg
, IFLA_VLAN_ID
, vlanid
))
1666 nla_end_nested(nlmsg
, nest2
);
1667 nla_end_nested(nlmsg
, nest
);
1669 if (nla_put_u32(nlmsg
, IFLA_LINK
, lindex
))
1672 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
1675 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1681 netlink_close(&nlh
);
1685 int lxc_macvlan_create(const char *master
, const char *name
, int mode
)
1687 int err
, index
, len
;
1688 struct ifinfomsg
*ifi
;
1689 struct nl_handler nlh
;
1690 struct rtattr
*nest
, *nest2
;
1691 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1693 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1698 len
= strlen(master
);
1699 if (len
== 1 || len
>= IFNAMSIZ
)
1703 if (len
== 1 || len
>= IFNAMSIZ
)
1707 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1711 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1716 index
= if_nametoindex(master
);
1720 nlmsg
->nlmsghdr
->nlmsg_flags
=
1721 NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
1722 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1724 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1729 ifi
->ifi_family
= AF_UNSPEC
;
1731 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
1735 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "macvlan"))
1739 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
1743 if (nla_put_u32(nlmsg
, IFLA_MACVLAN_MODE
, mode
))
1746 nla_end_nested(nlmsg
, nest2
);
1749 nla_end_nested(nlmsg
, nest
);
1751 if (nla_put_u32(nlmsg
, IFLA_LINK
, index
))
1754 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
1757 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1759 netlink_close(&nlh
);
1765 static int proc_sys_net_write(const char *path
, const char *value
)
1770 fd
= open(path
, O_WRONLY
);
1774 if (lxc_write_nointr(fd
, value
, strlen(value
)) < 0)
1781 static int lxc_is_ip_forwarding_enabled(const char *ifname
, int family
)
1784 char path
[PATH_MAX
];
1787 if (family
!= AF_INET
&& family
!= AF_INET6
)
1788 return minus_one_set_errno(EINVAL
);
1790 ret
= snprintf(path
, PATH_MAX
, "/proc/sys/net/%s/conf/%s/%s",
1791 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
1793 if (ret
< 0 || (size_t)ret
>= PATH_MAX
)
1794 return minus_one_set_errno(E2BIG
);
1796 return lxc_read_file_expect(path
, buf
, 1, "1");
1799 static int neigh_proxy_set(const char *ifname
, int family
, int flag
)
1802 char path
[PATH_MAX
];
1804 if (family
!= AF_INET
&& family
!= AF_INET6
)
1807 ret
= snprintf(path
, PATH_MAX
, "/proc/sys/net/%s/conf/%s/%s",
1808 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
1809 family
== AF_INET
? "proxy_arp" : "proxy_ndp");
1810 if (ret
< 0 || (size_t)ret
>= PATH_MAX
)
1813 return proc_sys_net_write(path
, flag
? "1" : "0");
1816 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname
, int family
)
1819 char path
[PATH_MAX
];
1822 if (family
!= AF_INET
&& family
!= AF_INET6
)
1823 return minus_one_set_errno(EINVAL
);
1825 ret
= snprintf(path
, PATH_MAX
, "/proc/sys/net/%s/conf/%s/%s",
1826 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
1827 family
== AF_INET
? "proxy_arp" : "proxy_ndp");
1828 if (ret
< 0 || (size_t)ret
>= PATH_MAX
)
1829 return minus_one_set_errno(E2BIG
);
1831 return lxc_read_file_expect(path
, buf
, 1, "1");
1834 int lxc_neigh_proxy_on(const char *name
, int family
)
1836 return neigh_proxy_set(name
, family
, 1);
1839 int lxc_neigh_proxy_off(const char *name
, int family
)
1841 return neigh_proxy_set(name
, family
, 0);
1844 int lxc_convert_mac(char *macaddr
, struct sockaddr
*sockaddr
)
1849 unsigned char *data
;
1851 sockaddr
->sa_family
= ARPHRD_ETHER
;
1852 data
= (unsigned char *)sockaddr
->sa_data
;
1854 while ((*macaddr
!= '\0') && (i
< ETH_ALEN
)) {
1858 else if (c
>= 'a' && c
<= 'f')
1860 else if (c
>= 'A' && c
<= 'F')
1869 else if (c
>= 'a' && c
<= 'f')
1870 val
|= c
- 'a' + 10;
1871 else if (c
>= 'A' && c
<= 'F')
1872 val
|= c
- 'A' + 10;
1873 else if (c
== ':' || c
== 0)
1879 *data
++ = (unsigned char)(val
& 0377);
1882 if (*macaddr
== ':')
1889 static int ip_addr_add(int family
, int ifindex
, void *addr
, void *bcast
,
1890 void *acast
, int prefix
)
1893 struct ifaddrmsg
*ifa
;
1894 struct nl_handler nlh
;
1895 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1897 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
1898 : sizeof(struct in6_addr
);
1900 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1905 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1909 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1913 nlmsg
->nlmsghdr
->nlmsg_flags
=
1914 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
1915 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWADDR
;
1917 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
1920 ifa
->ifa_prefixlen
= prefix
;
1921 ifa
->ifa_index
= ifindex
;
1922 ifa
->ifa_family
= family
;
1926 if (nla_put_buffer(nlmsg
, IFA_LOCAL
, addr
, addrlen
))
1929 if (nla_put_buffer(nlmsg
, IFA_ADDRESS
, addr
, addrlen
))
1932 if (nla_put_buffer(nlmsg
, IFA_BROADCAST
, bcast
, addrlen
))
1935 /* TODO: multicast, anycast with ipv6 */
1936 err
= -EPROTONOSUPPORT
;
1937 if (family
== AF_INET6
&&
1938 (memcmp(bcast
, &in6addr_any
, sizeof(in6addr_any
)) ||
1939 memcmp(acast
, &in6addr_any
, sizeof(in6addr_any
))))
1942 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1944 netlink_close(&nlh
);
1950 int lxc_ipv6_addr_add(int ifindex
, struct in6_addr
*addr
,
1951 struct in6_addr
*mcast
, struct in6_addr
*acast
,
1954 return ip_addr_add(AF_INET6
, ifindex
, addr
, mcast
, acast
, prefix
);
1957 int lxc_ipv4_addr_add(int ifindex
, struct in_addr
*addr
, struct in_addr
*bcast
,
1960 return ip_addr_add(AF_INET
, ifindex
, addr
, bcast
, NULL
, prefix
);
1963 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1964 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1965 * that pointer in *res (so res should be an in_addr** or in6_addr**).
1967 #pragma GCC diagnostic push
1968 #pragma GCC diagnostic ignored "-Wcast-align"
1970 static int ifa_get_local_ip(int family
, struct nlmsghdr
*msg
, void **res
)
1973 struct ifaddrmsg
*ifa
= NLMSG_DATA(msg
);
1974 struct rtattr
*rta
= IFA_RTA(ifa
);
1975 int attr_len
= NLMSG_PAYLOAD(msg
, sizeof(struct ifaddrmsg
));
1977 if (ifa
->ifa_family
!= family
)
1980 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
1981 : sizeof(struct in6_addr
);
1983 /* Loop over the rtattr's in this message */
1984 while (RTA_OK(rta
, attr_len
)) {
1985 /* Found a local address for the requested interface,
1988 if (rta
->rta_type
== IFA_LOCAL
||
1989 rta
->rta_type
== IFA_ADDRESS
) {
1990 /* Sanity check. The family check above should make sure
1991 * the address length is correct, but check here just in
1994 if (RTA_PAYLOAD(rta
) != addrlen
)
1997 /* We might have found an IFA_ADDRESS before, which we
1998 * now overwrite with an IFA_LOCAL.
2001 *res
= malloc(addrlen
);
2006 memcpy(*res
, RTA_DATA(rta
), addrlen
);
2007 if (rta
->rta_type
== IFA_LOCAL
)
2010 rta
= RTA_NEXT(rta
, attr_len
);
2015 #pragma GCC diagnostic pop
2017 static int ip_addr_get(int family
, int ifindex
, void **res
)
2019 int answer_len
, err
;
2020 struct ifaddrmsg
*ifa
;
2021 struct nl_handler nlh
;
2022 struct nlmsghdr
*msg
;
2023 int readmore
= 0, recv_len
= 0;
2024 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2026 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
2031 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2035 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2039 /* Save the answer buffer length, since it will be overwritten on the
2040 * first receive (and we might need to receive more than once).
2042 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
2044 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ROOT
;
2045 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETADDR
;
2047 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
2050 ifa
->ifa_family
= family
;
2052 /* Send the request for addresses, which returns all addresses on all
2055 err
= netlink_send(&nlh
, nlmsg
);
2059 #pragma GCC diagnostic push
2060 #pragma GCC diagnostic ignored "-Wcast-align"
2063 /* Restore the answer buffer length, it might have been
2064 * overwritten by a previous receive.
2066 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
2068 /* Get the (next) batch of reply messages. */
2069 err
= netlink_rcv(&nlh
, answer
);
2076 /* Satisfy the typing for the netlink macros. */
2077 msg
= answer
->nlmsghdr
;
2079 while (NLMSG_OK(msg
, recv_len
)) {
2080 /* Stop reading if we see an error message. */
2081 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
2082 struct nlmsgerr
*errmsg
=
2083 (struct nlmsgerr
*)NLMSG_DATA(msg
);
2084 err
= errmsg
->error
;
2088 /* Stop reading if we see a NLMSG_DONE message. */
2089 if (msg
->nlmsg_type
== NLMSG_DONE
) {
2094 if (msg
->nlmsg_type
!= RTM_NEWADDR
) {
2099 ifa
= (struct ifaddrmsg
*)NLMSG_DATA(msg
);
2100 if (ifa
->ifa_index
== ifindex
) {
2101 if (ifa_get_local_ip(family
, msg
, res
) < 0) {
2106 /* Found a result, stop searching. */
2111 /* Keep reading more data from the socket if the last
2112 * message had the NLF_F_MULTI flag set.
2114 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
2116 /* Look at the next message received in this buffer. */
2117 msg
= NLMSG_NEXT(msg
, recv_len
);
2121 #pragma GCC diagnostic pop
2123 /* If we end up here, we didn't find any result, so signal an
2129 netlink_close(&nlh
);
2135 int lxc_ipv6_addr_get(int ifindex
, struct in6_addr
**res
)
2137 return ip_addr_get(AF_INET6
, ifindex
, (void **)res
);
2140 int lxc_ipv4_addr_get(int ifindex
, struct in_addr
**res
)
2142 return ip_addr_get(AF_INET
, ifindex
, (void **)res
);
2145 static int ip_gateway_add(int family
, int ifindex
, void *gw
)
2148 struct nl_handler nlh
;
2150 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2152 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
2153 : sizeof(struct in6_addr
);
2155 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
2160 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2164 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2168 nlmsg
->nlmsghdr
->nlmsg_flags
=
2169 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
2170 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWROUTE
;
2172 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
2175 rt
->rtm_family
= family
;
2176 rt
->rtm_table
= RT_TABLE_MAIN
;
2177 rt
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2178 rt
->rtm_protocol
= RTPROT_BOOT
;
2179 rt
->rtm_type
= RTN_UNICAST
;
2180 /* "default" destination */
2181 rt
->rtm_dst_len
= 0;
2185 /* If gateway address not supplied, then a device route will be created instead */
2187 if (nla_put_buffer(nlmsg
, RTA_GATEWAY
, gw
, addrlen
))
2191 /* Adding the interface index enables the use of link-local
2192 * addresses for the gateway.
2194 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
2197 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
2199 netlink_close(&nlh
);
2205 int lxc_ipv4_gateway_add(int ifindex
, struct in_addr
*gw
)
2207 return ip_gateway_add(AF_INET
, ifindex
, gw
);
2210 int lxc_ipv6_gateway_add(int ifindex
, struct in6_addr
*gw
)
2212 return ip_gateway_add(AF_INET6
, ifindex
, gw
);
2214 bool is_ovs_bridge(const char *bridge
)
2218 char brdirname
[22 + IFNAMSIZ
+ 1] = {0};
2220 ret
= snprintf(brdirname
, 22 + IFNAMSIZ
+ 1, "/sys/class/net/%s/bridge",
2222 if (ret
< 0 || (size_t)ret
>= 22 + IFNAMSIZ
+ 1)
2225 ret
= stat(brdirname
, &sb
);
2226 if (ret
< 0 && errno
== ENOENT
)
2232 struct ovs_veth_args
{
2237 /* Called from a background thread - when nic goes away, remove it from the
2240 static int lxc_ovs_delete_port_exec(void *data
)
2242 struct ovs_veth_args
*args
= data
;
2244 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args
->bridge
, args
->nic
,
2249 int lxc_ovs_delete_port(const char *bridge
, const char *nic
)
2252 char cmd_output
[PATH_MAX
];
2253 struct ovs_veth_args args
;
2255 args
.bridge
= bridge
;
2257 ret
= run_command(cmd_output
, sizeof(cmd_output
),
2258 lxc_ovs_delete_port_exec
, (void *)&args
);
2260 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
2261 "%s", bridge
, nic
, cmd_output
);
2268 static int lxc_ovs_attach_bridge_exec(void *data
)
2270 struct ovs_veth_args
*args
= data
;
2272 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args
->bridge
, args
->nic
,
2277 static int lxc_ovs_attach_bridge(const char *bridge
, const char *nic
)
2280 char cmd_output
[PATH_MAX
];
2281 struct ovs_veth_args args
;
2283 args
.bridge
= bridge
;
2285 ret
= run_command(cmd_output
, sizeof(cmd_output
),
2286 lxc_ovs_attach_bridge_exec
, (void *)&args
);
2288 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
2289 bridge
, nic
, cmd_output
);
2296 int lxc_bridge_attach(const char *bridge
, const char *ifname
)
2302 if (strlen(ifname
) >= IFNAMSIZ
)
2305 index
= if_nametoindex(ifname
);
2309 if (is_ovs_bridge(bridge
))
2310 return lxc_ovs_attach_bridge(bridge
, ifname
);
2312 fd
= socket(AF_INET
, SOCK_STREAM
| SOCK_CLOEXEC
, 0);
2316 retlen
= strlcpy(ifr
.ifr_name
, bridge
, IFNAMSIZ
);
2317 if (retlen
>= IFNAMSIZ
) {
2322 ifr
.ifr_name
[IFNAMSIZ
- 1] = '\0';
2323 ifr
.ifr_ifindex
= index
;
2324 err
= ioctl(fd
, SIOCBRADDIF
, &ifr
);
2332 static const char *const lxc_network_types
[LXC_NET_MAXCONFTYPE
+ 1] = {
2333 [LXC_NET_EMPTY
] = "empty",
2334 [LXC_NET_VETH
] = "veth",
2335 [LXC_NET_MACVLAN
] = "macvlan",
2336 [LXC_NET_IPVLAN
] = "ipvlan",
2337 [LXC_NET_PHYS
] = "phys",
2338 [LXC_NET_VLAN
] = "vlan",
2339 [LXC_NET_NONE
] = "none",
2342 const char *lxc_net_type_to_str(int type
)
2344 if (type
< 0 || type
> LXC_NET_MAXCONFTYPE
)
2347 return lxc_network_types
[type
];
2350 static const char padchar
[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
2352 char *lxc_mkifname(char *template)
2355 struct netns_ifaddrs
*ifa
, *ifaddr
;
2356 char name
[IFNAMSIZ
];
2357 bool exists
= false;
2362 seed
= randseed(false);
2365 (void)randseed(true);
2368 if (strlen(template) >= IFNAMSIZ
)
2371 /* Get all the network interfaces. */
2372 ret
= netns_getifaddrs(&ifaddr
, -1, &(bool){false});
2374 SYSERROR("Failed to get network interfaces");
2378 /* Generate random names until we find one that doesn't exist. */
2381 (void)strlcpy(name
, template, IFNAMSIZ
);
2385 for (i
= 0; i
< strlen(name
); i
++) {
2386 if (name
[i
] == 'X') {
2388 name
[i
] = padchar
[rand_r(&seed
) % strlen(padchar
)];
2390 name
[i
] = padchar
[rand() % strlen(padchar
)];
2395 for (ifa
= ifaddr
; ifa
!= NULL
; ifa
= ifa
->ifa_next
) {
2396 if (!strcmp(ifa
->ifa_name
, name
)) {
2406 netns_freeifaddrs(ifaddr
);
2407 (void)strlcpy(template, name
, strlen(template) + 1);
2412 int setup_private_host_hw_addr(char *veth1
)
2417 sockfd
= socket(AF_INET
, SOCK_DGRAM
| SOCK_CLOEXEC
, 0);
2421 err
= snprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
2422 if (err
< 0 || (size_t)err
>= IFNAMSIZ
) {
2427 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
2433 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
2434 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
2442 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2444 struct lxc_list
*network
= &handler
->conf
->network
;
2445 struct lxc_list
*iterator
;
2446 struct lxc_netdev
*netdev
;
2449 lxc_list_for_each(iterator
, network
) {
2450 netdev
= iterator
->elem
;
2452 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2455 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
) {
2456 ERROR("Automatic gateway detection is only supported "
2457 "for veth and macvlan");
2461 if (netdev
->link
[0] == '\0') {
2462 ERROR("Automatic gateway detection needs a link interface");
2466 link_index
= if_nametoindex(netdev
->link
);
2470 if (netdev
->ipv4_gateway_auto
) {
2471 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
)) {
2472 ERROR("Failed to automatically find ipv4 gateway "
2473 "address from link interface \"%s\"", netdev
->link
);
2478 if (netdev
->ipv6_gateway_auto
) {
2479 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
)) {
2480 ERROR("Failed to automatically find ipv6 gateway "
2481 "address from link interface \"%s\"", netdev
->link
);
2490 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2491 static int lxc_create_network_unpriv_exec(const char *lxcpath
, const char *lxcname
,
2492 struct lxc_netdev
*netdev
, pid_t pid
, unsigned int hooks_version
)
2496 int bytes
, pipefd
[2];
2497 char *token
, *saveptr
= NULL
;
2498 char netdev_link
[IFNAMSIZ
];
2499 char buffer
[PATH_MAX
] = {0};
2502 if (netdev
->type
!= LXC_NET_VETH
) {
2503 ERROR("Network type %d not support for unprivileged use", netdev
->type
);
2509 SYSERROR("Failed to create pipe");
2515 SYSERROR("Failed to create new process");
2522 char pidstr
[INTTYPE_TO_STRLEN(pid_t
)];
2526 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
2528 ret
= dup2(pipefd
[1], STDERR_FILENO
);
2531 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2532 _exit(EXIT_FAILURE
);
2535 if (netdev
->link
[0] != '\0')
2536 retlen
= strlcpy(netdev_link
, netdev
->link
, IFNAMSIZ
);
2538 retlen
= strlcpy(netdev_link
, "none", IFNAMSIZ
);
2539 if (retlen
>= IFNAMSIZ
) {
2540 SYSERROR("Invalid network device name");
2541 _exit(EXIT_FAILURE
);
2544 ret
= snprintf(pidstr
, sizeof(pidstr
), "%d", pid
);
2545 if (ret
< 0 || ret
>= sizeof(pidstr
))
2546 _exit(EXIT_FAILURE
);
2547 pidstr
[sizeof(pidstr
) - 1] = '\0';
2549 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath
,
2550 lxcname
, pidstr
, netdev_link
,
2551 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)");
2552 if (netdev
->name
[0] != '\0')
2553 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2554 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2555 netdev
->name
, (char *)NULL
);
2557 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2558 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2560 SYSERROR("Failed to execute lxc-user-nic");
2561 _exit(EXIT_FAILURE
);
2564 /* close the write-end of the pipe */
2567 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, PATH_MAX
);
2569 SYSERROR("Failed to read from pipe file descriptor");
2572 buffer
[bytes
- 1] = '\0';
2575 ret
= wait_for_pid(child
);
2577 if (ret
!= 0 || bytes
< 0) {
2578 ERROR("lxc-user-nic failed to configure requested network: %s",
2579 buffer
[0] != '\0' ? buffer
: "(null)");
2582 TRACE("Received output \"%s\" from lxc-user-nic", buffer
);
2585 token
= strtok_r(buffer
, ":", &saveptr
);
2587 ERROR("Failed to parse lxc-user-nic output");
2592 * lxc-user-nic will take care of proper network device naming. So
2593 * netdev->name and netdev->created_name need to be identical to not
2594 * trigger another rename later on.
2596 retlen
= strlcpy(netdev
->name
, token
, IFNAMSIZ
);
2597 if (retlen
< IFNAMSIZ
)
2598 retlen
= strlcpy(netdev
->created_name
, token
, IFNAMSIZ
);
2599 if (retlen
>= IFNAMSIZ
) {
2600 ERROR("Container side veth device name returned by lxc-user-nic is too long");
2604 /* netdev->ifindex */
2605 token
= strtok_r(NULL
, ":", &saveptr
);
2607 ERROR("Failed to parse lxc-user-nic output");
2611 ret
= lxc_safe_int(token
, &netdev
->ifindex
);
2614 SYSERROR("Failed to convert string \"%s\" to integer", token
);
2618 /* netdev->priv.veth_attr.veth1 */
2619 token
= strtok_r(NULL
, ":", &saveptr
);
2621 ERROR("Failed to parse lxc-user-nic output");
2625 retlen
= strlcpy(netdev
->priv
.veth_attr
.veth1
, token
, IFNAMSIZ
);
2626 if (retlen
>= IFNAMSIZ
) {
2627 ERROR("Host side veth device name returned by lxc-user-nic is "
2632 /* netdev->priv.veth_attr.ifindex */
2633 token
= strtok_r(NULL
, ":", &saveptr
);
2635 ERROR("Failed to parse lxc-user-nic output");
2639 ret
= lxc_safe_int(token
, &netdev
->priv
.veth_attr
.ifindex
);
2642 SYSERROR("Failed to convert string \"%s\" to integer", token
);
2646 if (netdev
->upscript
) {
2650 netdev
->priv
.veth_attr
.veth1
,
2654 ret
= run_script_argv(lxcname
, hooks_version
, "net",
2655 netdev
->upscript
, "up", argv
);
2663 static int lxc_delete_network_unpriv_exec(const char *lxcpath
, const char *lxcname
,
2664 struct lxc_netdev
*netdev
,
2665 const char *netns_path
)
2670 char buffer
[PATH_MAX
] = {0};
2672 if (netdev
->type
!= LXC_NET_VETH
) {
2673 ERROR("Network type %d not support for unprivileged use", netdev
->type
);
2679 SYSERROR("Failed to create pipe");
2685 SYSERROR("Failed to create new process");
2696 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
2698 ret
= dup2(pipefd
[1], STDERR_FILENO
);
2701 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2702 _exit(EXIT_FAILURE
);
2705 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
2706 hostveth
= netdev
->priv
.veth_attr
.pair
;
2708 hostveth
= netdev
->priv
.veth_attr
.veth1
;
2709 if (hostveth
[0] == '\0') {
2710 SYSERROR("Host side veth device name is missing");
2711 _exit(EXIT_FAILURE
);
2714 if (netdev
->link
[0] == '\0') {
2715 SYSERROR("Network link for network device \"%s\" is "
2716 "missing", netdev
->priv
.veth_attr
.veth1
);
2717 _exit(EXIT_FAILURE
);
2720 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath
,
2721 lxcname
, netns_path
, netdev
->link
, hostveth
);
2722 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "delete", lxcpath
,
2723 lxcname
, netns_path
, "veth", netdev
->link
, hostveth
,
2725 SYSERROR("Failed to exec lxc-user-nic.");
2726 _exit(EXIT_FAILURE
);
2731 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, PATH_MAX
);
2733 SYSERROR("Failed to read from pipe file descriptor.");
2736 buffer
[bytes
- 1] = '\0';
2739 ret
= wait_for_pid(child
);
2741 if (ret
!= 0 || bytes
< 0) {
2742 ERROR("lxc-user-nic failed to delete requested network: %s",
2743 buffer
[0] != '\0' ? buffer
: "(null)");
2750 bool lxc_delete_network_unpriv(struct lxc_handler
*handler
)
2753 struct lxc_list
*iterator
;
2754 struct lxc_list
*network
= &handler
->conf
->network
;
2755 /* strlen("/proc/") = 6
2757 * INTTYPE_TO_STRLEN(pid_t)
2759 * strlen("/fd/") = 4
2761 * INTTYPE_TO_STRLEN(int)
2765 char netns_path
[6 + INTTYPE_TO_STRLEN(pid_t
) + 4 + INTTYPE_TO_STRLEN(int) + 1];
2769 if (handler
->nsfd
[LXC_NS_NET
] < 0) {
2770 DEBUG("Cannot not guarantee safe deletion of network devices. "
2771 "Manual cleanup maybe needed");
2775 ret
= snprintf(netns_path
, sizeof(netns_path
), "/proc/%d/fd/%d",
2776 lxc_raw_getpid(), handler
->nsfd
[LXC_NS_NET
]);
2777 if (ret
< 0 || ret
>= sizeof(netns_path
))
2780 lxc_list_for_each(iterator
, network
) {
2781 char *hostveth
= NULL
;
2782 struct lxc_netdev
*netdev
= iterator
->elem
;
2784 /* We can only delete devices whose ifindex we have. If we don't
2785 * have the index it means that we didn't create it.
2787 if (!netdev
->ifindex
)
2790 if (netdev
->type
== LXC_NET_PHYS
) {
2791 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
,
2794 WARN("Failed to rename interface with index %d "
2795 "to its initial name \"%s\"",
2796 netdev
->ifindex
, netdev
->link
);
2798 TRACE("Renamed interface with index %d to its "
2799 "initial name \"%s\"",
2800 netdev
->ifindex
, netdev
->link
);
2802 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
2803 goto clear_ifindices
;
2806 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
2808 WARN("Failed to deconfigure network device");
2810 if (netdev
->type
!= LXC_NET_VETH
)
2811 goto clear_ifindices
;
2813 if (netdev
->link
[0] == '\0' || !is_ovs_bridge(netdev
->link
))
2814 goto clear_ifindices
;
2816 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
2817 hostveth
= netdev
->priv
.veth_attr
.pair
;
2819 hostveth
= netdev
->priv
.veth_attr
.veth1
;
2820 if (hostveth
[0] == '\0')
2821 goto clear_ifindices
;
2823 ret
= lxc_delete_network_unpriv_exec(handler
->lxcpath
,
2824 handler
->name
, netdev
,
2827 WARN("Failed to remove port \"%s\" from openvswitch "
2828 "bridge \"%s\"", hostveth
, netdev
->link
);
2829 goto clear_ifindices
;
2831 INFO("Removed interface \"%s\" from \"%s\"", hostveth
,
2835 /* We need to clear any ifindices we recorded so liblxc won't
2836 * have cached stale data which would cause it to fail on reboot
2837 * we're we don't re-read the on-disk config file.
2839 netdev
->ifindex
= 0;
2840 if (netdev
->type
== LXC_NET_PHYS
) {
2841 netdev
->priv
.phys_attr
.ifindex
= 0;
2842 } else if (netdev
->type
== LXC_NET_VETH
) {
2843 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
2844 netdev
->priv
.veth_attr
.ifindex
= 0;
2851 struct ip_proxy_args
{
2856 static int lxc_add_ip_neigh_proxy_exec_wrapper(void *data
)
2858 struct ip_proxy_args
*args
= data
;
2860 execlp("ip", "ip", "neigh", "add", "proxy", args
->ip
, "dev", args
->dev
, (char *)NULL
);
2864 static int lxc_del_ip_neigh_proxy_exec_wrapper(void *data
)
2866 struct ip_proxy_args
*args
= data
;
2868 execlp("ip", "ip", "neigh", "flush", "proxy", args
->ip
, "dev", args
->dev
, (char *)NULL
);
2872 static int lxc_add_ip_neigh_proxy(const char *ip
, const char *dev
)
2875 char cmd_output
[PATH_MAX
];
2876 struct ip_proxy_args args
= {
2881 ret
= run_command(cmd_output
, sizeof(cmd_output
), lxc_add_ip_neigh_proxy_exec_wrapper
, &args
);
2883 ERROR("Failed to add ip proxy \"%s\" to dev \"%s\": %s", ip
, dev
, cmd_output
);
2890 static int lxc_del_ip_neigh_proxy(const char *ip
, const char *dev
)
2893 char cmd_output
[PATH_MAX
];
2894 struct ip_proxy_args args
= {
2899 ret
= run_command(cmd_output
, sizeof(cmd_output
), lxc_del_ip_neigh_proxy_exec_wrapper
, &args
);
2901 ERROR("Failed to delete ip proxy \"%s\" to dev \"%s\": %s", ip
, dev
, cmd_output
);
2908 static int lxc_setup_l2proxy(struct lxc_netdev
*netdev
) {
2909 struct lxc_list
*cur
, *next
;
2910 struct lxc_inetdev
*inet4dev
;
2911 struct lxc_inet6dev
*inet6dev
;
2912 char bufinet4
[INET_ADDRSTRLEN
], bufinet6
[INET6_ADDRSTRLEN
];
2914 unsigned int lo_ifindex
= 0;
2916 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
2917 if (!lxc_list_empty(&netdev
->ipv4
)) {
2918 /* Check for net.ipv4.conf.[link].forwarding=1 */
2919 if (lxc_is_ip_forwarding_enabled(netdev
->link
, AF_INET
) < 0) {
2920 ERROR("Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev
->link
);
2921 return minus_one_set_errno(EINVAL
);
2925 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
2926 if (!lxc_list_empty(&netdev
->ipv6
)) {
2927 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
2928 if (lxc_is_ip_neigh_proxy_enabled(netdev
->link
, AF_INET6
) < 0) {
2929 ERROR("Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev
->link
);
2930 return minus_one_set_errno(EINVAL
);
2933 /* Check for net.ipv6.conf.[link].forwarding=1 */
2934 if (lxc_is_ip_forwarding_enabled(netdev
->link
, AF_INET6
) < 0) {
2935 ERROR("Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev
->link
);
2936 return minus_one_set_errno(EINVAL
);
2940 /* Perform IPVLAN specific checks. */
2941 if (netdev
->type
== LXC_NET_IPVLAN
) {
2942 /* Check mode is l3s as other modes do not work with l2proxy. */
2943 if (netdev
->priv
.ipvlan_attr
.mode
!= IPVLAN_MODE_L3S
) {
2944 ERROR("Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev
->link
);
2945 return minus_one_set_errno(EINVAL
);
2948 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
2949 lo_ifindex
= if_nametoindex(loDev
);
2950 if (lo_ifindex
== 0) {
2951 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loDev
);
2952 return minus_one_set_errno(EINVAL
);
2956 lxc_list_for_each_safe(cur
, &netdev
->ipv4
, next
) {
2957 inet4dev
= cur
->elem
;
2958 if (!inet_ntop(AF_INET
, &inet4dev
->addr
, bufinet4
, sizeof(bufinet4
)))
2959 return minus_one_set_errno(-errno
);
2961 if (lxc_add_ip_neigh_proxy(bufinet4
, netdev
->link
) < 0)
2962 return minus_one_set_errno(EINVAL
);
2964 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2965 if (netdev
->type
== LXC_NET_IPVLAN
) {
2966 err
= lxc_ipv4_dest_add(lo_ifindex
, &inet4dev
->addr
, 32);
2968 ERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4
, loDev
);
2969 return minus_one_set_errno(-err
);
2974 lxc_list_for_each_safe(cur
, &netdev
->ipv6
, next
) {
2975 inet6dev
= cur
->elem
;
2976 if (!inet_ntop(AF_INET6
, &inet6dev
->addr
, bufinet6
, sizeof(bufinet6
)))
2977 return minus_one_set_errno(-errno
);
2979 if (lxc_add_ip_neigh_proxy(bufinet6
, netdev
->link
) < 0)
2980 return minus_one_set_errno(EINVAL
);
2982 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
2983 if (netdev
->type
== LXC_NET_IPVLAN
) {
2984 err
= lxc_ipv6_dest_add(lo_ifindex
, &inet6dev
->addr
, 128);
2986 ERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6
, loDev
);
2987 return minus_one_set_errno(-err
);
2995 static int lxc_delete_ipv4_l2proxy(struct in_addr
*ip
, char *link
, unsigned int lo_ifindex
) {
2996 char bufinet4
[INET_ADDRSTRLEN
];
2997 unsigned int errCount
= 0;
2999 if (!inet_ntop(AF_INET
, ip
, bufinet4
, sizeof(bufinet4
))) {
3000 SYSERROR("Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link
);
3001 return minus_one_set_errno(EINVAL
);
3004 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3005 if (lo_ifindex
> 0) {
3006 if (lxc_ipv4_dest_del(lo_ifindex
, ip
, 32) < 0) {
3008 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4
, lo_ifindex
);
3012 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3013 if (link
[0] != '\0') {
3014 if (lxc_del_ip_neigh_proxy(bufinet4
, link
) < 0)
3019 return minus_one_set_errno(EINVAL
);
3024 static int lxc_delete_ipv6_l2proxy(struct in6_addr
*ip
, char *link
, unsigned int lo_ifindex
) {
3025 char bufinet6
[INET6_ADDRSTRLEN
];
3026 unsigned int errCount
= 0;
3028 if (!inet_ntop(AF_INET6
, ip
, bufinet6
, sizeof(bufinet6
))) {
3029 SYSERROR("Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link
);
3030 return minus_one_set_errno(EINVAL
);
3033 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3034 if (lo_ifindex
> 0) {
3035 if (lxc_ipv6_dest_del(lo_ifindex
, ip
, 128) < 0) {
3037 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6
, lo_ifindex
);
3041 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3042 if (link
[0] != '\0') {
3043 if (lxc_del_ip_neigh_proxy(bufinet6
, link
) < 0)
3048 return minus_one_set_errno(EINVAL
);
3053 static int lxc_delete_l2proxy(struct lxc_netdev
*netdev
) {
3054 unsigned int lo_ifindex
= 0;
3055 unsigned int errCount
= 0;
3056 struct lxc_list
*cur
, *next
;
3057 struct lxc_inetdev
*inet4dev
;
3058 struct lxc_inet6dev
*inet6dev
;
3060 /* Perform IPVLAN specific checks. */
3061 if (netdev
->type
== LXC_NET_IPVLAN
) {
3062 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3063 lo_ifindex
= if_nametoindex(loDev
);
3064 if (lo_ifindex
== 0) {
3066 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loDev
);
3070 lxc_list_for_each_safe(cur
, &netdev
->ipv4
, next
) {
3071 inet4dev
= cur
->elem
;
3072 if (lxc_delete_ipv4_l2proxy(&inet4dev
->addr
, netdev
->link
, lo_ifindex
) < 0)
3076 lxc_list_for_each_safe(cur
, &netdev
->ipv6
, next
) {
3077 inet6dev
= cur
->elem
;
3078 if (lxc_delete_ipv6_l2proxy(&inet6dev
->addr
, netdev
->link
, lo_ifindex
) < 0)
3083 return minus_one_set_errno(EINVAL
);
3088 static int lxc_create_network_priv(struct lxc_handler
*handler
)
3090 struct lxc_list
*iterator
;
3091 struct lxc_list
*network
= &handler
->conf
->network
;
3093 lxc_list_for_each(iterator
, network
) {
3094 struct lxc_netdev
*netdev
= iterator
->elem
;
3096 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
) {
3097 ERROR("Invalid network configuration type %d", netdev
->type
);
3101 /* Setup l2proxy entries if enabled and used with a link property */
3102 if (netdev
->l2proxy
&& netdev
->link
[0] != '\0') {
3103 if (lxc_setup_l2proxy(netdev
)) {
3104 ERROR("Failed to setup l2proxy");
3109 if (netdev_conf
[netdev
->type
](handler
, netdev
)) {
3110 ERROR("Failed to create network device");
3118 int lxc_network_move_created_netdev_priv(struct lxc_handler
*handler
)
3120 pid_t pid
= handler
->pid
;
3121 struct lxc_list
*network
= &handler
->conf
->network
;
3122 struct lxc_list
*iterator
;
3124 if (am_guest_unpriv())
3127 lxc_list_for_each(iterator
, network
) {
3129 char ifname
[IFNAMSIZ
];
3130 struct lxc_netdev
*netdev
= iterator
->elem
;
3132 if (!netdev
->ifindex
)
3135 /* retrieve the name of the interface */
3136 if (!if_indextoname(netdev
->ifindex
, ifname
)) {
3137 ERROR("No interface corresponding to ifindex \"%d\"",
3142 ret
= lxc_netdev_move_by_name(ifname
, pid
, NULL
);
3145 SYSERROR("Failed to move network device \"%s\" to network namespace %d",
3150 strlcpy(netdev
->created_name
, ifname
, IFNAMSIZ
);
3152 DEBUG("Moved network device \"%s\" to network namespace of %d",
3153 netdev
->created_name
, pid
);
3159 static int lxc_create_network_unpriv(struct lxc_handler
*handler
)
3161 int hooks_version
= handler
->conf
->hooks_version
;
3162 const char *lxcname
= handler
->name
;
3163 const char *lxcpath
= handler
->lxcpath
;
3164 struct lxc_list
*network
= &handler
->conf
->network
;
3165 pid_t pid
= handler
->pid
;
3166 struct lxc_list
*iterator
;
3168 lxc_list_for_each(iterator
, network
) {
3169 struct lxc_netdev
*netdev
= iterator
->elem
;
3171 if (netdev
->type
== LXC_NET_EMPTY
)
3174 if (netdev
->type
== LXC_NET_NONE
)
3177 if (netdev
->type
!= LXC_NET_VETH
) {
3178 ERROR("Networks of type %s are not supported by unprivileged containers",
3179 lxc_net_type_to_str(netdev
->type
));
3184 INFO("mtu ignored due to insufficient privilege");
3186 if (lxc_create_network_unpriv_exec(lxcpath
, lxcname
, netdev
,
3187 pid
, hooks_version
))
3194 bool lxc_delete_network_priv(struct lxc_handler
*handler
)
3197 struct lxc_list
*iterator
;
3198 struct lxc_list
*network
= &handler
->conf
->network
;
3200 lxc_list_for_each(iterator
, network
) {
3201 char *hostveth
= NULL
;
3202 struct lxc_netdev
*netdev
= iterator
->elem
;
3204 /* We can only delete devices whose ifindex we have. If we don't
3205 * have the index it means that we didn't create it.
3207 if (!netdev
->ifindex
)
3210 /* Delete l2proxy entries if enabled and used with a link property */
3211 if (netdev
->l2proxy
&& netdev
->link
[0] != '\0') {
3212 if (lxc_delete_l2proxy(netdev
))
3213 WARN("Failed to delete all l2proxy config");
3214 /* Don't return, let the network be cleaned up as normal. */
3217 if (netdev
->type
== LXC_NET_PHYS
) {
3218 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
);
3220 WARN("Failed to rename interface with index %d "
3221 "from \"%s\" to its initial name \"%s\"",
3222 netdev
->ifindex
, netdev
->name
, netdev
->link
);
3224 TRACE("Renamed interface with index %d from "
3225 "\"%s\" to its initial name \"%s\"",
3226 netdev
->ifindex
, netdev
->name
,
3229 /* Restore original MTU */
3230 ret
= lxc_netdev_set_mtu(netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3232 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3233 netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3235 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3236 netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3240 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3241 goto clear_ifindices
;
3244 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3246 WARN("Failed to deconfigure network device");
3248 /* Recent kernels remove the virtual interfaces when the network
3249 * namespace is destroyed but in case we did not move the
3250 * interface to the network namespace, we have to destroy it.
3252 ret
= lxc_netdev_delete_by_index(netdev
->ifindex
);
3254 if (errno
!= ENODEV
) {
3255 WARN("Failed to remove interface \"%s\" with index %d",
3256 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)",
3258 goto clear_ifindices
;
3260 INFO("Interface \"%s\" with index %d already deleted or existing in different network namespace",
3261 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)",
3264 INFO("Removed interface \"%s\" with index %d",
3265 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)",
3268 if (netdev
->type
!= LXC_NET_VETH
)
3269 goto clear_ifindices
;
3271 /* Explicitly delete host veth device to prevent lingering
3272 * devices. We had issues in LXD around this.
3274 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
3275 hostveth
= netdev
->priv
.veth_attr
.pair
;
3277 hostveth
= netdev
->priv
.veth_attr
.veth1
;
3278 if (hostveth
[0] == '\0')
3279 goto clear_ifindices
;
3281 ret
= lxc_netdev_delete_by_name(hostveth
);
3283 WARN("Failed to remove interface \"%s\" from \"%s\"",
3284 hostveth
, netdev
->link
);
3285 goto clear_ifindices
;
3287 INFO("Removed interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
3289 if (netdev
->link
[0] == '\0' || !is_ovs_bridge(netdev
->link
)) {
3290 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
3291 netdev
->ifindex
= 0;
3292 netdev
->priv
.veth_attr
.ifindex
= 0;
3293 goto clear_ifindices
;
3296 /* Delete the openvswitch port. */
3297 ret
= lxc_ovs_delete_port(netdev
->link
, hostveth
);
3299 WARN("Failed to remove port \"%s\" from openvswitch "
3300 "bridge \"%s\"", hostveth
, netdev
->link
);
3302 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
3303 hostveth
, netdev
->link
);
3306 /* We need to clear any ifindices we recorded so liblxc won't
3307 * have cached stale data which would cause it to fail on reboot
3308 * we're we don't re-read the on-disk config file.
3310 netdev
->ifindex
= 0;
3311 if (netdev
->type
== LXC_NET_PHYS
) {
3312 netdev
->priv
.phys_attr
.ifindex
= 0;
3313 } else if (netdev
->type
== LXC_NET_VETH
) {
3314 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
3315 netdev
->priv
.veth_attr
.ifindex
= 0;
3322 int lxc_requests_empty_network(struct lxc_handler
*handler
)
3324 struct lxc_list
*network
= &handler
->conf
->network
;
3325 struct lxc_list
*iterator
;
3326 bool found_none
= false, found_nic
= false;
3328 if (lxc_list_empty(network
))
3331 lxc_list_for_each(iterator
, network
) {
3332 struct lxc_netdev
*netdev
= iterator
->elem
;
3334 if (netdev
->type
== LXC_NET_NONE
)
3339 if (found_none
&& !found_nic
)
3344 /* try to move physical nics to the init netns */
3345 int lxc_restore_phys_nics_to_netns(struct lxc_handler
*handler
)
3349 char ifname
[IFNAMSIZ
];
3350 struct lxc_list
*iterator
;
3351 int netnsfd
= handler
->nsfd
[LXC_NS_NET
];
3352 struct lxc_conf
*conf
= handler
->conf
;
3354 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3355 * the parent network namespace. We won't have this capability if we are
3358 if (!handler
->am_root
)
3361 TRACE("Moving physical network devices back to parent network namespace");
3363 oldfd
= lxc_preserve_ns(handler
->monitor_pid
, "net");
3365 SYSERROR("Failed to preserve network namespace");
3369 ret
= setns(netnsfd
, CLONE_NEWNET
);
3371 SYSERROR("Failed to enter network namespace");
3376 lxc_list_for_each(iterator
, &conf
->network
) {
3377 struct lxc_netdev
*netdev
= iterator
->elem
;
3379 if (netdev
->type
!= LXC_NET_PHYS
)
3382 /* Retrieve the name of the interface in the container's network
3385 if (!if_indextoname(netdev
->ifindex
, ifname
)) {
3386 WARN("No interface corresponding to ifindex %d",
3391 ret
= lxc_netdev_move_by_index_fd(netdev
->ifindex
, oldfd
, netdev
->link
);
3393 WARN("Error moving network device \"%s\" back to "
3394 "network namespace", ifname
);
3396 TRACE("Moved network device \"%s\" back to network "
3397 "namespace", ifname
);
3400 ret
= setns(oldfd
, CLONE_NEWNET
);
3403 SYSERROR("Failed to enter network namespace");
3410 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
3412 struct sockaddr sockaddr
;
3416 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
3419 SYSERROR("Mac address \"%s\" conversion failed", hwaddr
);
3423 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
3424 ifr
.ifr_name
[IFNAMSIZ
-1] = '\0';
3425 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
3427 fd
= socket(AF_INET
, SOCK_DGRAM
| SOCK_CLOEXEC
, 0);
3431 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
3433 SYSERROR("Failed to perform ioctl");
3437 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr
,
3443 static int setup_ipv4_addr(struct lxc_list
*ip
, int ifindex
)
3445 struct lxc_list
*iterator
;
3448 lxc_list_for_each(iterator
, ip
) {
3449 struct lxc_inetdev
*inetdev
= iterator
->elem
;
3451 err
= lxc_ipv4_addr_add(ifindex
, &inetdev
->addr
,
3452 &inetdev
->bcast
, inetdev
->prefix
);
3455 SYSERROR("Failed to setup ipv4 address for network device "
3456 "with ifindex %d", ifindex
);
3464 static int setup_ipv6_addr(struct lxc_list
*ip
, int ifindex
)
3466 struct lxc_list
*iterator
;
3469 lxc_list_for_each(iterator
, ip
) {
3470 struct lxc_inet6dev
*inet6dev
= iterator
->elem
;
3472 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
3473 &inet6dev
->mcast
, &inet6dev
->acast
,
3477 SYSERROR("Failed to setup ipv6 address for network device "
3478 "with ifindex %d", ifindex
);
3486 static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev
*netdev
)
3488 char ifname
[IFNAMSIZ
];
3490 char *current_ifname
= ifname
;
3491 char bufinet4
[INET_ADDRSTRLEN
], bufinet6
[INET6_ADDRSTRLEN
];
3493 /* empty network namespace */
3494 if (!netdev
->ifindex
) {
3495 if (netdev
->flags
& IFF_UP
) {
3496 err
= lxc_netdev_up("lo");
3499 SYSERROR("Failed to set the loopback network device up");
3504 if (netdev
->type
== LXC_NET_EMPTY
)
3507 if (netdev
->type
== LXC_NET_NONE
)
3510 netdev
->ifindex
= if_nametoindex(netdev
->created_name
);
3511 if (!netdev
->ifindex
)
3512 SYSERROR("Failed to retrieve ifindex for network device with name %s",
3513 netdev
->name
?: "(null)");
3516 /* get the new ifindex in case of physical netdev */
3517 if (netdev
->type
== LXC_NET_PHYS
) {
3518 netdev
->ifindex
= if_nametoindex(netdev
->link
);
3519 if (!netdev
->ifindex
) {
3520 ERROR("Failed to get ifindex for network device \"%s\"",
3526 /* retrieve the name of the interface */
3527 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
3528 SYSERROR("Failed to retrieve name for network device with ifindex %d",
3533 /* Default: let the system choose an interface name.
3534 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
3535 * netlink will replace the format specifier with an appropriate index.
3537 if (netdev
->name
[0] == '\0') {
3538 if (netdev
->type
== LXC_NET_PHYS
)
3539 (void)strlcpy(netdev
->name
, netdev
->link
, IFNAMSIZ
);
3541 (void)strlcpy(netdev
->name
, "eth%d", IFNAMSIZ
);
3544 /* rename the interface name */
3545 if (strcmp(current_ifname
, netdev
->name
) != 0) {
3546 err
= lxc_netdev_rename_by_name(current_ifname
, netdev
->name
);
3549 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
3550 current_ifname
, netdev
->name
);
3554 TRACE("Renamed network device from \"%s\" to \"%s\"",
3555 current_ifname
, netdev
->name
);
3558 /* Re-read the name of the interface because its name has changed
3559 * and would be automatically allocated by the system
3561 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
3562 ERROR("Failed get name for network device with ifindex %d",
3567 /* Now update the recorded name of the network device to reflect the
3568 * name of the network device in the child's network namespace. We will
3569 * later on send this information back to the parent.
3571 (void)strlcpy(netdev
->name
, current_ifname
, IFNAMSIZ
);
3573 /* set a mac address */
3574 if (netdev
->hwaddr
) {
3575 if (setup_hw_addr(netdev
->hwaddr
, current_ifname
)) {
3576 ERROR("Failed to setup hw address for network device \"%s\"",
3582 /* setup ipv4 addresses on the interface */
3583 if (setup_ipv4_addr(&netdev
->ipv4
, netdev
->ifindex
)) {
3584 ERROR("Failed to setup ip addresses for network device \"%s\"",
3589 /* setup ipv6 addresses on the interface */
3590 if (setup_ipv6_addr(&netdev
->ipv6
, netdev
->ifindex
)) {
3591 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
3596 /* set the network device up */
3597 if (netdev
->flags
& IFF_UP
) {
3598 err
= lxc_netdev_up(current_ifname
);
3601 SYSERROR("Failed to set network device \"%s\" up",
3606 /* the network is up, make the loopback up too */
3607 err
= lxc_netdev_up("lo");
3610 SYSERROR("Failed to set the loopback network device up");
3615 /* setup ipv4 gateway on the interface */
3616 if (netdev
->ipv4_gateway
|| netdev
->ipv4_gateway_dev
) {
3617 if (!(netdev
->flags
& IFF_UP
)) {
3618 ERROR("Cannot add ipv4 gateway for network device "
3619 "\"%s\" when not bringing up the interface", current_ifname
);
3623 if (lxc_list_empty(&netdev
->ipv4
)) {
3624 ERROR("Cannot add ipv4 gateway for network device "
3625 "\"%s\" when not assigning an address", current_ifname
);
3629 /* Setup device route if ipv4_gateway_dev is enabled */
3630 if (netdev
->ipv4_gateway_dev
) {
3631 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, NULL
);
3633 SYSERROR("Failed to setup ipv4 gateway to network device \"%s\"",
3635 return minus_one_set_errno(-err
);
3638 /* Check the gateway address is valid */
3639 if (!inet_ntop(AF_INET
, netdev
->ipv4_gateway
, bufinet4
, sizeof(bufinet4
)))
3640 return minus_one_set_errno(errno
);
3642 /* Try adding a default route to the gateway address */
3643 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
3645 /* If adding the default route fails, this could be because the
3646 * gateway address is in a different subnet to the container's address.
3647 * To work around this, we try adding a static device route to the
3648 * gateway address first, and then try again.
3650 err
= lxc_ipv4_dest_add(netdev
->ifindex
, netdev
->ipv4_gateway
, 32);
3653 SYSERROR("Failed to add ipv4 dest \"%s\" for network device \"%s\"",
3654 bufinet4
, current_ifname
);
3658 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
3661 SYSERROR("Failed to setup ipv4 gateway \"%s\" for network device \"%s\"",
3662 bufinet4
, current_ifname
);
3669 /* setup ipv6 gateway on the interface */
3670 if (netdev
->ipv6_gateway
|| netdev
->ipv6_gateway_dev
) {
3671 if (!(netdev
->flags
& IFF_UP
)) {
3672 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface",
3677 if (lxc_list_empty(&netdev
->ipv6
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
)) {
3678 ERROR("Cannot add ipv6 gateway for network device \"%s\" when not assigning an address",
3683 /* Setup device route if ipv6_gateway_dev is enabled */
3684 if (netdev
->ipv6_gateway_dev
) {
3685 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, NULL
);
3687 SYSERROR("Failed to setup ipv6 gateway to network device \"%s\"",
3689 return minus_one_set_errno(-err
);
3692 /* Check the gateway address is valid */
3693 if (!inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, bufinet6
, sizeof(bufinet6
)))
3694 return minus_one_set_errno(errno
);
3696 /* Try adding a default route to the gateway address */
3697 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3699 /* If adding the default route fails, this could be because the
3700 * gateway address is in a different subnet to the container's address.
3701 * To work around this, we try adding a static device route to the
3702 * gateway address first, and then try again.
3704 err
= lxc_ipv6_dest_add(netdev
->ifindex
, netdev
->ipv6_gateway
, 128);
3707 SYSERROR("Failed to add ipv6 dest \"%s\" for network device \"%s\"",
3708 bufinet6
, current_ifname
);
3712 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3715 SYSERROR("Failed to setup ipv6 gateway \"%s\" for network device \"%s\"",
3716 bufinet6
, current_ifname
);
3723 DEBUG("Network device \"%s\" has been setup", current_ifname
);
3728 int lxc_setup_network_in_child_namespaces(const struct lxc_conf
*conf
,
3729 struct lxc_list
*network
)
3731 struct lxc_list
*iterator
;
3733 lxc_list_for_each(iterator
, network
) {
3734 struct lxc_netdev
*netdev
= iterator
->elem
;
3736 if (lxc_setup_netdev_in_child_namespaces(netdev
)) {
3737 ERROR("Failed to setup netdev");
3742 if (!lxc_list_empty(network
))
3743 INFO("Network has been setup");
3748 int lxc_network_send_veth_names_to_child(struct lxc_handler
*handler
)
3750 struct lxc_list
*iterator
;
3751 struct lxc_list
*network
= &handler
->conf
->network
;
3752 int data_sock
= handler
->data_sock
[0];
3754 lxc_list_for_each(iterator
, network
) {
3756 struct lxc_netdev
*netdev
= iterator
->elem
;
3758 if (netdev
->type
!= LXC_NET_VETH
)
3761 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3765 ret
= lxc_send_nointr(data_sock
, netdev
->created_name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3769 TRACE("Sent network device name \"%s\" to child", netdev
->created_name
);
3775 int lxc_network_recv_veth_names_from_parent(struct lxc_handler
*handler
)
3777 struct lxc_list
*iterator
;
3778 struct lxc_list
*network
= &handler
->conf
->network
;
3779 int data_sock
= handler
->data_sock
[1];
3781 lxc_list_for_each(iterator
, network
) {
3783 struct lxc_netdev
*netdev
= iterator
->elem
;
3785 if (netdev
->type
!= LXC_NET_VETH
)
3788 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
3792 ret
= lxc_recv_nointr(data_sock
, netdev
->created_name
, IFNAMSIZ
, 0);
3795 TRACE("Received network device name \"%s\" from parent", netdev
->created_name
);
3801 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler
*handler
)
3803 struct lxc_list
*iterator
, *network
;
3804 int data_sock
= handler
->data_sock
[0];
3806 if (!handler
->am_root
)
3809 network
= &handler
->conf
->network
;
3810 lxc_list_for_each(iterator
, network
) {
3812 struct lxc_netdev
*netdev
= iterator
->elem
;
3814 /* Send network device name in the child's namespace to parent. */
3815 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3819 /* Send network device ifindex in the child's namespace to
3822 ret
= lxc_send_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), MSG_NOSIGNAL
);
3827 if (!lxc_list_empty(network
))
3828 TRACE("Sent network device names and ifindices to parent");
3833 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler
*handler
)
3835 struct lxc_list
*iterator
, *network
;
3836 int data_sock
= handler
->data_sock
[1];
3838 if (!handler
->am_root
)
3841 network
= &handler
->conf
->network
;
3842 lxc_list_for_each(iterator
, network
) {
3844 struct lxc_netdev
*netdev
= iterator
->elem
;
3846 /* Receive network device name in the child's namespace to
3849 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
3853 /* Receive network device ifindex in the child's namespace to
3856 ret
= lxc_recv_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), 0);
3864 void lxc_delete_network(struct lxc_handler
*handler
)
3868 if (handler
->am_root
)
3869 bret
= lxc_delete_network_priv(handler
);
3871 bret
= lxc_delete_network_unpriv(handler
);
3873 DEBUG("Failed to delete network devices");
3875 DEBUG("Deleted network devices");
3878 int lxc_netns_set_nsid(int fd
)
3881 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
3882 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
3884 struct nl_handler nlh
;
3885 struct nlmsghdr
*hdr
;
3886 struct rtgenmsg
*msg
;
3888 const __s32 ns_id
= -1;
3889 const __u32 netns_fd
= fd
;
3891 ret
= netlink_open(&nlh
, NETLINK_ROUTE
);
3895 memset(buf
, 0, sizeof(buf
));
3897 #pragma GCC diagnostic push
3898 #pragma GCC diagnostic ignored "-Wcast-align"
3899 hdr
= (struct nlmsghdr
*)buf
;
3900 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
3901 #pragma GCC diagnostic pop
3903 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
3904 hdr
->nlmsg_type
= RTM_NEWNSID
;
3905 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
3907 hdr
->nlmsg_seq
= RTM_NEWNSID
;
3908 msg
->rtgen_family
= AF_UNSPEC
;
3910 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
3914 ret
= addattr(hdr
, 1024, __LXC_NETNSA_NSID
, &ns_id
, sizeof(ns_id
));
3918 ret
= __netlink_transaction(&nlh
, hdr
, hdr
);
3921 saved_errno
= errno
;
3922 netlink_close(&nlh
);
3923 errno
= saved_errno
;
3928 static int parse_rtattr(struct rtattr
*tb
[], int max
, struct rtattr
*rta
, int len
)
3931 memset(tb
, 0, sizeof(struct rtattr
*) * (max
+ 1));
3933 while (RTA_OK(rta
, len
)) {
3934 unsigned short type
= rta
->rta_type
;
3936 if ((type
<= max
) && (!tb
[type
]))
3939 #pragma GCC diagnostic push
3940 #pragma GCC diagnostic ignored "-Wcast-align"
3941 rta
= RTA_NEXT(rta
, len
);
3942 #pragma GCC diagnostic pop
3948 static inline __s32
rta_getattr_s32(const struct rtattr
*rta
)
3950 return *(__s32
*)RTA_DATA(rta
);
3954 #define NETNS_RTA(r) \
3955 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3958 int lxc_netns_get_nsid(int fd
)
3962 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
3963 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
3965 struct rtattr
*tb
[__LXC_NETNSA_MAX
+ 1];
3966 struct nl_handler nlh
;
3967 struct nlmsghdr
*hdr
;
3968 struct rtgenmsg
*msg
;
3970 __u32 netns_fd
= fd
;
3972 ret
= netlink_open(&nlh
, NETLINK_ROUTE
);
3976 memset(buf
, 0, sizeof(buf
));
3978 #pragma GCC diagnostic push
3979 #pragma GCC diagnostic ignored "-Wcast-align"
3980 hdr
= (struct nlmsghdr
*)buf
;
3981 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
3982 #pragma GCC diagnostic pop
3984 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
3985 hdr
->nlmsg_type
= RTM_GETNSID
;
3986 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
3988 hdr
->nlmsg_seq
= RTM_GETNSID
;
3989 msg
->rtgen_family
= AF_UNSPEC
;
3991 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
3993 ret
= __netlink_transaction(&nlh
, hdr
, hdr
);
3995 saved_errno
= errno
;
3996 netlink_close(&nlh
);
3997 errno
= saved_errno
;
4002 msg
= NLMSG_DATA(hdr
);
4003 len
= hdr
->nlmsg_len
- NLMSG_SPACE(sizeof(*msg
));
4007 #pragma GCC diagnostic push
4008 #pragma GCC diagnostic ignored "-Wcast-align"
4009 parse_rtattr(tb
, __LXC_NETNSA_MAX
, NETNS_RTA(msg
), len
);
4010 if (tb
[__LXC_NETNSA_NSID
])
4011 return rta_getattr_s32(tb
[__LXC_NETNSA_NSID
]);
4012 #pragma GCC diagnostic pop
4017 int lxc_create_network(struct lxc_handler
*handler
)
4022 * Find gateway addresses from the link device, which is no longer
4023 * accessible inside the container. Do this before creating network
4024 * interfaces, since goto out_delete_net does not work before
4027 ret
= lxc_find_gateway_addresses(handler
);
4029 ERROR("Failed to find gateway addresses");
4033 if (handler
->am_root
) {
4034 ret
= lxc_create_network_priv(handler
);
4038 return lxc_network_move_created_netdev_priv(handler
);
4041 return lxc_create_network_unpriv(handler
);