2 * lxc: linux Container library
4 * (C) Copyright IBM Corp. 2007, 2008
7 * Daniel Lezcano <daniel.lezcano at free.fr>
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include <arpa/inet.h>
31 #include <linux/netlink.h>
32 #include <linux/rtnetlink.h>
33 #include <linux/sockios.h>
34 #include <net/ethernet.h>
36 #include <net/if_arp.h>
37 #include <netinet/in.h>
41 #include <sys/inotify.h>
42 #include <sys/ioctl.h>
43 #include <sys/param.h>
44 #include <sys/socket.h>
46 #include <sys/types.h>
50 #include "../include/netns_ifaddrs.h"
54 #include "file_utils.h"
59 #include "raw_syscalls.h"
60 #include "syscall_wrappers.h"
64 #include "include/strlcpy.h"
67 lxc_log_define(network
, lxc
);
69 typedef int (*instantiate_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
71 static int instantiate_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
73 int bridge_index
, err
;
75 char veth1buf
[IFNAMSIZ
], veth2buf
[IFNAMSIZ
];
78 if (netdev
->priv
.veth_attr
.pair
[0] != '\0') {
79 veth1
= netdev
->priv
.veth_attr
.pair
;
80 if (handler
->conf
->reboot
)
81 lxc_netdev_delete_by_name(veth1
);
83 err
= snprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
84 if (err
< 0 || (size_t)err
>= sizeof(veth1buf
))
87 veth1
= lxc_mkifname(veth1buf
);
91 /* store away for deconf */
92 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
95 err
= snprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
96 if (err
< 0 || (size_t)err
>= sizeof(veth2buf
))
99 veth2
= lxc_mkifname(veth2buf
);
103 err
= lxc_veth_create(veth1
, veth2
);
106 SYSERROR("Failed to create veth pair \"%s\" and \"%s\"", veth1
, veth2
);
110 /* changing the high byte of the mac address to 0xfe, the bridge interface
111 * will always keep the host's mac address and not take the mac address
113 err
= setup_private_host_hw_addr(veth1
);
116 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1
);
120 /* Retrieve ifindex of the host's veth device. */
121 netdev
->priv
.veth_attr
.ifindex
= if_nametoindex(veth1
);
122 if (!netdev
->priv
.veth_attr
.ifindex
) {
123 ERROR("Failed to retrieve ifindex for \"%s\"", veth1
);
127 /* Note that we're retrieving the container's ifindex in the host's
128 * network namespace because we need it to move the device from the
129 * host's network namespace to the container's network namespace later
132 netdev
->ifindex
= if_nametoindex(veth2
);
133 if (!netdev
->ifindex
) {
134 ERROR("Failed to retrieve ifindex for \"%s\"", veth2
);
139 if (lxc_safe_uint(netdev
->mtu
, &mtu
) < 0)
140 WARN("Failed to parse mtu");
142 INFO("Retrieved mtu %d", mtu
);
143 } else if (netdev
->link
[0] != '\0') {
144 bridge_index
= if_nametoindex(netdev
->link
);
146 mtu
= netdev_get_mtu(bridge_index
);
147 INFO("Retrieved mtu %d from %s", mtu
, netdev
->link
);
149 mtu
= netdev_get_mtu(netdev
->ifindex
);
150 INFO("Retrieved mtu %d from %s", mtu
, veth2
);
155 err
= lxc_netdev_set_mtu(veth1
, mtu
);
157 err
= lxc_netdev_set_mtu(veth2
, mtu
);
161 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
162 "and \"%s\"", mtu
, veth1
, veth2
);
167 if (netdev
->link
[0] != '\0') {
168 err
= lxc_bridge_attach(netdev
->link
, veth1
);
171 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"",
172 veth1
, netdev
->link
);
175 INFO("Attached \"%s\" to bridge \"%s\"", veth1
, netdev
->link
);
178 err
= lxc_netdev_up(veth1
);
181 SYSERROR("Failed to set \"%s\" up", veth1
);
185 if (netdev
->upscript
) {
193 err
= run_script_argv(handler
->name
,
194 handler
->conf
->hooks_version
, "net",
195 netdev
->upscript
, "up", argv
);
200 DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1
, veth2
,
206 if (netdev
->ifindex
!= 0)
207 lxc_netdev_delete_by_name(veth1
);
211 static int instantiate_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
213 char peerbuf
[IFNAMSIZ
], *peer
;
216 if (netdev
->link
[0] == '\0') {
217 ERROR("No link for macvlan network device specified");
221 err
= snprintf(peerbuf
, sizeof(peerbuf
), "mcXXXXXX");
222 if (err
< 0 || (size_t)err
>= sizeof(peerbuf
))
225 peer
= lxc_mkifname(peerbuf
);
229 err
= lxc_macvlan_create(netdev
->link
, peer
,
230 netdev
->priv
.macvlan_attr
.mode
);
233 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
238 netdev
->ifindex
= if_nametoindex(peer
);
239 if (!netdev
->ifindex
) {
240 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
244 if (netdev
->upscript
) {
251 err
= run_script_argv(handler
->name
,
252 handler
->conf
->hooks_version
, "net",
253 netdev
->upscript
, "up", argv
);
258 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
259 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
264 lxc_netdev_delete_by_name(peer
);
268 static int instantiate_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
272 static uint16_t vlan_cntr
= 0;
273 unsigned int mtu
= 0;
275 if (netdev
->link
[0] == '\0') {
276 ERROR("No link for vlan network device specified");
280 err
= snprintf(peer
, sizeof(peer
), "vlan%d-%d", netdev
->priv
.vlan_attr
.vid
, vlan_cntr
++);
281 if (err
< 0 || (size_t)err
>= sizeof(peer
))
284 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
287 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
292 netdev
->ifindex
= if_nametoindex(peer
);
293 if (!netdev
->ifindex
) {
294 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
295 lxc_netdev_delete_by_name(peer
);
299 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)",
300 peer
, netdev
->ifindex
);
302 if (lxc_safe_uint(netdev
->mtu
, &mtu
) < 0) {
303 ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".",
305 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)");
309 err
= lxc_netdev_set_mtu(peer
, mtu
);
312 SYSERROR("Failed to set mtu \"%s\" for \"%s\"",
314 lxc_netdev_delete_by_name(peer
);
322 static int instantiate_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
331 if (netdev
->link
[0] == '\0') {
332 ERROR("No link for physical interface specified");
336 /* Note that we're retrieving the container's ifindex in the host's
337 * network namespace because we need it to move the device from the
338 * host's network namespace to the container's network namespace later
340 * Note that netdev->link will contain the name of the physical network
341 * device in the host's namespace.
343 netdev
->ifindex
= if_nametoindex(netdev
->link
);
344 if (!netdev
->ifindex
) {
345 ERROR("Failed to retrieve ifindex for \"%s\"", netdev
->link
);
349 /* Store the ifindex of the host's network device in the host's
352 netdev
->priv
.phys_attr
.ifindex
= netdev
->ifindex
;
354 if (!netdev
->upscript
)
357 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
358 "net", netdev
->upscript
, "up", argv
);
365 static int instantiate_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
374 if (!netdev
->upscript
)
377 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
378 "net", netdev
->upscript
, "up", argv
);
385 static int instantiate_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
391 static instantiate_cb netdev_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
392 [LXC_NET_VETH
] = instantiate_veth
,
393 [LXC_NET_MACVLAN
] = instantiate_macvlan
,
394 [LXC_NET_VLAN
] = instantiate_vlan
,
395 [LXC_NET_PHYS
] = instantiate_phys
,
396 [LXC_NET_EMPTY
] = instantiate_empty
,
397 [LXC_NET_NONE
] = instantiate_none
,
400 static int shutdown_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
410 if (!netdev
->downscript
)
413 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
414 argv
[2] = netdev
->priv
.veth_attr
.pair
;
416 argv
[2] = netdev
->priv
.veth_attr
.veth1
;
418 ret
= run_script_argv(handler
->name
,
419 handler
->conf
->hooks_version
, "net",
420 netdev
->downscript
, "down", argv
);
427 static int shutdown_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
436 if (!netdev
->downscript
)
439 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
440 "net", netdev
->downscript
, "down", argv
);
447 static int shutdown_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
452 static int shutdown_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
461 if (!netdev
->downscript
)
464 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
465 "net", netdev
->downscript
, "down", argv
);
472 static int shutdown_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
480 if (!netdev
->downscript
)
483 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
484 "net", netdev
->downscript
, "down", argv
);
491 static int shutdown_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
496 static instantiate_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
497 [LXC_NET_VETH
] = shutdown_veth
,
498 [LXC_NET_MACVLAN
] = shutdown_macvlan
,
499 [LXC_NET_VLAN
] = shutdown_vlan
,
500 [LXC_NET_PHYS
] = shutdown_phys
,
501 [LXC_NET_EMPTY
] = shutdown_empty
,
502 [LXC_NET_NONE
] = shutdown_none
,
505 int lxc_netdev_move_by_index(int ifindex
, pid_t pid
, const char *ifname
)
508 struct nl_handler nlh
;
509 struct ifinfomsg
*ifi
;
510 struct nlmsg
*nlmsg
= NULL
;
512 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
517 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
521 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
522 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
524 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
527 ifi
->ifi_family
= AF_UNSPEC
;
528 ifi
->ifi_index
= ifindex
;
530 if (nla_put_u32(nlmsg
, IFLA_NET_NS_PID
, pid
))
533 if (ifname
!= NULL
) {
534 if (nla_put_string(nlmsg
, IFLA_IFNAME
, ifname
))
538 err
= netlink_transaction(&nlh
, nlmsg
, nlmsg
);
545 /* If we are asked to move a wireless interface, then we must actually move its
546 * phyN device. Detect that condition and return the physname here. The physname
547 * will be passed to lxc_netdev_move_wlan() which will free it when done.
549 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
550 static char *is_wlan(const char *ifname
)
557 char *physname
= NULL
;
559 len
= strlen(ifname
) + strlen(PHYSNAME
) - 1;
560 path
= alloca(len
+ 1);
561 ret
= snprintf(path
, len
, PHYSNAME
, ifname
);
562 if (ret
< 0 || (size_t)ret
>= len
)
565 f
= fopen(path
, "r");
569 /* Feh - sb.st_size is always 4096. */
570 fseek(f
, 0, SEEK_END
);
572 fseek(f
, 0, SEEK_SET
);
578 physname
= malloc(physlen
+ 1);
584 memset(physname
, 0, physlen
+ 1);
585 ret
= fread(physname
, 1, physlen
, f
);
590 for (i
= 0; i
< physlen
; i
++) {
591 if (physname
[i
] == '\n')
594 if (physname
[i
] == '\0')
605 static int lxc_netdev_rename_by_name_in_netns(pid_t pid
, const char *old
,
615 return wait_for_pid(fpid
);
617 if (!switch_to_ns(pid
, "net"))
620 _exit(lxc_netdev_rename_by_name(old
, new));
623 static int lxc_netdev_move_wlan(char *physname
, const char *ifname
, pid_t pid
,
630 /* Move phyN into the container. TODO - do this using netlink.
631 * However, IIUC this involves a bit more complicated work to talk to
632 * the 80211 module, so for now just call out to iw.
634 cmd
= on_path("iw", NULL
);
645 sprintf(pidstr
, "%d", pid
);
646 execlp("iw", "iw", "phy", physname
, "set", "netns", pidstr
,
651 if (wait_for_pid(fpid
))
656 err
= lxc_netdev_rename_by_name_in_netns(pid
, ifname
, newname
);
663 int lxc_netdev_move_by_name(const char *ifname
, pid_t pid
, const char* newname
)
671 index
= if_nametoindex(ifname
);
675 physname
= is_wlan(ifname
);
677 return lxc_netdev_move_wlan(physname
, ifname
, pid
, newname
);
679 return lxc_netdev_move_by_index(index
, pid
, newname
);
682 int lxc_netdev_delete_by_index(int ifindex
)
685 struct ifinfomsg
*ifi
;
686 struct nl_handler nlh
;
687 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
689 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
694 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
698 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
702 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
703 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_DELLINK
;
705 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
708 ifi
->ifi_family
= AF_UNSPEC
;
709 ifi
->ifi_index
= ifindex
;
711 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
719 int lxc_netdev_delete_by_name(const char *name
)
723 index
= if_nametoindex(name
);
727 return lxc_netdev_delete_by_index(index
);
730 int lxc_netdev_rename_by_index(int ifindex
, const char *newname
)
733 struct ifinfomsg
*ifi
;
734 struct nl_handler nlh
;
735 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
737 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
741 len
= strlen(newname
);
742 if (len
== 1 || len
>= IFNAMSIZ
)
746 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
750 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
754 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
755 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
757 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
760 ifi
->ifi_family
= AF_UNSPEC
;
761 ifi
->ifi_index
= ifindex
;
763 if (nla_put_string(nlmsg
, IFLA_IFNAME
, newname
))
766 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
774 int lxc_netdev_rename_by_name(const char *oldname
, const char *newname
)
778 len
= strlen(oldname
);
779 if (len
== 1 || len
>= IFNAMSIZ
)
782 index
= if_nametoindex(oldname
);
786 return lxc_netdev_rename_by_index(index
, newname
);
789 int netdev_set_flag(const char *name
, int flag
)
792 struct ifinfomsg
*ifi
;
793 struct nl_handler nlh
;
794 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
796 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
802 if (len
== 1 || len
>= IFNAMSIZ
)
806 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
810 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
815 index
= if_nametoindex(name
);
819 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
820 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
822 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
827 ifi
->ifi_family
= AF_UNSPEC
;
828 ifi
->ifi_index
= index
;
829 ifi
->ifi_change
|= IFF_UP
;
830 ifi
->ifi_flags
|= flag
;
832 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
840 int netdev_get_flag(const char *name
, int *flag
)
843 struct ifinfomsg
*ifi
;
844 struct nl_handler nlh
;
845 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
850 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
856 if (len
== 1 || len
>= IFNAMSIZ
)
860 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
864 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
869 index
= if_nametoindex(name
);
873 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
;
874 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
876 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
881 ifi
->ifi_family
= AF_UNSPEC
;
882 ifi
->ifi_index
= index
;
884 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
888 ifi
= NLMSG_DATA(answer
->nlmsghdr
);
890 *flag
= ifi
->ifi_flags
;
899 * \brief Check a interface is up or not.
901 * \param name: name for the interface.
904 * 0 means interface is down.
905 * 1 means interface is up.
906 * Others means error happened, and ret-value is the error number.
908 int lxc_netdev_isup(const char *name
)
912 err
= netdev_get_flag(name
, &flag
);
922 int netdev_get_mtu(int ifindex
)
924 int answer_len
, err
, res
;
925 struct nl_handler nlh
;
926 struct ifinfomsg
*ifi
;
927 struct nlmsghdr
*msg
;
928 int readmore
= 0, recv_len
= 0;
929 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
931 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
936 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
940 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
944 /* Save the answer buffer length, since it will be overwritten
945 * on the first receive (and we might need to receive more than
948 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
950 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
951 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
953 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
956 ifi
->ifi_family
= AF_UNSPEC
;
958 /* Send the request for addresses, which returns all addresses
959 * on all interfaces. */
960 err
= netlink_send(&nlh
, nlmsg
);
964 #pragma GCC diagnostic push
965 #pragma GCC diagnostic ignored "-Wcast-align"
968 /* Restore the answer buffer length, it might have been
969 * overwritten by a previous receive.
971 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
973 /* Get the (next) batch of reply messages */
974 err
= netlink_rcv(&nlh
, answer
);
980 /* Satisfy the typing for the netlink macros */
981 msg
= answer
->nlmsghdr
;
983 while (NLMSG_OK(msg
, recv_len
)) {
985 /* Stop reading if we see an error message */
986 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
987 struct nlmsgerr
*errmsg
=
988 (struct nlmsgerr
*)NLMSG_DATA(msg
);
993 /* Stop reading if we see a NLMSG_DONE message */
994 if (msg
->nlmsg_type
== NLMSG_DONE
) {
999 ifi
= NLMSG_DATA(msg
);
1000 if (ifi
->ifi_index
== ifindex
) {
1001 struct rtattr
*rta
= IFLA_RTA(ifi
);
1003 msg
->nlmsg_len
- NLMSG_LENGTH(sizeof(*ifi
));
1005 while (RTA_OK(rta
, attr_len
)) {
1006 /* Found a local address for the
1007 * requested interface, return it.
1009 if (rta
->rta_type
== IFLA_MTU
) {
1010 memcpy(&res
, RTA_DATA(rta
),
1015 rta
= RTA_NEXT(rta
, attr_len
);
1019 /* Keep reading more data from the socket if the last
1020 * message had the NLF_F_MULTI flag set.
1022 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
1024 /* Look at the next message received in this buffer. */
1025 msg
= NLMSG_NEXT(msg
, recv_len
);
1029 #pragma GCC diagnostic pop
1031 /* If we end up here, we didn't find any result, so signal an error. */
1035 netlink_close(&nlh
);
1041 int lxc_netdev_set_mtu(const char *name
, int mtu
)
1043 int err
, index
, len
;
1044 struct ifinfomsg
*ifi
;
1045 struct nl_handler nlh
;
1046 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1048 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1054 if (len
== 1 || len
>= IFNAMSIZ
)
1058 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1062 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1067 index
= if_nametoindex(name
);
1071 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1072 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1074 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1079 ifi
->ifi_family
= AF_UNSPEC
;
1080 ifi
->ifi_index
= index
;
1082 if (nla_put_u32(nlmsg
, IFLA_MTU
, mtu
))
1085 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1087 netlink_close(&nlh
);
1093 int lxc_netdev_up(const char *name
)
1095 return netdev_set_flag(name
, IFF_UP
);
1098 int lxc_netdev_down(const char *name
)
1100 return netdev_set_flag(name
, 0);
1103 int lxc_veth_create(const char *name1
, const char *name2
)
1106 struct ifinfomsg
*ifi
;
1107 struct nl_handler nlh
;
1108 struct rtattr
*nest1
, *nest2
, *nest3
;
1109 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1111 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1116 len
= strlen(name1
);
1117 if (len
== 1 || len
>= IFNAMSIZ
)
1120 len
= strlen(name2
);
1121 if (len
== 1 || len
>= IFNAMSIZ
)
1125 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1129 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1133 nlmsg
->nlmsghdr
->nlmsg_flags
=
1134 NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
1135 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1137 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1140 ifi
->ifi_family
= AF_UNSPEC
;
1143 nest1
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
1147 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "veth"))
1150 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
1154 nest3
= nla_begin_nested(nlmsg
, VETH_INFO_PEER
);
1158 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1164 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name2
))
1167 nla_end_nested(nlmsg
, nest3
);
1168 nla_end_nested(nlmsg
, nest2
);
1169 nla_end_nested(nlmsg
, nest1
);
1171 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name1
))
1174 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1176 netlink_close(&nlh
);
1182 /* TODO: merge with lxc_macvlan_create */
1183 int lxc_vlan_create(const char *master
, const char *name
, unsigned short vlanid
)
1185 int err
, len
, lindex
;
1186 struct ifinfomsg
*ifi
;
1187 struct nl_handler nlh
;
1188 struct rtattr
*nest
, *nest2
;
1189 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1191 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1196 len
= strlen(master
);
1197 if (len
== 1 || len
>= IFNAMSIZ
)
1201 if (len
== 1 || len
>= IFNAMSIZ
)
1205 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1209 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1214 lindex
= if_nametoindex(master
);
1218 nlmsg
->nlmsghdr
->nlmsg_flags
=
1219 NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
1220 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1222 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1227 ifi
->ifi_family
= AF_UNSPEC
;
1229 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
1233 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "vlan"))
1236 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
1240 if (nla_put_u16(nlmsg
, IFLA_VLAN_ID
, vlanid
))
1243 nla_end_nested(nlmsg
, nest2
);
1244 nla_end_nested(nlmsg
, nest
);
1246 if (nla_put_u32(nlmsg
, IFLA_LINK
, lindex
))
1249 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
1252 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1258 netlink_close(&nlh
);
1262 int lxc_macvlan_create(const char *master
, const char *name
, int mode
)
1264 int err
, index
, len
;
1265 struct ifinfomsg
*ifi
;
1266 struct nl_handler nlh
;
1267 struct rtattr
*nest
, *nest2
;
1268 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1270 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1275 len
= strlen(master
);
1276 if (len
== 1 || len
>= IFNAMSIZ
)
1280 if (len
== 1 || len
>= IFNAMSIZ
)
1284 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1288 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1293 index
= if_nametoindex(master
);
1297 nlmsg
->nlmsghdr
->nlmsg_flags
=
1298 NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
1299 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1301 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1306 ifi
->ifi_family
= AF_UNSPEC
;
1308 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
1312 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "macvlan"))
1316 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
1320 if (nla_put_u32(nlmsg
, IFLA_MACVLAN_MODE
, mode
))
1323 nla_end_nested(nlmsg
, nest2
);
1326 nla_end_nested(nlmsg
, nest
);
1328 if (nla_put_u32(nlmsg
, IFLA_LINK
, index
))
1331 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
1334 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1336 netlink_close(&nlh
);
1342 static int proc_sys_net_write(const char *path
, const char *value
)
1347 fd
= open(path
, O_WRONLY
);
1351 if (lxc_write_nointr(fd
, value
, strlen(value
)) < 0)
1358 static int neigh_proxy_set(const char *ifname
, int family
, int flag
)
1361 char path
[PATH_MAX
];
1363 if (family
!= AF_INET
&& family
!= AF_INET6
)
1366 ret
= snprintf(path
, PATH_MAX
, "/proc/sys/net/%s/conf/%s/%s",
1367 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
1368 family
== AF_INET
? "proxy_arp" : "proxy_ndp");
1369 if (ret
< 0 || (size_t)ret
>= PATH_MAX
)
1372 return proc_sys_net_write(path
, flag
? "1" : "0");
1375 int lxc_neigh_proxy_on(const char *name
, int family
)
1377 return neigh_proxy_set(name
, family
, 1);
1380 int lxc_neigh_proxy_off(const char *name
, int family
)
1382 return neigh_proxy_set(name
, family
, 0);
1385 int lxc_convert_mac(char *macaddr
, struct sockaddr
*sockaddr
)
1390 unsigned char *data
;
1392 sockaddr
->sa_family
= ARPHRD_ETHER
;
1393 data
= (unsigned char *)sockaddr
->sa_data
;
1395 while ((*macaddr
!= '\0') && (i
< ETH_ALEN
)) {
1399 else if (c
>= 'a' && c
<= 'f')
1401 else if (c
>= 'A' && c
<= 'F')
1410 else if (c
>= 'a' && c
<= 'f')
1411 val
|= c
- 'a' + 10;
1412 else if (c
>= 'A' && c
<= 'F')
1413 val
|= c
- 'A' + 10;
1414 else if (c
== ':' || c
== 0)
1420 *data
++ = (unsigned char)(val
& 0377);
1423 if (*macaddr
== ':')
1430 static int ip_addr_add(int family
, int ifindex
, void *addr
, void *bcast
,
1431 void *acast
, int prefix
)
1434 struct ifaddrmsg
*ifa
;
1435 struct nl_handler nlh
;
1436 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1438 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
1439 : sizeof(struct in6_addr
);
1441 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1446 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1450 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1454 nlmsg
->nlmsghdr
->nlmsg_flags
=
1455 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
1456 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWADDR
;
1458 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
1461 ifa
->ifa_prefixlen
= prefix
;
1462 ifa
->ifa_index
= ifindex
;
1463 ifa
->ifa_family
= family
;
1467 if (nla_put_buffer(nlmsg
, IFA_LOCAL
, addr
, addrlen
))
1470 if (nla_put_buffer(nlmsg
, IFA_ADDRESS
, addr
, addrlen
))
1473 if (nla_put_buffer(nlmsg
, IFA_BROADCAST
, bcast
, addrlen
))
1476 /* TODO: multicast, anycast with ipv6 */
1477 err
= -EPROTONOSUPPORT
;
1478 if (family
== AF_INET6
&&
1479 (memcmp(bcast
, &in6addr_any
, sizeof(in6addr_any
)) ||
1480 memcmp(acast
, &in6addr_any
, sizeof(in6addr_any
))))
1483 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1485 netlink_close(&nlh
);
1491 int lxc_ipv6_addr_add(int ifindex
, struct in6_addr
*addr
,
1492 struct in6_addr
*mcast
, struct in6_addr
*acast
,
1495 return ip_addr_add(AF_INET6
, ifindex
, addr
, mcast
, acast
, prefix
);
1498 int lxc_ipv4_addr_add(int ifindex
, struct in_addr
*addr
, struct in_addr
*bcast
,
1501 return ip_addr_add(AF_INET
, ifindex
, addr
, bcast
, NULL
, prefix
);
1504 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
1505 * the given RTM_NEWADDR message. Allocates memory for the address and stores
1506 * that pointer in *res (so res should be an in_addr** or in6_addr**).
1508 #pragma GCC diagnostic push
1509 #pragma GCC diagnostic ignored "-Wcast-align"
1511 static int ifa_get_local_ip(int family
, struct nlmsghdr
*msg
, void **res
)
1514 struct ifaddrmsg
*ifa
= NLMSG_DATA(msg
);
1515 struct rtattr
*rta
= IFA_RTA(ifa
);
1516 int attr_len
= NLMSG_PAYLOAD(msg
, sizeof(struct ifaddrmsg
));
1518 if (ifa
->ifa_family
!= family
)
1521 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
1522 : sizeof(struct in6_addr
);
1524 /* Loop over the rtattr's in this message */
1525 while (RTA_OK(rta
, attr_len
)) {
1526 /* Found a local address for the requested interface,
1529 if (rta
->rta_type
== IFA_LOCAL
||
1530 rta
->rta_type
== IFA_ADDRESS
) {
1531 /* Sanity check. The family check above should make sure
1532 * the address length is correct, but check here just in
1535 if (RTA_PAYLOAD(rta
) != addrlen
)
1538 /* We might have found an IFA_ADDRESS before, which we
1539 * now overwrite with an IFA_LOCAL.
1542 *res
= malloc(addrlen
);
1547 memcpy(*res
, RTA_DATA(rta
), addrlen
);
1548 if (rta
->rta_type
== IFA_LOCAL
)
1551 rta
= RTA_NEXT(rta
, attr_len
);
1556 #pragma GCC diagnostic pop
1558 static int ip_addr_get(int family
, int ifindex
, void **res
)
1560 int answer_len
, err
;
1561 struct ifaddrmsg
*ifa
;
1562 struct nl_handler nlh
;
1563 struct nlmsghdr
*msg
;
1564 int readmore
= 0, recv_len
= 0;
1565 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1567 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1572 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1576 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1580 /* Save the answer buffer length, since it will be overwritten on the
1581 * first receive (and we might need to receive more than once).
1583 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
1585 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ROOT
;
1586 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETADDR
;
1588 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
1591 ifa
->ifa_family
= family
;
1593 /* Send the request for addresses, which returns all addresses on all
1596 err
= netlink_send(&nlh
, nlmsg
);
1600 #pragma GCC diagnostic push
1601 #pragma GCC diagnostic ignored "-Wcast-align"
1604 /* Restore the answer buffer length, it might have been
1605 * overwritten by a previous receive.
1607 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
1609 /* Get the (next) batch of reply messages. */
1610 err
= netlink_rcv(&nlh
, answer
);
1617 /* Satisfy the typing for the netlink macros. */
1618 msg
= answer
->nlmsghdr
;
1620 while (NLMSG_OK(msg
, recv_len
)) {
1621 /* Stop reading if we see an error message. */
1622 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
1623 struct nlmsgerr
*errmsg
=
1624 (struct nlmsgerr
*)NLMSG_DATA(msg
);
1625 err
= errmsg
->error
;
1629 /* Stop reading if we see a NLMSG_DONE message. */
1630 if (msg
->nlmsg_type
== NLMSG_DONE
) {
1635 if (msg
->nlmsg_type
!= RTM_NEWADDR
) {
1640 ifa
= (struct ifaddrmsg
*)NLMSG_DATA(msg
);
1641 if (ifa
->ifa_index
== ifindex
) {
1642 if (ifa_get_local_ip(family
, msg
, res
) < 0) {
1647 /* Found a result, stop searching. */
1652 /* Keep reading more data from the socket if the last
1653 * message had the NLF_F_MULTI flag set.
1655 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
1657 /* Look at the next message received in this buffer. */
1658 msg
= NLMSG_NEXT(msg
, recv_len
);
1662 #pragma GCC diagnostic pop
1664 /* If we end up here, we didn't find any result, so signal an
1670 netlink_close(&nlh
);
1676 int lxc_ipv6_addr_get(int ifindex
, struct in6_addr
**res
)
1678 return ip_addr_get(AF_INET6
, ifindex
, (void **)res
);
1681 int lxc_ipv4_addr_get(int ifindex
, struct in_addr
**res
)
1683 return ip_addr_get(AF_INET
, ifindex
, (void **)res
);
1686 static int ip_gateway_add(int family
, int ifindex
, void *gw
)
1689 struct nl_handler nlh
;
1691 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1693 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
1694 : sizeof(struct in6_addr
);
1696 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1701 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1705 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1709 nlmsg
->nlmsghdr
->nlmsg_flags
=
1710 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
1711 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWROUTE
;
1713 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
1716 rt
->rtm_family
= family
;
1717 rt
->rtm_table
= RT_TABLE_MAIN
;
1718 rt
->rtm_scope
= RT_SCOPE_UNIVERSE
;
1719 rt
->rtm_protocol
= RTPROT_BOOT
;
1720 rt
->rtm_type
= RTN_UNICAST
;
1721 /* "default" destination */
1722 rt
->rtm_dst_len
= 0;
1725 if (nla_put_buffer(nlmsg
, RTA_GATEWAY
, gw
, addrlen
))
1728 /* Adding the interface index enables the use of link-local
1729 * addresses for the gateway.
1731 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
1734 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1736 netlink_close(&nlh
);
1742 int lxc_ipv4_gateway_add(int ifindex
, struct in_addr
*gw
)
1744 return ip_gateway_add(AF_INET
, ifindex
, gw
);
1747 int lxc_ipv6_gateway_add(int ifindex
, struct in6_addr
*gw
)
1749 return ip_gateway_add(AF_INET6
, ifindex
, gw
);
1752 static int ip_route_dest_add(int family
, int ifindex
, void *dest
)
1755 struct nl_handler nlh
;
1757 struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1759 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
1760 : sizeof(struct in6_addr
);
1762 err
= netlink_open(&nlh
, NETLINK_ROUTE
);
1767 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1771 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1775 nlmsg
->nlmsghdr
->nlmsg_flags
=
1776 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
1777 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWROUTE
;
1779 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
1782 rt
->rtm_family
= family
;
1783 rt
->rtm_table
= RT_TABLE_MAIN
;
1784 rt
->rtm_scope
= RT_SCOPE_LINK
;
1785 rt
->rtm_protocol
= RTPROT_BOOT
;
1786 rt
->rtm_type
= RTN_UNICAST
;
1787 rt
->rtm_dst_len
= addrlen
* 8;
1790 if (nla_put_buffer(nlmsg
, RTA_DST
, dest
, addrlen
))
1792 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
1794 err
= netlink_transaction(&nlh
, nlmsg
, answer
);
1796 netlink_close(&nlh
);
1802 int lxc_ipv4_dest_add(int ifindex
, struct in_addr
*dest
)
1804 return ip_route_dest_add(AF_INET
, ifindex
, dest
);
1807 int lxc_ipv6_dest_add(int ifindex
, struct in6_addr
*dest
)
1809 return ip_route_dest_add(AF_INET6
, ifindex
, dest
);
1812 bool is_ovs_bridge(const char *bridge
)
1816 char brdirname
[22 + IFNAMSIZ
+ 1] = {0};
1818 ret
= snprintf(brdirname
, 22 + IFNAMSIZ
+ 1, "/sys/class/net/%s/bridge",
1820 if (ret
< 0 || (size_t)ret
>= 22 + IFNAMSIZ
+ 1)
1823 ret
= stat(brdirname
, &sb
);
1824 if (ret
< 0 && errno
== ENOENT
)
1830 struct ovs_veth_args
{
1835 /* Called from a background thread - when nic goes away, remove it from the
1838 static int lxc_ovs_delete_port_exec(void *data
)
1840 struct ovs_veth_args
*args
= data
;
1842 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args
->bridge
, args
->nic
,
1847 int lxc_ovs_delete_port(const char *bridge
, const char *nic
)
1850 char cmd_output
[PATH_MAX
];
1851 struct ovs_veth_args args
;
1853 args
.bridge
= bridge
;
1855 ret
= run_command(cmd_output
, sizeof(cmd_output
),
1856 lxc_ovs_delete_port_exec
, (void *)&args
);
1858 ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
1859 "%s", bridge
, nic
, cmd_output
);
1866 static int lxc_ovs_attach_bridge_exec(void *data
)
1868 struct ovs_veth_args
*args
= data
;
1870 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args
->bridge
, args
->nic
,
1875 static int lxc_ovs_attach_bridge(const char *bridge
, const char *nic
)
1878 char cmd_output
[PATH_MAX
];
1879 struct ovs_veth_args args
;
1881 args
.bridge
= bridge
;
1883 ret
= run_command(cmd_output
, sizeof(cmd_output
),
1884 lxc_ovs_attach_bridge_exec
, (void *)&args
);
1886 ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
1887 bridge
, nic
, cmd_output
);
1894 int lxc_bridge_attach(const char *bridge
, const char *ifname
)
1900 if (strlen(ifname
) >= IFNAMSIZ
)
1903 index
= if_nametoindex(ifname
);
1907 if (is_ovs_bridge(bridge
))
1908 return lxc_ovs_attach_bridge(bridge
, ifname
);
1910 fd
= socket(AF_INET
, SOCK_STREAM
, 0);
1914 retlen
= strlcpy(ifr
.ifr_name
, bridge
, IFNAMSIZ
);
1915 if (retlen
>= IFNAMSIZ
) {
1920 ifr
.ifr_name
[IFNAMSIZ
- 1] = '\0';
1921 ifr
.ifr_ifindex
= index
;
1922 err
= ioctl(fd
, SIOCBRADDIF
, &ifr
);
1930 static const char *const lxc_network_types
[LXC_NET_MAXCONFTYPE
+ 1] = {
1931 [LXC_NET_EMPTY
] = "empty",
1932 [LXC_NET_VETH
] = "veth",
1933 [LXC_NET_MACVLAN
] = "macvlan",
1934 [LXC_NET_PHYS
] = "phys",
1935 [LXC_NET_VLAN
] = "vlan",
1936 [LXC_NET_NONE
] = "none",
1939 const char *lxc_net_type_to_str(int type
)
1941 if (type
< 0 || type
> LXC_NET_MAXCONFTYPE
)
1944 return lxc_network_types
[type
];
1947 static const char padchar
[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
1949 char *lxc_mkifname(char *template)
1952 struct netns_ifaddrs
*ifa
, *ifaddr
;
1953 char name
[IFNAMSIZ
];
1954 bool exists
= false;
1959 seed
= randseed(false);
1962 (void)randseed(true);
1965 if (strlen(template) >= IFNAMSIZ
)
1968 /* Get all the network interfaces. */
1969 ret
= netns_getifaddrs(&ifaddr
, -1, &(bool){false});
1971 SYSERROR("Failed to get network interfaces");
1975 /* Generate random names until we find one that doesn't exist. */
1978 (void)strlcpy(name
, template, IFNAMSIZ
);
1982 for (i
= 0; i
< strlen(name
); i
++) {
1983 if (name
[i
] == 'X') {
1985 name
[i
] = padchar
[rand_r(&seed
) % (strlen(padchar
) - 1)];
1987 name
[i
] = padchar
[rand() % (strlen(padchar
) - 1)];
1992 for (ifa
= ifaddr
; ifa
!= NULL
; ifa
= ifa
->ifa_next
) {
1993 if (!strcmp(ifa
->ifa_name
, name
)) {
2003 netns_freeifaddrs(ifaddr
);
2004 (void)strlcpy(template, name
, strlen(template) + 1);
2009 int setup_private_host_hw_addr(char *veth1
)
2014 sockfd
= socket(AF_INET
, SOCK_DGRAM
, 0);
2018 err
= snprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
2019 if (err
< 0 || (size_t)err
>= IFNAMSIZ
) {
2024 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
2030 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
2031 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
2039 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2041 struct lxc_list
*network
= &handler
->conf
->network
;
2042 struct lxc_list
*iterator
;
2043 struct lxc_netdev
*netdev
;
2046 lxc_list_for_each(iterator
, network
) {
2047 netdev
= iterator
->elem
;
2049 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2052 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
) {
2053 ERROR("Automatic gateway detection is only supported "
2054 "for veth and macvlan");
2058 if (netdev
->link
[0] == '\0') {
2059 ERROR("Automatic gateway detection needs a link interface");
2063 link_index
= if_nametoindex(netdev
->link
);
2067 if (netdev
->ipv4_gateway_auto
) {
2068 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
)) {
2069 ERROR("Failed to automatically find ipv4 gateway "
2070 "address from link interface \"%s\"", netdev
->link
);
2075 if (netdev
->ipv6_gateway_auto
) {
2076 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
)) {
2077 ERROR("Failed to automatically find ipv6 gateway "
2078 "address from link interface \"%s\"", netdev
->link
);
2087 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2088 static int lxc_create_network_unpriv_exec(const char *lxcpath
, const char *lxcname
,
2089 struct lxc_netdev
*netdev
, pid_t pid
, unsigned int hooks_version
)
2093 int bytes
, pipefd
[2];
2094 char *token
, *saveptr
= NULL
;
2095 char netdev_link
[IFNAMSIZ
];
2096 char buffer
[PATH_MAX
] = {0};
2099 if (netdev
->type
!= LXC_NET_VETH
) {
2100 ERROR("Network type %d not support for unprivileged use", netdev
->type
);
2106 SYSERROR("Failed to create pipe");
2112 SYSERROR("Failed to create new process");
2121 char pidstr
[INTTYPE_TO_STRLEN(pid_t
)];
2125 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
2127 ret
= dup2(pipefd
[1], STDERR_FILENO
);
2130 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2131 _exit(EXIT_FAILURE
);
2134 if (netdev
->link
[0] != '\0')
2135 retlen
= strlcpy(netdev_link
, netdev
->link
, IFNAMSIZ
);
2137 retlen
= strlcpy(netdev_link
, "none", IFNAMSIZ
);
2138 if (retlen
>= IFNAMSIZ
) {
2139 SYSERROR("Invalid network device name");
2140 _exit(EXIT_FAILURE
);
2143 ret
= snprintf(pidstr
, sizeof(pidstr
), "%d", pid
);
2144 if (ret
< 0 || ret
>= sizeof(pidstr
))
2145 _exit(EXIT_FAILURE
);
2146 pidstr
[sizeof(pidstr
) - 1] = '\0';
2148 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath
,
2149 lxcname
, pidstr
, netdev_link
,
2150 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)");
2151 if (netdev
->name
[0] != '\0')
2152 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2153 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2154 netdev
->name
, (char *)NULL
);
2156 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2157 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2159 SYSERROR("Failed to execute lxc-user-nic");
2160 _exit(EXIT_FAILURE
);
2163 /* close the write-end of the pipe */
2166 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, PATH_MAX
);
2168 SYSERROR("Failed to read from pipe file descriptor");
2171 buffer
[bytes
- 1] = '\0';
2174 ret
= wait_for_pid(child
);
2176 if (ret
!= 0 || bytes
< 0) {
2177 ERROR("lxc-user-nic failed to configure requested network: %s",
2178 buffer
[0] != '\0' ? buffer
: "(null)");
2181 TRACE("Received output \"%s\" from lxc-user-nic", buffer
);
2184 token
= strtok_r(buffer
, ":", &saveptr
);
2186 ERROR("Failed to parse lxc-user-nic output");
2190 memset(netdev
->name
, 0, IFNAMSIZ
);
2191 memcpy(netdev
->name
, token
, IFNAMSIZ
- 1);
2193 /* netdev->ifindex */
2194 token
= strtok_r(NULL
, ":", &saveptr
);
2196 ERROR("Failed to parse lxc-user-nic output");
2200 ret
= lxc_safe_int(token
, &netdev
->ifindex
);
2203 SYSERROR("Failed to convert string \"%s\" to integer", token
);
2207 /* netdev->priv.veth_attr.veth1 */
2208 token
= strtok_r(NULL
, ":", &saveptr
);
2210 ERROR("Failed to parse lxc-user-nic output");
2214 retlen
= strlcpy(netdev
->priv
.veth_attr
.veth1
, token
, IFNAMSIZ
);
2215 if (retlen
>= IFNAMSIZ
) {
2216 ERROR("Host side veth device name returned by lxc-user-nic is "
2221 /* netdev->priv.veth_attr.ifindex */
2222 token
= strtok_r(NULL
, ":", &saveptr
);
2224 ERROR("Failed to parse lxc-user-nic output");
2228 ret
= lxc_safe_int(token
, &netdev
->priv
.veth_attr
.ifindex
);
2231 SYSERROR("Failed to convert string \"%s\" to integer", token
);
2235 if (netdev
->upscript
) {
2239 netdev
->priv
.veth_attr
.veth1
,
2243 ret
= run_script_argv(lxcname
,
2244 hooks_version
, "net",
2245 netdev
->upscript
, "up", argv
);
2253 static int lxc_delete_network_unpriv_exec(const char *lxcpath
, const char *lxcname
,
2254 struct lxc_netdev
*netdev
,
2255 const char *netns_path
)
2260 char buffer
[PATH_MAX
] = {0};
2262 if (netdev
->type
!= LXC_NET_VETH
) {
2263 ERROR("Network type %d not support for unprivileged use", netdev
->type
);
2269 SYSERROR("Failed to create pipe");
2275 SYSERROR("Failed to create new process");
2287 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
2289 ret
= dup2(pipefd
[1], STDERR_FILENO
);
2292 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2293 _exit(EXIT_FAILURE
);
2296 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
2297 hostveth
= netdev
->priv
.veth_attr
.pair
;
2299 hostveth
= netdev
->priv
.veth_attr
.veth1
;
2300 if (hostveth
[0] == '\0') {
2301 SYSERROR("Host side veth device name is missing");
2302 _exit(EXIT_FAILURE
);
2305 if (netdev
->link
[0] == '\0') {
2306 SYSERROR("Network link for network device \"%s\" is "
2307 "missing", netdev
->priv
.veth_attr
.veth1
);
2308 _exit(EXIT_FAILURE
);
2311 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath
,
2312 lxcname
, netns_path
, netdev
->link
, hostveth
);
2313 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "delete", lxcpath
,
2314 lxcname
, netns_path
, "veth", netdev
->link
, hostveth
,
2316 SYSERROR("Failed to exec lxc-user-nic.");
2317 _exit(EXIT_FAILURE
);
2322 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, PATH_MAX
);
2324 SYSERROR("Failed to read from pipe file descriptor.");
2327 buffer
[bytes
- 1] = '\0';
2330 ret
= wait_for_pid(child
);
2332 if (ret
!= 0 || bytes
< 0) {
2333 ERROR("lxc-user-nic failed to delete requested network: %s",
2334 buffer
[0] != '\0' ? buffer
: "(null)");
2341 bool lxc_delete_network_unpriv(struct lxc_handler
*handler
)
2344 struct lxc_list
*iterator
;
2345 struct lxc_list
*network
= &handler
->conf
->network
;
2346 /* strlen("/proc/") = 6
2348 * INTTYPE_TO_STRLEN(pid_t)
2350 * strlen("/fd/") = 4
2352 * INTTYPE_TO_STRLEN(int)
2356 char netns_path
[6 + INTTYPE_TO_STRLEN(pid_t
) + 4 + INTTYPE_TO_STRLEN(int) + 1];
2360 if (handler
->nsfd
[LXC_NS_NET
] < 0) {
2361 DEBUG("Cannot not guarantee safe deletion of network devices. "
2362 "Manual cleanup maybe needed");
2366 ret
= snprintf(netns_path
, sizeof(netns_path
), "/proc/%d/fd/%d",
2367 lxc_raw_getpid(), handler
->nsfd
[LXC_NS_NET
]);
2368 if (ret
< 0 || ret
>= sizeof(netns_path
))
2371 lxc_list_for_each(iterator
, network
) {
2372 char *hostveth
= NULL
;
2373 struct lxc_netdev
*netdev
= iterator
->elem
;
2375 /* We can only delete devices whose ifindex we have. If we don't
2376 * have the index it means that we didn't create it.
2378 if (!netdev
->ifindex
)
2381 if (netdev
->type
== LXC_NET_PHYS
) {
2382 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
,
2385 WARN("Failed to rename interface with index %d "
2386 "to its initial name \"%s\"",
2387 netdev
->ifindex
, netdev
->link
);
2389 TRACE("Renamed interface with index %d to its "
2390 "initial name \"%s\"",
2391 netdev
->ifindex
, netdev
->link
);
2392 goto clear_ifindices
;
2395 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
2397 WARN("Failed to deconfigure network device");
2399 if (netdev
->type
!= LXC_NET_VETH
)
2400 goto clear_ifindices
;
2402 if (netdev
->link
[0] == '\0' || !is_ovs_bridge(netdev
->link
))
2403 goto clear_ifindices
;
2405 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
2406 hostveth
= netdev
->priv
.veth_attr
.pair
;
2408 hostveth
= netdev
->priv
.veth_attr
.veth1
;
2409 if (hostveth
[0] == '\0')
2410 goto clear_ifindices
;
2412 ret
= lxc_delete_network_unpriv_exec(handler
->lxcpath
,
2413 handler
->name
, netdev
,
2416 WARN("Failed to remove port \"%s\" from openvswitch "
2417 "bridge \"%s\"", hostveth
, netdev
->link
);
2418 goto clear_ifindices
;
2420 INFO("Removed interface \"%s\" from \"%s\"", hostveth
,
2424 /* We need to clear any ifindeces we recorded so liblxc won't
2425 * have cached stale data which would cause it to fail on reboot
2426 * we're we don't re-read the on-disk config file.
2428 netdev
->ifindex
= 0;
2429 if (netdev
->type
== LXC_NET_PHYS
) {
2430 netdev
->priv
.phys_attr
.ifindex
= 0;
2431 } else if (netdev
->type
== LXC_NET_VETH
) {
2432 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
2433 netdev
->priv
.veth_attr
.ifindex
= 0;
2440 int lxc_create_network_priv(struct lxc_handler
*handler
)
2442 struct lxc_list
*iterator
;
2443 struct lxc_list
*network
= &handler
->conf
->network
;
2445 if (!handler
->am_root
)
2448 lxc_list_for_each(iterator
, network
) {
2449 struct lxc_netdev
*netdev
= iterator
->elem
;
2451 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
) {
2452 ERROR("Invalid network configuration type %d", netdev
->type
);
2456 if (netdev_conf
[netdev
->type
](handler
, netdev
)) {
2457 ERROR("Failed to create network device");
2466 int lxc_network_move_created_netdev_priv(const char *lxcpath
, const char *lxcname
,
2467 struct lxc_list
*network
, pid_t pid
)
2470 char ifname
[IFNAMSIZ
];
2471 struct lxc_list
*iterator
;
2473 if (am_guest_unpriv())
2476 lxc_list_for_each(iterator
, network
) {
2477 struct lxc_netdev
*netdev
= iterator
->elem
;
2479 if (!netdev
->ifindex
)
2482 /* retrieve the name of the interface */
2483 if (!if_indextoname(netdev
->ifindex
, ifname
)) {
2484 ERROR("No interface corresponding to ifindex \"%d\"",
2489 ret
= lxc_netdev_move_by_name(ifname
, pid
, NULL
);
2492 SYSERROR("Failed to move network device \"%s\" to "
2493 "network namespace %d", ifname
, pid
);
2497 DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
2499 ifname
, netdev
->name
[0] != '\0' ? netdev
->name
: "(null)",
2506 int lxc_create_network_unpriv(const char *lxcpath
, const char *lxcname
,
2507 struct lxc_list
*network
, pid_t pid
, unsigned int hooks_version
)
2509 struct lxc_list
*iterator
;
2511 if (!am_guest_unpriv())
2514 lxc_list_for_each(iterator
, network
) {
2515 struct lxc_netdev
*netdev
= iterator
->elem
;
2517 if (netdev
->type
== LXC_NET_EMPTY
)
2520 if (netdev
->type
== LXC_NET_NONE
)
2523 if (netdev
->type
!= LXC_NET_VETH
) {
2524 ERROR("Networks of type %s are not supported by "
2525 "unprivileged containers",
2526 lxc_net_type_to_str(netdev
->type
));
2531 INFO("mtu ignored due to insufficient privilege");
2533 if (lxc_create_network_unpriv_exec(lxcpath
, lxcname
, netdev
, pid
, hooks_version
))
2540 bool lxc_delete_network_priv(struct lxc_handler
*handler
)
2543 struct lxc_list
*iterator
;
2544 struct lxc_list
*network
= &handler
->conf
->network
;
2546 lxc_list_for_each(iterator
, network
) {
2547 char *hostveth
= NULL
;
2548 struct lxc_netdev
*netdev
= iterator
->elem
;
2550 /* We can only delete devices whose ifindex we have. If we don't
2551 * have the index it means that we didn't create it.
2553 if (!netdev
->ifindex
)
2556 if (netdev
->type
== LXC_NET_PHYS
) {
2557 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
);
2559 WARN("Failed to rename interface with index %d "
2560 "from \"%s\" to its initial name \"%s\"",
2561 netdev
->ifindex
, netdev
->name
, netdev
->link
);
2563 TRACE("Renamed interface with index %d from "
2564 "\"%s\" to its initial name \"%s\"",
2565 netdev
->ifindex
, netdev
->name
,
2567 goto clear_ifindices
;
2570 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
2572 WARN("Failed to deconfigure network device");
2574 /* Recent kernels remove the virtual interfaces when the network
2575 * namespace is destroyed but in case we did not move the
2576 * interface to the network namespace, we have to destroy it.
2578 ret
= lxc_netdev_delete_by_index(netdev
->ifindex
);
2579 if (-ret
== ENODEV
) {
2580 INFO("Interface \"%s\" with index %d already "
2581 "deleted or existing in different network "
2583 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)",
2585 } else if (ret
< 0) {
2587 SYSWARN("Failed to remove interface \"%s\" with index %d",
2588 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)",
2590 goto clear_ifindices
;
2592 INFO("Removed interface \"%s\" with index %d",
2593 netdev
->name
[0] != '\0' ? netdev
->name
: "(null)",
2596 if (netdev
->type
!= LXC_NET_VETH
)
2597 goto clear_ifindices
;
2599 /* Explicitly delete host veth device to prevent lingering
2600 * devices. We had issues in LXD around this.
2602 if (netdev
->priv
.veth_attr
.pair
[0] != '\0')
2603 hostveth
= netdev
->priv
.veth_attr
.pair
;
2605 hostveth
= netdev
->priv
.veth_attr
.veth1
;
2606 if (hostveth
[0] == '\0')
2607 goto clear_ifindices
;
2609 ret
= lxc_netdev_delete_by_name(hostveth
);
2612 SYSWARN("Failed to remove interface \"%s\" from \"%s\"",
2613 hostveth
, netdev
->link
);
2614 goto clear_ifindices
;
2616 INFO("Removed interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
2618 if (netdev
->link
[0] == '\0' || !is_ovs_bridge(netdev
->link
)) {
2619 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
2620 netdev
->ifindex
= 0;
2621 netdev
->priv
.veth_attr
.ifindex
= 0;
2622 goto clear_ifindices
;
2625 /* Delete the openvswitch port. */
2626 ret
= lxc_ovs_delete_port(netdev
->link
, hostveth
);
2628 WARN("Failed to remove port \"%s\" from openvswitch "
2629 "bridge \"%s\"", hostveth
, netdev
->link
);
2631 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
2632 hostveth
, netdev
->link
);
2635 /* We need to clear any ifindeces we recorded so liblxc won't
2636 * have cached stale data which would cause it to fail on reboot
2637 * we're we don't re-read the on-disk config file.
2639 netdev
->ifindex
= 0;
2640 if (netdev
->type
== LXC_NET_PHYS
) {
2641 netdev
->priv
.phys_attr
.ifindex
= 0;
2642 } else if (netdev
->type
== LXC_NET_VETH
) {
2643 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
2644 netdev
->priv
.veth_attr
.ifindex
= 0;
2651 int lxc_requests_empty_network(struct lxc_handler
*handler
)
2653 struct lxc_list
*network
= &handler
->conf
->network
;
2654 struct lxc_list
*iterator
;
2655 bool found_none
= false, found_nic
= false;
2657 if (lxc_list_empty(network
))
2660 lxc_list_for_each(iterator
, network
) {
2661 struct lxc_netdev
*netdev
= iterator
->elem
;
2663 if (netdev
->type
== LXC_NET_NONE
)
2668 if (found_none
&& !found_nic
)
2673 /* try to move physical nics to the init netns */
2674 int lxc_restore_phys_nics_to_netns(struct lxc_handler
*handler
)
2678 char ifname
[IFNAMSIZ
];
2679 struct lxc_list
*iterator
;
2680 int netnsfd
= handler
->nsfd
[LXC_NS_NET
];
2681 struct lxc_conf
*conf
= handler
->conf
;
2683 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
2684 * the parent network namespace. We won't have this capability if we are
2687 if (!handler
->am_root
)
2690 TRACE("Moving physical network devices back to parent network namespace");
2692 oldfd
= lxc_preserve_ns(lxc_raw_getpid(), "net");
2694 SYSERROR("Failed to preserve network namespace");
2698 ret
= setns(netnsfd
, CLONE_NEWNET
);
2700 SYSERROR("Failed to enter network namespace");
2705 lxc_list_for_each(iterator
, &conf
->network
) {
2706 struct lxc_netdev
*netdev
= iterator
->elem
;
2708 if (netdev
->type
!= LXC_NET_PHYS
)
2711 /* Retrieve the name of the interface in the container's network
2714 if (!if_indextoname(netdev
->ifindex
, ifname
)) {
2715 WARN("No interface corresponding to ifindex %d",
2720 ret
= lxc_netdev_move_by_name(ifname
, 1, netdev
->link
);
2722 WARN("Error moving network device \"%s\" back to "
2723 "network namespace", ifname
);
2725 TRACE("Moved network device \"%s\" back to network "
2726 "namespace", ifname
);
2729 ret
= setns(oldfd
, CLONE_NEWNET
);
2732 SYSERROR("Failed to enter network namespace");
2739 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
2741 struct sockaddr sockaddr
;
2745 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
2748 SYSERROR("Mac address \"%s\" conversion failed", hwaddr
);
2752 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
2753 ifr
.ifr_name
[IFNAMSIZ
-1] = '\0';
2754 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
2756 fd
= socket(AF_INET
, SOCK_DGRAM
, 0);
2760 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
2762 SYSERROR("Failed to perform ioctl");
2766 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr
,
2772 static int setup_ipv4_addr(struct lxc_list
*ip
, int ifindex
)
2774 struct lxc_list
*iterator
;
2777 lxc_list_for_each(iterator
, ip
) {
2778 struct lxc_inetdev
*inetdev
= iterator
->elem
;
2780 err
= lxc_ipv4_addr_add(ifindex
, &inetdev
->addr
,
2781 &inetdev
->bcast
, inetdev
->prefix
);
2784 SYSERROR("Failed to setup ipv4 address for network device "
2785 "with eifindex %d", ifindex
);
2793 static int setup_ipv6_addr(struct lxc_list
*ip
, int ifindex
)
2795 struct lxc_list
*iterator
;
2798 lxc_list_for_each(iterator
, ip
) {
2799 struct lxc_inet6dev
*inet6dev
= iterator
->elem
;
2801 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
2802 &inet6dev
->mcast
, &inet6dev
->acast
,
2806 SYSERROR("Failed to setup ipv6 address for network device "
2807 "with eifindex %d", ifindex
);
2815 static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev
*netdev
)
2817 char ifname
[IFNAMSIZ
];
2819 const char *net_type_name
;
2820 char *current_ifname
= ifname
;
2822 /* empty network namespace */
2823 if (!netdev
->ifindex
) {
2824 if (netdev
->flags
& IFF_UP
) {
2825 err
= lxc_netdev_up("lo");
2828 SYSERROR("Failed to set the loopback network device up");
2833 if (netdev
->type
== LXC_NET_EMPTY
)
2836 if (netdev
->type
== LXC_NET_NONE
)
2839 if (netdev
->type
!= LXC_NET_VETH
) {
2840 net_type_name
= lxc_net_type_to_str(netdev
->type
);
2841 ERROR("%s networks are not supported for containers "
2842 "not setup up by privileged users", net_type_name
);
2846 netdev
->ifindex
= if_nametoindex(netdev
->name
);
2849 /* get the new ifindex in case of physical netdev */
2850 if (netdev
->type
== LXC_NET_PHYS
) {
2851 netdev
->ifindex
= if_nametoindex(netdev
->link
);
2852 if (!netdev
->ifindex
) {
2853 ERROR("Failed to get ifindex for network device \"%s\"",
2859 /* retrieve the name of the interface */
2860 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
2861 ERROR("Failed get name for network device with ifindex %d",
2866 /* Default: let the system to choose one interface name.
2867 * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
2868 * netlink will replace the format specifier with an appropriate index.
2870 if (netdev
->name
[0] == '\0') {
2871 if (netdev
->type
== LXC_NET_PHYS
)
2872 (void)strlcpy(netdev
->name
, netdev
->link
, IFNAMSIZ
);
2874 (void)strlcpy(netdev
->name
, "eth%d", IFNAMSIZ
);
2877 /* rename the interface name */
2878 if (strcmp(ifname
, netdev
->name
) != 0) {
2879 err
= lxc_netdev_rename_by_name(ifname
, netdev
->name
);
2882 SYSERROR("Failed to rename network device \"%s\" to \"%s\"",
2883 ifname
, netdev
->name
);
2888 /* Re-read the name of the interface because its name has changed
2889 * and would be automatically allocated by the system
2891 if (!if_indextoname(netdev
->ifindex
, current_ifname
)) {
2892 ERROR("Failed get name for network device with ifindex %d",
2897 /* Now update the recorded name of the network device to reflect the
2898 * name of the network device in the child's network namespace. We will
2899 * later on send this information back to the parent.
2901 (void)strlcpy(netdev
->name
, current_ifname
, IFNAMSIZ
);
2903 /* set a mac address */
2904 if (netdev
->hwaddr
) {
2905 if (setup_hw_addr(netdev
->hwaddr
, current_ifname
)) {
2906 ERROR("Failed to setup hw address for network device \"%s\"",
2912 /* setup ipv4 addresses on the interface */
2913 if (setup_ipv4_addr(&netdev
->ipv4
, netdev
->ifindex
)) {
2914 ERROR("Failed to setup ip addresses for network device \"%s\"",
2919 /* setup ipv6 addresses on the interface */
2920 if (setup_ipv6_addr(&netdev
->ipv6
, netdev
->ifindex
)) {
2921 ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
2926 /* set the network device up */
2927 if (netdev
->flags
& IFF_UP
) {
2930 err
= lxc_netdev_up(current_ifname
);
2933 SYSERROR("Failed to set network device \"%s\" up",
2938 /* the network is up, make the loopback up too */
2939 err
= lxc_netdev_up("lo");
2942 SYSERROR("Failed to set the loopback network device up");
2947 /* We can only set up the default routes after bringing
2948 * up the interface, sine bringing up the interface adds
2949 * the link-local routes and we can't add a default
2950 * route if the gateway is not reachable. */
2952 /* setup ipv4 gateway on the interface */
2953 if (netdev
->ipv4_gateway
) {
2954 if (!(netdev
->flags
& IFF_UP
)) {
2955 ERROR("Cannot add ipv4 gateway for network device "
2956 "\"%s\" when not bringing up the interface", ifname
);
2960 if (lxc_list_empty(&netdev
->ipv4
)) {
2961 ERROR("Cannot add ipv4 gateway for network device "
2962 "\"%s\" when not assigning an address", ifname
);
2966 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
2968 err
= lxc_ipv4_dest_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
2971 SYSERROR("Failed to add ipv4 dest for network device \"%s\"",
2975 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
2978 SYSERROR("Failed to setup ipv4 gateway for network device \"%s\"",
2981 if (netdev
->ipv4_gateway_auto
) {
2982 char buf
[INET_ADDRSTRLEN
];
2983 inet_ntop(AF_INET
, netdev
->ipv4_gateway
, buf
, sizeof(buf
));
2984 ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf
);
2991 /* setup ipv6 gateway on the interface */
2992 if (netdev
->ipv6_gateway
) {
2993 if (!(netdev
->flags
& IFF_UP
)) {
2994 ERROR("Cannot add ipv6 gateway for network device "
2995 "\"%s\" when not bringing up the interface", ifname
);
2999 if (lxc_list_empty(&netdev
->ipv6
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
)) {
3000 ERROR("Cannot add ipv6 gateway for network device "
3001 "\"%s\" when not assigning an address", ifname
);
3005 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3007 err
= lxc_ipv6_dest_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3010 SYSERROR("Failed to add ipv6 dest for network device \"%s\"",
3014 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3017 SYSERROR("Failed to setup ipv6 gateway for network device \"%s\"",
3020 if (netdev
->ipv6_gateway_auto
) {
3021 char buf
[INET6_ADDRSTRLEN
];
3022 inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, buf
, sizeof(buf
));
3023 ERROR("Tried to set autodetected ipv6 "
3024 "gateway for network device "
3032 DEBUG("Network device \"%s\" has been setup", current_ifname
);
3037 int lxc_setup_network_in_child_namespaces(const struct lxc_conf
*conf
,
3038 struct lxc_list
*network
)
3040 struct lxc_list
*iterator
;
3041 struct lxc_netdev
*netdev
;
3043 lxc_list_for_each(iterator
, network
) {
3044 netdev
= iterator
->elem
;
3046 if (lxc_setup_netdev_in_child_namespaces(netdev
)) {
3047 ERROR("failed to setup netdev");
3052 if (!lxc_list_empty(network
))
3053 INFO("network has been setup");
3058 int lxc_network_send_veth_names_to_child(struct lxc_handler
*handler
)
3060 struct lxc_list
*iterator
;
3061 struct lxc_list
*network
= &handler
->conf
->network
;
3062 int data_sock
= handler
->data_sock
[0];
3064 if (handler
->am_root
)
3067 lxc_list_for_each(iterator
, network
) {
3069 struct lxc_netdev
*netdev
= iterator
->elem
;
3071 if (netdev
->type
!= LXC_NET_VETH
)
3074 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3077 TRACE("Sent network device name \"%s\" to child", netdev
->name
);
3083 int lxc_network_recv_veth_names_from_parent(struct lxc_handler
*handler
)
3085 struct lxc_list
*iterator
;
3086 struct lxc_list
*network
= &handler
->conf
->network
;
3087 int data_sock
= handler
->data_sock
[1];
3089 if (handler
->am_root
)
3092 lxc_list_for_each(iterator
, network
) {
3094 struct lxc_netdev
*netdev
= iterator
->elem
;
3096 if (netdev
->type
!= LXC_NET_VETH
)
3099 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
3102 TRACE("Received network device name \"%s\" from parent", netdev
->name
);
3108 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler
*handler
)
3110 struct lxc_list
*iterator
, *network
;
3111 int data_sock
= handler
->data_sock
[0];
3113 if (!handler
->am_root
)
3116 network
= &handler
->conf
->network
;
3117 lxc_list_for_each(iterator
, network
) {
3119 struct lxc_netdev
*netdev
= iterator
->elem
;
3121 /* Send network device name in the child's namespace to parent. */
3122 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3126 /* Send network device ifindex in the child's namespace to
3129 ret
= lxc_send_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), MSG_NOSIGNAL
);
3134 TRACE("Sent network device names and ifindeces to parent");
3138 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler
*handler
)
3140 struct lxc_list
*iterator
, *network
;
3141 int data_sock
= handler
->data_sock
[1];
3143 if (!handler
->am_root
)
3146 network
= &handler
->conf
->network
;
3147 lxc_list_for_each(iterator
, network
) {
3149 struct lxc_netdev
*netdev
= iterator
->elem
;
3151 /* Receive network device name in the child's namespace to
3154 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
3158 /* Receive network device ifindex in the child's namespace to
3161 ret
= lxc_recv_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), 0);
3169 void lxc_delete_network(struct lxc_handler
*handler
)
3173 if (handler
->am_root
)
3174 bret
= lxc_delete_network_priv(handler
);
3176 bret
= lxc_delete_network_unpriv(handler
);
3178 DEBUG("Failed to delete network devices");
3180 DEBUG("Deleted network devices");
3183 int lxc_netns_set_nsid(int fd
)
3186 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
3187 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
3189 struct nl_handler nlh
;
3190 struct nlmsghdr
*hdr
;
3191 struct rtgenmsg
*msg
;
3193 const __s32 ns_id
= -1;
3194 const __u32 netns_fd
= fd
;
3196 ret
= netlink_open(&nlh
, NETLINK_ROUTE
);
3200 memset(buf
, 0, sizeof(buf
));
3202 #pragma GCC diagnostic push
3203 #pragma GCC diagnostic ignored "-Wcast-align"
3204 hdr
= (struct nlmsghdr
*)buf
;
3205 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
3206 #pragma GCC diagnostic pop
3208 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
3209 hdr
->nlmsg_type
= RTM_NEWNSID
;
3210 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
3212 hdr
->nlmsg_seq
= RTM_NEWNSID
;
3213 msg
->rtgen_family
= AF_UNSPEC
;
3215 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
3219 ret
= addattr(hdr
, 1024, __LXC_NETNSA_NSID
, &ns_id
, sizeof(ns_id
));
3223 ret
= __netlink_transaction(&nlh
, hdr
, hdr
);
3226 saved_errno
= errno
;
3227 netlink_close(&nlh
);
3228 errno
= saved_errno
;
3233 static int parse_rtattr(struct rtattr
*tb
[], int max
, struct rtattr
*rta
, int len
)
3236 memset(tb
, 0, sizeof(struct rtattr
*) * (max
+ 1));
3238 while (RTA_OK(rta
, len
)) {
3239 unsigned short type
= rta
->rta_type
;
3241 if ((type
<= max
) && (!tb
[type
]))
3244 #pragma GCC diagnostic push
3245 #pragma GCC diagnostic ignored "-Wcast-align"
3246 rta
= RTA_NEXT(rta
, len
);
3247 #pragma GCC diagnostic pop
3253 static inline __s32
rta_getattr_s32(const struct rtattr
*rta
)
3255 return *(__s32
*)RTA_DATA(rta
);
3259 #define NETNS_RTA(r) \
3260 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
3263 int lxc_netns_get_nsid(int fd
)
3267 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
3268 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
3270 struct rtattr
*tb
[__LXC_NETNSA_MAX
+ 1];
3271 struct nl_handler nlh
;
3272 struct nlmsghdr
*hdr
;
3273 struct rtgenmsg
*msg
;
3275 __u32 netns_fd
= fd
;
3277 ret
= netlink_open(&nlh
, NETLINK_ROUTE
);
3281 memset(buf
, 0, sizeof(buf
));
3283 #pragma GCC diagnostic push
3284 #pragma GCC diagnostic ignored "-Wcast-align"
3285 hdr
= (struct nlmsghdr
*)buf
;
3286 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
3287 #pragma GCC diagnostic pop
3289 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
3290 hdr
->nlmsg_type
= RTM_GETNSID
;
3291 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
3293 hdr
->nlmsg_seq
= RTM_GETNSID
;
3294 msg
->rtgen_family
= AF_UNSPEC
;
3296 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
3298 ret
= __netlink_transaction(&nlh
, hdr
, hdr
);
3300 saved_errno
= errno
;
3301 netlink_close(&nlh
);
3302 errno
= saved_errno
;
3307 msg
= NLMSG_DATA(hdr
);
3308 len
= hdr
->nlmsg_len
- NLMSG_SPACE(sizeof(*msg
));
3312 #pragma GCC diagnostic push
3313 #pragma GCC diagnostic ignored "-Wcast-align"
3314 parse_rtattr(tb
, __LXC_NETNSA_MAX
, NETNS_RTA(msg
), len
);
3315 if (tb
[__LXC_NETNSA_NSID
])
3316 return rta_getattr_s32(tb
[__LXC_NETNSA_NSID
]);
3317 #pragma GCC diagnostic pop