1 /* SPDX-License-Identifier: LGPL-2.1+ */
10 #include <linux/netlink.h>
11 #include <linux/rtnetlink.h>
12 #include <linux/sockios.h>
13 #include <net/ethernet.h>
15 #include <net/if_arp.h>
16 #include <netinet/in.h>
20 #include <sys/inotify.h>
21 #include <sys/ioctl.h>
22 #include <sys/param.h>
23 #include <sys/socket.h>
25 #include <sys/types.h>
29 #include "../include/netns_ifaddrs.h"
33 #include "file_utils.h"
36 #include "memory_utils.h"
39 #include "process_utils.h"
40 #include "syscall_wrappers.h"
44 #include "include/strlcpy.h"
47 lxc_log_define(network
, lxc
);
49 typedef int (*instantiate_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
50 typedef int (*instantiate_ns_cb
)(struct lxc_netdev
*);
51 static const char loop_device
[] = "lo";
53 static int lxc_ip_route_dest(__u16 nlmsg_type
, int family
, int ifindex
, void *dest
, unsigned int netmask
)
55 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
56 struct nl_handler nlh
;
57 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
61 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
62 : sizeof(struct in6_addr
);
64 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
68 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
72 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
76 nlmsg
->nlmsghdr
->nlmsg_flags
=
77 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
78 nlmsg
->nlmsghdr
->nlmsg_type
= nlmsg_type
;
80 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
84 rt
->rtm_family
= family
;
85 rt
->rtm_table
= RT_TABLE_MAIN
;
86 rt
->rtm_scope
= RT_SCOPE_LINK
;
87 rt
->rtm_protocol
= RTPROT_BOOT
;
88 rt
->rtm_type
= RTN_UNICAST
;
89 rt
->rtm_dst_len
= netmask
;
91 if (nla_put_buffer(nlmsg
, RTA_DST
, dest
, addrlen
))
94 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
97 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
100 static int lxc_ipv4_dest_add(int ifindex
, struct in_addr
*dest
, unsigned int netmask
)
102 return lxc_ip_route_dest(RTM_NEWROUTE
, AF_INET
, ifindex
, dest
, netmask
);
105 static int lxc_ipv6_dest_add(int ifindex
, struct in6_addr
*dest
, unsigned int netmask
)
107 return lxc_ip_route_dest(RTM_NEWROUTE
, AF_INET6
, ifindex
, dest
, netmask
);
110 static int lxc_ipv4_dest_del(int ifindex
, struct in_addr
*dest
, unsigned int netmask
)
112 return lxc_ip_route_dest(RTM_DELROUTE
, AF_INET
, ifindex
, dest
, netmask
);
115 static int lxc_ipv6_dest_del(int ifindex
, struct in6_addr
*dest
, unsigned int netmask
)
117 return lxc_ip_route_dest(RTM_DELROUTE
, AF_INET6
, ifindex
, dest
, netmask
);
120 static int lxc_setup_ipv4_routes(struct lxc_list
*ip
, int ifindex
)
122 struct lxc_list
*iterator
;
125 lxc_list_for_each(iterator
, ip
) {
126 struct lxc_inetdev
*inetdev
= iterator
->elem
;
128 err
= lxc_ipv4_dest_add(ifindex
, &inetdev
->addr
, inetdev
->prefix
);
130 return log_error_errno(-1, -err
, "Failed to setup ipv4 route for network device with ifindex %d", ifindex
);
136 static int lxc_setup_ipv6_routes(struct lxc_list
*ip
, int ifindex
)
138 struct lxc_list
*iterator
;
141 lxc_list_for_each(iterator
, ip
) {
142 struct lxc_inet6dev
*inet6dev
= iterator
->elem
;
144 err
= lxc_ipv6_dest_add(ifindex
, &inet6dev
->addr
, inet6dev
->prefix
);
146 return log_error_errno(-1, -err
, "Failed to setup ipv6 route for network device with ifindex %d", ifindex
);
152 static int setup_ipv4_addr_routes(struct lxc_list
*ip
, int ifindex
)
154 struct lxc_list
*iterator
;
157 lxc_list_for_each(iterator
, ip
) {
158 struct lxc_inetdev
*inetdev
= iterator
->elem
;
160 err
= lxc_ipv4_dest_add(ifindex
, &inetdev
->addr
, 32);
163 return log_error_errno(-1, err
, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex
);
169 static int setup_ipv6_addr_routes(struct lxc_list
*ip
, int ifindex
)
171 struct lxc_list
*iterator
;
174 lxc_list_for_each(iterator
, ip
) {
175 struct lxc_inet6dev
*inet6dev
= iterator
->elem
;
177 err
= lxc_ipv6_dest_add(ifindex
, &inet6dev
->addr
, 128);
179 return log_error_errno(-1, err
, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex
);
185 static int lxc_ip_neigh_proxy(__u16 nlmsg_type
, int family
, int ifindex
, void *dest
)
187 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
188 struct nl_handler nlh
;
189 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
193 addrlen
= family
== AF_INET
? sizeof(struct in_addr
) : sizeof(struct in6_addr
);
195 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
199 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
203 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
207 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
208 nlmsg
->nlmsghdr
->nlmsg_type
= nlmsg_type
;
210 rt
= nlmsg_reserve(nlmsg
, sizeof(struct ndmsg
));
214 rt
->ndm_ifindex
= ifindex
;
215 rt
->ndm_flags
= NTF_PROXY
;
216 rt
->ndm_type
= NDA_DST
;
217 rt
->ndm_family
= family
;
219 if (nla_put_buffer(nlmsg
, NDA_DST
, dest
, addrlen
))
222 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
225 static int lxc_is_ip_forwarding_enabled(const char *ifname
, int family
)
231 if (family
!= AF_INET
&& family
!= AF_INET6
)
232 return ret_set_errno(-1, EINVAL
);
234 ret
= snprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
235 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
237 if (ret
< 0 || (size_t)ret
>= sizeof(path
))
238 return ret_set_errno(-1, E2BIG
);
240 return lxc_read_file_expect(path
, buf
, 1, "1");
243 struct bridge_vlan_info
{
248 static int lxc_bridge_vlan(unsigned int ifindex
, unsigned short operation
, unsigned short vlan_id
, bool tagged
)
250 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
251 struct nl_handler nlh
;
252 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
254 struct ifinfomsg
*ifi
;
256 unsigned short bridge_flags
= 0;
257 struct bridge_vlan_info vlan_info
;
259 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
263 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
265 return ret_errno(ENOMEM
);
267 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
269 return ret_errno(ENOMEM
);
271 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
272 nlmsg
->nlmsghdr
->nlmsg_type
= operation
;
274 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
276 return ret_errno(ENOMEM
);
277 ifi
->ifi_family
= AF_BRIDGE
;
278 ifi
->ifi_index
= ifindex
;
280 nest
= nla_begin_nested(nlmsg
, IFLA_AF_SPEC
);
282 return ret_errno(ENOMEM
);
284 bridge_flags
|= BRIDGE_FLAGS_MASTER
;
285 if (nla_put_u16(nlmsg
, IFLA_BRIDGE_FLAGS
, bridge_flags
))
286 return ret_errno(ENOMEM
);
288 vlan_info
.vid
= vlan_id
;
291 vlan_info
.flags
= BRIDGE_VLAN_INFO_PVID
| BRIDGE_VLAN_INFO_UNTAGGED
;
293 if (nla_put_buffer(nlmsg
, IFLA_BRIDGE_VLAN_INFO
, &vlan_info
, sizeof(struct bridge_vlan_info
)))
294 return ret_errno(ENOMEM
);
296 nla_end_nested(nlmsg
, nest
);
298 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
301 static int lxc_bridge_vlan_add(unsigned int ifindex
, unsigned short vlan_id
, bool tagged
)
303 return lxc_bridge_vlan(ifindex
, RTM_SETLINK
, vlan_id
, tagged
);
306 static int lxc_bridge_vlan_del(unsigned int ifindex
, unsigned short vlan_id
)
308 return lxc_bridge_vlan(ifindex
, RTM_DELLINK
, vlan_id
, false);
311 static int lxc_bridge_vlan_add_tagged(unsigned int ifindex
, struct lxc_list
*vlan_ids
)
313 struct lxc_list
*iterator
;
316 lxc_list_for_each(iterator
, vlan_ids
) {
317 unsigned short vlan_id
= PTR_TO_USHORT(iterator
->elem
);
319 err
= lxc_bridge_vlan_add(ifindex
, vlan_id
, true);
321 return log_error_errno(-1, -err
, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id
, ifindex
);
327 static int validate_veth(struct lxc_netdev
*netdev
)
329 if (netdev
->priv
.veth_attr
.mode
!= VETH_MODE_BRIDGE
|| is_empty_string(netdev
->link
)) {
330 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
331 if (netdev
->priv
.veth_attr
.vlan_id_set
)
332 return log_error_errno(-1, EINVAL
, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
334 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
335 if (lxc_list_len(&netdev
->priv
.veth_attr
.vlan_tagged_ids
) > 0)
336 return log_error_errno(-1, EINVAL
, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
339 if (netdev
->priv
.veth_attr
.vlan_id_set
) {
341 lxc_list_for_each(it
, &netdev
->priv
.veth_attr
.vlan_tagged_ids
) {
342 unsigned short i
= PTR_TO_USHORT(it
->elem
);
343 if (i
== netdev
->priv
.veth_attr
.vlan_id
)
344 return log_error_errno(-1, EINVAL
, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev
->priv
.veth_attr
.vlan_id
);
351 static int setup_veth_native_bridge_vlan(char *veth1
, struct lxc_netdev
*netdev
)
353 int err
, rc
, veth1index
;
354 char path
[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ
+ 1];
355 char buf
[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
357 /* Skip setup if no VLAN options are specified. */
358 if (!netdev
->priv
.veth_attr
.vlan_id_set
&& lxc_list_len(&netdev
->priv
.veth_attr
.vlan_tagged_ids
) <= 0)
361 /* Check vlan filtering is enabled on parent bridge. */
362 rc
= snprintf(path
, sizeof(path
), "/sys/class/net/%s/bridge/vlan_filtering", netdev
->link
);
363 if (rc
< 0 || (size_t)rc
>= sizeof(path
))
366 rc
= lxc_read_from_file(path
, buf
, sizeof(buf
));
368 return log_error_errno(rc
, errno
, "Failed reading from \"%s\"", path
);
372 if (strcmp(buf
, "1") != 0)
373 return log_error_errno(-1, EPERM
, "vlan_filtering is not enabled on \"%s\"", netdev
->link
);
375 /* Get veth1 ifindex for use with netlink. */
376 veth1index
= if_nametoindex(veth1
);
378 return log_error_errno(-1, errno
, "Failed getting ifindex of \"%s\"", netdev
->link
);
380 /* Configure untagged VLAN settings on bridge port if specified. */
381 if (netdev
->priv
.veth_attr
.vlan_id_set
) {
382 unsigned short default_pvid
;
384 /* Get the bridge's default VLAN PVID. */
385 rc
= snprintf(path
, sizeof(path
), "/sys/class/net/%s/bridge/default_pvid", netdev
->link
);
386 if (rc
< 0 || (size_t)rc
>= sizeof(path
))
389 rc
= lxc_read_from_file(path
, buf
, sizeof(buf
));
391 return log_error_errno(rc
, errno
, "Failed reading from \"%s\"", path
);
394 err
= get_u16(&default_pvid
, buf
, 0);
396 return log_error_errno(-1, EINVAL
, "Failed parsing default_pvid of \"%s\"", netdev
->link
);
398 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
399 if (default_pvid
!= netdev
->priv
.veth_attr
.vlan_id
) {
400 err
= lxc_bridge_vlan_del(veth1index
, default_pvid
);
402 return log_error_errno(err
, errno
, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid
, veth1
);
405 if (netdev
->priv
.veth_attr
.vlan_id
> BRIDGE_VLAN_NONE
) {
406 err
= lxc_bridge_vlan_add(veth1index
, netdev
->priv
.veth_attr
.vlan_id
, false);
408 return log_error_errno(err
, errno
, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev
->priv
.veth_attr
.vlan_id
, veth1
);
412 /* Configure tagged VLAN settings on bridge port if specified. */
413 err
= lxc_bridge_vlan_add_tagged(veth1index
, &netdev
->priv
.veth_attr
.vlan_tagged_ids
);
415 return log_error_errno(err
, errno
, "Failed to add tagged vlans on \"%s\"", veth1
);
420 struct ovs_veth_vlan_args
{
422 const char *vlan_mode
; /* Port VLAN mode. */
423 short vlan_id
; /* PVID VLAN ID. */
424 char *trunks
; /* Comma delimited list of tagged VLAN IDs. */
427 static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args
*args
)
429 free_disarm(args
->trunks
);
432 static int lxc_ovs_setup_bridge_vlan_exec(void *data
)
434 struct ovs_veth_vlan_args
*args
= data
;
435 __do_free
char *vlan_mode
= NULL
, *tag
= NULL
, *trunks
= NULL
;
437 if (!args
->vlan_mode
)
438 return ret_errno(EINVAL
);
440 vlan_mode
= must_concat(NULL
, "vlan_mode=", args
->vlan_mode
, (char *)NULL
);
442 if (args
->vlan_id
> BRIDGE_VLAN_NONE
) {
446 rc
= snprintf(buf
, sizeof(buf
), "%u", args
->vlan_id
);
447 if (rc
< 0 || (size_t)rc
>= sizeof(buf
))
448 return log_error_errno(-1, EINVAL
, "Failed to parse ovs bridge vlan \"%d\"", args
->vlan_id
);
450 tag
= must_concat(NULL
, "tag=", buf
, (char *)NULL
);
454 trunks
= must_concat(NULL
, "trunks=", args
->trunks
, (char *)NULL
);
456 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
458 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args
->nic
, vlan_mode
, tag
, trunks
, (char *)NULL
);
460 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args
->nic
, vlan_mode
, tag
, (char *)NULL
);
462 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args
->nic
, vlan_mode
, trunks
, (char *)NULL
);
469 static int setup_veth_ovs_bridge_vlan(char *veth1
, struct lxc_netdev
*netdev
)
471 int taggedLength
= lxc_list_len(&netdev
->priv
.veth_attr
.vlan_tagged_ids
);
472 struct ovs_veth_vlan_args args
;
474 args
.vlan_mode
= NULL
;
475 args
.vlan_id
= BRIDGE_VLAN_NONE
;
478 /* Skip setup if no VLAN options are specified. */
479 if (!netdev
->priv
.veth_attr
.vlan_id_set
&& taggedLength
<= 0)
482 /* Configure untagged VLAN settings on bridge port if specified. */
483 if (netdev
->priv
.veth_attr
.vlan_id_set
) {
484 if (netdev
->priv
.veth_attr
.vlan_id
== BRIDGE_VLAN_NONE
&& taggedLength
<= 0)
485 return log_error_errno(-1, EINVAL
, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
487 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
488 * Also set the vlan_mode=access, which will drop any tagged frames.
489 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
490 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
492 if (netdev
->priv
.veth_attr
.vlan_id
> BRIDGE_VLAN_NONE
) {
493 args
.vlan_mode
= "access";
494 args
.vlan_id
= netdev
->priv
.veth_attr
.vlan_id
;
498 if (taggedLength
> 0) {
499 args
.vlan_mode
= "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
501 if (netdev
->priv
.veth_attr
.vlan_id
> BRIDGE_VLAN_NONE
) {
502 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
503 args
.vlan_mode
= "native-untagged";
506 struct lxc_list
*iterator
;
507 lxc_list_for_each(iterator
, &netdev
->priv
.veth_attr
.vlan_tagged_ids
) {
508 unsigned short vlan_id
= PTR_TO_USHORT(iterator
->elem
);
509 char buf
[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
512 rc
= snprintf(buf
, sizeof(buf
), "%u", vlan_id
);
513 if (rc
< 0 || (size_t)rc
>= sizeof(buf
)) {
514 free_ovs_veth_vlan_args(&args
);
515 return log_error_errno(-1, EINVAL
, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id
, veth1
);
519 args
.trunks
= must_concat(NULL
, args
.trunks
, buf
, ",", (char *)NULL
);
521 args
.trunks
= must_concat(NULL
, buf
, ",", (char *)NULL
);
525 if (args
.vlan_mode
) {
527 char cmd_output
[PATH_MAX
];
529 ret
= run_command(cmd_output
, sizeof(cmd_output
), lxc_ovs_setup_bridge_vlan_exec
, (void *)&args
);
531 free_ovs_veth_vlan_args(&args
);
532 return log_error_errno(-1, ret
, "Failed to setup openvswitch vlan on port \"%s\": %s", args
.nic
, cmd_output
);
536 free_ovs_veth_vlan_args(&args
);
540 static int instantiate_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
543 unsigned int mtu
= 1500;
545 char veth1buf
[IFNAMSIZ
], veth2buf
[IFNAMSIZ
];
547 err
= validate_veth(netdev
);
551 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
)) {
552 veth1
= netdev
->priv
.veth_attr
.pair
;
553 if (handler
->conf
->reboot
)
554 lxc_netdev_delete_by_name(veth1
);
556 err
= snprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
557 if (err
< 0 || (size_t)err
>= sizeof(veth1buf
))
560 veth1
= lxc_ifname_alnum_case_sensitive(veth1buf
);
564 /* store away for deconf */
565 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
568 err
= snprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
569 if (err
< 0 || (size_t)err
>= sizeof(veth2buf
))
572 veth2
= lxc_ifname_alnum_case_sensitive(veth2buf
);
576 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
578 if (lxc_safe_uint(netdev
->mtu
, &mtu
))
579 return log_error_errno(-1, errno
, "Failed to parse mtu");
580 } else if (!is_empty_string(netdev
->link
)) {
583 ifindex_mtu
= if_nametoindex(netdev
->link
);
585 mtu
= netdev_get_mtu(ifindex_mtu
);
586 INFO("Retrieved mtu %d from %s", mtu
, netdev
->link
);
590 err
= lxc_veth_create(veth1
, veth2
, handler
->pid
, mtu
);
592 return log_error_errno(-1, -err
, "Failed to create veth pair \"%s\" and \"%s\"", veth1
, veth2
);
594 strlcpy(netdev
->created_name
, veth2
, IFNAMSIZ
);
596 /* changing the high byte of the mac address to 0xfe, the bridge interface
597 * will always keep the host's mac address and not take the mac address
599 err
= setup_private_host_hw_addr(veth1
);
602 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1
);
606 /* Retrieve ifindex of the host's veth device. */
607 netdev
->priv
.veth_attr
.ifindex
= if_nametoindex(veth1
);
608 if (!netdev
->priv
.veth_attr
.ifindex
) {
609 ERROR("Failed to retrieve ifindex for \"%s\"", veth1
);
614 err
= lxc_netdev_set_mtu(veth1
, mtu
);
617 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu
, veth1
);
622 if (!is_empty_string(netdev
->link
) && netdev
->priv
.veth_attr
.mode
== VETH_MODE_BRIDGE
) {
623 if (!lxc_nic_exists(netdev
->link
)) {
624 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1
, netdev
->link
);
628 err
= lxc_bridge_attach(netdev
->link
, veth1
);
631 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1
, netdev
->link
);
634 INFO("Attached \"%s\" to bridge \"%s\"", veth1
, netdev
->link
);
636 if (is_ovs_bridge(netdev
->link
)) {
637 err
= setup_veth_ovs_bridge_vlan(veth1
, netdev
);
639 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1
);
640 lxc_ovs_delete_port(netdev
->link
, veth1
);
644 err
= setup_veth_native_bridge_vlan(veth1
, netdev
);
646 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1
);
652 err
= lxc_netdev_up(veth1
);
655 SYSERROR("Failed to set \"%s\" up", veth1
);
659 /* setup ipv4 routes on the host interface */
660 if (lxc_setup_ipv4_routes(&netdev
->priv
.veth_attr
.ipv4_routes
, netdev
->priv
.veth_attr
.ifindex
)) {
661 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1
);
665 /* setup ipv6 routes on the host interface */
666 if (lxc_setup_ipv6_routes(&netdev
->priv
.veth_attr
.ipv6_routes
, netdev
->priv
.veth_attr
.ifindex
)) {
667 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1
);
671 if (netdev
->priv
.veth_attr
.mode
== VETH_MODE_ROUTER
) {
672 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
673 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
674 appear to be added successfully but then do not appear in the proxy list. The length of time
675 slept doesn't appear to be important, only that the process sleeps for a short period of time.
677 nanosleep((const struct timespec
[]){{0, 1000}}, NULL
);
679 if (netdev
->ipv4_gateway
) {
680 char bufinet4
[INET_ADDRSTRLEN
];
681 if (!inet_ntop(AF_INET
, netdev
->ipv4_gateway
, bufinet4
, sizeof(bufinet4
))) {
682 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1
);
686 err
= lxc_ip_forwarding_on(veth1
, AF_INET
);
688 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1
);
692 err
= lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET
, netdev
->priv
.veth_attr
.ifindex
, netdev
->ipv4_gateway
);
694 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1
);
699 if (netdev
->ipv6_gateway
) {
700 char bufinet6
[INET6_ADDRSTRLEN
];
702 if (!inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, bufinet6
, sizeof(bufinet6
))) {
703 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1
);
707 /* Check for sysctl net.ipv6.conf.all.forwarding=1
708 Kernel requires this to route any packets for IPv6.
710 err
= lxc_is_ip_forwarding_enabled("all", AF_INET6
);
712 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
716 err
= lxc_ip_forwarding_on(veth1
, AF_INET6
);
718 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1
);
722 err
= lxc_neigh_proxy_on(veth1
, AF_INET6
);
724 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1
);
728 err
= lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET6
, netdev
->priv
.veth_attr
.ifindex
, netdev
->ipv6_gateway
);
730 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1
);
735 /* setup ipv4 address routes on the host interface */
736 err
= setup_ipv4_addr_routes(&netdev
->ipv4
, netdev
->priv
.veth_attr
.ifindex
);
738 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1
);
742 /* setup ipv6 address routes on the host interface */
743 err
= setup_ipv6_addr_routes(&netdev
->ipv6
, netdev
->priv
.veth_attr
.ifindex
);
745 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1
);
750 if (netdev
->upscript
) {
758 err
= run_script_argv(handler
->name
,
759 handler
->conf
->hooks_version
, "net",
760 netdev
->upscript
, "up", argv
);
765 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1
, veth2
);
770 lxc_netdev_delete_by_name(veth1
);
774 static int instantiate_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
779 if (is_empty_string(netdev
->link
)) {
780 ERROR("No link for macvlan network device specified");
784 err
= snprintf(peer
, sizeof(peer
), "mcXXXXXX");
785 if (err
< 0 || (size_t)err
>= sizeof(peer
))
788 if (!lxc_ifname_alnum_case_sensitive(peer
))
791 err
= lxc_macvlan_create(netdev
->link
, peer
,
792 netdev
->priv
.macvlan_attr
.mode
);
795 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
800 strlcpy(netdev
->created_name
, peer
, IFNAMSIZ
);
802 netdev
->ifindex
= if_nametoindex(peer
);
803 if (!netdev
->ifindex
) {
804 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
811 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
814 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
818 err
= lxc_netdev_set_mtu(peer
, mtu
);
821 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
826 if (netdev
->upscript
) {
833 err
= run_script_argv(handler
->name
,
834 handler
->conf
->hooks_version
, "net",
835 netdev
->upscript
, "up", argv
);
840 DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
841 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
846 lxc_netdev_delete_by_name(peer
);
850 static int lxc_ipvlan_create(const char *parent
, const char *name
, int mode
, int isolation
)
852 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
853 struct nl_handler nlh
;
854 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
856 struct ifinfomsg
*ifi
;
857 struct rtattr
*nest
, *nest2
;
859 len
= strlen(parent
);
860 if (len
== 1 || len
>= IFNAMSIZ
)
861 return ret_errno(EINVAL
);
864 if (len
== 1 || len
>= IFNAMSIZ
)
865 return ret_errno(EINVAL
);
867 index
= if_nametoindex(parent
);
869 return ret_errno(EINVAL
);
871 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
875 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
877 return ret_errno(ENOMEM
);
879 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
881 return ret_errno(ENOMEM
);
883 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
884 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
886 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
888 return ret_errno(ENOMEM
);
889 ifi
->ifi_family
= AF_UNSPEC
;
891 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
893 return ret_errno(EPROTO
);
895 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "ipvlan"))
896 return ret_errno(EPROTO
);
898 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
900 return ret_errno(EPROTO
);
902 if (nla_put_u16(nlmsg
, IFLA_IPVLAN_MODE
, mode
))
903 return ret_errno(EPROTO
);
905 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
906 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
907 * according to ipvlan docs.
909 if (isolation
> 0 && nla_put_u16(nlmsg
, IFLA_IPVLAN_ISOLATION
, isolation
))
910 return ret_errno(EPROTO
);
912 nla_end_nested(nlmsg
, nest2
);
913 nla_end_nested(nlmsg
, nest
);
915 if (nla_put_u32(nlmsg
, IFLA_LINK
, index
))
916 return ret_errno(EPROTO
);
918 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
919 return ret_errno(EPROTO
);
921 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
924 static int instantiate_ipvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
929 if (is_empty_string(netdev
->link
)) {
930 ERROR("No link for ipvlan network device specified");
934 err
= snprintf(peer
, sizeof(peer
), "ipXXXXXX");
935 if (err
< 0 || (size_t)err
>= sizeof(peer
))
938 if (!lxc_ifname_alnum_case_sensitive(peer
))
941 err
= lxc_ipvlan_create(netdev
->link
, peer
, netdev
->priv
.ipvlan_attr
.mode
,
942 netdev
->priv
.ipvlan_attr
.isolation
);
944 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
949 strlcpy(netdev
->created_name
, peer
, IFNAMSIZ
);
951 netdev
->ifindex
= if_nametoindex(peer
);
952 if (!netdev
->ifindex
) {
953 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
960 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
963 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
967 err
= lxc_netdev_set_mtu(peer
, mtu
);
970 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
975 if (netdev
->upscript
) {
982 err
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
983 "net", netdev
->upscript
, "up", argv
);
988 DEBUG("Instantiated ipvlan \"%s\" with ifindex is %d and mode %d", peer
,
989 netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
994 lxc_netdev_delete_by_name(peer
);
998 static int instantiate_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1000 char peer
[IFNAMSIZ
];
1002 static uint16_t vlan_cntr
= 0;
1004 if (is_empty_string(netdev
->link
)) {
1005 ERROR("No link for vlan network device specified");
1009 err
= snprintf(peer
, sizeof(peer
), "vlan%d-%d",
1010 netdev
->priv
.vlan_attr
.vid
, vlan_cntr
++);
1011 if (err
< 0 || (size_t)err
>= sizeof(peer
))
1014 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
1017 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1018 peer
, netdev
->link
);
1022 strlcpy(netdev
->created_name
, peer
, IFNAMSIZ
);
1024 netdev
->ifindex
= if_nametoindex(peer
);
1025 if (!netdev
->ifindex
) {
1026 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
1033 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
1036 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
1040 err
= lxc_netdev_set_mtu(peer
, mtu
);
1043 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
1048 if (netdev
->upscript
) {
1055 err
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1056 "net", netdev
->upscript
, "up", argv
);
1062 DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\"", peer
,
1068 lxc_netdev_delete_by_name(peer
);
1072 static int instantiate_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1074 int err
, mtu_orig
= 0;
1076 if (is_empty_string(netdev
->link
))
1077 return log_error_errno(-1, errno
, "No link for physical interface specified");
1080 * Note that we're retrieving the container's ifindex in the host's
1081 * network namespace because we need it to move the device from the
1082 * host's network namespace to the container's network namespace later
1084 * Note that netdev->link will contain the name of the physical network
1085 * device in the host's namespace.
1087 netdev
->ifindex
= if_nametoindex(netdev
->link
);
1088 if (!netdev
->ifindex
)
1089 return log_error_errno(-1, errno
, "Failed to retrieve ifindex for \"%s\"", netdev
->link
);
1091 strlcpy(netdev
->created_name
, netdev
->link
, IFNAMSIZ
);
1092 if (is_empty_string(netdev
->name
))
1093 (void)strlcpy(netdev
->name
, netdev
->link
, IFNAMSIZ
);
1096 * Store the ifindex of the host's network device in the host's
1099 netdev
->priv
.phys_attr
.ifindex
= netdev
->ifindex
;
1102 * Get original device MTU setting and store for restoration after
1103 * container shutdown.
1105 mtu_orig
= netdev_get_mtu(netdev
->ifindex
);
1107 return log_error_errno(-1, -mtu_orig
, "Failed to get original mtu for interface \"%s\"", netdev
->link
);
1109 netdev
->priv
.phys_attr
.mtu
= mtu_orig
;
1114 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
1116 return log_error_errno(-1, -err
, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, netdev
->link
);
1118 err
= lxc_netdev_set_mtu(netdev
->link
, mtu
);
1120 return log_error_errno(-1, -err
, "Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, netdev
->link
);
1123 if (netdev
->upscript
) {
1130 err
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1131 "net", netdev
->upscript
, "up", argv
);
1136 DEBUG("Instantiated phys \"%s\" with ifindex is \"%d\"", netdev
->link
,
1142 static int instantiate_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1150 netdev
->ifindex
= 0;
1151 if (!netdev
->upscript
)
1154 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1155 "net", netdev
->upscript
, "up", argv
);
1162 static int instantiate_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1164 netdev
->ifindex
= 0;
1168 static instantiate_cb netdev_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
1169 [LXC_NET_VETH
] = instantiate_veth
,
1170 [LXC_NET_MACVLAN
] = instantiate_macvlan
,
1171 [LXC_NET_IPVLAN
] = instantiate_ipvlan
,
1172 [LXC_NET_VLAN
] = instantiate_vlan
,
1173 [LXC_NET_PHYS
] = instantiate_phys
,
1174 [LXC_NET_EMPTY
] = instantiate_empty
,
1175 [LXC_NET_NONE
] = instantiate_none
,
1178 static int __instantiate_ns_common(struct lxc_netdev
*netdev
)
1180 char current_ifname
[IFNAMSIZ
];
1182 netdev
->ifindex
= if_nametoindex(netdev
->created_name
);
1183 if (!netdev
->ifindex
)
1184 return log_error_errno(-1,
1185 errno
, "Failed to retrieve ifindex for network device with name %s",
1186 netdev
->created_name
);
1188 if (is_empty_string(netdev
->name
))
1189 (void)strlcpy(netdev
->name
, "eth%d", IFNAMSIZ
);
1191 if (strcmp(netdev
->created_name
, netdev
->name
) != 0) {
1194 ret
= lxc_netdev_rename_by_name(netdev
->created_name
, netdev
->name
);
1196 return log_error_errno(-1, -ret
, "Failed to rename network device \"%s\" to \"%s\"",
1197 netdev
->created_name
,
1200 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev
->created_name
, netdev
->name
);
1204 * Re-read the name of the interface because its name has changed and
1205 * would be automatically allocated by the system
1207 if (!if_indextoname(netdev
->ifindex
, current_ifname
))
1208 return log_error_errno(-1, errno
, "Failed get name for network device with ifindex %d", netdev
->ifindex
);
1211 * Now update the recorded name of the network device to reflect the
1212 * name of the network device in the child's network namespace. We will
1213 * later on send this information back to the parent.
1215 (void)strlcpy(netdev
->name
, current_ifname
, IFNAMSIZ
);
1220 static int instantiate_ns_veth(struct lxc_netdev
*netdev
)
1223 return __instantiate_ns_common(netdev
);
1226 static int instantiate_ns_macvlan(struct lxc_netdev
*netdev
)
1228 return __instantiate_ns_common(netdev
);
1231 static int instantiate_ns_ipvlan(struct lxc_netdev
*netdev
)
1233 return __instantiate_ns_common(netdev
);
1236 static int instantiate_ns_vlan(struct lxc_netdev
*netdev
)
1238 return __instantiate_ns_common(netdev
);
1241 static int instantiate_ns_phys(struct lxc_netdev
*netdev
)
1243 return __instantiate_ns_common(netdev
);
1246 static int instantiate_ns_empty(struct lxc_netdev
*netdev
)
1251 static int instantiate_ns_none(struct lxc_netdev
*netdev
)
1256 static instantiate_ns_cb netdev_ns_conf
[LXC_NET_MAXCONFTYPE
+ 1] = {
1257 [LXC_NET_VETH
] = instantiate_ns_veth
,
1258 [LXC_NET_MACVLAN
] = instantiate_ns_macvlan
,
1259 [LXC_NET_IPVLAN
] = instantiate_ns_ipvlan
,
1260 [LXC_NET_VLAN
] = instantiate_ns_vlan
,
1261 [LXC_NET_PHYS
] = instantiate_ns_phys
,
1262 [LXC_NET_EMPTY
] = instantiate_ns_empty
,
1263 [LXC_NET_NONE
] = instantiate_ns_none
,
1266 static int shutdown_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1276 if (!netdev
->downscript
)
1279 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
1280 argv
[2] = netdev
->priv
.veth_attr
.pair
;
1282 argv
[2] = netdev
->priv
.veth_attr
.veth1
;
1284 ret
= run_script_argv(handler
->name
,
1285 handler
->conf
->hooks_version
, "net",
1286 netdev
->downscript
, "down", argv
);
1293 static int shutdown_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1302 if (!netdev
->downscript
)
1305 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1306 "net", netdev
->downscript
, "down", argv
);
1313 static int shutdown_ipvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1322 if (!netdev
->downscript
)
1325 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1326 "net", netdev
->downscript
, "down", argv
);
1333 static int shutdown_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1342 if (!netdev
->downscript
)
1345 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1346 "net", netdev
->downscript
, "down", argv
);
1353 static int shutdown_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1362 if (!netdev
->downscript
)
1365 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1366 "net", netdev
->downscript
, "down", argv
);
1373 static int shutdown_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1381 if (!netdev
->downscript
)
1384 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1385 "net", netdev
->downscript
, "down", argv
);
1392 static int shutdown_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1397 static instantiate_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
1398 [LXC_NET_VETH
] = shutdown_veth
,
1399 [LXC_NET_MACVLAN
] = shutdown_macvlan
,
1400 [LXC_NET_IPVLAN
] = shutdown_ipvlan
,
1401 [LXC_NET_VLAN
] = shutdown_vlan
,
1402 [LXC_NET_PHYS
] = shutdown_phys
,
1403 [LXC_NET_EMPTY
] = shutdown_empty
,
1404 [LXC_NET_NONE
] = shutdown_none
,
1407 static int lxc_netdev_move_by_index_fd(int ifindex
, int fd
, const char *ifname
)
1409 call_cleaner(nlmsg_free
) struct nlmsg
*nlmsg
= NULL
;
1410 struct nl_handler nlh
;
1411 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1413 struct ifinfomsg
*ifi
;
1415 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1419 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1421 return ret_errno(ENOMEM
);
1423 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1424 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1426 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1428 return ret_errno(ENOMEM
);
1430 ifi
->ifi_family
= AF_UNSPEC
;
1431 ifi
->ifi_index
= ifindex
;
1433 if (nla_put_u32(nlmsg
, IFLA_NET_NS_FD
, fd
))
1434 return ret_errno(ENOMEM
);
1436 if (!is_empty_string(ifname
) && nla_put_string(nlmsg
, IFLA_IFNAME
, ifname
))
1437 return ret_errno(ENOMEM
);
1439 return netlink_transaction(nlh_ptr
, nlmsg
, nlmsg
);
1442 int lxc_netdev_move_by_index(int ifindex
, pid_t pid
, const char *ifname
)
1444 call_cleaner(nlmsg_free
) struct nlmsg
*nlmsg
= NULL
;
1445 struct nl_handler nlh
;
1446 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1448 struct ifinfomsg
*ifi
;
1450 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1454 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1456 return ret_errno(ENOMEM
);
1458 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1459 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1461 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1463 return ret_errno(ENOMEM
);
1465 ifi
->ifi_family
= AF_UNSPEC
;
1466 ifi
->ifi_index
= ifindex
;
1468 if (nla_put_u32(nlmsg
, IFLA_NET_NS_PID
, pid
))
1469 return ret_errno(ENOMEM
);
1471 if (!is_empty_string(ifname
) && nla_put_string(nlmsg
, IFLA_IFNAME
, ifname
))
1472 return ret_errno(ENOMEM
);
1474 return netlink_transaction(nlh_ptr
, nlmsg
, nlmsg
);
1477 /* If we are asked to move a wireless interface, then we must actually move its
1478 * phyN device. Detect that condition and return the physname here. The physname
1479 * will be passed to lxc_netdev_move_wlan() which will free it when done.
1481 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
1482 char *is_wlan(const char *ifname
)
1484 __do_fclose
FILE *f
= NULL
;
1485 __do_free
char *path
= NULL
, *physname
= NULL
;
1490 len
= strlen(ifname
) + strlen(PHYSNAME
) - 1;
1491 path
= must_realloc(NULL
, len
+ 1);
1492 ret
= snprintf(path
, len
, PHYSNAME
, ifname
);
1493 if (ret
< 0 || (size_t)ret
>= len
)
1496 f
= fopen(path
, "re");
1500 /* Feh - sb.st_size is always 4096. */
1501 fseek(f
, 0, SEEK_END
);
1503 fseek(f
, 0, SEEK_SET
);
1507 physname
= malloc(physlen
+ 1);
1511 memset(physname
, 0, physlen
+ 1);
1512 ret
= fread(physname
, 1, physlen
, f
);
1516 for (i
= 0; i
< physlen
; i
++) {
1517 if (physname
[i
] == '\n')
1520 if (physname
[i
] == '\0')
1524 return move_ptr(physname
);
1527 static int lxc_netdev_rename_by_name_in_netns(pid_t pid
, const char *old
,
1537 return wait_for_pid(fpid
);
1539 if (!switch_to_ns(pid
, "net"))
1542 _exit(lxc_netdev_rename_by_name(old
, new));
1545 int lxc_netdev_move_wlan(char *physname
, const char *ifname
, pid_t pid
,
1546 const char *newname
)
1548 __do_free
char *cmd
= NULL
;
1551 /* Move phyN into the container. TODO - do this using netlink.
1552 * However, IIUC this involves a bit more complicated work to talk to
1553 * the 80211 module, so for now just call out to iw.
1555 cmd
= on_path("iw", NULL
);
1565 sprintf(pidstr
, "%d", pid
);
1566 execlp("iw", "iw", "phy", physname
, "set", "netns", pidstr
, (char *)NULL
);
1567 _exit(EXIT_FAILURE
);
1570 if (wait_for_pid(fpid
))
1574 return lxc_netdev_rename_by_name_in_netns(pid
, ifname
, newname
);
1579 int lxc_netdev_move_by_name(const char *ifname
, pid_t pid
, const char* newname
)
1581 __do_free
char *physname
= NULL
;
1587 index
= if_nametoindex(ifname
);
1591 physname
= is_wlan(ifname
);
1593 return lxc_netdev_move_wlan(physname
, ifname
, pid
, newname
);
1595 return lxc_netdev_move_by_index(index
, pid
, newname
);
1598 int lxc_netdev_delete_by_index(int ifindex
)
1600 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1601 struct nl_handler nlh
;
1602 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1604 struct ifinfomsg
*ifi
;
1606 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1610 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1612 return ret_errno(ENOMEM
);
1614 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1616 return ret_errno(ENOMEM
);
1618 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
1619 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_DELLINK
;
1621 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1623 return ret_errno(ENOMEM
);
1625 ifi
->ifi_family
= AF_UNSPEC
;
1626 ifi
->ifi_index
= ifindex
;
1628 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1631 int lxc_netdev_delete_by_name(const char *name
)
1635 index
= if_nametoindex(name
);
1639 return lxc_netdev_delete_by_index(index
);
1642 int lxc_netdev_rename_by_index(int ifindex
, const char *newname
)
1644 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1645 struct nl_handler nlh
;
1646 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1648 struct ifinfomsg
*ifi
;
1650 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1654 len
= strlen(newname
);
1655 if (len
== 1 || len
>= IFNAMSIZ
)
1656 return ret_errno(EINVAL
);
1658 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1660 return ret_errno(ENOMEM
);
1662 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1664 return ret_errno(ENOMEM
);
1666 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
1667 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1669 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1671 return ret_errno(ENOMEM
);
1673 ifi
->ifi_family
= AF_UNSPEC
;
1674 ifi
->ifi_index
= ifindex
;
1676 if (nla_put_string(nlmsg
, IFLA_IFNAME
, newname
))
1677 return ret_errno(ENOMEM
);
1679 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1682 int lxc_netdev_rename_by_name(const char *oldname
, const char *newname
)
1686 len
= strlen(oldname
);
1687 if (len
== 1 || len
>= IFNAMSIZ
)
1690 index
= if_nametoindex(oldname
);
1694 return lxc_netdev_rename_by_index(index
, newname
);
1697 int netdev_set_flag(const char *name
, int flag
)
1699 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1700 struct nl_handler nlh
;
1701 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1702 int err
, index
, len
;
1703 struct ifinfomsg
*ifi
;
1705 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1710 if (len
== 1 || len
>= IFNAMSIZ
)
1711 return ret_errno(EINVAL
);
1713 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1715 return ret_errno(ENOMEM
);
1717 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1719 return ret_errno(ENOMEM
);
1721 index
= if_nametoindex(name
);
1723 return ret_errno(EINVAL
);
1725 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1726 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1728 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1730 return ret_errno(ENOMEM
);
1732 ifi
->ifi_family
= AF_UNSPEC
;
1733 ifi
->ifi_index
= index
;
1734 ifi
->ifi_change
|= IFF_UP
;
1735 ifi
->ifi_flags
|= flag
;
1737 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1740 static int netdev_get_flag(const char *name
, int *flag
)
1742 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1743 struct nl_handler nlh
;
1744 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1745 int err
, index
, len
;
1746 struct ifinfomsg
*ifi
;
1749 return ret_errno(EINVAL
);
1751 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1756 if (len
== 1 || len
>= IFNAMSIZ
)
1757 return ret_errno(EINVAL
);
1759 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1761 return ret_errno(ENOMEM
);
1763 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1765 return ret_errno(ENOMEM
);
1767 index
= if_nametoindex(name
);
1769 return ret_errno(EINVAL
);
1771 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
;
1772 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
1774 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1776 return ret_errno(ENOMEM
);
1778 ifi
->ifi_family
= AF_UNSPEC
;
1779 ifi
->ifi_index
= index
;
1781 err
= netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1783 return ret_set_errno(-1, errno
);
1785 ifi
= NLMSG_DATA(answer
->nlmsghdr
);
1787 *flag
= ifi
->ifi_flags
;
1792 * \brief Check a interface is up or not.
1794 * \param name: name for the interface.
1797 * 0 means interface is down.
1798 * 1 means interface is up.
1799 * Others means error happened, and ret-value is the error number.
1801 int lxc_netdev_isup(const char *name
)
1805 err
= netdev_get_flag(name
, &flag
);
1815 int netdev_get_mtu(int ifindex
)
1817 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1818 struct nl_handler nlh
;
1819 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1820 int readmore
= 0, recv_len
= 0;
1821 int answer_len
, err
, res
;
1822 struct ifinfomsg
*ifi
;
1823 struct nlmsghdr
*msg
;
1825 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1829 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1831 return ret_errno(ENOMEM
);
1833 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1835 return ret_errno(ENOMEM
);
1837 /* Save the answer buffer length, since it will be overwritten
1838 * on the first receive (and we might need to receive more than
1841 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
1843 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
1844 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
1846 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1848 return ret_errno(ENOMEM
);
1850 ifi
->ifi_family
= AF_UNSPEC
;
1852 /* Send the request for addresses, which returns all addresses
1853 * on all interfaces. */
1854 err
= netlink_send(nlh_ptr
, nlmsg
);
1856 return ret_set_errno(-1, errno
);
1858 #pragma GCC diagnostic push
1859 #pragma GCC diagnostic ignored "-Wcast-align"
1862 /* Restore the answer buffer length, it might have been
1863 * overwritten by a previous receive.
1865 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
1867 /* Get the (next) batch of reply messages */
1868 err
= netlink_rcv(nlh_ptr
, answer
);
1870 return ret_set_errno(-1, errno
);
1874 /* Satisfy the typing for the netlink macros */
1875 msg
= answer
->nlmsghdr
;
1877 while (NLMSG_OK(msg
, recv_len
)) {
1878 /* Stop reading if we see an error message */
1879 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
1880 struct nlmsgerr
*errmsg
= (struct nlmsgerr
*)NLMSG_DATA(msg
);
1881 return ret_set_errno(errmsg
->error
, errno
);
1884 /* Stop reading if we see a NLMSG_DONE message */
1885 if (msg
->nlmsg_type
== NLMSG_DONE
) {
1890 ifi
= NLMSG_DATA(msg
);
1891 if (ifi
->ifi_index
== ifindex
) {
1892 struct rtattr
*rta
= IFLA_RTA(ifi
);
1893 int attr_len
= msg
->nlmsg_len
- NLMSG_LENGTH(sizeof(*ifi
));
1896 while (RTA_OK(rta
, attr_len
)) {
1898 * Found a local address for the
1899 * requested interface, return it.
1901 if (rta
->rta_type
== IFLA_MTU
) {
1902 memcpy(&res
, RTA_DATA(rta
), sizeof(int));
1906 rta
= RTA_NEXT(rta
, attr_len
);
1910 /* Keep reading more data from the socket if the last
1911 * message had the NLF_F_MULTI flag set.
1913 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
1915 /* Look at the next message received in this buffer. */
1916 msg
= NLMSG_NEXT(msg
, recv_len
);
1920 #pragma GCC diagnostic pop
1922 /* If we end up here, we didn't find any result, so signal an error. */
1926 int lxc_netdev_set_mtu(const char *name
, int mtu
)
1928 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1929 struct nl_handler nlh
;
1930 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1932 struct ifinfomsg
*ifi
;
1934 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1939 if (len
== 1 || len
>= IFNAMSIZ
)
1940 return ret_errno(EINVAL
);
1942 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1944 return ret_errno(ENOMEM
);
1946 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1948 return ret_errno(ENOMEM
);
1950 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1951 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1953 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1955 return ret_errno(ENOMEM
);
1957 ifi
->ifi_family
= AF_UNSPEC
;
1959 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
1960 return ret_errno(ENOMEM
);
1962 if (nla_put_u32(nlmsg
, IFLA_MTU
, mtu
))
1963 return ret_errno(ENOMEM
);
1965 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1968 int lxc_netdev_up(const char *name
)
1970 return netdev_set_flag(name
, IFF_UP
);
1973 int lxc_netdev_down(const char *name
)
1975 return netdev_set_flag(name
, 0);
1978 int lxc_veth_create(const char *name1
, const char *name2
, pid_t pid
, unsigned int mtu
)
1980 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1981 struct nl_handler nlh
;
1982 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1984 struct ifinfomsg
*ifi
;
1985 struct rtattr
*nest1
, *nest2
, *nest3
;
1987 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1991 len
= strlen(name1
);
1992 if (len
== 1 || len
>= IFNAMSIZ
)
1993 return ret_errno(EINVAL
);
1995 len
= strlen(name2
);
1996 if (len
== 1 || len
>= IFNAMSIZ
)
1997 return ret_errno(EINVAL
);
1999 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2001 return ret_errno(ENOMEM
);
2003 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2005 return ret_errno(ENOMEM
);
2007 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
2008 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
2010 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2012 return ret_errno(ENOMEM
);
2014 ifi
->ifi_family
= AF_UNSPEC
;
2016 nest1
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
2018 return ret_errno(EINVAL
);
2020 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "veth"))
2021 return ret_errno(ENOMEM
);
2023 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
2025 return ret_errno(ENOMEM
);
2027 nest3
= nla_begin_nested(nlmsg
, VETH_INFO_PEER
);
2029 return ret_errno(ENOMEM
);
2031 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2033 return ret_errno(ENOMEM
);
2035 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name2
))
2036 return ret_errno(ENOMEM
);
2038 if (mtu
> 0 && nla_put_u32(nlmsg
, IFLA_MTU
, mtu
))
2039 return ret_errno(ENOMEM
);
2041 if (pid
> 0 && nla_put_u32(nlmsg
, IFLA_NET_NS_PID
, pid
))
2042 return ret_errno(ENOMEM
);
2044 nla_end_nested(nlmsg
, nest3
);
2045 nla_end_nested(nlmsg
, nest2
);
2046 nla_end_nested(nlmsg
, nest1
);
2048 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name1
))
2049 return ret_errno(ENOMEM
);
2051 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2054 /* TODO: merge with lxc_macvlan_create */
2055 int lxc_vlan_create(const char *parent
, const char *name
, unsigned short vlanid
)
2057 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2058 struct nl_handler nlh
;
2059 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2060 int err
, len
, lindex
;
2061 struct ifinfomsg
*ifi
;
2062 struct rtattr
*nest
, *nest2
;
2064 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2068 len
= strlen(parent
);
2069 if (len
== 1 || len
>= IFNAMSIZ
)
2070 return ret_errno(EINVAL
);
2073 if (len
== 1 || len
>= IFNAMSIZ
)
2074 return ret_errno(EINVAL
);
2076 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2078 return ret_errno(ENOMEM
);
2080 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2082 return ret_errno(ENOMEM
);
2084 lindex
= if_nametoindex(parent
);
2086 return ret_errno(EINVAL
);
2088 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
2089 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
2091 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2093 return ret_errno(ENOMEM
);
2095 ifi
->ifi_family
= AF_UNSPEC
;
2097 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
2099 return ret_errno(ENOMEM
);
2101 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "vlan"))
2102 return ret_errno(ENOMEM
);
2104 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
2106 return ret_errno(ENOMEM
);
2108 if (nla_put_u16(nlmsg
, IFLA_VLAN_ID
, vlanid
))
2109 return ret_errno(ENOMEM
);
2111 nla_end_nested(nlmsg
, nest2
);
2112 nla_end_nested(nlmsg
, nest
);
2114 if (nla_put_u32(nlmsg
, IFLA_LINK
, lindex
))
2115 return ret_errno(ENOMEM
);
2117 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
2118 return ret_errno(ENOMEM
);
2120 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2123 int lxc_macvlan_create(const char *parent
, const char *name
, int mode
)
2125 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2126 struct nl_handler nlh
;
2127 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2128 int err
, index
, len
;
2129 struct ifinfomsg
*ifi
;
2130 struct rtattr
*nest
, *nest2
;
2132 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2136 len
= strlen(parent
);
2137 if (len
== 1 || len
>= IFNAMSIZ
)
2138 return ret_errno(EINVAL
);
2141 if (len
== 1 || len
>= IFNAMSIZ
)
2142 return ret_errno(EINVAL
);
2144 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2146 return ret_errno(ENOMEM
);
2148 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2150 return ret_errno(ENOMEM
);
2152 index
= if_nametoindex(parent
);
2154 return ret_errno(EINVAL
);
2156 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
2157 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
2159 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2161 return ret_errno(ENOMEM
);
2163 ifi
->ifi_family
= AF_UNSPEC
;
2165 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
2167 return ret_errno(ENOMEM
);
2169 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "macvlan"))
2170 return ret_errno(ENOMEM
);
2173 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
2175 return ret_errno(ENOMEM
);
2177 if (nla_put_u32(nlmsg
, IFLA_MACVLAN_MODE
, mode
))
2178 return ret_errno(ENOMEM
);
2180 nla_end_nested(nlmsg
, nest2
);
2183 nla_end_nested(nlmsg
, nest
);
2185 if (nla_put_u32(nlmsg
, IFLA_LINK
, index
))
2186 return ret_errno(ENOMEM
);
2188 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
2189 return ret_errno(ENOMEM
);
2191 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2194 static int proc_sys_net_write(const char *path
, const char *value
)
2199 fd
= open(path
, O_WRONLY
);
2203 if (lxc_write_nointr(fd
, value
, strlen(value
)) < 0)
2210 static int ip_forwarding_set(const char *ifname
, int family
, int flag
)
2213 char path
[PATH_MAX
];
2215 if (family
!= AF_INET
&& family
!= AF_INET6
)
2218 ret
= snprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
2219 family
== AF_INET
? "ipv4" : "ipv6", ifname
, "forwarding");
2220 if (ret
< 0 || (size_t)ret
>= sizeof(path
))
2223 return proc_sys_net_write(path
, flag
? "1" : "0");
2226 int lxc_ip_forwarding_on(const char *name
, int family
)
2228 return ip_forwarding_set(name
, family
, 1);
2231 int lxc_ip_forwarding_off(const char *name
, int family
)
2233 return ip_forwarding_set(name
, family
, 0);
2236 static int neigh_proxy_set(const char *ifname
, int family
, int flag
)
2239 char path
[PATH_MAX
];
2241 if (family
!= AF_INET
&& family
!= AF_INET6
)
2244 ret
= snprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
2245 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
2246 family
== AF_INET
? "proxy_arp" : "proxy_ndp");
2247 if (ret
< 0 || (size_t)ret
>= sizeof(path
))
2250 return proc_sys_net_write(path
, flag
? "1" : "0");
2253 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname
, int family
)
2256 char path
[PATH_MAX
];
2259 if (family
!= AF_INET
&& family
!= AF_INET6
)
2260 return ret_set_errno(-1, EINVAL
);
2262 ret
= snprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
2263 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
2264 family
== AF_INET
? "proxy_arp" : "proxy_ndp");
2265 if (ret
< 0 || (size_t)ret
>= sizeof(path
))
2266 return ret_set_errno(-1, E2BIG
);
2268 return lxc_read_file_expect(path
, buf
, 1, "1");
2271 int lxc_neigh_proxy_on(const char *name
, int family
)
2273 return neigh_proxy_set(name
, family
, 1);
2276 int lxc_neigh_proxy_off(const char *name
, int family
)
2278 return neigh_proxy_set(name
, family
, 0);
2281 int lxc_convert_mac(char *macaddr
, struct sockaddr
*sockaddr
)
2286 unsigned char *data
;
2288 sockaddr
->sa_family
= ARPHRD_ETHER
;
2289 data
= (unsigned char *)sockaddr
->sa_data
;
2291 while ((*macaddr
!= '\0') && (i
< ETH_ALEN
)) {
2295 else if (c
>= 'a' && c
<= 'f')
2297 else if (c
>= 'A' && c
<= 'F')
2306 else if (c
>= 'a' && c
<= 'f')
2307 val
|= c
- 'a' + 10;
2308 else if (c
>= 'A' && c
<= 'F')
2309 val
|= c
- 'A' + 10;
2310 else if (c
== ':' || c
== 0)
2316 *data
++ = (unsigned char)(val
& 0377);
2319 if (*macaddr
== ':')
2326 static int ip_addr_add(int family
, int ifindex
, void *addr
, void *bcast
,
2327 void *acast
, int prefix
)
2329 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2330 struct nl_handler nlh
;
2331 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2333 struct ifaddrmsg
*ifa
;
2335 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
2336 : sizeof(struct in6_addr
);
2338 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2342 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2344 return ret_errno(ENOMEM
);
2346 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2348 return ret_errno(ENOMEM
);
2350 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
2351 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWADDR
;
2353 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
2355 return ret_errno(ENOMEM
);
2357 ifa
->ifa_prefixlen
= prefix
;
2358 ifa
->ifa_index
= ifindex
;
2359 ifa
->ifa_family
= family
;
2362 if (nla_put_buffer(nlmsg
, IFA_LOCAL
, addr
, addrlen
))
2363 return ret_errno(EINVAL
);
2365 if (nla_put_buffer(nlmsg
, IFA_ADDRESS
, addr
, addrlen
))
2366 return ret_errno(EINVAL
);
2368 if (nla_put_buffer(nlmsg
, IFA_BROADCAST
, bcast
, addrlen
))
2369 return ret_errno(EINVAL
);
2371 /* TODO: multicast, anycast with ipv6 */
2372 if (family
== AF_INET6
&&
2373 (memcmp(bcast
, &in6addr_any
, sizeof(in6addr_any
)) ||
2374 memcmp(acast
, &in6addr_any
, sizeof(in6addr_any
))))
2375 return ret_errno(EPROTONOSUPPORT
);
2377 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2380 int lxc_ipv6_addr_add(int ifindex
, struct in6_addr
*addr
,
2381 struct in6_addr
*mcast
, struct in6_addr
*acast
,
2384 return ip_addr_add(AF_INET6
, ifindex
, addr
, mcast
, acast
, prefix
);
2387 int lxc_ipv4_addr_add(int ifindex
, struct in_addr
*addr
, struct in_addr
*bcast
,
2390 return ip_addr_add(AF_INET
, ifindex
, addr
, bcast
, NULL
, prefix
);
2393 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2394 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2395 * that pointer in *res (so res should be an in_addr** or in6_addr**).
2397 #pragma GCC diagnostic push
2398 #pragma GCC diagnostic ignored "-Wcast-align"
2400 static int ifa_get_local_ip(int family
, struct nlmsghdr
*msg
, void **res
)
2403 struct ifaddrmsg
*ifa
= NLMSG_DATA(msg
);
2404 struct rtattr
*rta
= IFA_RTA(ifa
);
2405 int attr_len
= NLMSG_PAYLOAD(msg
, sizeof(struct ifaddrmsg
));
2407 if (ifa
->ifa_family
!= family
)
2410 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
2411 : sizeof(struct in6_addr
);
2413 /* Loop over the rtattr's in this message */
2414 while (RTA_OK(rta
, attr_len
)) {
2415 /* Found a local address for the requested interface,
2418 if (rta
->rta_type
== IFA_LOCAL
||
2419 rta
->rta_type
== IFA_ADDRESS
) {
2420 /* Sanity check. The family check above should make sure
2421 * the address length is correct, but check here just in
2424 if (RTA_PAYLOAD(rta
) != addrlen
)
2427 /* We might have found an IFA_ADDRESS before, which we
2428 * now overwrite with an IFA_LOCAL.
2431 *res
= malloc(addrlen
);
2436 memcpy(*res
, RTA_DATA(rta
), addrlen
);
2437 if (rta
->rta_type
== IFA_LOCAL
)
2440 rta
= RTA_NEXT(rta
, attr_len
);
2445 #pragma GCC diagnostic pop
2447 static int ip_addr_get(int family
, int ifindex
, void **res
)
2449 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2450 struct nl_handler nlh
;
2451 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2452 int answer_len
, err
;
2453 struct ifaddrmsg
*ifa
;
2454 struct nlmsghdr
*msg
;
2455 int readmore
= 0, recv_len
= 0;
2457 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2461 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2463 return ret_errno(ENOMEM
);
2465 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2467 return ret_errno(ENOMEM
);
2469 /* Save the answer buffer length, since it will be overwritten on the
2470 * first receive (and we might need to receive more than once).
2472 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
2474 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ROOT
;
2475 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETADDR
;
2477 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
2479 return ret_errno(ENOMEM
);
2481 ifa
->ifa_family
= family
;
2483 /* Send the request for addresses, which returns all addresses on all
2486 err
= netlink_send(nlh_ptr
, nlmsg
);
2488 return ret_set_errno(err
, errno
);
2490 #pragma GCC diagnostic push
2491 #pragma GCC diagnostic ignored "-Wcast-align"
2494 /* Restore the answer buffer length, it might have been
2495 * overwritten by a previous receive.
2497 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
2499 /* Get the (next) batch of reply messages. */
2500 err
= netlink_rcv(nlh_ptr
, answer
);
2502 return ret_set_errno(err
, errno
);
2507 /* Satisfy the typing for the netlink macros. */
2508 msg
= answer
->nlmsghdr
;
2510 while (NLMSG_OK(msg
, recv_len
)) {
2511 /* Stop reading if we see an error message. */
2512 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
2513 struct nlmsgerr
*errmsg
= (struct nlmsgerr
*)NLMSG_DATA(msg
);
2514 return ret_set_errno(errmsg
->error
, errno
);
2517 /* Stop reading if we see a NLMSG_DONE message. */
2518 if (msg
->nlmsg_type
== NLMSG_DONE
) {
2523 if (msg
->nlmsg_type
!= RTM_NEWADDR
)
2524 return ret_errno(EINVAL
);
2526 ifa
= (struct ifaddrmsg
*)NLMSG_DATA(msg
);
2527 if (ifa
->ifa_index
== ifindex
) {
2528 if (ifa_get_local_ip(family
, msg
, res
) < 0)
2529 return ret_errno(EINVAL
);
2531 /* Found a result, stop searching. */
2536 /* Keep reading more data from the socket if the last
2537 * message had the NLF_F_MULTI flag set.
2539 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
2541 /* Look at the next message received in this buffer. */
2542 msg
= NLMSG_NEXT(msg
, recv_len
);
2546 #pragma GCC diagnostic pop
2548 /* If we end up here, we didn't find any result, so signal an
2554 int lxc_ipv6_addr_get(int ifindex
, struct in6_addr
**res
)
2556 return ip_addr_get(AF_INET6
, ifindex
, (void **)res
);
2559 int lxc_ipv4_addr_get(int ifindex
, struct in_addr
**res
)
2561 return ip_addr_get(AF_INET
, ifindex
, (void **)res
);
2564 static int ip_gateway_add(int family
, int ifindex
, void *gw
)
2566 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2567 struct nl_handler nlh
;
2568 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2572 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
2573 : sizeof(struct in6_addr
);
2575 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2579 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2581 return ret_errno(ENOMEM
);
2583 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2585 return ret_errno(ENOMEM
);
2587 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
2588 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWROUTE
;
2590 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
2592 return ret_errno(ENOMEM
);
2594 rt
->rtm_family
= family
;
2595 rt
->rtm_table
= RT_TABLE_MAIN
;
2596 rt
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2597 rt
->rtm_protocol
= RTPROT_BOOT
;
2598 rt
->rtm_type
= RTN_UNICAST
;
2599 /* "default" destination */
2600 rt
->rtm_dst_len
= 0;
2602 /* If gateway address not supplied, then a device route will be created instead */
2603 if (gw
&& nla_put_buffer(nlmsg
, RTA_GATEWAY
, gw
, addrlen
))
2604 return ret_errno(ENOMEM
);
2606 /* Adding the interface index enables the use of link-local
2607 * addresses for the gateway.
2609 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
2610 return ret_errno(EINVAL
);
2612 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2615 int lxc_ipv4_gateway_add(int ifindex
, struct in_addr
*gw
)
2617 return ip_gateway_add(AF_INET
, ifindex
, gw
);
2620 int lxc_ipv6_gateway_add(int ifindex
, struct in6_addr
*gw
)
2622 return ip_gateway_add(AF_INET6
, ifindex
, gw
);
2624 bool is_ovs_bridge(const char *bridge
)
2628 char brdirname
[22 + IFNAMSIZ
+ 1] = {0};
2630 ret
= snprintf(brdirname
, 22 + IFNAMSIZ
+ 1, "/sys/class/net/%s/bridge",
2632 if (ret
< 0 || (size_t)ret
>= 22 + IFNAMSIZ
+ 1)
2635 ret
= stat(brdirname
, &sb
);
2636 if (ret
< 0 && errno
== ENOENT
)
2642 struct ovs_veth_args
{
2647 /* Called from a background thread - when nic goes away, remove it from the
2650 static int lxc_ovs_delete_port_exec(void *data
)
2652 struct ovs_veth_args
*args
= data
;
2654 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args
->bridge
, args
->nic
, (char *)NULL
);
2658 int lxc_ovs_delete_port(const char *bridge
, const char *nic
)
2661 char cmd_output
[PATH_MAX
];
2662 struct ovs_veth_args args
;
2664 args
.bridge
= bridge
;
2666 ret
= run_command(cmd_output
, sizeof(cmd_output
),
2667 lxc_ovs_delete_port_exec
, (void *)&args
);
2669 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic
, bridge
, cmd_output
);
2674 static int lxc_ovs_attach_bridge_exec(void *data
)
2676 struct ovs_veth_args
*args
= data
;
2678 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args
->bridge
, args
->nic
, (char *)NULL
);
2682 static int lxc_ovs_attach_bridge(const char *bridge
, const char *nic
)
2685 char cmd_output
[PATH_MAX
];
2686 struct ovs_veth_args args
;
2688 args
.bridge
= bridge
;
2690 ret
= run_command(cmd_output
, sizeof(cmd_output
),
2691 lxc_ovs_attach_bridge_exec
, (void *)&args
);
2693 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic
, bridge
, cmd_output
);
2698 int lxc_bridge_attach(const char *bridge
, const char *ifname
)
2704 if (strlen(ifname
) >= IFNAMSIZ
)
2707 index
= if_nametoindex(ifname
);
2711 if (is_ovs_bridge(bridge
))
2712 return lxc_ovs_attach_bridge(bridge
, ifname
);
2714 fd
= socket(AF_INET
, SOCK_STREAM
| SOCK_CLOEXEC
, 0);
2718 retlen
= strlcpy(ifr
.ifr_name
, bridge
, IFNAMSIZ
);
2719 if (retlen
>= IFNAMSIZ
) {
2724 ifr
.ifr_name
[IFNAMSIZ
- 1] = '\0';
2725 ifr
.ifr_ifindex
= index
;
2726 err
= ioctl(fd
, SIOCBRADDIF
, &ifr
);
2734 static const char *const lxc_network_types
[LXC_NET_MAXCONFTYPE
+ 1] = {
2735 [LXC_NET_EMPTY
] = "empty",
2736 [LXC_NET_VETH
] = "veth",
2737 [LXC_NET_MACVLAN
] = "macvlan",
2738 [LXC_NET_IPVLAN
] = "ipvlan",
2739 [LXC_NET_PHYS
] = "phys",
2740 [LXC_NET_VLAN
] = "vlan",
2741 [LXC_NET_NONE
] = "none",
2744 const char *lxc_net_type_to_str(int type
)
2746 if (type
< 0 || type
> LXC_NET_MAXCONFTYPE
)
2749 return lxc_network_types
[type
];
2752 static const char padchar
[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
2754 char *lxc_ifname_alnum_case_sensitive(char *template)
2756 char name
[IFNAMSIZ
];
2761 seed
= randseed(false);
2764 (void)randseed(true);
2767 if (strlen(template) >= IFNAMSIZ
)
2770 /* Generate random names until we find one that doesn't exist. */
2773 (void)strlcpy(name
, template, IFNAMSIZ
);
2775 for (i
= 0; i
< strlen(name
); i
++) {
2776 if (name
[i
] == 'X') {
2778 name
[i
] = padchar
[rand_r(&seed
) % strlen(padchar
)];
2780 name
[i
] = padchar
[rand() % strlen(padchar
)];
2785 if (if_nametoindex(name
) == 0)
2789 (void)strlcpy(template, name
, strlen(template) + 1);
2794 int setup_private_host_hw_addr(char *veth1
)
2799 sockfd
= socket(AF_INET
, SOCK_DGRAM
| SOCK_CLOEXEC
, 0);
2803 err
= snprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
2804 if (err
< 0 || (size_t)err
>= IFNAMSIZ
) {
2809 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
2815 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
2816 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
2824 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2826 struct lxc_list
*network
= &handler
->conf
->network
;
2827 struct lxc_list
*iterator
;
2828 struct lxc_netdev
*netdev
;
2831 lxc_list_for_each(iterator
, network
) {
2832 netdev
= iterator
->elem
;
2834 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2837 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
)
2838 return log_error_errno(-1, EINVAL
, "Automatic gateway detection is only supported for veth and macvlan");
2840 if (is_empty_string(netdev
->link
)) {
2841 return log_error_errno(-1, errno
, "Automatic gateway detection needs a link interface");
2844 link_index
= if_nametoindex(netdev
->link
);
2848 if (netdev
->ipv4_gateway_auto
) {
2849 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
))
2850 return log_error_errno(-1, errno
, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev
->link
);
2853 if (netdev
->ipv6_gateway_auto
) {
2854 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
))
2855 return log_error_errno(-1, errno
, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev
->link
);
2862 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2863 static int lxc_create_network_unpriv_exec(const char *lxcpath
, const char *lxcname
,
2864 struct lxc_netdev
*netdev
, pid_t pid
, unsigned int hooks_version
)
2868 int bytes
, pipefd
[2];
2869 char *token
, *saveptr
= NULL
;
2870 char netdev_link
[IFNAMSIZ
];
2871 char buffer
[PATH_MAX
] = {0};
2874 if (netdev
->type
!= LXC_NET_VETH
)
2875 return log_error_errno(-1, errno
, "Network type %d not support for unprivileged use", netdev
->type
);
2879 return log_error_errno(-1, errno
, "Failed to create pipe");
2885 return log_error_errno(-1, errno
, "Failed to create new process");
2889 char pidstr
[INTTYPE_TO_STRLEN(pid_t
)];
2893 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
2895 ret
= dup2(pipefd
[1], STDERR_FILENO
);
2898 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2899 _exit(EXIT_FAILURE
);
2902 if (!is_empty_string(netdev
->link
))
2903 retlen
= strlcpy(netdev_link
, netdev
->link
, IFNAMSIZ
);
2905 retlen
= strlcpy(netdev_link
, "none", IFNAMSIZ
);
2906 if (retlen
>= IFNAMSIZ
) {
2907 SYSERROR("Invalid network device name");
2908 _exit(EXIT_FAILURE
);
2911 ret
= snprintf(pidstr
, sizeof(pidstr
), "%d", pid
);
2912 if (ret
< 0 || ret
>= sizeof(pidstr
))
2913 _exit(EXIT_FAILURE
);
2914 pidstr
[sizeof(pidstr
) - 1] = '\0';
2916 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath
,
2917 lxcname
, pidstr
, netdev_link
,
2918 !is_empty_string(netdev
->name
) ? netdev
->name
: "(null)");
2919 if (!is_empty_string(netdev
->name
))
2920 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2921 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2922 netdev
->name
, (char *)NULL
);
2924 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2925 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2927 SYSERROR("Failed to execute lxc-user-nic");
2928 _exit(EXIT_FAILURE
);
2931 /* close the write-end of the pipe */
2934 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, sizeof(buffer
));
2936 SYSERROR("Failed to read from pipe file descriptor");
2939 buffer
[bytes
- 1] = '\0';
2942 ret
= wait_for_pid(child
);
2944 if (ret
!= 0 || bytes
< 0)
2945 return log_error(-1, "lxc-user-nic failed to configure requested network: %s", buffer
[0] != '\0' ? buffer
: "(null)");
2946 TRACE("Received output \"%s\" from lxc-user-nic", buffer
);
2949 token
= strtok_r(buffer
, ":", &saveptr
);
2951 return log_error(-1, "Failed to parse lxc-user-nic output");
2954 * lxc-user-nic will take care of proper network device naming. So
2955 * netdev->name and netdev->created_name need to be identical to not
2956 * trigger another rename later on.
2958 retlen
= strlcpy(netdev
->name
, token
, IFNAMSIZ
);
2959 if (retlen
< IFNAMSIZ
)
2960 retlen
= strlcpy(netdev
->created_name
, token
, IFNAMSIZ
);
2961 if (retlen
>= IFNAMSIZ
)
2962 return log_error_errno(-1, E2BIG
, "Container side veth device name returned by lxc-user-nic is too long");
2964 /* netdev->ifindex */
2965 token
= strtok_r(NULL
, ":", &saveptr
);
2967 return log_error(-1, "Failed to parse lxc-user-nic output");
2969 ret
= lxc_safe_int(token
, &netdev
->ifindex
);
2971 return log_error_errno(-1, -ret
, "Failed to convert string \"%s\" to integer", token
);
2973 /* netdev->priv.veth_attr.veth1 */
2974 token
= strtok_r(NULL
, ":", &saveptr
);
2976 return log_error(-1, "Failed to parse lxc-user-nic output");
2978 retlen
= strlcpy(netdev
->priv
.veth_attr
.veth1
, token
, IFNAMSIZ
);
2979 if (retlen
>= IFNAMSIZ
)
2980 return log_error_errno(-1, E2BIG
, "Host side veth device name returned by lxc-user-nic is too long");
2982 /* netdev->priv.veth_attr.ifindex */
2983 token
= strtok_r(NULL
, ":", &saveptr
);
2985 return log_error(-1, "Failed to parse lxc-user-nic output");
2987 ret
= lxc_safe_int(token
, &netdev
->priv
.veth_attr
.ifindex
);
2989 return log_error_errno(-1, -ret
, "Failed to convert string \"%s\" to integer", token
);
2991 if (netdev
->upscript
) {
2995 netdev
->priv
.veth_attr
.veth1
,
2999 ret
= run_script_argv(lxcname
, hooks_version
, "net",
3000 netdev
->upscript
, "up", argv
);
3008 static int lxc_delete_network_unpriv_exec(const char *lxcpath
, const char *lxcname
,
3009 struct lxc_netdev
*netdev
,
3010 const char *netns_path
)
3015 char buffer
[PATH_MAX
] = {};
3017 if (netdev
->type
!= LXC_NET_VETH
)
3018 return log_error_errno(-1, EINVAL
, "Network type %d not support for unprivileged use", netdev
->type
);
3022 return log_error_errno(-1, errno
, "Failed to create pipe");
3028 return log_error_errno(-1, errno
, "Failed to create new process");
3036 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
3038 ret
= dup2(pipefd
[1], STDERR_FILENO
);
3041 SYSERROR("Failed to duplicate std{err,out} file descriptor");
3042 _exit(EXIT_FAILURE
);
3045 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
3046 hostveth
= netdev
->priv
.veth_attr
.pair
;
3048 hostveth
= netdev
->priv
.veth_attr
.veth1
;
3049 if (is_empty_string(hostveth
)) {
3050 SYSERROR("Host side veth device name is missing");
3051 _exit(EXIT_FAILURE
);
3054 if (is_empty_string(netdev
->link
)) {
3055 SYSERROR("Network link for network device \"%s\" is missing", netdev
->priv
.veth_attr
.veth1
);
3056 _exit(EXIT_FAILURE
);
3059 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath
,
3060 lxcname
, netns_path
, netdev
->link
, hostveth
);
3061 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "delete", lxcpath
,
3062 lxcname
, netns_path
, "veth", netdev
->link
, hostveth
,
3064 SYSERROR("Failed to exec lxc-user-nic.");
3065 _exit(EXIT_FAILURE
);
3070 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, sizeof(buffer
));
3072 SYSERROR("Failed to read from pipe file descriptor.");
3075 buffer
[bytes
- 1] = '\0';
3078 ret
= wait_for_pid(child
);
3079 close_prot_errno_disarm(pipefd
[0]);
3080 if (ret
!= 0 || bytes
< 0)
3081 return log_error_errno(-1, errno
, "lxc-user-nic failed to delete requested network: %s",
3082 !is_empty_string(buffer
) ? buffer
: "(null)");
3087 static bool lxc_delete_network_unpriv(struct lxc_handler
*handler
)
3090 struct lxc_list
*iterator
;
3091 struct lxc_list
*network
= &handler
->conf
->network
;
3092 /* strlen("/proc/") = 6
3094 * INTTYPE_TO_STRLEN(pid_t)
3096 * strlen("/fd/") = 4
3098 * INTTYPE_TO_STRLEN(int)
3102 char netns_path
[6 + INTTYPE_TO_STRLEN(pid_t
) + 4 + INTTYPE_TO_STRLEN(int) + 1];
3106 if (handler
->nsfd
[LXC_NS_NET
] < 0)
3107 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
3109 ret
= snprintf(netns_path
, sizeof(netns_path
), "/proc/%d/fd/%d",
3110 lxc_raw_getpid(), handler
->nsfd
[LXC_NS_NET
]);
3111 if (ret
< 0 || ret
>= sizeof(netns_path
))
3114 lxc_list_for_each(iterator
, network
) {
3115 char *hostveth
= NULL
;
3116 struct lxc_netdev
*netdev
= iterator
->elem
;
3118 /* We can only delete devices whose ifindex we have. If we don't
3119 * have the index it means that we didn't create it.
3121 if (!netdev
->ifindex
)
3124 if (netdev
->type
== LXC_NET_PHYS
) {
3125 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
,
3128 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
3129 netdev
->ifindex
, netdev
->link
);
3131 TRACE("Renamed interface with index %d to its initial name \"%s\"",
3132 netdev
->ifindex
, netdev
->link
);
3134 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3136 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3137 netdev
->ifindex
, netdev
->link
);
3138 goto clear_ifindices
;
3141 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3143 WARN("Failed to deconfigure network device");
3145 if (netdev
->type
!= LXC_NET_VETH
)
3146 goto clear_ifindices
;
3148 if (is_empty_string(netdev
->link
) || !is_ovs_bridge(netdev
->link
))
3149 goto clear_ifindices
;
3151 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
3152 hostveth
= netdev
->priv
.veth_attr
.pair
;
3154 hostveth
= netdev
->priv
.veth_attr
.veth1
;
3155 if (is_empty_string(hostveth
))
3156 goto clear_ifindices
;
3158 ret
= lxc_delete_network_unpriv_exec(handler
->lxcpath
,
3159 handler
->name
, netdev
,
3162 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth
, netdev
->link
);
3163 goto clear_ifindices
;
3165 INFO("Removed interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
3169 * We need to clear any ifindices we recorded so liblxc won't
3170 * have cached stale data which would cause it to fail on
3171 * reboot where we don't re-read the on-disk config file.
3173 netdev
->ifindex
= 0;
3174 if (netdev
->type
== LXC_NET_PHYS
) {
3175 netdev
->priv
.phys_attr
.ifindex
= 0;
3176 } else if (netdev
->type
== LXC_NET_VETH
) {
3177 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
3178 netdev
->priv
.veth_attr
.ifindex
= 0;
3185 static int lxc_setup_l2proxy(struct lxc_netdev
*netdev
) {
3186 struct lxc_list
*cur
, *next
;
3187 struct lxc_inetdev
*inet4dev
;
3188 struct lxc_inet6dev
*inet6dev
;
3189 char bufinet4
[INET_ADDRSTRLEN
], bufinet6
[INET6_ADDRSTRLEN
];
3191 unsigned int lo_ifindex
= 0, link_ifindex
= 0;
3193 link_ifindex
= if_nametoindex(netdev
->link
);
3194 if (link_ifindex
== 0)
3195 return log_error_errno(-1, errno
, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev
->link
);
3198 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3199 if (!lxc_list_empty(&netdev
->ipv4
)) {
3200 /* Check for net.ipv4.conf.[link].forwarding=1 */
3201 if (lxc_is_ip_forwarding_enabled(netdev
->link
, AF_INET
) < 0)
3202 return log_error_errno(-1, EINVAL
, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev
->link
);
3205 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3206 if (!lxc_list_empty(&netdev
->ipv6
)) {
3207 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
3208 if (lxc_is_ip_neigh_proxy_enabled(netdev
->link
, AF_INET6
) < 0)
3209 return log_error_errno(-1, EINVAL
, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev
->link
);
3211 /* Check for net.ipv6.conf.[link].forwarding=1 */
3212 if (lxc_is_ip_forwarding_enabled(netdev
->link
, AF_INET6
) < 0)
3213 return log_error_errno(-1, EINVAL
, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev
->link
);
3216 /* Perform IPVLAN specific checks. */
3217 if (netdev
->type
== LXC_NET_IPVLAN
) {
3218 /* Check mode is l3s as other modes do not work with l2proxy. */
3219 if (netdev
->priv
.ipvlan_attr
.mode
!= IPVLAN_MODE_L3S
)
3220 return log_error_errno(-1, EINVAL
, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev
->link
);
3222 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3223 lo_ifindex
= if_nametoindex(loop_device
);
3224 if (lo_ifindex
== 0)
3225 return log_error_errno(-1, EINVAL
, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device
);
3228 lxc_list_for_each_safe(cur
, &netdev
->ipv4
, next
) {
3229 inet4dev
= cur
->elem
;
3230 if (!inet_ntop(AF_INET
, &inet4dev
->addr
, bufinet4
, sizeof(bufinet4
)))
3231 return ret_set_errno(-1, -errno
);
3233 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET
, link_ifindex
, &inet4dev
->addr
) < 0)
3234 return ret_set_errno(-1, EINVAL
);
3236 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3237 if (netdev
->type
== LXC_NET_IPVLAN
) {
3238 err
= lxc_ipv4_dest_add(lo_ifindex
, &inet4dev
->addr
, 32);
3240 return log_error_errno(-1, -err
, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4
, loop_device
);
3244 lxc_list_for_each_safe(cur
, &netdev
->ipv6
, next
) {
3245 inet6dev
= cur
->elem
;
3246 if (!inet_ntop(AF_INET6
, &inet6dev
->addr
, bufinet6
, sizeof(bufinet6
)))
3247 return ret_set_errno(-1, -errno
);
3249 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET6
, link_ifindex
, &inet6dev
->addr
) < 0)
3250 return ret_set_errno(-1, EINVAL
);
3252 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3253 if (netdev
->type
== LXC_NET_IPVLAN
) {
3254 err
= lxc_ipv6_dest_add(lo_ifindex
, &inet6dev
->addr
, 128);
3256 return log_error_errno(-1, -err
, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6
, loop_device
);
3263 static int lxc_delete_ipv4_l2proxy(struct in_addr
*ip
, char *link
, unsigned int lo_ifindex
)
3265 char bufinet4
[INET_ADDRSTRLEN
];
3266 bool had_error
= false;
3267 unsigned int link_ifindex
= 0;
3269 if (!inet_ntop(AF_INET
, ip
, bufinet4
, sizeof(bufinet4
)))
3270 return log_error_errno(-1, EINVAL
, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link
);
3272 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3273 if (lo_ifindex
> 0) {
3274 if (lxc_ipv4_dest_del(lo_ifindex
, ip
, 32) < 0) {
3276 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4
, lo_ifindex
);
3280 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3281 if (!is_empty_string(link
)) {
3282 link_ifindex
= if_nametoindex(link
);
3283 if (link_ifindex
== 0)
3284 return log_error_errno(-1, EINVAL
, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link
);
3286 if (lxc_ip_neigh_proxy(RTM_DELNEIGH
, AF_INET
, link_ifindex
, ip
) < 0)
3291 return ret_set_errno(-1, EINVAL
);
3296 static int lxc_delete_ipv6_l2proxy(struct in6_addr
*ip
, char *link
, unsigned int lo_ifindex
)
3298 char bufinet6
[INET6_ADDRSTRLEN
];
3299 bool had_error
= false;
3300 unsigned int link_ifindex
= 0;
3302 if (!inet_ntop(AF_INET6
, ip
, bufinet6
, sizeof(bufinet6
)))
3303 return log_error_errno(-1, EINVAL
, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link
);
3305 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3306 if (lo_ifindex
> 0) {
3307 if (lxc_ipv6_dest_del(lo_ifindex
, ip
, 128) < 0) {
3309 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6
, lo_ifindex
);
3313 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3314 if (!is_empty_string(link
)) {
3315 link_ifindex
= if_nametoindex(link
);
3316 if (link_ifindex
== 0) {
3317 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link
);
3318 return ret_set_errno(-1, EINVAL
);
3321 if (lxc_ip_neigh_proxy(RTM_DELNEIGH
, AF_INET6
, link_ifindex
, ip
) < 0)
3326 return ret_set_errno(-1, EINVAL
);
3331 static int lxc_delete_l2proxy(struct lxc_netdev
*netdev
) {
3332 unsigned int lo_ifindex
= 0;
3333 unsigned int errCount
= 0;
3334 struct lxc_list
*cur
, *next
;
3335 struct lxc_inetdev
*inet4dev
;
3336 struct lxc_inet6dev
*inet6dev
;
3338 /* Perform IPVLAN specific checks. */
3339 if (netdev
->type
== LXC_NET_IPVLAN
) {
3340 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3341 lo_ifindex
= if_nametoindex(loop_device
);
3342 if (lo_ifindex
== 0) {
3344 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device
);
3348 lxc_list_for_each_safe(cur
, &netdev
->ipv4
, next
) {
3349 inet4dev
= cur
->elem
;
3350 if (lxc_delete_ipv4_l2proxy(&inet4dev
->addr
, netdev
->link
, lo_ifindex
) < 0)
3354 lxc_list_for_each_safe(cur
, &netdev
->ipv6
, next
) {
3355 inet6dev
= cur
->elem
;
3356 if (lxc_delete_ipv6_l2proxy(&inet6dev
->addr
, netdev
->link
, lo_ifindex
) < 0)
3361 return ret_set_errno(-1, EINVAL
);
3366 static int lxc_create_network_priv(struct lxc_handler
*handler
)
3368 struct lxc_list
*iterator
;
3369 struct lxc_list
*network
= &handler
->conf
->network
;
3371 lxc_list_for_each(iterator
, network
) {
3372 struct lxc_netdev
*netdev
= iterator
->elem
;
3374 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
)
3375 return log_error_errno(-1, EINVAL
, "Invalid network configuration type %d", netdev
->type
);
3377 /* Setup l2proxy entries if enabled and used with a link property */
3378 if (netdev
->l2proxy
&& !is_empty_string(netdev
->link
)) {
3379 if (lxc_setup_l2proxy(netdev
))
3380 return log_error_errno(-1, errno
, "Failed to setup l2proxy");
3383 if (netdev_conf
[netdev
->type
](handler
, netdev
))
3384 return log_error_errno(-1, errno
, "Failed to create network device");
3390 int lxc_network_move_created_netdev_priv(struct lxc_handler
*handler
)
3392 pid_t pid
= handler
->pid
;
3393 struct lxc_list
*network
= &handler
->conf
->network
;
3394 struct lxc_list
*iterator
;
3396 if (am_guest_unpriv())
3399 lxc_list_for_each(iterator
, network
) {
3400 __do_free
char *physname
= NULL
;
3402 struct lxc_netdev
*netdev
= iterator
->elem
;
3404 if (!netdev
->ifindex
)
3407 if (netdev
->type
== LXC_NET_PHYS
)
3408 physname
= is_wlan(netdev
->link
);
3411 ret
= lxc_netdev_move_wlan(physname
, netdev
->link
, pid
, NULL
);
3413 ret
= lxc_netdev_move_by_index(netdev
->ifindex
, pid
, NULL
);
3415 return log_error_errno(-1, -ret
, "Failed to move network device \"%s\" with ifindex %d to network namespace %d",
3416 netdev
->created_name
,
3417 netdev
->ifindex
, pid
);
3419 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d",
3420 netdev
->created_name
, netdev
->ifindex
, pid
);
3426 static int network_requires_advanced_setup(int type
)
3428 if (type
== LXC_NET_EMPTY
)
3431 if (type
== LXC_NET_NONE
)
3437 static int lxc_create_network_unpriv(struct lxc_handler
*handler
)
3439 int hooks_version
= handler
->conf
->hooks_version
;
3440 const char *lxcname
= handler
->name
;
3441 const char *lxcpath
= handler
->lxcpath
;
3442 struct lxc_list
*network
= &handler
->conf
->network
;
3443 pid_t pid
= handler
->pid
;
3444 struct lxc_list
*iterator
;
3446 lxc_list_for_each(iterator
, network
) {
3447 struct lxc_netdev
*netdev
= iterator
->elem
;
3449 if (!network_requires_advanced_setup(netdev
->type
))
3452 if (netdev
->type
!= LXC_NET_VETH
)
3453 return log_error_errno(-1, EINVAL
, "Networks of type %s are not supported by unprivileged containers",
3454 lxc_net_type_to_str(netdev
->type
));
3457 INFO("mtu ignored due to insufficient privilege");
3459 if (lxc_create_network_unpriv_exec(lxcpath
, lxcname
, netdev
,
3460 pid
, hooks_version
))
3467 static bool lxc_delete_network_priv(struct lxc_handler
*handler
)
3470 struct lxc_list
*iterator
;
3471 struct lxc_list
*network
= &handler
->conf
->network
;
3473 lxc_list_for_each(iterator
, network
) {
3474 char *hostveth
= NULL
;
3475 struct lxc_netdev
*netdev
= iterator
->elem
;
3477 /* We can only delete devices whose ifindex we have. If we don't
3478 * have the index it means that we didn't create it.
3480 if (!netdev
->ifindex
)
3484 * If the network device has been moved back from the
3485 * containers network namespace, update the ifindex.
3487 netdev
->ifindex
= if_nametoindex(netdev
->name
);
3489 /* Delete l2proxy entries if enabled and used with a link property */
3490 if (netdev
->l2proxy
&& !is_empty_string(netdev
->link
)) {
3491 if (lxc_delete_l2proxy(netdev
))
3492 WARN("Failed to delete all l2proxy config");
3493 /* Don't return, let the network be cleaned up as normal. */
3496 if (netdev
->type
== LXC_NET_PHYS
) {
3497 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
);
3499 WARN("Failed to rename interface with index %d "
3500 "from \"%s\" to its initial name \"%s\"",
3501 netdev
->ifindex
, netdev
->name
, netdev
->link
);
3503 TRACE("Renamed interface with index %d from "
3504 "\"%s\" to its initial name \"%s\"",
3505 netdev
->ifindex
, netdev
->name
,
3508 /* Restore original MTU */
3509 ret
= lxc_netdev_set_mtu(netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3511 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3512 netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3514 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3515 netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3519 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3521 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3522 netdev
->ifindex
, netdev
->link
);
3523 goto clear_ifindices
;
3526 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3528 WARN("Failed to deconfigure network device");
3530 if (netdev
->type
!= LXC_NET_VETH
)
3531 goto clear_ifindices
;
3533 /* Explicitly delete host veth device to prevent lingering
3534 * devices. We had issues in LXD around this.
3536 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
3537 hostveth
= netdev
->priv
.veth_attr
.pair
;
3539 hostveth
= netdev
->priv
.veth_attr
.veth1
;
3540 if (is_empty_string(hostveth
))
3541 goto clear_ifindices
;
3543 if (is_empty_string(netdev
->link
) || !is_ovs_bridge(netdev
->link
)) {
3544 ret
= lxc_netdev_delete_by_name(hostveth
);
3546 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
3548 INFO("Removed interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
3549 } else if (!is_empty_string(netdev
->link
)) {
3550 ret
= lxc_ovs_delete_port(netdev
->link
, hostveth
);
3552 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth
, netdev
->link
);
3554 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth
, netdev
->link
);
3558 /* We need to clear any ifindices we recorded so liblxc won't
3559 * have cached stale data which would cause it to fail on reboot
3560 * we're we don't re-read the on-disk config file.
3562 netdev
->ifindex
= 0;
3563 if (netdev
->type
== LXC_NET_PHYS
) {
3564 netdev
->priv
.phys_attr
.ifindex
= 0;
3565 } else if (netdev
->type
== LXC_NET_VETH
) {
3566 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
3567 netdev
->priv
.veth_attr
.ifindex
= 0;
3574 int lxc_requests_empty_network(struct lxc_handler
*handler
)
3576 struct lxc_list
*network
= &handler
->conf
->network
;
3577 struct lxc_list
*iterator
;
3578 bool found_none
= false, found_nic
= false;
3580 if (lxc_list_empty(network
))
3583 lxc_list_for_each (iterator
, network
) {
3584 struct lxc_netdev
*netdev
= iterator
->elem
;
3586 if (netdev
->type
== LXC_NET_NONE
)
3592 if (found_none
&& !found_nic
)
3598 /* try to move physical nics to the init netns */
3599 int lxc_restore_phys_nics_to_netns(struct lxc_handler
*handler
)
3601 __do_close
int oldfd
= -EBADF
;
3602 int netnsfd
= handler
->nsfd
[LXC_NS_NET
];
3603 struct lxc_conf
*conf
= handler
->conf
;
3605 char ifname
[IFNAMSIZ
];
3606 struct lxc_list
*iterator
;
3609 * If we weren't asked to clone a new network namespace, there's
3610 * nothing to restore.
3612 if (!(handler
->ns_clone_flags
& CLONE_NEWNET
))
3615 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3616 * the parent network namespace. We won't have this capability if we are
3619 if (!handler
->am_root
)
3622 TRACE("Moving physical network devices back to parent network namespace");
3624 oldfd
= lxc_preserve_ns(handler
->monitor_pid
, "net");
3626 return log_error_errno(-1, errno
, "Failed to preserve network namespace");
3628 ret
= setns(netnsfd
, CLONE_NEWNET
);
3630 return log_error_errno(-1, errno
, "Failed to enter network namespace");
3632 lxc_list_for_each(iterator
, &conf
->network
) {
3633 struct lxc_netdev
*netdev
= iterator
->elem
;
3635 if (netdev
->type
!= LXC_NET_PHYS
)
3638 /* Retrieve the name of the interface in the container's network
3641 if (!if_indextoname(netdev
->ifindex
, ifname
)) {
3642 WARN("No interface corresponding to ifindex %d", netdev
->ifindex
);
3646 ret
= lxc_netdev_move_by_index_fd(netdev
->ifindex
, oldfd
, netdev
->link
);
3648 WARN("Error moving network device \"%s\" back to network namespace", ifname
);
3650 TRACE("Moved network device \"%s\" back to network namespace", ifname
);
3653 ret
= setns(oldfd
, CLONE_NEWNET
);
3655 return log_error_errno(-1, errno
, "Failed to enter network namespace");
3660 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
3662 __do_close
int fd
= -EBADF
;
3663 struct sockaddr sockaddr
;
3667 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
3669 return log_error_errno(-1, -ret
, "Mac address \"%s\" conversion failed", hwaddr
);
3671 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
3672 ifr
.ifr_name
[IFNAMSIZ
-1] = '\0';
3673 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
3675 fd
= socket(AF_INET
, SOCK_DGRAM
| SOCK_CLOEXEC
, 0);
3679 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
3681 SYSERROR("Failed to perform ioctl");
3683 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr
, ifr
.ifr_name
);
3688 static int setup_ipv4_addr(struct lxc_list
*ip
, int ifindex
)
3690 struct lxc_list
*iterator
;
3693 lxc_list_for_each(iterator
, ip
) {
3694 struct lxc_inetdev
*inetdev
= iterator
->elem
;
3696 err
= lxc_ipv4_addr_add(ifindex
, &inetdev
->addr
,
3697 &inetdev
->bcast
, inetdev
->prefix
);
3699 return log_error_errno(-1, -err
, "Failed to setup ipv4 address for network device with ifindex %d", ifindex
);
3705 static int setup_ipv6_addr(struct lxc_list
*ip
, int ifindex
)
3707 struct lxc_list
*iterator
;
3710 lxc_list_for_each(iterator
, ip
) {
3711 struct lxc_inet6dev
*inet6dev
= iterator
->elem
;
3713 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
3714 &inet6dev
->mcast
, &inet6dev
->acast
,
3717 return log_error_errno(-1, -err
, "Failed to setup ipv6 address for network device with ifindex %d", ifindex
);
3723 static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev
*netdev
)
3726 char bufinet4
[INET_ADDRSTRLEN
], bufinet6
[INET6_ADDRSTRLEN
];
3728 /* empty network namespace */
3729 if (!netdev
->ifindex
&& netdev
->flags
& IFF_UP
) {
3730 err
= lxc_netdev_up("lo");
3732 return log_error_errno(-1, -err
, "Failed to set the loopback network device up");
3735 /* set a mac address */
3736 if (netdev
->hwaddr
&& setup_hw_addr(netdev
->hwaddr
, netdev
->name
))
3737 return log_error_errno(-1, errno
, "Failed to setup hw address for network device \"%s\"", netdev
->name
);
3739 /* setup ipv4 addresses on the interface */
3740 if (setup_ipv4_addr(&netdev
->ipv4
, netdev
->ifindex
))
3741 return log_error_errno(-1, errno
, "Failed to setup ip addresses for network device \"%s\"", netdev
->name
);
3743 /* setup ipv6 addresses on the interface */
3744 if (setup_ipv6_addr(&netdev
->ipv6
, netdev
->ifindex
))
3745 return log_error_errno(-1, errno
, "Failed to setup ipv6 addresses for network device \"%s\"", netdev
->name
);
3747 /* set the network device up */
3748 if (netdev
->flags
& IFF_UP
) {
3749 err
= lxc_netdev_up(netdev
->name
);
3751 return log_error_errno(-1, -err
, "Failed to set network device \"%s\" up", netdev
->name
);
3753 /* the network is up, make the loopback up too */
3754 err
= lxc_netdev_up("lo");
3756 return log_error_errno(-1, -err
, "Failed to set the loopback network device up");
3759 /* setup ipv4 gateway on the interface */
3760 if (netdev
->ipv4_gateway
|| netdev
->ipv4_gateway_dev
) {
3761 if (!(netdev
->flags
& IFF_UP
))
3762 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev
->name
);
3764 if (lxc_list_empty(&netdev
->ipv4
))
3765 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev
->name
);
3767 /* Setup device route if ipv4_gateway_dev is enabled */
3768 if (netdev
->ipv4_gateway_dev
) {
3769 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, NULL
);
3771 return log_error_errno(-1, -err
, "Failed to setup ipv4 gateway to network device \"%s\"", netdev
->name
);
3773 /* Check the gateway address is valid */
3774 if (!inet_ntop(AF_INET
, netdev
->ipv4_gateway
, bufinet4
, sizeof(bufinet4
)))
3775 return ret_set_errno(-1, errno
);
3777 /* Try adding a default route to the gateway address */
3778 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
3780 /* If adding the default route fails, this could be because the
3781 * gateway address is in a different subnet to the container's address.
3782 * To work around this, we try adding a static device route to the
3783 * gateway address first, and then try again.
3785 err
= lxc_ipv4_dest_add(netdev
->ifindex
, netdev
->ipv4_gateway
, 32);
3787 return log_error_errno(-1, -err
, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4
, netdev
->name
);
3789 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
3791 return log_error_errno(-1, -err
, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4
, netdev
->name
);
3796 /* setup ipv6 gateway on the interface */
3797 if (netdev
->ipv6_gateway
|| netdev
->ipv6_gateway_dev
) {
3798 if (!(netdev
->flags
& IFF_UP
))
3799 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev
->name
);
3801 if (lxc_list_empty(&netdev
->ipv6
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
))
3802 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev
->name
);
3804 /* Setup device route if ipv6_gateway_dev is enabled */
3805 if (netdev
->ipv6_gateway_dev
) {
3806 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, NULL
);
3808 return log_error_errno(-1, -err
, "Failed to setup ipv6 gateway to network device \"%s\"", netdev
->name
);
3810 /* Check the gateway address is valid */
3811 if (!inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, bufinet6
, sizeof(bufinet6
)))
3812 return ret_set_errno(-1, errno
);
3814 /* Try adding a default route to the gateway address */
3815 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3817 /* If adding the default route fails, this could be because the
3818 * gateway address is in a different subnet to the container's address.
3819 * To work around this, we try adding a static device route to the
3820 * gateway address first, and then try again.
3822 err
= lxc_ipv6_dest_add(netdev
->ifindex
, netdev
->ipv6_gateway
, 128);
3824 return log_error_errno(-1, errno
, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6
, netdev
->name
);
3826 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3828 return log_error_errno(-1, -err
, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6
, netdev
->name
);
3833 DEBUG("Network device \"%s\" has been setup", netdev
->name
);
3838 int lxc_setup_network_in_child_namespaces(const struct lxc_conf
*conf
,
3839 struct lxc_list
*network
)
3841 struct lxc_list
*iterator
;
3843 lxc_list_for_each (iterator
, network
) {
3844 struct lxc_netdev
*netdev
= iterator
->elem
;
3847 ret
= netdev_ns_conf
[netdev
->type
](netdev
);
3849 ret
= lxc_network_setup_in_child_namespaces_common(netdev
);
3851 return log_error_errno(-1, errno
, "Failed to setup netdev");
3854 if (!lxc_list_empty(network
))
3855 INFO("Network has been setup");
3860 int lxc_network_send_to_child(struct lxc_handler
*handler
)
3862 struct lxc_list
*iterator
;
3863 struct lxc_list
*network
= &handler
->conf
->network
;
3864 int data_sock
= handler
->data_sock
[0];
3866 lxc_list_for_each(iterator
, network
) {
3868 struct lxc_netdev
*netdev
= iterator
->elem
;
3870 if (!network_requires_advanced_setup(netdev
->type
))
3873 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3877 ret
= lxc_send_nointr(data_sock
, netdev
->created_name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3881 TRACE("Sent network device name \"%s\" to child", netdev
->created_name
);
3887 int lxc_network_recv_from_parent(struct lxc_handler
*handler
)
3889 struct lxc_list
*iterator
;
3890 struct lxc_list
*network
= &handler
->conf
->network
;
3891 int data_sock
= handler
->data_sock
[1];
3893 lxc_list_for_each(iterator
, network
) {
3895 struct lxc_netdev
*netdev
= iterator
->elem
;
3897 if (!network_requires_advanced_setup(netdev
->type
))
3900 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
3904 ret
= lxc_recv_nointr(data_sock
, netdev
->created_name
, IFNAMSIZ
, 0);
3908 TRACE("Received network device name \"%s\" from parent", netdev
->created_name
);
3914 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler
*handler
)
3916 struct lxc_list
*iterator
, *network
;
3917 int data_sock
= handler
->data_sock
[0];
3919 if (!handler
->am_root
)
3922 network
= &handler
->conf
->network
;
3923 lxc_list_for_each(iterator
, network
) {
3925 struct lxc_netdev
*netdev
= iterator
->elem
;
3927 /* Send network device name in the child's namespace to parent. */
3928 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
3932 /* Send network device ifindex in the child's namespace to
3935 ret
= lxc_send_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), MSG_NOSIGNAL
);
3940 if (!lxc_list_empty(network
))
3941 TRACE("Sent network device names and ifindices to parent");
3946 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler
*handler
)
3948 struct lxc_list
*iterator
, *network
;
3949 int data_sock
= handler
->data_sock
[1];
3951 if (!handler
->am_root
)
3954 network
= &handler
->conf
->network
;
3955 lxc_list_for_each(iterator
, network
) {
3957 struct lxc_netdev
*netdev
= iterator
->elem
;
3959 /* Receive network device name in the child's namespace to
3962 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
3966 /* Receive network device ifindex in the child's namespace to
3969 ret
= lxc_recv_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), 0);
3977 void lxc_delete_network(struct lxc_handler
*handler
)
3981 if (handler
->am_root
)
3982 bret
= lxc_delete_network_priv(handler
);
3984 bret
= lxc_delete_network_unpriv(handler
);
3986 DEBUG("Failed to delete network devices");
3988 DEBUG("Deleted network devices");
3991 int lxc_netns_set_nsid(int fd
)
3994 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
3995 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
3997 struct nl_handler nlh
;
3998 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
3999 struct nlmsghdr
*hdr
;
4000 struct rtgenmsg
*msg
;
4001 const __s32 ns_id
= -1;
4002 const __u32 netns_fd
= fd
;
4004 ret
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
4008 memset(buf
, 0, sizeof(buf
));
4010 #pragma GCC diagnostic push
4011 #pragma GCC diagnostic ignored "-Wcast-align"
4012 hdr
= (struct nlmsghdr
*)buf
;
4013 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
4014 #pragma GCC diagnostic pop
4016 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
4017 hdr
->nlmsg_type
= RTM_NEWNSID
;
4018 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
4020 hdr
->nlmsg_seq
= RTM_NEWNSID
;
4021 msg
->rtgen_family
= AF_UNSPEC
;
4023 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
4025 return ret_errno(ENOMEM
);
4027 ret
= addattr(hdr
, 1024, __LXC_NETNSA_NSID
, &ns_id
, sizeof(ns_id
));
4029 return ret_errno(ENOMEM
);
4031 return __netlink_transaction(nlh_ptr
, hdr
, hdr
);
4034 static int parse_rtattr(struct rtattr
*tb
[], int max
, struct rtattr
*rta
, int len
)
4037 memset(tb
, 0, sizeof(struct rtattr
*) * (max
+ 1));
4039 while (RTA_OK(rta
, len
)) {
4040 unsigned short type
= rta
->rta_type
;
4042 if ((type
<= max
) && (!tb
[type
]))
4045 #pragma GCC diagnostic push
4046 #pragma GCC diagnostic ignored "-Wcast-align"
4047 rta
= RTA_NEXT(rta
, len
);
4048 #pragma GCC diagnostic pop
4054 static inline __s32
rta_getattr_s32(const struct rtattr
*rta
)
4056 return *(__s32
*)RTA_DATA(rta
);
4060 #define NETNS_RTA(r) \
4061 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4064 int lxc_netns_get_nsid(int fd
)
4066 struct nl_handler nlh
;
4067 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
4070 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
4071 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
4073 struct rtattr
*tb
[__LXC_NETNSA_MAX
+ 1];
4074 struct nlmsghdr
*hdr
;
4075 struct rtgenmsg
*msg
;
4076 __u32 netns_fd
= fd
;
4078 ret
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
4082 memset(buf
, 0, sizeof(buf
));
4084 #pragma GCC diagnostic push
4085 #pragma GCC diagnostic ignored "-Wcast-align"
4086 hdr
= (struct nlmsghdr
*)buf
;
4087 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
4088 #pragma GCC diagnostic pop
4090 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
4091 hdr
->nlmsg_type
= RTM_GETNSID
;
4092 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
4094 hdr
->nlmsg_seq
= RTM_GETNSID
;
4095 msg
->rtgen_family
= AF_UNSPEC
;
4097 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
4099 return ret_errno(ENOMEM
);
4101 ret
= __netlink_transaction(nlh_ptr
, hdr
, hdr
);
4105 msg
= NLMSG_DATA(hdr
);
4106 len
= hdr
->nlmsg_len
- NLMSG_SPACE(sizeof(*msg
));
4108 return ret_errno(EINVAL
);
4110 #pragma GCC diagnostic push
4111 #pragma GCC diagnostic ignored "-Wcast-align"
4112 parse_rtattr(tb
, __LXC_NETNSA_MAX
, NETNS_RTA(msg
), len
);
4113 if (tb
[__LXC_NETNSA_NSID
])
4114 return rta_getattr_s32(tb
[__LXC_NETNSA_NSID
]);
4115 #pragma GCC diagnostic pop
4120 int lxc_create_network(struct lxc_handler
*handler
)
4124 if (handler
->am_root
) {
4125 ret
= lxc_create_network_priv(handler
);
4129 return lxc_network_move_created_netdev_priv(handler
);
4132 return lxc_create_network_unpriv(handler
);