1 /* SPDX-License-Identifier: LGPL-2.1+ */
9 #include <linux/netlink.h>
10 #include <linux/rtnetlink.h>
11 #include <linux/sockios.h>
12 #include <net/ethernet.h>
14 #include <net/if_arp.h>
15 #include <netinet/in.h>
19 #include <sys/inotify.h>
20 #include <sys/ioctl.h>
21 #include <sys/param.h>
22 #include <sys/socket.h>
24 #include <sys/types.h>
28 #include "netns_ifaddrs.h"
31 #include "file_utils.h"
34 #include "memory_utils.h"
37 #include "process_utils.h"
38 #include "string_utils.h"
39 #include "syscall_wrappers.h"
46 lxc_log_define(network
, lxc
);
48 typedef int (*netdev_configure_server_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
49 typedef int (*netdev_configure_container_cb
)(struct lxc_netdev
*);
50 typedef int (*netdev_shutdown_server_cb
)(struct lxc_handler
*, struct lxc_netdev
*);
52 const struct lxc_network_info
{
54 const char template[IFNAMSIZ
];
56 } lxc_network_info
[LXC_NET_MAXCONFTYPE
+ 1] = {
57 [LXC_NET_EMPTY
] = { "empty", "emptXXXXXX", STRLITERALLEN("emptXXXXXX") },
58 [LXC_NET_VETH
] = { "veth", "vethXXXXXX", STRLITERALLEN("vethXXXXXX") },
59 [LXC_NET_MACVLAN
] = { "macvlan", "macvXXXXXX", STRLITERALLEN("macvXXXXXX") },
60 [LXC_NET_IPVLAN
] = { "ipvlan", "ipvlXXXXXX", STRLITERALLEN("ipvlXXXXXX") },
61 [LXC_NET_PHYS
] = { "phys", "physXXXXXX", STRLITERALLEN("physXXXXXX") },
62 [LXC_NET_VLAN
] = { "vlan", "vlanXXXXXX", STRLITERALLEN("vlanXXXXXX") },
63 [LXC_NET_NONE
] = { "none", "noneXXXXXX", STRLITERALLEN("noneXXXXXX") },
64 [LXC_NET_MAXCONFTYPE
] = { NULL
, "", 0 }
67 const char *lxc_net_type_to_str(int type
)
69 if (type
< 0 || type
> LXC_NET_MAXCONFTYPE
)
72 return lxc_network_info
[type
].name
;
75 static const char padchar
[] = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
77 char *lxc_ifname_alnum_case_sensitive(char *template)
84 seed
= randseed(false);
90 if (strlen(template) >= IFNAMSIZ
)
93 /* Generate random names until we find one that doesn't exist. */
96 (void)strlcpy(name
, template, IFNAMSIZ
);
98 for (i
= 0; i
< strlen(name
); i
++) {
101 name
[i
] = padchar
[rand_r(&seed
) % strlen(padchar
)];
103 name
[i
] = padchar
[rand() % strlen(padchar
)];
108 if (if_nametoindex(name
) == 0)
112 (void)strlcpy(template, name
, strlen(template) + 1);
116 static const char loop_device
[] = "lo";
118 static int lxc_ip_route_dest(__u16 nlmsg_type
, int family
, int ifindex
, void *dest
, unsigned int netmask
)
120 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
121 struct nl_handler nlh
= NL_HANDLER_INIT
;
122 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
126 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
127 : sizeof(struct in6_addr
);
129 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
133 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
137 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
141 nlmsg
->nlmsghdr
->nlmsg_flags
=
142 NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
143 nlmsg
->nlmsghdr
->nlmsg_type
= nlmsg_type
;
145 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
149 rt
->rtm_family
= family
;
150 rt
->rtm_table
= RT_TABLE_MAIN
;
151 rt
->rtm_scope
= RT_SCOPE_LINK
;
152 rt
->rtm_protocol
= RTPROT_BOOT
;
153 rt
->rtm_type
= RTN_UNICAST
;
154 rt
->rtm_dst_len
= netmask
;
156 if (nla_put_buffer(nlmsg
, RTA_DST
, dest
, addrlen
))
159 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
162 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
165 static int lxc_ipv4_dest_add(int ifindex
, struct in_addr
*dest
, unsigned int netmask
)
167 return lxc_ip_route_dest(RTM_NEWROUTE
, AF_INET
, ifindex
, dest
, netmask
);
170 static int lxc_ipv6_dest_add(int ifindex
, struct in6_addr
*dest
, unsigned int netmask
)
172 return lxc_ip_route_dest(RTM_NEWROUTE
, AF_INET6
, ifindex
, dest
, netmask
);
175 static int lxc_ipv4_dest_del(int ifindex
, struct in_addr
*dest
, unsigned int netmask
)
177 return lxc_ip_route_dest(RTM_DELROUTE
, AF_INET
, ifindex
, dest
, netmask
);
180 static int lxc_ipv6_dest_del(int ifindex
, struct in6_addr
*dest
, unsigned int netmask
)
182 return lxc_ip_route_dest(RTM_DELROUTE
, AF_INET6
, ifindex
, dest
, netmask
);
185 static int setup_ipv4_routes(struct lxc_netdev
*netdev
)
187 int ifindex
= netdev
->priv
.veth_attr
.ifindex
;
188 struct lxc_inetdev
*inetdev
;
191 list_for_each_entry(inetdev
, &netdev
->priv
.veth_attr
.ipv4_routes
, head
) {
192 err
= lxc_ipv4_dest_add(ifindex
, &inetdev
->addr
, inetdev
->prefix
);
194 return log_error_errno(-1, -err
, "Failed to setup ipv4 route for network device with ifindex %d", ifindex
);
200 static int setup_ipv6_routes(struct lxc_netdev
*netdev
)
203 struct lxc_inet6dev
*inet6dev
;
204 int ifindex
= netdev
->priv
.veth_attr
.ifindex
;
206 list_for_each_entry(inet6dev
, &netdev
->priv
.veth_attr
.ipv6_routes
, head
) {
207 err
= lxc_ipv6_dest_add(ifindex
, &inet6dev
->addr
, inet6dev
->prefix
);
209 return log_error_errno(-1, -err
, "Failed to setup ipv6 route for network device with ifindex %d", ifindex
);
215 static int setup_ipv4_addr_routes(struct lxc_netdev
*netdev
)
218 struct lxc_inetdev
*inetdev
;
221 if (netdev
->type
!= LXC_NET_VETH
)
222 return ret_errno(EINVAL
);
224 ifindex
= netdev
->priv
.veth_attr
.ifindex
;
225 list_for_each_entry(inetdev
, &netdev
->ipv4_addresses
, head
) {
226 err
= lxc_ipv4_dest_add(ifindex
, &inetdev
->addr
, 32);
228 return log_error_errno(-1, err
, "Failed to setup ipv4 address route for network device with eifindex %d", ifindex
);
234 static int setup_ipv6_addr_routes(struct lxc_netdev
*netdev
)
237 struct lxc_inet6dev
*inet6dev
;
240 if (netdev
->type
!= LXC_NET_VETH
)
241 return ret_errno(EINVAL
);
243 ifindex
= netdev
->priv
.veth_attr
.ifindex
;
244 list_for_each_entry(inet6dev
, &netdev
->ipv6_addresses
, head
) {
246 err
= lxc_ipv6_dest_add(ifindex
, &inet6dev
->addr
, 128);
248 return log_error_errno(-1, err
, "Failed to setup ipv6 address route for network device with eifindex %d", ifindex
);
254 static int lxc_ip_neigh_proxy(__u16 nlmsg_type
, int family
, int ifindex
, void *dest
)
256 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
257 struct nl_handler nlh
= NL_HANDLER_INIT
;
258 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
262 addrlen
= family
== AF_INET
? sizeof(struct in_addr
) : sizeof(struct in6_addr
);
264 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
268 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
272 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
276 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
277 nlmsg
->nlmsghdr
->nlmsg_type
= nlmsg_type
;
279 rt
= nlmsg_reserve(nlmsg
, sizeof(struct ndmsg
));
283 rt
->ndm_ifindex
= ifindex
;
284 rt
->ndm_flags
= NTF_PROXY
;
285 rt
->ndm_type
= NDA_DST
;
286 rt
->ndm_family
= family
;
288 if (nla_put_buffer(nlmsg
, NDA_DST
, dest
, addrlen
))
291 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
294 static int lxc_is_ip_forwarding_enabled(const char *ifname
, int family
)
300 if (family
!= AF_INET
&& family
!= AF_INET6
)
301 return ret_set_errno(-1, EINVAL
);
303 ret
= strnprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
304 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
307 return ret_set_errno(-1, E2BIG
);
309 return lxc_read_file_expect(path
, buf
, 1, "1");
312 struct bridge_vlan_info
{
317 static int lxc_bridge_vlan(unsigned int ifindex
, unsigned short operation
, unsigned short vlan_id
, bool tagged
)
319 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
320 struct nl_handler nlh
= NL_HANDLER_INIT
;
321 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
323 struct ifinfomsg
*ifi
;
325 unsigned short bridge_flags
= 0;
326 struct bridge_vlan_info vlan_info
;
328 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
332 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
334 return ret_errno(ENOMEM
);
336 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
338 return ret_errno(ENOMEM
);
340 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
341 nlmsg
->nlmsghdr
->nlmsg_type
= operation
;
343 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
345 return ret_errno(ENOMEM
);
346 ifi
->ifi_family
= AF_BRIDGE
;
347 ifi
->ifi_index
= ifindex
;
349 nest
= nla_begin_nested(nlmsg
, IFLA_AF_SPEC
);
351 return ret_errno(ENOMEM
);
353 bridge_flags
|= BRIDGE_FLAGS_MASTER
;
354 if (nla_put_u16(nlmsg
, IFLA_BRIDGE_FLAGS
, bridge_flags
))
355 return ret_errno(ENOMEM
);
357 vlan_info
.vid
= vlan_id
;
360 vlan_info
.flags
= BRIDGE_VLAN_INFO_PVID
| BRIDGE_VLAN_INFO_UNTAGGED
;
362 if (nla_put_buffer(nlmsg
, IFLA_BRIDGE_VLAN_INFO
, &vlan_info
, sizeof(struct bridge_vlan_info
)))
363 return ret_errno(ENOMEM
);
365 nla_end_nested(nlmsg
, nest
);
367 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
370 static int lxc_bridge_vlan_add(unsigned int ifindex
, unsigned short vlan_id
, bool tagged
)
372 return lxc_bridge_vlan(ifindex
, RTM_SETLINK
, vlan_id
, tagged
);
375 static int lxc_bridge_vlan_del(unsigned int ifindex
, unsigned short vlan_id
)
377 return lxc_bridge_vlan(ifindex
, RTM_DELLINK
, vlan_id
, false);
380 static int lxc_bridge_vlan_add_tagged(unsigned int ifindex
, struct lxc_list
*vlan_ids
)
382 struct lxc_list
*iterator
;
385 lxc_list_for_each(iterator
, vlan_ids
) {
386 unsigned short vlan_id
= PTR_TO_USHORT(iterator
->elem
);
388 err
= lxc_bridge_vlan_add(ifindex
, vlan_id
, true);
390 return log_error_errno(-1, -err
, "Failed to add tagged vlan \"%u\" to ifindex \"%d\"", vlan_id
, ifindex
);
396 static int validate_veth(struct lxc_netdev
*netdev
)
398 if (netdev
->priv
.veth_attr
.mode
!= VETH_MODE_BRIDGE
|| is_empty_string(netdev
->link
)) {
399 /* Check that veth.vlan.id isn't being used in non bridge veth.mode. */
400 if (netdev
->priv
.veth_attr
.vlan_id_set
)
401 return log_error_errno(-1, EINVAL
, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
403 /* Check that veth.vlan.tagged.id isn't being used in non bridge veth.mode. */
404 if (lxc_list_len(&netdev
->priv
.veth_attr
.vlan_tagged_ids
) > 0)
405 return log_error_errno(-1, EINVAL
, "Cannot use veth vlan.id when not in bridge mode or no bridge link specified");
408 if (netdev
->priv
.veth_attr
.vlan_id_set
) {
410 lxc_list_for_each(it
, &netdev
->priv
.veth_attr
.vlan_tagged_ids
) {
411 unsigned short i
= PTR_TO_USHORT(it
->elem
);
412 if (i
== netdev
->priv
.veth_attr
.vlan_id
)
413 return log_error_errno(-1, EINVAL
, "Cannot use same veth vlan.id \"%u\" in vlan.tagged.id", netdev
->priv
.veth_attr
.vlan_id
);
420 static int setup_veth_native_bridge_vlan(char *veth1
, struct lxc_netdev
*netdev
)
422 int err
, rc
, veth1index
;
423 char path
[STRLITERALLEN("/sys/class/net//bridge/vlan_filtering") + IFNAMSIZ
+ 1];
424 char buf
[5]; /* Sufficient size to fit max VLAN ID (4094) and null char. */
426 /* Skip setup if no VLAN options are specified. */
427 if (!netdev
->priv
.veth_attr
.vlan_id_set
&& lxc_list_len(&netdev
->priv
.veth_attr
.vlan_tagged_ids
) <= 0)
430 /* Check vlan filtering is enabled on parent bridge. */
431 rc
= strnprintf(path
, sizeof(path
), "/sys/class/net/%s/bridge/vlan_filtering", netdev
->link
);
435 rc
= lxc_read_from_file(path
, buf
, sizeof(buf
));
437 return log_error_errno(rc
, errno
, "Failed reading from \"%s\"", path
);
441 if (!strequal(buf
, "1"))
442 return log_error_errno(-1, EPERM
, "vlan_filtering is not enabled on \"%s\"", netdev
->link
);
444 /* Get veth1 ifindex for use with netlink. */
445 veth1index
= if_nametoindex(veth1
);
447 return log_error_errno(-1, errno
, "Failed getting ifindex of \"%s\"", netdev
->link
);
449 /* Configure untagged VLAN settings on bridge port if specified. */
450 if (netdev
->priv
.veth_attr
.vlan_id_set
) {
451 unsigned short default_pvid
;
453 /* Get the bridge's default VLAN PVID. */
454 rc
= strnprintf(path
, sizeof(path
), "/sys/class/net/%s/bridge/default_pvid", netdev
->link
);
458 rc
= lxc_read_from_file(path
, buf
, sizeof(buf
));
460 return log_error_errno(rc
, errno
, "Failed reading from \"%s\"", path
);
463 err
= get_u16(&default_pvid
, buf
, 0);
465 return log_error_errno(-1, EINVAL
, "Failed parsing default_pvid of \"%s\"", netdev
->link
);
467 /* If the default PVID on the port is not the specified untagged VLAN, then delete it. */
468 if (default_pvid
!= netdev
->priv
.veth_attr
.vlan_id
) {
469 err
= lxc_bridge_vlan_del(veth1index
, default_pvid
);
471 return log_error_errno(err
, errno
, "Failed to delete default untagged vlan \"%u\" on \"%s\"", default_pvid
, veth1
);
474 if (netdev
->priv
.veth_attr
.vlan_id
> BRIDGE_VLAN_NONE
) {
475 err
= lxc_bridge_vlan_add(veth1index
, netdev
->priv
.veth_attr
.vlan_id
, false);
477 return log_error_errno(err
, errno
, "Failed to add untagged vlan \"%u\" on \"%s\"", netdev
->priv
.veth_attr
.vlan_id
, veth1
);
481 /* Configure tagged VLAN settings on bridge port if specified. */
482 err
= lxc_bridge_vlan_add_tagged(veth1index
, &netdev
->priv
.veth_attr
.vlan_tagged_ids
);
484 return log_error_errno(err
, errno
, "Failed to add tagged vlans on \"%s\"", veth1
);
489 struct ovs_veth_vlan_args
{
491 const char *vlan_mode
; /* Port VLAN mode. */
492 short vlan_id
; /* PVID VLAN ID. */
493 char *trunks
; /* Comma delimited list of tagged VLAN IDs. */
496 static inline void free_ovs_veth_vlan_args(struct ovs_veth_vlan_args
*args
)
498 free_disarm(args
->trunks
);
501 static int lxc_ovs_setup_bridge_vlan_exec(void *data
)
503 struct ovs_veth_vlan_args
*args
= data
;
504 __do_free
char *vlan_mode
= NULL
, *tag
= NULL
, *trunks
= NULL
;
506 if (!args
->vlan_mode
)
507 return ret_errno(EINVAL
);
509 vlan_mode
= must_concat(NULL
, "vlan_mode=", args
->vlan_mode
, (char *)NULL
);
511 if (args
->vlan_id
> BRIDGE_VLAN_NONE
) {
515 rc
= strnprintf(buf
, sizeof(buf
), "%u", args
->vlan_id
);
517 return log_error_errno(-1, EINVAL
, "Failed to parse ovs bridge vlan \"%d\"", args
->vlan_id
);
519 tag
= must_concat(NULL
, "tag=", buf
, (char *)NULL
);
523 trunks
= must_concat(NULL
, "trunks=", args
->trunks
, (char *)NULL
);
525 /* Detect the combination of vlan_id and trunks specified and convert to ovs-vsctl command. */
527 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args
->nic
, vlan_mode
, tag
, trunks
, (char *)NULL
);
529 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args
->nic
, vlan_mode
, tag
, (char *)NULL
);
531 execlp("ovs-vsctl", "ovs-vsctl", "set", "port", args
->nic
, vlan_mode
, trunks
, (char *)NULL
);
538 static int setup_veth_ovs_bridge_vlan(char *veth1
, struct lxc_netdev
*netdev
)
540 int taggedLength
= lxc_list_len(&netdev
->priv
.veth_attr
.vlan_tagged_ids
);
541 struct ovs_veth_vlan_args args
;
543 args
.vlan_mode
= NULL
;
544 args
.vlan_id
= BRIDGE_VLAN_NONE
;
547 /* Skip setup if no VLAN options are specified. */
548 if (!netdev
->priv
.veth_attr
.vlan_id_set
&& taggedLength
<= 0)
551 /* Configure untagged VLAN settings on bridge port if specified. */
552 if (netdev
->priv
.veth_attr
.vlan_id_set
) {
553 if (netdev
->priv
.veth_attr
.vlan_id
== BRIDGE_VLAN_NONE
&& taggedLength
<= 0)
554 return log_error_errno(-1, EINVAL
, "Cannot use vlan.id=none with openvswitch bridges when not using vlan.tagged.id");
556 /* Configure the untagged 'native' membership settings of the port if VLAN ID specified.
557 * Also set the vlan_mode=access, which will drop any tagged frames.
558 * Order is important here, as vlan_mode is set to "access", assuming that vlan.tagged.id is not
559 * used. If vlan.tagged.id is specified, then we expect it to also change the vlan_mode as needed.
561 if (netdev
->priv
.veth_attr
.vlan_id
> BRIDGE_VLAN_NONE
) {
562 args
.vlan_mode
= "access";
563 args
.vlan_id
= netdev
->priv
.veth_attr
.vlan_id
;
567 if (taggedLength
> 0) {
568 args
.vlan_mode
= "trunk"; /* Default to only allowing tagged frames (drop untagged frames). */
570 if (netdev
->priv
.veth_attr
.vlan_id
> BRIDGE_VLAN_NONE
) {
571 /* If untagged vlan mode isn't "none" then allow untagged frames for port's 'native' VLAN. */
572 args
.vlan_mode
= "native-untagged";
575 struct lxc_list
*iterator
;
576 lxc_list_for_each(iterator
, &netdev
->priv
.veth_attr
.vlan_tagged_ids
) {
577 unsigned short vlan_id
= PTR_TO_USHORT(iterator
->elem
);
578 char buf
[5]; /* Sufficient size to fit max VLAN ID (4094) null char. */
581 rc
= strnprintf(buf
, sizeof(buf
), "%u", vlan_id
);
583 free_ovs_veth_vlan_args(&args
);
584 return log_error_errno(-1, EINVAL
, "Failed to parse tagged vlan \"%u\" for interface \"%s\"", vlan_id
, veth1
);
588 args
.trunks
= must_concat(NULL
, args
.trunks
, buf
, ",", (char *)NULL
);
590 args
.trunks
= must_concat(NULL
, buf
, ",", (char *)NULL
);
594 if (args
.vlan_mode
) {
596 char cmd_output
[PATH_MAX
];
598 ret
= run_command(cmd_output
, sizeof(cmd_output
), lxc_ovs_setup_bridge_vlan_exec
, (void *)&args
);
600 free_ovs_veth_vlan_args(&args
);
601 return log_error_errno(-1, ret
, "Failed to setup openvswitch vlan on port \"%s\": %s", args
.nic
, cmd_output
);
605 free_ovs_veth_vlan_args(&args
);
609 static int netdev_configure_server_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
612 unsigned int mtu
= 1500;
614 char veth1buf
[IFNAMSIZ
], veth2buf
[IFNAMSIZ
];
616 err
= validate_veth(netdev
);
620 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
)) {
621 veth1
= netdev
->priv
.veth_attr
.pair
;
622 if (handler
->conf
->reboot
)
623 lxc_netdev_delete_by_name(veth1
);
625 err
= strnprintf(veth1buf
, sizeof(veth1buf
), "vethXXXXXX");
629 veth1
= lxc_ifname_alnum_case_sensitive(veth1buf
);
633 /* store away for deconf */
634 memcpy(netdev
->priv
.veth_attr
.veth1
, veth1
, IFNAMSIZ
);
637 err
= strnprintf(veth2buf
, sizeof(veth2buf
), "vethXXXXXX");
641 veth2
= lxc_ifname_alnum_case_sensitive(veth2buf
);
645 /* if mtu is specified in config then use that, otherwise inherit from link device if provided. */
647 if (lxc_safe_uint(netdev
->mtu
, &mtu
))
648 return log_error_errno(-1, errno
, "Failed to parse mtu");
649 } else if (!is_empty_string(netdev
->link
)) {
652 ifindex_mtu
= if_nametoindex(netdev
->link
);
654 mtu
= netdev_get_mtu(ifindex_mtu
);
655 INFO("Retrieved mtu %d from %s", mtu
, netdev
->link
);
659 err
= lxc_veth_create(veth1
, veth2
, handler
->pid
, mtu
,
660 netdev
->priv
.veth_attr
.n_rxqueues
, netdev
->priv
.veth_attr
.n_txqueues
);
662 return log_error_errno(-1, -err
, "Failed to create veth pair \"%s\" and \"%s\"", veth1
, veth2
);
665 * Veth devices are directly created in the container's network
666 * namespace so the device doesn't need to be moved into the
667 * container's network namespace. Make this explicit by setting the
668 * devices ifindex to 0.
672 strlcpy(netdev
->created_name
, veth2
, IFNAMSIZ
);
675 * Since the device won't be moved transient name generation won't
676 * happen. But the transient name is needed for the container to
677 * retrieve the ifindex for the device.
679 strlcpy(netdev
->transient_name
, veth2
, IFNAMSIZ
);
682 * Changing the high byte of the mac address to 0xfe, the bridge interface
683 * will always keep the host's mac address and not take the mac address
686 err
= setup_private_host_hw_addr(veth1
);
689 SYSERROR("Failed to change mac address of host interface \"%s\"", veth1
);
693 /* Retrieve ifindex of the host's veth device. */
694 netdev
->priv
.veth_attr
.ifindex
= if_nametoindex(veth1
);
695 if (!netdev
->priv
.veth_attr
.ifindex
) {
696 ERROR("Failed to retrieve ifindex for \"%s\"", veth1
);
701 err
= lxc_netdev_set_mtu(veth1
, mtu
);
704 SYSERROR("Failed to set mtu \"%d\" for veth pair \"%s\" ", mtu
, veth1
);
709 if (!is_empty_string(netdev
->link
) && netdev
->priv
.veth_attr
.mode
== VETH_MODE_BRIDGE
) {
710 if (!lxc_nic_exists(netdev
->link
)) {
711 SYSERROR("Failed to attach \"%s\" to bridge \"%s\", bridge interface doesn't exist", veth1
, netdev
->link
);
715 err
= lxc_bridge_attach(netdev
->link
, veth1
);
718 SYSERROR("Failed to attach \"%s\" to bridge \"%s\"", veth1
, netdev
->link
);
721 INFO("Attached \"%s\" to bridge \"%s\"", veth1
, netdev
->link
);
723 if (is_ovs_bridge(netdev
->link
)) {
724 err
= setup_veth_ovs_bridge_vlan(veth1
, netdev
);
726 SYSERROR("Failed to setup openvswitch bridge vlan on \"%s\"", veth1
);
727 lxc_ovs_delete_port(netdev
->link
, veth1
);
731 err
= setup_veth_native_bridge_vlan(veth1
, netdev
);
733 SYSERROR("Failed to setup native bridge vlan on \"%s\"", veth1
);
739 err
= lxc_netdev_up(veth1
);
742 SYSERROR("Failed to set \"%s\" up", veth1
);
746 /* setup ipv4 routes on the host interface */
747 if (setup_ipv4_routes(netdev
)) {
748 ERROR("Failed to setup ipv4 routes for network device \"%s\"", veth1
);
752 /* setup ipv6 routes on the host interface */
753 if (setup_ipv6_routes(netdev
)) {
754 ERROR("Failed to setup ipv6 routes for network device \"%s\"", veth1
);
758 if (netdev
->priv
.veth_attr
.mode
== VETH_MODE_ROUTER
) {
759 /* sleep for a short period of time to work around a bug that intermittently prevents IP neighbour
760 proxy entries from being added using lxc_ip_neigh_proxy below. When the issue occurs the entries
761 appear to be added successfully but then do not appear in the proxy list. The length of time
762 slept doesn't appear to be important, only that the process sleeps for a short period of time.
764 nanosleep((const struct timespec
[]){{0, 1000}}, NULL
);
766 if (netdev
->ipv4_gateway
) {
767 char bufinet4
[INET_ADDRSTRLEN
];
768 if (!inet_ntop(AF_INET
, netdev
->ipv4_gateway
, bufinet4
, sizeof(bufinet4
))) {
769 SYSERROR("Failed to convert gateway ipv4 address on \"%s\"", veth1
);
773 err
= lxc_ip_forwarding_on(veth1
, AF_INET
);
775 SYSERROR("Failed to activate ipv4 forwarding on \"%s\"", veth1
);
779 err
= lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET
, netdev
->priv
.veth_attr
.ifindex
, netdev
->ipv4_gateway
);
781 SYSERROR("Failed to add gateway ipv4 proxy on \"%s\"", veth1
);
786 if (netdev
->ipv6_gateway
) {
787 char bufinet6
[INET6_ADDRSTRLEN
];
789 if (!inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, bufinet6
, sizeof(bufinet6
))) {
790 SYSERROR("Failed to convert gateway ipv6 address on \"%s\"", veth1
);
794 /* Check for sysctl net.ipv6.conf.all.forwarding=1
795 Kernel requires this to route any packets for IPv6.
797 err
= lxc_is_ip_forwarding_enabled("all", AF_INET6
);
799 SYSERROR("Requires sysctl net.ipv6.conf.all.forwarding=1");
803 err
= lxc_ip_forwarding_on(veth1
, AF_INET6
);
805 SYSERROR("Failed to activate ipv6 forwarding on \"%s\"", veth1
);
809 err
= lxc_neigh_proxy_on(veth1
, AF_INET6
);
811 SYSERROR("Failed to activate proxy ndp on \"%s\"", veth1
);
815 err
= lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET6
, netdev
->priv
.veth_attr
.ifindex
, netdev
->ipv6_gateway
);
817 SYSERROR("Failed to add gateway ipv6 proxy on \"%s\"", veth1
);
822 /* setup ipv4 address routes on the host interface */
823 err
= setup_ipv4_addr_routes(netdev
);
825 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1
);
829 /* setup ipv6 address routes on the host interface */
830 err
= setup_ipv6_addr_routes(netdev
);
832 SYSERROR("Failed to setup ip address routes for network device \"%s\"", veth1
);
837 if (netdev
->upscript
) {
845 err
= run_script_argv(handler
->name
,
846 handler
->conf
->hooks_version
, "net",
847 netdev
->upscript
, "up", argv
);
852 DEBUG("Instantiated veth tunnel \"%s <--> %s\"", veth1
, veth2
);
857 lxc_netdev_delete_by_name(veth1
);
861 static int netdev_configure_server_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
866 if (is_empty_string(netdev
->link
)) {
867 ERROR("No link for macvlan network device specified");
871 err
= strnprintf(peer
, sizeof(peer
), "mcXXXXXX");
875 if (!lxc_ifname_alnum_case_sensitive(peer
))
878 err
= lxc_macvlan_create(netdev
->link
, peer
,
879 netdev
->priv
.macvlan_attr
.mode
);
882 SYSERROR("Failed to create macvlan interface \"%s\" on \"%s\"",
887 strlcpy(netdev
->created_name
, peer
, IFNAMSIZ
);
889 netdev
->ifindex
= if_nametoindex(peer
);
890 if (!netdev
->ifindex
) {
891 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
898 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
901 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
905 err
= lxc_netdev_set_mtu(peer
, mtu
);
908 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
913 if (netdev
->upscript
) {
920 err
= run_script_argv(handler
->name
,
921 handler
->conf
->hooks_version
, "net",
922 netdev
->upscript
, "up", argv
);
927 DEBUG("Instantiated macvlan \"%s\" with ifindex %d and mode %d",
928 peer
, netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
933 lxc_netdev_delete_by_name(peer
);
937 static int lxc_ipvlan_create(const char *parent
, const char *name
, int mode
, int isolation
)
939 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
940 struct nl_handler nlh
= NL_HANDLER_INIT
;
941 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
943 struct ifinfomsg
*ifi
;
944 struct rtattr
*nest
, *nest2
;
946 len
= strlen(parent
);
947 if (len
== 1 || len
>= IFNAMSIZ
)
948 return ret_errno(EINVAL
);
951 if (len
== 1 || len
>= IFNAMSIZ
)
952 return ret_errno(EINVAL
);
954 index
= if_nametoindex(parent
);
956 return ret_errno(EINVAL
);
958 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
962 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
964 return ret_errno(ENOMEM
);
966 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
968 return ret_errno(ENOMEM
);
970 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
971 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
973 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
975 return ret_errno(ENOMEM
);
976 ifi
->ifi_family
= AF_UNSPEC
;
978 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
980 return ret_errno(EPROTO
);
982 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "ipvlan"))
983 return ret_errno(EPROTO
);
985 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
987 return ret_errno(EPROTO
);
989 if (nla_put_u16(nlmsg
, IFLA_IPVLAN_MODE
, mode
))
990 return ret_errno(EPROTO
);
992 /* if_link.h does not define the isolation flag value for bridge mode (unlike IPVLAN_F_PRIVATE and
993 * IPVLAN_F_VEPA) so we define it as 0 and only send mode if mode >0 as default mode is bridge anyway
994 * according to ipvlan docs.
996 if (isolation
> 0 && nla_put_u16(nlmsg
, IFLA_IPVLAN_ISOLATION
, isolation
))
997 return ret_errno(EPROTO
);
999 nla_end_nested(nlmsg
, nest2
);
1000 nla_end_nested(nlmsg
, nest
);
1002 if (nla_put_u32(nlmsg
, IFLA_LINK
, index
))
1003 return ret_errno(EPROTO
);
1005 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
1006 return ret_errno(EPROTO
);
1008 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1011 static int netdev_configure_server_ipvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1013 char peer
[IFNAMSIZ
];
1016 if (is_empty_string(netdev
->link
)) {
1017 ERROR("No link for ipvlan network device specified");
1021 err
= strnprintf(peer
, sizeof(peer
), "ipXXXXXX");
1025 if (!lxc_ifname_alnum_case_sensitive(peer
))
1028 err
= lxc_ipvlan_create(netdev
->link
, peer
, netdev
->priv
.ipvlan_attr
.mode
,
1029 netdev
->priv
.ipvlan_attr
.isolation
);
1031 SYSERROR("Failed to create ipvlan interface \"%s\" on \"%s\"",
1032 peer
, netdev
->link
);
1036 strlcpy(netdev
->created_name
, peer
, IFNAMSIZ
);
1038 netdev
->ifindex
= if_nametoindex(peer
);
1039 if (!netdev
->ifindex
) {
1040 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
1047 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
1050 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
1054 err
= lxc_netdev_set_mtu(peer
, mtu
);
1057 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
1062 if (netdev
->upscript
) {
1069 err
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1070 "net", netdev
->upscript
, "up", argv
);
1075 DEBUG("Instantiated ipvlan \"%s\" with ifindex %d and mode %d", peer
,
1076 netdev
->ifindex
, netdev
->priv
.macvlan_attr
.mode
);
1081 lxc_netdev_delete_by_name(peer
);
1085 static int netdev_configure_server_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1087 char peer
[IFNAMSIZ
];
1089 static uint16_t vlan_cntr
= 0;
1091 if (is_empty_string(netdev
->link
)) {
1092 ERROR("No link for vlan network device specified");
1096 err
= strnprintf(peer
, sizeof(peer
), "vlan%d-%d",
1097 netdev
->priv
.vlan_attr
.vid
, vlan_cntr
++);
1101 err
= lxc_vlan_create(netdev
->link
, peer
, netdev
->priv
.vlan_attr
.vid
);
1104 SYSERROR("Failed to create vlan interface \"%s\" on \"%s\"",
1105 peer
, netdev
->link
);
1109 strlcpy(netdev
->created_name
, peer
, IFNAMSIZ
);
1111 netdev
->ifindex
= if_nametoindex(peer
);
1112 if (!netdev
->ifindex
) {
1113 ERROR("Failed to retrieve ifindex for \"%s\"", peer
);
1120 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
1123 SYSERROR("Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
1127 err
= lxc_netdev_set_mtu(peer
, mtu
);
1130 SYSERROR("Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, peer
);
1135 if (netdev
->upscript
) {
1142 err
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1143 "net", netdev
->upscript
, "up", argv
);
1149 DEBUG("Instantiated vlan \"%s\" with ifindex \"%d\"", peer
,
1155 lxc_netdev_delete_by_name(peer
);
1159 static int netdev_configure_server_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1161 int err
, mtu_orig
= 0;
1163 if (is_empty_string(netdev
->link
))
1164 return log_error_errno(-1, errno
, "No link for physical interface specified");
1167 * Note that we're retrieving the container's ifindex in the host's
1168 * network namespace because we need it to move the device from the
1169 * host's network namespace to the container's network namespace later
1171 * Note that netdev->link will contain the name of the physical network
1172 * device in the host's namespace.
1174 netdev
->ifindex
= if_nametoindex(netdev
->link
);
1175 if (!netdev
->ifindex
)
1176 return log_error_errno(-1, errno
, "Failed to retrieve ifindex for \"%s\"", netdev
->link
);
1178 strlcpy(netdev
->created_name
, netdev
->link
, IFNAMSIZ
);
1179 if (is_empty_string(netdev
->name
))
1180 (void)strlcpy(netdev
->name
, netdev
->link
, IFNAMSIZ
);
1183 * Store the ifindex of the host's network device in the host's
1186 netdev
->priv
.phys_attr
.ifindex
= netdev
->ifindex
;
1189 * Get original device MTU setting and store for restoration after
1190 * container shutdown.
1192 mtu_orig
= netdev_get_mtu(netdev
->ifindex
);
1194 return log_error_errno(-1, -mtu_orig
, "Failed to get original mtu for interface \"%s\"", netdev
->link
);
1196 netdev
->priv
.phys_attr
.mtu
= mtu_orig
;
1201 err
= lxc_safe_uint(netdev
->mtu
, &mtu
);
1203 return log_error_errno(-1, -err
, "Failed to parse mtu \"%s\" for interface \"%s\"", netdev
->mtu
, netdev
->link
);
1205 err
= lxc_netdev_set_mtu(netdev
->link
, mtu
);
1207 return log_error_errno(-1, -err
, "Failed to set mtu \"%s\" for interface \"%s\"", netdev
->mtu
, netdev
->link
);
1210 if (netdev
->upscript
) {
1217 err
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1218 "net", netdev
->upscript
, "up", argv
);
1223 DEBUG("Instantiated phys \"%s\" with ifindex \"%d\"", netdev
->link
,
1229 static int netdev_configure_server_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1237 /* The loopback device always has index 1. */
1238 netdev
->ifindex
= 1;
1240 if (!strequal(netdev
->name
, "lo"))
1241 return syserror_set(-EINVAL
, "Custom loopback device names not supported");
1243 if (!netdev
->upscript
)
1246 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1247 "net", netdev
->upscript
, "up", argv
);
1254 static int netdev_configure_server_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1256 netdev
->ifindex
= 0;
1260 static netdev_configure_server_cb netdev_configure_server
[LXC_NET_MAXCONFTYPE
+ 1] = {
1261 [LXC_NET_VETH
] = netdev_configure_server_veth
,
1262 [LXC_NET_MACVLAN
] = netdev_configure_server_macvlan
,
1263 [LXC_NET_IPVLAN
] = netdev_configure_server_ipvlan
,
1264 [LXC_NET_VLAN
] = netdev_configure_server_vlan
,
1265 [LXC_NET_PHYS
] = netdev_configure_server_phys
,
1266 [LXC_NET_EMPTY
] = netdev_configure_server_empty
,
1267 [LXC_NET_NONE
] = netdev_configure_server_none
,
1270 static int __netdev_configure_container_common(struct lxc_netdev
*netdev
)
1272 char current_ifname
[IFNAMSIZ
];
1274 netdev
->ifindex
= if_nametoindex(netdev
->transient_name
);
1275 if (!netdev
->ifindex
)
1276 return log_error_errno(-1,
1277 errno
, "Failed to retrieve ifindex for network device with name %s",
1278 netdev
->transient_name
);
1280 if (is_empty_string(netdev
->name
))
1281 (void)strlcpy(netdev
->name
, "eth%d", IFNAMSIZ
);
1283 if (!strequal(netdev
->transient_name
, netdev
->name
)) {
1286 ret
= lxc_netdev_rename_by_name(netdev
->transient_name
, netdev
->name
);
1288 return log_error_errno(-1, -ret
, "Failed to rename network device \"%s\" to \"%s\"",
1289 netdev
->transient_name
, netdev
->name
);
1291 TRACE("Renamed network device from \"%s\" to \"%s\"", netdev
->transient_name
, netdev
->name
);
1295 * Re-read the name of the interface because its name has changed and
1296 * would be automatically allocated by the system
1298 if (!if_indextoname(netdev
->ifindex
, current_ifname
))
1299 return log_error_errno(-1, errno
, "Failed get name for network device with ifindex %d", netdev
->ifindex
);
1302 * Now update the recorded name of the network device to reflect the
1303 * name of the network device in the child's network namespace. We will
1304 * later on send this information back to the parent.
1306 (void)strlcpy(netdev
->name
, current_ifname
, IFNAMSIZ
);
1307 netdev
->transient_name
[0] = '\0';
1312 static int netdev_configure_container_veth(struct lxc_netdev
*netdev
)
1315 return __netdev_configure_container_common(netdev
);
1318 static int netdev_configure_container_macvlan(struct lxc_netdev
*netdev
)
1320 return __netdev_configure_container_common(netdev
);
1323 static int netdev_configure_container_ipvlan(struct lxc_netdev
*netdev
)
1325 return __netdev_configure_container_common(netdev
);
1328 static int netdev_configure_container_vlan(struct lxc_netdev
*netdev
)
1330 return __netdev_configure_container_common(netdev
);
1333 static int netdev_configure_container_phys(struct lxc_netdev
*netdev
)
1335 return __netdev_configure_container_common(netdev
);
1338 static int netdev_configure_container_empty(struct lxc_netdev
*netdev
)
1343 static int netdev_configure_container_none(struct lxc_netdev
*netdev
)
1348 static netdev_configure_container_cb netdev_configure_container
[LXC_NET_MAXCONFTYPE
+ 1] = {
1349 [LXC_NET_VETH
] = netdev_configure_container_veth
,
1350 [LXC_NET_MACVLAN
] = netdev_configure_container_macvlan
,
1351 [LXC_NET_IPVLAN
] = netdev_configure_container_ipvlan
,
1352 [LXC_NET_VLAN
] = netdev_configure_container_vlan
,
1353 [LXC_NET_PHYS
] = netdev_configure_container_phys
,
1354 [LXC_NET_EMPTY
] = netdev_configure_container_empty
,
1355 [LXC_NET_NONE
] = netdev_configure_container_none
,
1358 static int netdev_shutdown_server_veth(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1368 if (!netdev
->downscript
)
1371 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
1372 argv
[2] = netdev
->priv
.veth_attr
.pair
;
1374 argv
[2] = netdev
->priv
.veth_attr
.veth1
;
1376 ret
= run_script_argv(handler
->name
,
1377 handler
->conf
->hooks_version
, "net",
1378 netdev
->downscript
, "down", argv
);
1385 static int netdev_shutdown_server_macvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1394 if (!netdev
->downscript
)
1397 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1398 "net", netdev
->downscript
, "down", argv
);
1405 static int netdev_shutdown_server_ipvlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1414 if (!netdev
->downscript
)
1417 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1418 "net", netdev
->downscript
, "down", argv
);
1425 static int netdev_shutdown_server_vlan(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1434 if (!netdev
->downscript
)
1437 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1438 "net", netdev
->downscript
, "down", argv
);
1445 static int netdev_shutdown_server_phys(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1454 if (!netdev
->downscript
)
1457 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1458 "net", netdev
->downscript
, "down", argv
);
1465 static int netdev_shutdown_server_empty(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1473 if (!netdev
->downscript
)
1476 ret
= run_script_argv(handler
->name
, handler
->conf
->hooks_version
,
1477 "net", netdev
->downscript
, "down", argv
);
1484 static int netdev_shutdown_server_none(struct lxc_handler
*handler
, struct lxc_netdev
*netdev
)
1489 static netdev_shutdown_server_cb netdev_deconf
[LXC_NET_MAXCONFTYPE
+ 1] = {
1490 [LXC_NET_VETH
] = netdev_shutdown_server_veth
,
1491 [LXC_NET_MACVLAN
] = netdev_shutdown_server_macvlan
,
1492 [LXC_NET_IPVLAN
] = netdev_shutdown_server_ipvlan
,
1493 [LXC_NET_VLAN
] = netdev_shutdown_server_vlan
,
1494 [LXC_NET_PHYS
] = netdev_shutdown_server_phys
,
1495 [LXC_NET_EMPTY
] = netdev_shutdown_server_empty
,
1496 [LXC_NET_NONE
] = netdev_shutdown_server_none
,
1499 static int lxc_netdev_move_by_index_fd(int ifindex
, int fd
, const char *ifname
)
1501 call_cleaner(nlmsg_free
) struct nlmsg
*nlmsg
= NULL
;
1502 struct nl_handler nlh
= NL_HANDLER_INIT
;
1503 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1505 struct ifinfomsg
*ifi
;
1507 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1511 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1513 return ret_errno(ENOMEM
);
1515 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1516 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1518 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1520 return ret_errno(ENOMEM
);
1522 ifi
->ifi_family
= AF_UNSPEC
;
1523 ifi
->ifi_index
= ifindex
;
1525 if (nla_put_u32(nlmsg
, IFLA_NET_NS_FD
, fd
))
1526 return ret_errno(ENOMEM
);
1528 if (!is_empty_string(ifname
) && nla_put_string(nlmsg
, IFLA_IFNAME
, ifname
))
1529 return ret_errno(ENOMEM
);
1531 return netlink_transaction(nlh_ptr
, nlmsg
, nlmsg
);
1534 int lxc_netdev_move_by_index(int ifindex
, pid_t pid
, const char *ifname
)
1536 call_cleaner(nlmsg_free
) struct nlmsg
*nlmsg
= NULL
;
1537 struct nl_handler nlh
= NL_HANDLER_INIT
;
1538 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1540 struct ifinfomsg
*ifi
;
1542 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1546 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1548 return ret_errno(ENOMEM
);
1550 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1551 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1553 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1555 return ret_errno(ENOMEM
);
1557 ifi
->ifi_family
= AF_UNSPEC
;
1558 ifi
->ifi_index
= ifindex
;
1560 if (nla_put_u32(nlmsg
, IFLA_NET_NS_PID
, pid
))
1561 return ret_errno(ENOMEM
);
1563 if (!is_empty_string(ifname
) && nla_put_string(nlmsg
, IFLA_IFNAME
, ifname
))
1564 return ret_errno(ENOMEM
);
1566 return netlink_transaction(nlh_ptr
, nlmsg
, nlmsg
);
1569 /* If we are asked to move a wireless interface, then we must actually move its
1570 * phyN device. Detect that condition and return the physname here. The physname
1571 * will be passed to lxc_netdev_move_wlan() which will free it when done.
1573 #define PHYSNAME "/sys/class/net/%s/phy80211/name"
1574 char *is_wlan(const char *ifname
)
1576 __do_fclose
FILE *f
= NULL
;
1577 __do_free
char *path
= NULL
, *physname
= NULL
;
1582 len
= strlen(ifname
) + strlen(PHYSNAME
) - 1;
1583 path
= must_realloc(NULL
, len
+ 1);
1584 ret
= strnprintf(path
, len
, PHYSNAME
, ifname
);
1588 f
= fopen(path
, "re");
1592 /* Feh - sb.st_size is always 4096. */
1593 fseek(f
, 0, SEEK_END
);
1595 fseek(f
, 0, SEEK_SET
);
1599 physname
= malloc(physlen
+ 1);
1603 memset(physname
, 0, physlen
+ 1);
1604 ret
= fread(physname
, 1, physlen
, f
);
1608 for (i
= 0; i
< physlen
; i
++) {
1609 if (physname
[i
] == '\n')
1612 if (physname
[i
] == '\0')
1616 return move_ptr(physname
);
1619 static int lxc_netdev_rename_by_name_in_netns(pid_t pid
, const char *old
,
1629 return wait_for_pid(fpid
);
1631 if (!switch_to_ns(pid
, "net"))
1634 _exit(lxc_netdev_rename_by_name(old
, new));
1637 int lxc_netdev_move_wlan(char *physname
, const char *ifname
, pid_t pid
,
1638 const char *newname
)
1640 __do_free
char *cmd
= NULL
;
1643 /* Move phyN into the container. TODO - do this using netlink.
1644 * However, IIUC this involves a bit more complicated work to talk to
1645 * the 80211 module, so for now just call out to iw.
1647 cmd
= on_path("iw", NULL
);
1649 ERROR("Couldn't find the application iw in PATH");
1659 sprintf(pidstr
, "%d", pid
);
1660 execlp("iw", "iw", "phy", physname
, "set", "netns", pidstr
, (char *)NULL
);
1661 _exit(EXIT_FAILURE
);
1664 if (wait_for_pid(fpid
))
1668 return lxc_netdev_rename_by_name_in_netns(pid
, ifname
, newname
);
1673 int lxc_netdev_move_by_name(const char *ifname
, pid_t pid
, const char* newname
)
1675 __do_free
char *physname
= NULL
;
1681 index
= if_nametoindex(ifname
);
1685 physname
= is_wlan(ifname
);
1687 return lxc_netdev_move_wlan(physname
, ifname
, pid
, newname
);
1689 return lxc_netdev_move_by_index(index
, pid
, newname
);
1692 int lxc_netdev_delete_by_index(int ifindex
)
1694 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1695 struct nl_handler nlh
= NL_HANDLER_INIT
;
1696 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1698 struct ifinfomsg
*ifi
;
1700 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1704 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1706 return ret_errno(ENOMEM
);
1708 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1710 return ret_errno(ENOMEM
);
1712 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
1713 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_DELLINK
;
1715 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1717 return ret_errno(ENOMEM
);
1719 ifi
->ifi_family
= AF_UNSPEC
;
1720 ifi
->ifi_index
= ifindex
;
1722 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1725 int lxc_netdev_delete_by_name(const char *name
)
1729 index
= if_nametoindex(name
);
1733 return lxc_netdev_delete_by_index(index
);
1736 int lxc_netdev_rename_by_index(int ifindex
, const char *newname
)
1738 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1739 struct nl_handler nlh
= NL_HANDLER_INIT
;
1740 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1742 struct ifinfomsg
*ifi
;
1744 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1748 len
= strlen(newname
);
1749 if (len
== 1 || len
>= IFNAMSIZ
)
1750 return ret_errno(EINVAL
);
1752 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1754 return ret_errno(ENOMEM
);
1756 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1758 return ret_errno(ENOMEM
);
1760 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
;
1761 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1763 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1765 return ret_errno(ENOMEM
);
1767 ifi
->ifi_family
= AF_UNSPEC
;
1768 ifi
->ifi_index
= ifindex
;
1770 if (nla_put_string(nlmsg
, IFLA_IFNAME
, newname
))
1771 return ret_errno(ENOMEM
);
1773 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1776 int lxc_netdev_rename_by_name(const char *oldname
, const char *newname
)
1780 len
= strlen(oldname
);
1781 if (len
== 1 || len
>= IFNAMSIZ
)
1784 index
= if_nametoindex(oldname
);
1788 return lxc_netdev_rename_by_index(index
, newname
);
1791 int netdev_set_flag(const char *name
, int flag
)
1793 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1794 struct nl_handler nlh
= NL_HANDLER_INIT
;
1795 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1796 int err
, index
, len
;
1797 struct ifinfomsg
*ifi
;
1799 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1804 if (len
== 1 || len
>= IFNAMSIZ
)
1805 return ret_errno(EINVAL
);
1807 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1809 return ret_errno(ENOMEM
);
1811 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1813 return ret_errno(ENOMEM
);
1815 index
= if_nametoindex(name
);
1817 return ret_errno(EINVAL
);
1819 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
1820 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
1822 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1824 return ret_errno(ENOMEM
);
1826 ifi
->ifi_family
= AF_UNSPEC
;
1827 ifi
->ifi_index
= index
;
1828 ifi
->ifi_change
|= IFF_UP
;
1829 ifi
->ifi_flags
|= flag
;
1831 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1834 static int netdev_get_flag(const char *name
, int *flag
)
1836 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1837 struct nl_handler nlh
= NL_HANDLER_INIT
;
1838 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1839 int err
, index
, len
;
1840 struct ifinfomsg
*ifi
;
1843 return ret_errno(EINVAL
);
1845 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1850 if (len
== 1 || len
>= IFNAMSIZ
)
1851 return ret_errno(EINVAL
);
1853 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1855 return ret_errno(ENOMEM
);
1857 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1859 return ret_errno(ENOMEM
);
1861 index
= if_nametoindex(name
);
1863 return ret_errno(EINVAL
);
1865 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
;
1866 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
1868 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1870 return ret_errno(ENOMEM
);
1872 ifi
->ifi_family
= AF_UNSPEC
;
1873 ifi
->ifi_index
= index
;
1875 err
= netlink_transaction(nlh_ptr
, nlmsg
, answer
);
1877 return ret_set_errno(-1, errno
);
1879 ifi
= NLMSG_DATA(answer
->nlmsghdr
);
1881 *flag
= ifi
->ifi_flags
;
1886 * \brief Check a interface is up or not.
1888 * \param name: name for the interface.
1891 * 0 means interface is down.
1892 * 1 means interface is up.
1893 * Others means error happened, and ret-value is the error number.
1895 int lxc_netdev_isup(const char *name
)
1900 err
= netdev_get_flag(name
, &flag
);
1910 int netdev_get_mtu(int ifindex
)
1912 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
1913 struct nl_handler nlh
= NL_HANDLER_INIT
;
1914 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
1917 int answer_len
, err
, res
;
1918 struct ifinfomsg
*ifi
;
1919 struct nlmsghdr
*msg
;
1921 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
1925 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
1927 return ret_errno(ENOMEM
);
1929 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
1931 return ret_errno(ENOMEM
);
1933 /* Save the answer buffer length, since it will be overwritten
1934 * on the first receive (and we might need to receive more than
1937 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
1939 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_DUMP
;
1940 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETLINK
;
1942 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
1944 return ret_errno(ENOMEM
);
1946 ifi
->ifi_family
= AF_UNSPEC
;
1948 /* Send the request for addresses, which returns all addresses
1949 * on all interfaces. */
1950 err
= netlink_send(nlh_ptr
, nlmsg
);
1952 return ret_set_errno(-1, errno
);
1954 #pragma GCC diagnostic push
1955 #pragma GCC diagnostic ignored "-Wcast-align"
1958 /* Restore the answer buffer length, it might have been
1959 * overwritten by a previous receive.
1961 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
1963 /* Get the (next) batch of reply messages */
1964 err
= netlink_rcv(nlh_ptr
, answer
);
1966 return ret_set_errno(-1, errno
);
1970 /* Satisfy the typing for the netlink macros */
1971 msg
= answer
->nlmsghdr
;
1973 while (NLMSG_OK(msg
, recv_len
)) {
1974 /* Stop reading if we see an error message */
1975 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
1976 struct nlmsgerr
*errmsg
= (struct nlmsgerr
*)NLMSG_DATA(msg
);
1977 return ret_set_errno(errmsg
->error
, errno
);
1980 /* Stop reading if we see a NLMSG_DONE message */
1981 if (msg
->nlmsg_type
== NLMSG_DONE
) {
1986 ifi
= NLMSG_DATA(msg
);
1987 if (ifi
->ifi_index
== ifindex
) {
1988 struct rtattr
*rta
= IFLA_RTA(ifi
);
1989 int attr_len
= msg
->nlmsg_len
- NLMSG_LENGTH(sizeof(*ifi
));
1992 while (RTA_OK(rta
, attr_len
)) {
1994 * Found a local address for the
1995 * requested interface, return it.
1997 if (rta
->rta_type
== IFLA_MTU
) {
1998 memcpy(&res
, RTA_DATA(rta
), sizeof(int));
2002 rta
= RTA_NEXT(rta
, attr_len
);
2006 /* Keep reading more data from the socket if the last
2007 * message had the NLF_F_MULTI flag set.
2009 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
2011 /* Look at the next message received in this buffer. */
2012 msg
= NLMSG_NEXT(msg
, recv_len
);
2016 #pragma GCC diagnostic pop
2018 /* If we end up here, we didn't find any result, so signal an error. */
2022 int lxc_netdev_set_mtu(const char *name
, int mtu
)
2024 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2025 struct nl_handler nlh
= NL_HANDLER_INIT
;
2026 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2028 struct ifinfomsg
*ifi
;
2030 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2035 if (len
== 1 || len
>= IFNAMSIZ
)
2036 return ret_errno(EINVAL
);
2038 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2040 return ret_errno(ENOMEM
);
2042 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2044 return ret_errno(ENOMEM
);
2046 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
2047 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
2049 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2051 return ret_errno(ENOMEM
);
2053 ifi
->ifi_family
= AF_UNSPEC
;
2055 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
2056 return ret_errno(ENOMEM
);
2058 if (nla_put_u32(nlmsg
, IFLA_MTU
, mtu
))
2059 return ret_errno(ENOMEM
);
2061 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2064 int lxc_netdev_up(const char *name
)
2066 return netdev_set_flag(name
, IFF_UP
);
2069 int lxc_netdev_down(const char *name
)
2071 return netdev_set_flag(name
, 0);
2074 int lxc_veth_create(const char *name1
, const char *name2
, pid_t pid
, unsigned int mtu
,
2075 int n_rxqueues
, int n_txqueues
)
2077 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2078 struct nl_handler nlh
= NL_HANDLER_INIT
;
2079 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2081 struct ifinfomsg
*ifi
;
2082 struct rtattr
*nest1
, *nest2
, *nest3
;
2084 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2088 len
= strlen(name1
);
2089 if (len
== 1 || len
>= IFNAMSIZ
)
2090 return ret_errno(EINVAL
);
2092 len
= strlen(name2
);
2093 if (len
== 1 || len
>= IFNAMSIZ
)
2094 return ret_errno(EINVAL
);
2096 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2098 return ret_errno(ENOMEM
);
2100 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2102 return ret_errno(ENOMEM
);
2104 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
2105 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
2107 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2109 return ret_errno(ENOMEM
);
2111 ifi
->ifi_family
= AF_UNSPEC
;
2113 nest1
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
2115 return ret_errno(EINVAL
);
2117 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "veth"))
2118 return ret_errno(ENOMEM
);
2120 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
2122 return ret_errno(ENOMEM
);
2124 nest3
= nla_begin_nested(nlmsg
, VETH_INFO_PEER
);
2126 return ret_errno(ENOMEM
);
2128 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2130 return ret_errno(ENOMEM
);
2132 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name2
))
2133 return ret_errno(ENOMEM
);
2135 if (n_rxqueues
> 0 && nla_put_u32(nlmsg
, IFLA_NUM_RX_QUEUES
, (unsigned int)n_rxqueues
))
2136 return ret_errno(ENOMEM
);
2138 if (n_txqueues
> 0 && nla_put_u32(nlmsg
, IFLA_NUM_TX_QUEUES
, (unsigned int)n_txqueues
))
2139 return ret_errno(ENOMEM
);
2141 if (mtu
> 0 && nla_put_u32(nlmsg
, IFLA_MTU
, mtu
))
2142 return ret_errno(ENOMEM
);
2144 if (pid
> 0 && nla_put_u32(nlmsg
, IFLA_NET_NS_PID
, pid
))
2145 return ret_errno(ENOMEM
);
2147 nla_end_nested(nlmsg
, nest3
);
2148 nla_end_nested(nlmsg
, nest2
);
2149 nla_end_nested(nlmsg
, nest1
);
2151 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name1
))
2152 return ret_errno(ENOMEM
);
2154 if (n_txqueues
> 0 && nla_put_u32(nlmsg
, IFLA_NUM_RX_QUEUES
, (unsigned int)n_txqueues
))
2155 return ret_errno(ENOMEM
);
2157 if (n_rxqueues
> 0 && nla_put_u32(nlmsg
, IFLA_NUM_TX_QUEUES
, (unsigned int)n_rxqueues
))
2158 return ret_errno(ENOMEM
);
2160 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2163 /* TODO: merge with lxc_macvlan_create */
2164 int lxc_vlan_create(const char *parent
, const char *name
, unsigned short vlanid
)
2166 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2167 struct nl_handler nlh
= NL_HANDLER_INIT
;
2168 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2169 int err
, len
, lindex
;
2170 struct ifinfomsg
*ifi
;
2171 struct rtattr
*nest
, *nest2
;
2173 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2177 len
= strlen(parent
);
2178 if (len
== 1 || len
>= IFNAMSIZ
)
2179 return ret_errno(EINVAL
);
2182 if (len
== 1 || len
>= IFNAMSIZ
)
2183 return ret_errno(EINVAL
);
2185 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2187 return ret_errno(ENOMEM
);
2189 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2191 return ret_errno(ENOMEM
);
2193 lindex
= if_nametoindex(parent
);
2195 return ret_errno(EINVAL
);
2197 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
2198 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
2200 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2202 return ret_errno(ENOMEM
);
2204 ifi
->ifi_family
= AF_UNSPEC
;
2206 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
2208 return ret_errno(ENOMEM
);
2210 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "vlan"))
2211 return ret_errno(ENOMEM
);
2213 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
2215 return ret_errno(ENOMEM
);
2217 if (nla_put_u16(nlmsg
, IFLA_VLAN_ID
, vlanid
))
2218 return ret_errno(ENOMEM
);
2220 nla_end_nested(nlmsg
, nest2
);
2221 nla_end_nested(nlmsg
, nest
);
2223 if (nla_put_u32(nlmsg
, IFLA_LINK
, lindex
))
2224 return ret_errno(ENOMEM
);
2226 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
2227 return ret_errno(ENOMEM
);
2229 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2232 int lxc_macvlan_create(const char *parent
, const char *name
, int mode
)
2234 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2235 struct nl_handler nlh
= NL_HANDLER_INIT
;
2236 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2237 int err
, index
, len
;
2238 struct ifinfomsg
*ifi
;
2239 struct rtattr
*nest
, *nest2
;
2241 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2245 len
= strlen(parent
);
2246 if (len
== 1 || len
>= IFNAMSIZ
)
2247 return ret_errno(EINVAL
);
2250 if (len
== 1 || len
>= IFNAMSIZ
)
2251 return ret_errno(EINVAL
);
2253 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2255 return ret_errno(ENOMEM
);
2257 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2259 return ret_errno(ENOMEM
);
2261 index
= if_nametoindex(parent
);
2263 return ret_errno(EINVAL
);
2265 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
| NLM_F_ACK
;
2266 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWLINK
;
2268 ifi
= nlmsg_reserve(nlmsg
, sizeof(struct ifinfomsg
));
2270 return ret_errno(ENOMEM
);
2272 ifi
->ifi_family
= AF_UNSPEC
;
2274 nest
= nla_begin_nested(nlmsg
, IFLA_LINKINFO
);
2276 return ret_errno(ENOMEM
);
2278 if (nla_put_string(nlmsg
, IFLA_INFO_KIND
, "macvlan"))
2279 return ret_errno(ENOMEM
);
2282 nest2
= nla_begin_nested(nlmsg
, IFLA_INFO_DATA
);
2284 return ret_errno(ENOMEM
);
2286 if (nla_put_u32(nlmsg
, IFLA_MACVLAN_MODE
, mode
))
2287 return ret_errno(ENOMEM
);
2289 nla_end_nested(nlmsg
, nest2
);
2292 nla_end_nested(nlmsg
, nest
);
2294 if (nla_put_u32(nlmsg
, IFLA_LINK
, index
))
2295 return ret_errno(ENOMEM
);
2297 if (nla_put_string(nlmsg
, IFLA_IFNAME
, name
))
2298 return ret_errno(ENOMEM
);
2300 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2303 static int proc_sys_net_write(const char *path
, const char *value
)
2308 fd
= open(path
, O_WRONLY
);
2312 if (lxc_write_nointr(fd
, value
, strlen(value
)) < 0)
2319 static int ip_forwarding_set(const char *ifname
, int family
, int flag
)
2322 char path
[PATH_MAX
];
2324 if (family
!= AF_INET
&& family
!= AF_INET6
)
2327 ret
= strnprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
2328 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
2333 return proc_sys_net_write(path
, flag
? "1" : "0");
2336 int lxc_ip_forwarding_on(const char *name
, int family
)
2338 return ip_forwarding_set(name
, family
, 1);
2341 int lxc_ip_forwarding_off(const char *name
, int family
)
2343 return ip_forwarding_set(name
, family
, 0);
2346 static int neigh_proxy_set(const char *ifname
, int family
, int flag
)
2349 char path
[PATH_MAX
];
2351 if (family
!= AF_INET
&& family
!= AF_INET6
)
2354 ret
= strnprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
2355 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
2356 family
== AF_INET
? "proxy_arp" : "proxy_ndp");
2360 return proc_sys_net_write(path
, flag
? "1" : "0");
2363 static int lxc_is_ip_neigh_proxy_enabled(const char *ifname
, int family
)
2366 char path
[PATH_MAX
];
2369 if (family
!= AF_INET
&& family
!= AF_INET6
)
2370 return ret_set_errno(-1, EINVAL
);
2372 ret
= strnprintf(path
, sizeof(path
), "/proc/sys/net/%s/conf/%s/%s",
2373 family
== AF_INET
? "ipv4" : "ipv6", ifname
,
2374 family
== AF_INET
? "proxy_arp" : "proxy_ndp");
2376 return ret_set_errno(-1, E2BIG
);
2378 return lxc_read_file_expect(path
, buf
, 1, "1");
2381 int lxc_neigh_proxy_on(const char *name
, int family
)
2383 return neigh_proxy_set(name
, family
, 1);
2386 int lxc_neigh_proxy_off(const char *name
, int family
)
2388 return neigh_proxy_set(name
, family
, 0);
2391 int lxc_convert_mac(char *macaddr
, struct sockaddr
*sockaddr
)
2396 unsigned char *data
;
2398 sockaddr
->sa_family
= ARPHRD_ETHER
;
2399 data
= (unsigned char *)sockaddr
->sa_data
;
2401 while ((*macaddr
!= '\0') && (i
< ETH_ALEN
)) {
2405 else if (c
>= 'a' && c
<= 'f')
2407 else if (c
>= 'A' && c
<= 'F')
2416 else if (c
>= 'a' && c
<= 'f')
2417 val
|= c
- 'a' + 10;
2418 else if (c
>= 'A' && c
<= 'F')
2419 val
|= c
- 'A' + 10;
2420 else if (c
== ':' || c
== 0)
2426 *data
++ = (unsigned char)(val
& 0377);
2429 if (*macaddr
== ':')
2436 static int ip_addr_add(int family
, int ifindex
, void *addr
, void *bcast
,
2437 void *acast
, int prefix
)
2439 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2440 struct nl_handler nlh
= NL_HANDLER_INIT
;
2441 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2443 struct ifaddrmsg
*ifa
;
2445 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
2446 : sizeof(struct in6_addr
);
2448 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2452 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2454 return ret_errno(ENOMEM
);
2456 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2458 return ret_errno(ENOMEM
);
2460 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
2461 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWADDR
;
2463 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
2465 return ret_errno(ENOMEM
);
2467 ifa
->ifa_prefixlen
= prefix
;
2468 ifa
->ifa_index
= ifindex
;
2469 ifa
->ifa_family
= family
;
2472 if (nla_put_buffer(nlmsg
, IFA_LOCAL
, addr
, addrlen
))
2473 return ret_errno(EINVAL
);
2475 if (nla_put_buffer(nlmsg
, IFA_ADDRESS
, addr
, addrlen
))
2476 return ret_errno(EINVAL
);
2478 if (nla_put_buffer(nlmsg
, IFA_BROADCAST
, bcast
, addrlen
))
2479 return ret_errno(EINVAL
);
2481 /* TODO: multicast, anycast with ipv6 */
2482 if (family
== AF_INET6
&&
2483 (memcmp(bcast
, &in6addr_any
, sizeof(in6addr_any
)) ||
2484 memcmp(acast
, &in6addr_any
, sizeof(in6addr_any
))))
2485 return ret_errno(EPROTONOSUPPORT
);
2487 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2490 int lxc_ipv6_addr_add(int ifindex
, struct in6_addr
*addr
,
2491 struct in6_addr
*mcast
, struct in6_addr
*acast
,
2494 return ip_addr_add(AF_INET6
, ifindex
, addr
, mcast
, acast
, prefix
);
2497 int lxc_ipv4_addr_add(int ifindex
, struct in_addr
*addr
, struct in_addr
*bcast
,
2500 return ip_addr_add(AF_INET
, ifindex
, addr
, bcast
, NULL
, prefix
);
2503 /* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
2504 * the given RTM_NEWADDR message. Allocates memory for the address and stores
2505 * that pointer in *res (so res should be an in_addr** or in6_addr**).
2507 #pragma GCC diagnostic push
2508 #pragma GCC diagnostic ignored "-Wcast-align"
2510 static int ifa_get_local_ip(int family
, struct nlmsghdr
*msg
, void **res
)
2513 struct ifaddrmsg
*ifa
= NLMSG_DATA(msg
);
2514 struct rtattr
*rta
= IFA_RTA(ifa
);
2515 int attr_len
= NLMSG_PAYLOAD(msg
, sizeof(struct ifaddrmsg
));
2517 if (ifa
->ifa_family
!= family
)
2520 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
2521 : sizeof(struct in6_addr
);
2523 /* Loop over the rtattr's in this message */
2524 while (RTA_OK(rta
, attr_len
)) {
2525 /* Found a local address for the requested interface,
2528 if (rta
->rta_type
== IFA_LOCAL
||
2529 rta
->rta_type
== IFA_ADDRESS
) {
2530 /* Sanity check. The family check above should make sure
2531 * the address length is correct, but check here just in
2534 if (RTA_PAYLOAD(rta
) != (unsigned int)addrlen
)
2537 /* We might have found an IFA_ADDRESS before, which we
2538 * now overwrite with an IFA_LOCAL.
2541 *res
= malloc(addrlen
);
2546 memcpy(*res
, RTA_DATA(rta
), addrlen
);
2547 if (rta
->rta_type
== IFA_LOCAL
)
2550 rta
= RTA_NEXT(rta
, attr_len
);
2555 #pragma GCC diagnostic pop
2557 static int ip_addr_get(int family
, int ifindex
, void **res
)
2559 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2560 struct nl_handler nlh
= NL_HANDLER_INIT
;
2561 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2562 int answer_len
, err
;
2563 struct ifaddrmsg
*ifa
;
2564 struct nlmsghdr
*msg
;
2568 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2572 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2574 return ret_errno(ENOMEM
);
2576 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2578 return ret_errno(ENOMEM
);
2580 /* Save the answer buffer length, since it will be overwritten on the
2581 * first receive (and we might need to receive more than once).
2583 answer_len
= answer
->nlmsghdr
->nlmsg_len
;
2585 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ROOT
;
2586 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_GETADDR
;
2588 ifa
= nlmsg_reserve(nlmsg
, sizeof(struct ifaddrmsg
));
2590 return ret_errno(ENOMEM
);
2592 ifa
->ifa_family
= family
;
2594 /* Send the request for addresses, which returns all addresses on all
2597 err
= netlink_send(nlh_ptr
, nlmsg
);
2599 return ret_set_errno(err
, errno
);
2601 #pragma GCC diagnostic push
2602 #pragma GCC diagnostic ignored "-Wcast-align"
2605 /* Restore the answer buffer length, it might have been
2606 * overwritten by a previous receive.
2608 answer
->nlmsghdr
->nlmsg_len
= answer_len
;
2610 /* Get the (next) batch of reply messages. */
2611 err
= netlink_rcv(nlh_ptr
, answer
);
2613 return ret_set_errno(err
, errno
);
2618 /* Satisfy the typing for the netlink macros. */
2619 msg
= answer
->nlmsghdr
;
2621 while (NLMSG_OK(msg
, recv_len
)) {
2622 /* Stop reading if we see an error message. */
2623 if (msg
->nlmsg_type
== NLMSG_ERROR
) {
2624 struct nlmsgerr
*errmsg
= (struct nlmsgerr
*)NLMSG_DATA(msg
);
2625 return ret_set_errno(errmsg
->error
, errno
);
2628 /* Stop reading if we see a NLMSG_DONE message. */
2629 if (msg
->nlmsg_type
== NLMSG_DONE
) {
2634 if (msg
->nlmsg_type
!= RTM_NEWADDR
)
2635 return ret_errno(EINVAL
);
2637 ifa
= (struct ifaddrmsg
*)NLMSG_DATA(msg
);
2638 if (ifa
->ifa_index
== (__u32
)ifindex
) {
2639 if (ifa_get_local_ip(family
, msg
, res
) < 0)
2640 return ret_errno(EINVAL
);
2642 /* Found a result, stop searching. */
2647 /* Keep reading more data from the socket if the last
2648 * message had the NLF_F_MULTI flag set.
2650 readmore
= (msg
->nlmsg_flags
& NLM_F_MULTI
);
2652 /* Look at the next message received in this buffer. */
2653 msg
= NLMSG_NEXT(msg
, recv_len
);
2657 #pragma GCC diagnostic pop
2659 /* If we end up here, we didn't find any result, so signal an
2665 int lxc_ipv6_addr_get(int ifindex
, struct in6_addr
**res
)
2667 return ip_addr_get(AF_INET6
, ifindex
, (void **)res
);
2670 int lxc_ipv4_addr_get(int ifindex
, struct in_addr
**res
)
2672 return ip_addr_get(AF_INET
, ifindex
, (void **)res
);
2675 static int ip_gateway_add(int family
, int ifindex
, void *gw
)
2677 call_cleaner(nlmsg_free
) struct nlmsg
*answer
= NULL
, *nlmsg
= NULL
;
2678 struct nl_handler nlh
= NL_HANDLER_INIT
;
2679 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
2683 addrlen
= family
== AF_INET
? sizeof(struct in_addr
)
2684 : sizeof(struct in6_addr
);
2686 err
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
2690 nlmsg
= nlmsg_alloc(NLMSG_GOOD_SIZE
);
2692 return ret_errno(ENOMEM
);
2694 answer
= nlmsg_alloc_reserve(NLMSG_GOOD_SIZE
);
2696 return ret_errno(ENOMEM
);
2698 nlmsg
->nlmsghdr
->nlmsg_flags
= NLM_F_ACK
| NLM_F_REQUEST
| NLM_F_CREATE
| NLM_F_EXCL
;
2699 nlmsg
->nlmsghdr
->nlmsg_type
= RTM_NEWROUTE
;
2701 rt
= nlmsg_reserve(nlmsg
, sizeof(struct rtmsg
));
2703 return ret_errno(ENOMEM
);
2705 rt
->rtm_family
= family
;
2706 rt
->rtm_table
= RT_TABLE_MAIN
;
2707 rt
->rtm_scope
= RT_SCOPE_UNIVERSE
;
2708 rt
->rtm_protocol
= RTPROT_BOOT
;
2709 rt
->rtm_type
= RTN_UNICAST
;
2710 /* "default" destination */
2711 rt
->rtm_dst_len
= 0;
2713 /* If gateway address not supplied, then a device route will be created instead */
2714 if (gw
&& nla_put_buffer(nlmsg
, RTA_GATEWAY
, gw
, addrlen
))
2715 return ret_errno(ENOMEM
);
2717 /* Adding the interface index enables the use of link-local
2718 * addresses for the gateway.
2720 if (nla_put_u32(nlmsg
, RTA_OIF
, ifindex
))
2721 return ret_errno(EINVAL
);
2723 return netlink_transaction(nlh_ptr
, nlmsg
, answer
);
2726 int lxc_ipv4_gateway_add(int ifindex
, struct in_addr
*gw
)
2728 return ip_gateway_add(AF_INET
, ifindex
, gw
);
2731 int lxc_ipv6_gateway_add(int ifindex
, struct in6_addr
*gw
)
2733 return ip_gateway_add(AF_INET6
, ifindex
, gw
);
2735 bool is_ovs_bridge(const char *bridge
)
2739 char brdirname
[22 + IFNAMSIZ
+ 1] = {0};
2741 ret
= strnprintf(brdirname
, 22 + IFNAMSIZ
+ 1,
2742 "/sys/class/net/%s/bridge", bridge
);
2746 ret
= stat(brdirname
, &sb
);
2747 if (ret
< 0 && errno
== ENOENT
)
2753 struct ovs_veth_args
{
2758 /* Called from a background thread - when nic goes away, remove it from the
2761 static int lxc_ovs_delete_port_exec(void *data
)
2763 struct ovs_veth_args
*args
= data
;
2765 execlp("ovs-vsctl", "ovs-vsctl", "del-port", args
->bridge
, args
->nic
, (char *)NULL
);
2769 int lxc_ovs_delete_port(const char *bridge
, const char *nic
)
2772 char cmd_output
[PATH_MAX
];
2773 struct ovs_veth_args args
;
2775 args
.bridge
= bridge
;
2777 ret
= run_command(cmd_output
, sizeof(cmd_output
),
2778 lxc_ovs_delete_port_exec
, (void *)&args
);
2780 return log_error(-1, "Failed to delete \"%s\" from openvswitch bridge \"%s\": %s", nic
, bridge
, cmd_output
);
2785 static int lxc_ovs_attach_bridge_exec(void *data
)
2787 struct ovs_veth_args
*args
= data
;
2789 execlp("ovs-vsctl", "ovs-vsctl", "add-port", args
->bridge
, args
->nic
, (char *)NULL
);
2793 static int lxc_ovs_attach_bridge(const char *bridge
, const char *nic
)
2796 char cmd_output
[PATH_MAX
];
2797 struct ovs_veth_args args
;
2799 args
.bridge
= bridge
;
2801 ret
= run_command(cmd_output
, sizeof(cmd_output
),
2802 lxc_ovs_attach_bridge_exec
, (void *)&args
);
2804 return log_error(-1, "Failed to attach \"%s\" to openvswitch bridge \"%s\": %s", nic
, bridge
, cmd_output
);
2809 int lxc_bridge_attach(const char *bridge
, const char *ifname
)
2815 if (strlen(ifname
) >= IFNAMSIZ
)
2818 index
= if_nametoindex(ifname
);
2822 if (is_ovs_bridge(bridge
))
2823 return lxc_ovs_attach_bridge(bridge
, ifname
);
2825 fd
= socket(AF_INET
, SOCK_STREAM
| SOCK_CLOEXEC
, 0);
2829 retlen
= strlcpy(ifr
.ifr_name
, bridge
, IFNAMSIZ
);
2830 if (retlen
>= IFNAMSIZ
) {
2835 ifr
.ifr_name
[IFNAMSIZ
- 1] = '\0';
2836 ifr
.ifr_ifindex
= index
;
2837 err
= ioctl(fd
, SIOCBRADDIF
, &ifr
);
2845 int setup_private_host_hw_addr(char *veth1
)
2847 __do_close
int sockfd
= -EBADF
;
2851 sockfd
= socket(AF_INET
, SOCK_DGRAM
| SOCK_CLOEXEC
, 0);
2855 err
= strnprintf((char *)ifr
.ifr_name
, IFNAMSIZ
, "%s", veth1
);
2859 err
= ioctl(sockfd
, SIOCGIFHWADDR
, &ifr
);
2863 ifr
.ifr_hwaddr
.sa_data
[0] = 0xfe;
2864 err
= ioctl(sockfd
, SIOCSIFHWADDR
, &ifr
);
2871 int lxc_find_gateway_addresses(struct lxc_handler
*handler
)
2873 struct lxc_netdev
*netdev
;
2876 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
2877 if (!netdev
->ipv4_gateway_auto
&& !netdev
->ipv6_gateway_auto
)
2880 if (netdev
->type
!= LXC_NET_VETH
&& netdev
->type
!= LXC_NET_MACVLAN
)
2881 return log_error_errno(-1, EINVAL
, "Automatic gateway detection is only supported for veth and macvlan");
2883 if (is_empty_string(netdev
->link
))
2884 return log_error_errno(-1, errno
, "Automatic gateway detection needs a link interface");
2886 link_index
= if_nametoindex(netdev
->link
);
2890 if (netdev
->ipv4_gateway_auto
) {
2891 if (lxc_ipv4_addr_get(link_index
, &netdev
->ipv4_gateway
))
2892 return log_error_errno(-1, errno
, "Failed to automatically find ipv4 gateway address from link interface \"%s\"", netdev
->link
);
2895 if (netdev
->ipv6_gateway_auto
) {
2896 if (lxc_ipv6_addr_get(link_index
, &netdev
->ipv6_gateway
))
2897 return log_error_errno(-1, errno
, "Failed to automatically find ipv6 gateway address from link interface \"%s\"", netdev
->link
);
2904 #define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
2905 static int lxc_create_network_unpriv_exec(const char *lxcpath
,
2906 const char *lxcname
,
2907 struct lxc_netdev
*netdev
, pid_t pid
,
2908 unsigned int hooks_version
)
2912 int bytes
, pipefd
[2];
2913 char *token
, *saveptr
= NULL
;
2914 char netdev_link
[IFNAMSIZ
];
2915 char buffer
[PATH_MAX
] = {0};
2918 if (netdev
->type
!= LXC_NET_VETH
)
2919 return log_error_errno(-1, errno
,
2920 "Network type %d not support for unprivileged use",
2925 return log_error_errno(-1, errno
, "Failed to create pipe");
2931 return log_error_errno(-1, errno
, "Failed to create new process");
2935 char pidstr
[INTTYPE_TO_STRLEN(pid_t
)];
2939 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
2941 ret
= dup2(pipefd
[1], STDERR_FILENO
);
2944 SYSERROR("Failed to duplicate std{err,out} file descriptor");
2945 _exit(EXIT_FAILURE
);
2948 if (!is_empty_string(netdev
->link
))
2949 retlen
= strlcpy(netdev_link
, netdev
->link
, IFNAMSIZ
);
2951 retlen
= strlcpy(netdev_link
, "none", IFNAMSIZ
);
2952 if (retlen
>= IFNAMSIZ
) {
2953 SYSERROR("Invalid network device name");
2954 _exit(EXIT_FAILURE
);
2957 ret
= strnprintf(pidstr
, sizeof(pidstr
), "%d", pid
);
2959 _exit(EXIT_FAILURE
);
2960 pidstr
[sizeof(pidstr
) - 1] = '\0';
2962 INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath
,
2963 lxcname
, pidstr
, netdev_link
, !is_empty_string(netdev
->name
) ? netdev
->name
: "(null)");
2964 if (!is_empty_string(netdev
->name
))
2965 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2966 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2967 netdev
->name
, (char *)NULL
);
2969 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "create",
2970 lxcpath
, lxcname
, pidstr
, "veth", netdev_link
,
2972 SYSERROR("Failed to execute lxc-user-nic");
2973 _exit(EXIT_FAILURE
);
2976 /* close the write-end of the pipe */
2979 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, sizeof(buffer
));
2981 SYSERROR("Failed to read from pipe file descriptor");
2984 buffer
[bytes
- 1] = '\0';
2987 ret
= wait_for_pid(child
);
2989 if (ret
!= 0 || bytes
< 0)
2990 return log_error(-1, "lxc-user-nic failed to configure requested network: %s",
2991 buffer
[0] != '\0' ? buffer
: "(null)");
2992 TRACE("Received output \"%s\" from lxc-user-nic", buffer
);
2995 token
= strtok_r(buffer
, ":", &saveptr
);
2997 return log_error(-1, "Failed to parse lxc-user-nic output");
3000 * lxc-user-nic will take care of proper network device naming. So
3001 * netdev->name and netdev->transient_name need to be identical to not
3002 * trigger another rename later on.
3004 retlen
= strlcpy(netdev
->name
, token
, IFNAMSIZ
);
3005 if (retlen
< IFNAMSIZ
) {
3006 retlen
= strlcpy(netdev
->transient_name
, token
, IFNAMSIZ
);
3007 if (retlen
< IFNAMSIZ
)
3008 retlen
= strlcpy(netdev
->created_name
, token
, IFNAMSIZ
);
3010 if (retlen
>= IFNAMSIZ
)
3011 return log_error_errno(-1, E2BIG
,
3012 "Container side veth device name returned by lxc-user-nic is too long");
3014 /* netdev->ifindex */
3015 token
= strtok_r(NULL
, ":", &saveptr
);
3017 return log_error(-1, "Failed to parse lxc-user-nic output");
3019 ret
= lxc_safe_int(token
, &netdev
->ifindex
);
3021 return log_error_errno(-1, -ret
,
3022 "Failed to convert string \"%s\" to integer", token
);
3024 /* netdev->priv.veth_attr.veth1 */
3025 token
= strtok_r(NULL
, ":", &saveptr
);
3027 return log_error(-1, "Failed to parse lxc-user-nic output");
3029 retlen
= strlcpy(netdev
->priv
.veth_attr
.veth1
, token
, IFNAMSIZ
);
3030 if (retlen
>= IFNAMSIZ
)
3031 return log_error_errno(-1, E2BIG
,
3032 "Host side veth device name returned by lxc-user-nic is too long");
3034 /* netdev->priv.veth_attr.ifindex */
3035 token
= strtok_r(NULL
, ":", &saveptr
);
3037 return log_error(-1, "Failed to parse lxc-user-nic output");
3039 ret
= lxc_safe_int(token
, &netdev
->priv
.veth_attr
.ifindex
);
3041 return log_error_errno(-1, -ret
,
3042 "Failed to convert string \"%s\" to integer", token
);
3044 if (netdev
->upscript
) {
3048 netdev
->priv
.veth_attr
.veth1
,
3052 ret
= run_script_argv(lxcname
, hooks_version
, "net",
3053 netdev
->upscript
, "up", argv
);
3061 static int lxc_delete_network_unpriv_exec(const char *lxcpath
, const char *lxcname
,
3062 struct lxc_netdev
*netdev
,
3063 const char *netns_path
)
3068 char buffer
[PATH_MAX
] = {};
3070 if (netdev
->type
!= LXC_NET_VETH
)
3071 return log_error_errno(-1, EINVAL
, "Network type %d not support for unprivileged use", netdev
->type
);
3075 return log_error_errno(-1, errno
, "Failed to create pipe");
3081 return log_error_errno(-1, errno
, "Failed to create new process");
3089 ret
= dup2(pipefd
[1], STDOUT_FILENO
);
3091 ret
= dup2(pipefd
[1], STDERR_FILENO
);
3094 SYSERROR("Failed to duplicate std{err,out} file descriptor");
3095 _exit(EXIT_FAILURE
);
3098 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
3099 hostveth
= netdev
->priv
.veth_attr
.pair
;
3101 hostveth
= netdev
->priv
.veth_attr
.veth1
;
3102 if (is_empty_string(hostveth
)) {
3103 SYSERROR("Host side veth device name is missing");
3104 _exit(EXIT_FAILURE
);
3107 if (is_empty_string(netdev
->link
)) {
3108 SYSERROR("Network link for network device \"%s\" is missing", netdev
->priv
.veth_attr
.veth1
);
3109 _exit(EXIT_FAILURE
);
3112 INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath
,
3113 lxcname
, netns_path
, netdev
->link
, hostveth
);
3114 execlp(LXC_USERNIC_PATH
, LXC_USERNIC_PATH
, "delete", lxcpath
,
3115 lxcname
, netns_path
, "veth", netdev
->link
, hostveth
,
3117 SYSERROR("Failed to exec lxc-user-nic.");
3118 _exit(EXIT_FAILURE
);
3123 bytes
= lxc_read_nointr(pipefd
[0], &buffer
, sizeof(buffer
));
3125 SYSERROR("Failed to read from pipe file descriptor.");
3128 buffer
[bytes
- 1] = '\0';
3131 ret
= wait_for_pid(child
);
3132 close_prot_errno_disarm(pipefd
[0]);
3133 if (ret
!= 0 || bytes
< 0)
3134 return log_error_errno(-1, errno
, "lxc-user-nic failed to delete requested network: %s",
3135 !is_empty_string(buffer
) ? buffer
: "(null)");
3140 static bool lxc_delete_network_unpriv(struct lxc_handler
*handler
)
3143 struct lxc_netdev
*netdev
;
3144 /* strlen("/proc/") = 6
3146 * INTTYPE_TO_STRLEN(pid_t)
3148 * strlen("/fd/") = 4
3150 * INTTYPE_TO_STRLEN(int)
3154 char netns_path
[6 + INTTYPE_TO_STRLEN(pid_t
) + 4 + INTTYPE_TO_STRLEN(int) + 1];
3158 if (handler
->nsfd
[LXC_NS_NET
] < 0)
3159 return log_debug(false, "Cannot not guarantee safe deletion of network devices. Manual cleanup maybe needed");
3161 ret
= strnprintf(netns_path
, sizeof(netns_path
), "/proc/%d/fd/%d",
3162 lxc_raw_getpid(), handler
->nsfd
[LXC_NS_NET
]);
3166 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
3167 char *hostveth
= NULL
;
3169 /* We can only delete devices whose ifindex we have. If we don't
3170 * have the index it means that we didn't create it.
3172 if (!netdev
->ifindex
)
3175 if (netdev
->type
== LXC_NET_PHYS
) {
3176 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
,
3179 WARN("Failed to rename interface with index %d to its initial name \"%s\"",
3180 netdev
->ifindex
, netdev
->link
);
3182 TRACE("Renamed interface with index %d to its initial name \"%s\"",
3183 netdev
->ifindex
, netdev
->link
);
3185 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3187 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3188 netdev
->ifindex
, netdev
->link
);
3189 goto clear_ifindices
;
3192 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3194 WARN("Failed to deconfigure network device");
3196 if (netdev
->type
!= LXC_NET_VETH
)
3197 goto clear_ifindices
;
3199 if (is_empty_string(netdev
->link
) || !is_ovs_bridge(netdev
->link
))
3200 goto clear_ifindices
;
3202 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
3203 hostveth
= netdev
->priv
.veth_attr
.pair
;
3205 hostveth
= netdev
->priv
.veth_attr
.veth1
;
3206 if (is_empty_string(hostveth
))
3207 goto clear_ifindices
;
3209 ret
= lxc_delete_network_unpriv_exec(handler
->lxcpath
,
3210 handler
->name
, netdev
,
3213 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth
, netdev
->link
);
3214 goto clear_ifindices
;
3216 INFO("Removed interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
3220 * We need to clear any ifindices we recorded so liblxc won't
3221 * have cached stale data which would cause it to fail on
3222 * reboot where we don't re-read the on-disk config file.
3224 netdev
->ifindex
= 0;
3225 if (netdev
->type
== LXC_NET_PHYS
) {
3226 netdev
->priv
.phys_attr
.ifindex
= 0;
3227 } else if (netdev
->type
== LXC_NET_VETH
) {
3228 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
3229 netdev
->priv
.veth_attr
.ifindex
= 0;
3236 static int lxc_setup_l2proxy(struct lxc_netdev
*netdev
) {
3237 struct lxc_inetdev
*inet4dev
;
3238 struct lxc_inet6dev
*inet6dev
;
3239 char bufinet4
[INET_ADDRSTRLEN
], bufinet6
[INET6_ADDRSTRLEN
];
3241 unsigned int lo_ifindex
= 0, link_ifindex
= 0;
3243 link_ifindex
= if_nametoindex(netdev
->link
);
3244 if (link_ifindex
== 0)
3245 return log_error_errno(-1, errno
, "Failed to retrieve ifindex for \"%s\" l2proxy setup", netdev
->link
);
3248 /* If IPv4 addresses are specified, then check that sysctl is configured correctly. */
3249 if (!list_empty(&netdev
->ipv4_addresses
)) {
3250 /* Check for net.ipv4.conf.[link].forwarding=1 */
3251 if (lxc_is_ip_forwarding_enabled(netdev
->link
, AF_INET
) < 0)
3252 return log_error_errno(-1, EINVAL
, "Requires sysctl net.ipv4.conf.%s.forwarding=1", netdev
->link
);
3255 /* If IPv6 addresses are specified, then check that sysctl is configured correctly. */
3256 if (!list_empty(&netdev
->ipv6_addresses
)) {
3257 /* Check for net.ipv6.conf.[link].proxy_ndp=1 */
3258 if (lxc_is_ip_neigh_proxy_enabled(netdev
->link
, AF_INET6
) < 0)
3259 return log_error_errno(-1, EINVAL
, "Requires sysctl net.ipv6.conf.%s.proxy_ndp=1", netdev
->link
);
3261 /* Check for net.ipv6.conf.[link].forwarding=1 */
3262 if (lxc_is_ip_forwarding_enabled(netdev
->link
, AF_INET6
) < 0)
3263 return log_error_errno(-1, EINVAL
, "Requires sysctl net.ipv6.conf.%s.forwarding=1", netdev
->link
);
3266 /* Perform IPVLAN specific checks. */
3267 if (netdev
->type
== LXC_NET_IPVLAN
) {
3268 /* Check mode is l3s as other modes do not work with l2proxy. */
3269 if (netdev
->priv
.ipvlan_attr
.mode
!= IPVLAN_MODE_L3S
)
3270 return log_error_errno(-1, EINVAL
, "Requires ipvlan mode on dev \"%s\" be l3s when used with l2proxy", netdev
->link
);
3272 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3273 lo_ifindex
= if_nametoindex(loop_device
);
3274 if (lo_ifindex
== 0)
3275 return log_error_errno(-1, EINVAL
, "Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device
);
3278 list_for_each_entry(inet4dev
, &netdev
->ipv4_addresses
, head
) {
3279 if (!inet_ntop(AF_INET
, &inet4dev
->addr
, bufinet4
, sizeof(bufinet4
)))
3280 return ret_set_errno(-1, -errno
);
3282 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET
, link_ifindex
, &inet4dev
->addr
) < 0)
3283 return ret_set_errno(-1, EINVAL
);
3285 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3286 if (netdev
->type
== LXC_NET_IPVLAN
) {
3287 err
= lxc_ipv4_dest_add(lo_ifindex
, &inet4dev
->addr
, 32);
3289 return log_error_errno(-1, -err
, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4
, loop_device
);
3293 list_for_each_entry(inet6dev
, &netdev
->ipv6_addresses
, head
) {
3294 if (!inet_ntop(AF_INET6
, &inet6dev
->addr
, bufinet6
, sizeof(bufinet6
)))
3295 return ret_set_errno(-1, -errno
);
3297 if (lxc_ip_neigh_proxy(RTM_NEWNEIGH
, AF_INET6
, link_ifindex
, &inet6dev
->addr
) < 0)
3298 return ret_set_errno(-1, EINVAL
);
3300 /* IPVLAN requires a route to local-loopback to trigger l2proxy. */
3301 if (netdev
->type
== LXC_NET_IPVLAN
) {
3302 err
= lxc_ipv6_dest_add(lo_ifindex
, &inet6dev
->addr
, 128);
3304 return log_error_errno(-1, -err
, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6
, loop_device
);
3311 static int lxc_delete_ipv4_l2proxy(struct in_addr
*ip
, char *link
, unsigned int lo_ifindex
)
3313 char bufinet4
[INET_ADDRSTRLEN
];
3314 bool had_error
= false;
3315 unsigned int link_ifindex
= 0;
3317 if (!inet_ntop(AF_INET
, ip
, bufinet4
, sizeof(bufinet4
)))
3318 return log_error_errno(-1, EINVAL
, "Failed to convert IP for l2proxy ipv4 removal on dev \"%s\"", link
);
3320 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3321 if (lo_ifindex
> 0) {
3322 if (lxc_ipv4_dest_del(lo_ifindex
, ip
, 32) < 0) {
3324 ERROR("Failed to delete ipv4 dest \"%s\" for network ifindex \"%u\"", bufinet4
, lo_ifindex
);
3328 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3329 if (!is_empty_string(link
)) {
3330 link_ifindex
= if_nametoindex(link
);
3331 if (link_ifindex
== 0)
3332 return log_error_errno(-1, EINVAL
, "Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link
);
3334 if (lxc_ip_neigh_proxy(RTM_DELNEIGH
, AF_INET
, link_ifindex
, ip
) < 0)
3339 return ret_set_errno(-1, EINVAL
);
3344 static int lxc_delete_ipv6_l2proxy(struct in6_addr
*ip
, char *link
, unsigned int lo_ifindex
)
3346 char bufinet6
[INET6_ADDRSTRLEN
];
3347 bool had_error
= false;
3348 unsigned int link_ifindex
= 0;
3350 if (!inet_ntop(AF_INET6
, ip
, bufinet6
, sizeof(bufinet6
)))
3351 return log_error_errno(-1, EINVAL
, "Failed to convert IP for l2proxy ipv6 removal on dev \"%s\"", link
);
3353 /* If a local-loopback ifindex supplied remove the static route to the lo device. */
3354 if (lo_ifindex
> 0) {
3355 if (lxc_ipv6_dest_del(lo_ifindex
, ip
, 128) < 0) {
3357 ERROR("Failed to delete ipv6 dest \"%s\" for network ifindex \"%u\"", bufinet6
, lo_ifindex
);
3361 /* If link is supplied remove the IP neigh proxy entry for this IP on the device. */
3362 if (!is_empty_string(link
)) {
3363 link_ifindex
= if_nametoindex(link
);
3364 if (link_ifindex
== 0) {
3365 ERROR("Failed to retrieve ifindex for \"%s\" l2proxy cleanup", link
);
3366 return ret_set_errno(-1, EINVAL
);
3369 if (lxc_ip_neigh_proxy(RTM_DELNEIGH
, AF_INET6
, link_ifindex
, ip
) < 0)
3374 return ret_set_errno(-1, EINVAL
);
3379 static int lxc_delete_l2proxy(struct lxc_netdev
*netdev
)
3381 unsigned int lo_ifindex
= 0;
3382 unsigned int err
= 0;
3383 struct lxc_inetdev
*inet4dev
;
3384 struct lxc_inet6dev
*inet6dev
;
3386 /* Perform IPVLAN specific checks. */
3387 if (netdev
->type
== LXC_NET_IPVLAN
) {
3388 /* Retrieve local-loopback interface index for use with IPVLAN static routes. */
3389 lo_ifindex
= if_nametoindex(loop_device
);
3390 if (lo_ifindex
== 0) {
3392 ERROR("Failed to retrieve ifindex for \"%s\" routing cleanup", loop_device
);
3396 list_for_each_entry(inet4dev
, &netdev
->ipv4_addresses
, head
) {
3397 if (lxc_delete_ipv4_l2proxy(&inet4dev
->addr
, netdev
->link
, lo_ifindex
) < 0)
3401 list_for_each_entry(inet6dev
, &netdev
->ipv6_addresses
, head
) {
3402 if (lxc_delete_ipv6_l2proxy(&inet6dev
->addr
, netdev
->link
, lo_ifindex
) < 0)
3407 return ret_errno(EINVAL
);
3412 static int lxc_create_network_priv(struct lxc_handler
*handler
)
3414 struct lxc_netdev
*netdev
;
3416 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
3417 if (netdev
->type
< 0 || netdev
->type
> LXC_NET_MAXCONFTYPE
)
3418 return log_error_errno(-1, EINVAL
, "Invalid network configuration type %d", netdev
->type
);
3420 /* Setup l2proxy entries if enabled and used with a link property */
3421 if (netdev
->l2proxy
&& !is_empty_string(netdev
->link
)) {
3422 if (lxc_setup_l2proxy(netdev
))
3423 return log_error_errno(-1, errno
, "Failed to setup l2proxy");
3426 if (netdev_configure_server
[netdev
->type
](handler
, netdev
))
3427 return log_error_errno(-1, errno
, "Failed to create network device");
3434 * LXC moves network devices into the target namespace based on their created
3435 * name. The created name can either be randomly generated for e.g. veth
3436 * devices or it can be the name of the existing device in the server's
3437 * namespaces. This is e.g. the case when moving physical devices. However this
3438 * can lead to weird clashes. Consider we have a network namespace that has the
3439 * following devices:
3441 * 4: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3442 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3444 * 5: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3445 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3448 * and the user generates the following network config for their container:
3450 * lxc.net.0.type = phys
3451 * lxc.net.0.name = eth1
3452 * lxc.net.0.link = eth2
3454 * lxc.net.1.type = phys
3455 * lxc.net.1.name = eth2
3456 * lxc.net.1.link = eth1
3458 * This would cause LXC to move the devices eth1 and eth2 from the server's
3459 * network namespace into the container's network namespace:
3461 * 24: eth1: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3462 * link/ether 00:16:3e:91:d3:ae brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:e7:5d:10
3464 * 25: eth2: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN group default qlen 1000
3465 * link/ether 00:16:3e:e7:5d:10 brd ff:ff:ff:ff:ff:ff permaddr 00:16:3e:91:d3:ae
3468 * According to the network config above we now need to rename the network
3469 * devices in the container's network namespace. Let's say we start with
3470 * renaming eth2 to eth1. This would immediately lead to a clash since the
3471 * container's network namespace already contains a network device with that
3472 * name. Renaming the other device would have the same problem.
3474 * There are multiple ways to fix this but I'm concerned with keeping the logic
3475 * somewhat reasonable which is why we simply start creating transient device
3476 * names that are unique which we'll use to move and rename the network device
3477 * in the container's network namespace at the same time. And then we rename
3478 * based on those random devices names to the target name.
3480 * Note that the transient name is based on the type of network device as
3481 * specified in the LXC config. However, that doesn't mean it's correct. LXD
3482 * passes veth devices and a range of other network devices (e.g. Infiniband
3483 * VFs etc.) via LXC_NET_PHYS even though they're not really "physical" in the
3484 * sense we like to think about it so you might see a veth device being
3485 * assigned a "physXXXXXX" transient name. That's not a problem.
3487 static int create_transient_name(struct lxc_netdev
*netdev
)
3489 const struct lxc_network_info
*info
;
3491 if (!is_empty_string(netdev
->transient_name
))
3492 return syserror_set(-EINVAL
, "Network device already had a transient name %s",
3493 netdev
->transient_name
);
3495 info
= &lxc_network_info
[netdev
->type
];
3496 strlcpy(netdev
->transient_name
, info
->template, info
->template_len
+ 1);
3498 if (!lxc_ifname_alnum_case_sensitive(netdev
->transient_name
))
3499 return syserror_set(-EINVAL
, "Failed to create transient name for network device %s", netdev
->created_name
);
3501 TRACE("Created transient name %s for network device", netdev
->transient_name
);
3505 static int netdev_requires_move(const struct lxc_netdev
*netdev
)
3507 if (netdev
->type
== LXC_NET_EMPTY
|| netdev
->type
== LXC_NET_NONE
)
3511 * Veth devices are directly created in the container's network
3512 * namespace so the device doesn't need to be moved into the
3513 * container's network namespace. The transient name will
3514 * already have been set above when we created the veth tunnel.
3516 if (!netdev
->ifindex
)
3522 int lxc_network_move_created_netdev_priv(struct lxc_handler
*handler
)
3524 pid_t pid
= handler
->pid
;
3525 struct lxc_netdev
*netdev
;
3527 if (am_guest_unpriv())
3530 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
3531 __do_free
char *physname
= NULL
;
3534 if (!netdev_requires_move(netdev
))
3537 ret
= create_transient_name(netdev
);
3541 if (netdev
->type
== LXC_NET_PHYS
)
3542 physname
= is_wlan(netdev
->link
);
3545 ret
= lxc_netdev_move_wlan(physname
, netdev
->link
, pid
, netdev
->transient_name
);
3547 ret
= lxc_netdev_move_by_index(netdev
->ifindex
, pid
, netdev
->transient_name
);
3549 return log_error_errno(-1, -ret
, "Failed to move network device \"%s\" with ifindex %d to network namespace %d and rename to %s",
3550 netdev
->created_name
, netdev
->ifindex
, pid
, netdev
->transient_name
);
3552 DEBUG("Moved network device \"%s\" with ifindex %d to network namespace of %d and renamed to %s",
3553 maybe_empty(netdev
->created_name
), netdev
->ifindex
, pid
, netdev
->transient_name
);
3559 static int network_requires_advanced_setup(int type
)
3561 if (type
== LXC_NET_EMPTY
)
3564 if (type
== LXC_NET_NONE
)
3570 static int lxc_create_network_unpriv(struct lxc_handler
*handler
)
3572 int hooks_version
= handler
->conf
->hooks_version
;
3573 const char *lxcname
= handler
->name
;
3574 const char *lxcpath
= handler
->lxcpath
;
3575 pid_t pid
= handler
->pid
;
3576 struct lxc_netdev
*netdev
;
3578 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
3579 if (!network_requires_advanced_setup(netdev
->type
))
3582 if (netdev
->type
!= LXC_NET_VETH
)
3583 return log_error_errno(-1, EINVAL
, "Networks of type %s are not supported by unprivileged containers",
3584 lxc_net_type_to_str(netdev
->type
));
3587 INFO("mtu ignored due to insufficient privilege");
3589 if (lxc_create_network_unpriv_exec(lxcpath
, lxcname
, netdev
,
3590 pid
, hooks_version
))
3597 static bool lxc_delete_network_priv(struct lxc_handler
*handler
)
3600 struct lxc_netdev
*netdev
;
3602 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
3603 char *hostveth
= NULL
;
3605 /* We can only delete devices whose ifindex we have. If we don't
3606 * have the index it means that we didn't create it.
3608 if (!netdev
->ifindex
)
3612 * If the network device has been moved back from the
3613 * containers network namespace, update the ifindex.
3615 netdev
->ifindex
= if_nametoindex(netdev
->name
);
3617 /* Delete l2proxy entries if enabled and used with a link property */
3618 if (netdev
->l2proxy
&& !is_empty_string(netdev
->link
)) {
3619 if (lxc_delete_l2proxy(netdev
))
3620 WARN("Failed to delete all l2proxy config");
3621 /* Don't return, let the network be cleaned up as normal. */
3624 if (netdev
->type
== LXC_NET_PHYS
) {
3625 /* Physical interfaces are initially returned to the parent namespace
3626 * with their transient name to avoid collisions
3628 netdev
->ifindex
= if_nametoindex(netdev
->transient_name
);
3629 ret
= lxc_netdev_rename_by_index(netdev
->ifindex
, netdev
->link
);
3631 WARN("Failed to rename interface with index %d "
3632 "from \"%s\" to its initial name \"%s\"",
3633 netdev
->ifindex
, netdev
->name
, netdev
->link
);
3635 TRACE("Renamed interface with index %d from "
3636 "\"%s\" to its initial name \"%s\"",
3637 netdev
->ifindex
, netdev
->name
,
3640 /* Restore original MTU */
3641 ret
= lxc_netdev_set_mtu(netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3643 WARN("Failed to set interface \"%s\" to its initial mtu \"%d\"",
3644 netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3646 TRACE("Restored interface \"%s\" to its initial mtu \"%d\"",
3647 netdev
->link
, netdev
->priv
.phys_attr
.mtu
);
3651 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3653 WARN("Failed to deconfigure interface with index %d and initial name \"%s\"",
3654 netdev
->ifindex
, netdev
->link
);
3655 goto clear_ifindices
;
3658 ret
= netdev_deconf
[netdev
->type
](handler
, netdev
);
3660 WARN("Failed to deconfigure network device");
3662 if (netdev
->type
!= LXC_NET_VETH
)
3663 goto clear_ifindices
;
3665 /* Explicitly delete host veth device to prevent lingering
3666 * devices. We had issues in LXD around this.
3668 if (!is_empty_string(netdev
->priv
.veth_attr
.pair
))
3669 hostveth
= netdev
->priv
.veth_attr
.pair
;
3671 hostveth
= netdev
->priv
.veth_attr
.veth1
;
3672 if (is_empty_string(hostveth
))
3673 goto clear_ifindices
;
3675 if (is_empty_string(netdev
->link
) || !is_ovs_bridge(netdev
->link
)) {
3676 ret
= lxc_netdev_delete_by_name(hostveth
);
3678 WARN("Failed to remove interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
3680 INFO("Removed interface \"%s\" from \"%s\"", hostveth
, netdev
->link
);
3681 } else if (!is_empty_string(netdev
->link
)) {
3682 ret
= lxc_ovs_delete_port(netdev
->link
, hostveth
);
3684 WARN("Failed to remove port \"%s\" from openvswitch bridge \"%s\"", hostveth
, netdev
->link
);
3686 INFO("Removed port \"%s\" from openvswitch bridge \"%s\"", hostveth
, netdev
->link
);
3690 /* We need to clear any ifindices we recorded so liblxc won't
3691 * have cached stale data which would cause it to fail on reboot
3692 * we're we don't re-read the on-disk config file.
3694 netdev
->ifindex
= 0;
3695 if (netdev
->type
== LXC_NET_PHYS
) {
3696 netdev
->priv
.phys_attr
.ifindex
= 0;
3697 } else if (netdev
->type
== LXC_NET_VETH
) {
3698 netdev
->priv
.veth_attr
.veth1
[0] = '\0';
3699 netdev
->priv
.veth_attr
.ifindex
= 0;
3702 /* Clear transient name */
3703 if (!is_empty_string (netdev
->transient_name
))
3705 netdev
->transient_name
[0] = '\0';
3712 int lxc_requests_empty_network(struct lxc_handler
*handler
)
3714 struct list_head
*netdevs
= &handler
->conf
->netdevs
;
3715 bool found_none
= false, found_nic
= false;
3716 struct lxc_netdev
*netdev
;
3718 if (list_empty(netdevs
))
3721 list_for_each_entry(netdev
, netdevs
, head
) {
3723 if (netdev
->type
== LXC_NET_NONE
)
3729 if (found_none
&& !found_nic
)
3735 /* try to move physical nics to the init netns */
3736 int lxc_restore_phys_nics_to_netns(struct lxc_handler
*handler
)
3738 __do_close
int oldfd
= -EBADF
;
3739 int netnsfd
= handler
->nsfd
[LXC_NS_NET
];
3740 struct lxc_conf
*conf
= handler
->conf
;
3742 char ifname
[IFNAMSIZ
];
3743 struct lxc_netdev
*netdev
;
3746 * If we weren't asked to clone a new network namespace, there's
3747 * nothing to restore.
3749 if (!(handler
->ns_clone_flags
& CLONE_NEWNET
))
3752 /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
3753 * the parent network namespace. We won't have this capability if we are
3756 if (!handler
->am_root
)
3759 TRACE("Moving physical network devices back to parent network namespace");
3761 oldfd
= lxc_preserve_ns(handler
->monitor_pid
, "net");
3763 return log_error_errno(-1, errno
, "Failed to preserve network namespace");
3765 ret
= setns(netnsfd
, CLONE_NEWNET
);
3767 return log_error_errno(-1, errno
, "Failed to enter network namespace");
3769 list_for_each_entry(netdev
, &conf
->netdevs
, head
) {
3770 if (netdev
->type
!= LXC_NET_PHYS
)
3773 /* Retrieve the name of the interface in the container's network
3776 if (!if_indextoname(netdev
->ifindex
, ifname
)) {
3777 WARN("No interface corresponding to ifindex %d", netdev
->ifindex
);
3781 /* Restore physical interfaces to host's network namespace with its transient name
3782 * to avoid collisions with the host's other interfaces.
3784 ret
= lxc_netdev_move_by_index_fd(netdev
->ifindex
, oldfd
, netdev
->transient_name
);
3786 WARN("Error moving network device \"%s\" back to network namespace", ifname
);
3788 TRACE("Moved network device \"%s\" back to network namespace", ifname
);
3791 ret
= setns(oldfd
, CLONE_NEWNET
);
3793 return log_error_errno(-1, errno
, "Failed to enter network namespace");
3798 static int setup_hw_addr(char *hwaddr
, const char *ifname
)
3800 __do_close
int fd
= -EBADF
;
3801 struct sockaddr sockaddr
;
3805 ret
= lxc_convert_mac(hwaddr
, &sockaddr
);
3807 return log_error_errno(-1, -ret
, "Mac address \"%s\" conversion failed", hwaddr
);
3809 memcpy(ifr
.ifr_name
, ifname
, IFNAMSIZ
);
3810 ifr
.ifr_name
[IFNAMSIZ
-1] = '\0';
3811 memcpy((char *) &ifr
.ifr_hwaddr
, (char *) &sockaddr
, sizeof(sockaddr
));
3813 fd
= socket(AF_INET
, SOCK_DGRAM
| SOCK_CLOEXEC
, 0);
3817 ret
= ioctl(fd
, SIOCSIFHWADDR
, &ifr
);
3819 SYSERROR("Failed to perform ioctl");
3821 DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr
, ifr
.ifr_name
);
3826 static int setup_ipv4_addr(struct lxc_netdev
*netdev
)
3828 int ifindex
= netdev
->ifindex
;
3830 struct lxc_inetdev
*inet4dev
;
3832 list_for_each_entry(inet4dev
, &netdev
->ipv4_addresses
, head
) {
3833 err
= lxc_ipv4_addr_add(ifindex
, &inet4dev
->addr
,
3834 &inet4dev
->bcast
, inet4dev
->prefix
);
3836 return log_error_errno(-1, -err
, "Failed to setup ipv4 address for network device with ifindex %d", ifindex
);
3842 static int setup_ipv6_addr(struct lxc_netdev
*netdev
)
3845 struct lxc_inet6dev
*inet6dev
;
3846 int ifindex
= netdev
->ifindex
;
3848 list_for_each_entry(inet6dev
, &netdev
->ipv6_addresses
, head
) {
3849 err
= lxc_ipv6_addr_add(ifindex
, &inet6dev
->addr
,
3850 &inet6dev
->mcast
, &inet6dev
->acast
,
3853 return log_error_errno(-1, -err
, "Failed to setup ipv6 address for network device with ifindex %d", ifindex
);
3859 static int lxc_network_setup_in_child_namespaces_common(struct lxc_netdev
*netdev
)
3862 char bufinet4
[INET_ADDRSTRLEN
], bufinet6
[INET6_ADDRSTRLEN
];
3864 /* set a mac address */
3865 if (netdev
->hwaddr
&& setup_hw_addr(netdev
->hwaddr
, netdev
->name
))
3866 return log_error_errno(-1, errno
, "Failed to setup hw address for network device \"%s\"", netdev
->name
);
3868 /* setup ipv4 addresses on the interface */
3869 if (setup_ipv4_addr(netdev
))
3870 return log_error_errno(-1, errno
, "Failed to setup ip addresses for network device \"%s\"", netdev
->name
);
3872 /* setup ipv6 addresses on the interface */
3873 if (setup_ipv6_addr(netdev
))
3874 return log_error_errno(-1, errno
, "Failed to setup ipv6 addresses for network device \"%s\"", netdev
->name
);
3876 /* set the network device up */
3877 if (netdev
->flags
& IFF_UP
) {
3878 err
= lxc_netdev_up(netdev
->name
);
3880 return log_error_errno(-1, -err
, "Failed to set network device \"%s\" up", netdev
->name
);
3882 /* the network is up, make the loopback up too */
3883 err
= lxc_netdev_up("lo");
3885 return log_error_errno(-1, -err
, "Failed to set the loopback network device up");
3888 /* setup ipv4 gateway on the interface */
3889 if (netdev
->ipv4_gateway
|| netdev
->ipv4_gateway_dev
) {
3890 if (!(netdev
->flags
& IFF_UP
))
3891 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not bringing up the interface", netdev
->name
);
3893 if (list_empty(&netdev
->ipv4_addresses
))
3894 return log_error(-1, "Cannot add ipv4 gateway for network device \"%s\" when not assigning an address", netdev
->name
);
3896 /* Setup device route if ipv4_gateway_dev is enabled */
3897 if (netdev
->ipv4_gateway_dev
) {
3898 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, NULL
);
3900 return log_error_errno(-1, -err
, "Failed to setup ipv4 gateway to network device \"%s\"", netdev
->name
);
3902 /* Check the gateway address is valid */
3903 if (!inet_ntop(AF_INET
, netdev
->ipv4_gateway
, bufinet4
, sizeof(bufinet4
)))
3904 return ret_set_errno(-1, errno
);
3906 /* Try adding a default route to the gateway address */
3907 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
3909 /* If adding the default route fails, this could be because the
3910 * gateway address is in a different subnet to the container's address.
3911 * To work around this, we try adding a static device route to the
3912 * gateway address first, and then try again.
3914 err
= lxc_ipv4_dest_add(netdev
->ifindex
, netdev
->ipv4_gateway
, 32);
3916 return log_error_errno(-1, -err
, "Failed to add ipv4 dest \"%s\" for network device \"%s\"", bufinet4
, netdev
->name
);
3918 err
= lxc_ipv4_gateway_add(netdev
->ifindex
, netdev
->ipv4_gateway
);
3920 return log_error_errno(-1, -err
, "Failed to setup ipv4 gateway \"%s\" for network device \"%s\"", bufinet4
, netdev
->name
);
3925 /* setup ipv6 gateway on the interface */
3926 if (netdev
->ipv6_gateway
|| netdev
->ipv6_gateway_dev
) {
3927 if (!(netdev
->flags
& IFF_UP
))
3928 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not bringing up the interface", netdev
->name
);
3930 if (list_empty(&netdev
->ipv6_addresses
) && !IN6_IS_ADDR_LINKLOCAL(netdev
->ipv6_gateway
))
3931 return log_error(-1, "Cannot add ipv6 gateway for network device \"%s\" when not assigning an address", netdev
->name
);
3933 /* Setup device route if ipv6_gateway_dev is enabled */
3934 if (netdev
->ipv6_gateway_dev
) {
3935 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, NULL
);
3937 return log_error_errno(-1, -err
, "Failed to setup ipv6 gateway to network device \"%s\"", netdev
->name
);
3939 /* Check the gateway address is valid */
3940 if (!inet_ntop(AF_INET6
, netdev
->ipv6_gateway
, bufinet6
, sizeof(bufinet6
)))
3941 return ret_set_errno(-1, errno
);
3943 /* Try adding a default route to the gateway address */
3944 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3946 /* If adding the default route fails, this could be because the
3947 * gateway address is in a different subnet to the container's address.
3948 * To work around this, we try adding a static device route to the
3949 * gateway address first, and then try again.
3951 err
= lxc_ipv6_dest_add(netdev
->ifindex
, netdev
->ipv6_gateway
, 128);
3953 return log_error_errno(-1, errno
, "Failed to add ipv6 dest \"%s\" for network device \"%s\"", bufinet6
, netdev
->name
);
3955 err
= lxc_ipv6_gateway_add(netdev
->ifindex
, netdev
->ipv6_gateway
);
3957 return log_error_errno(-1, -err
, "Failed to setup ipv6 gateway \"%s\" for network device \"%s\"", bufinet6
, netdev
->name
);
3962 DEBUG("Network device \"%s\" has been setup", netdev
->name
);
3968 * Consider the following network layout:
3970 * lxc.net.0.type = phys
3971 * lxc.net.0.link = eth2
3972 * lxc.net.0.name = eth%d
3974 * lxc.net.1.type = phys
3975 * lxc.net.1.link = eth1
3976 * lxc.net.1.name = eth0
3978 * If we simply follow this order and create the first network first the kernel
3979 * will allocate eth0 for the first network but the second network requests
3980 * that eth1 be renamed to eth0 in the container's network namespace which
3981 * would lead to a clash.
3983 * Note, we don't handle cases like:
3985 * lxc.net.0.type = phys
3986 * lxc.net.0.link = eth2
3987 * lxc.net.0.name = eth0
3989 * lxc.net.1.type = phys
3990 * lxc.net.1.link = eth1
3991 * lxc.net.1.name = eth0
3993 * That'll brutally fail of course but there's nothing we can do about it.
3995 int lxc_setup_network_in_child_namespaces(const struct lxc_conf
*conf
)
3997 bool needs_second_pass
= false;
3998 struct lxc_netdev
*netdev
;
3999 const struct list_head
*netdevs
= &conf
->netdevs
;
4001 if (list_empty(netdevs
))
4004 /* Configure all devices that have a specific target name. */
4005 list_for_each_entry(netdev
, netdevs
, head
) {
4008 if (is_empty_string(netdev
->name
) || strequal(netdev
->name
, "eth%d")) {
4009 needs_second_pass
= true;
4013 ret
= netdev_configure_container
[netdev
->type
](netdev
);
4015 ret
= lxc_network_setup_in_child_namespaces_common(netdev
);
4017 return log_error_errno(-1, errno
, "Failed to setup netdev");
4019 INFO("Finished setting up network devices with caller assigned names");
4021 if (needs_second_pass
) {
4022 /* Configure all devices that have a kernel assigned name. */
4023 list_for_each_entry(netdev
, netdevs
, head
) {
4026 if (!is_empty_string(netdev
->name
) && !strequal(netdev
->name
, "eth%d"))
4029 ret
= netdev_configure_container
[netdev
->type
](netdev
);
4031 ret
= lxc_network_setup_in_child_namespaces_common(netdev
);
4033 return log_error_errno(-1, errno
, "Failed to setup netdev");
4035 INFO("Finished setting up network devices with kernel assigned names");
4041 int lxc_network_send_to_child(struct lxc_handler
*handler
)
4043 int data_sock
= handler
->data_sock
[0];
4044 struct lxc_netdev
*netdev
;
4046 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
4049 if (!network_requires_advanced_setup(netdev
->type
))
4052 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
4056 ret
= lxc_send_nointr(data_sock
, netdev
->transient_name
, IFNAMSIZ
, MSG_NOSIGNAL
);
4060 TRACE("Sent network device name \"%s\" to child", netdev
->transient_name
);
4066 int lxc_network_recv_from_parent(struct lxc_handler
*handler
)
4068 int data_sock
= handler
->data_sock
[1];
4069 struct lxc_netdev
*netdev
;
4071 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
4074 if (!network_requires_advanced_setup(netdev
->type
))
4077 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
4081 ret
= lxc_recv_nointr(data_sock
, netdev
->transient_name
, IFNAMSIZ
, 0);
4085 TRACE("Received network device name \"%s\" from parent", netdev
->transient_name
);
4091 int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler
*handler
)
4093 int data_sock
= handler
->data_sock
[0];
4094 struct lxc_netdev
*netdev
;
4095 struct list_head
*netdevs
= &handler
->conf
->netdevs
;
4097 if (!handler
->am_root
)
4100 list_for_each_entry(netdev
, netdevs
, head
) {
4103 /* Send network device name in the child's namespace to parent. */
4104 ret
= lxc_send_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, MSG_NOSIGNAL
);
4108 /* Send network device ifindex in the child's namespace to
4111 ret
= lxc_send_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), MSG_NOSIGNAL
);
4115 TRACE("Sent network device %s with ifindex %d to parent", maybe_empty(netdev
->name
), netdev
->ifindex
);
4118 if (!list_empty(netdevs
))
4119 TRACE("Sent network device names and ifindices to parent");
4124 int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler
*handler
)
4126 int data_sock
= handler
->data_sock
[1];
4127 struct lxc_netdev
*netdev
;
4129 if (!handler
->am_root
)
4132 list_for_each_entry(netdev
, &handler
->conf
->netdevs
, head
) {
4135 /* Receive network device name in the child's namespace to
4138 ret
= lxc_recv_nointr(data_sock
, netdev
->name
, IFNAMSIZ
, 0);
4142 /* Receive network device ifindex in the child's namespace to
4145 ret
= lxc_recv_nointr(data_sock
, &netdev
->ifindex
, sizeof(netdev
->ifindex
), 0);
4149 TRACE("Received network device %s with ifindex %d from child", maybe_empty(netdev
->name
), netdev
->ifindex
);
4155 void lxc_delete_network(struct lxc_handler
*handler
)
4160 * Always expose namespace fd paths to network down hooks via
4161 * environment variables. No need to complicate things by passing them
4162 * as additional hook arguments.
4164 lxc_expose_namespace_environment(handler
);
4166 if (handler
->am_root
)
4167 bret
= lxc_delete_network_priv(handler
);
4169 bret
= lxc_delete_network_unpriv(handler
);
4171 DEBUG("Failed to delete network devices");
4173 DEBUG("Deleted network devices");
4176 int lxc_netns_set_nsid(int fd
)
4179 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
4180 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
4182 struct nl_handler nlh
= NL_HANDLER_INIT
;
4183 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
4184 struct nlmsghdr
*hdr
;
4185 struct rtgenmsg
*msg
;
4186 const __s32 ns_id
= -1;
4187 const __u32 netns_fd
= fd
;
4189 ret
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
4193 memset(buf
, 0, sizeof(buf
));
4195 #pragma GCC diagnostic push
4196 #pragma GCC diagnostic ignored "-Wcast-align"
4197 hdr
= (struct nlmsghdr
*)buf
;
4198 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
4199 #pragma GCC diagnostic pop
4201 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
4202 hdr
->nlmsg_type
= RTM_NEWNSID
;
4203 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
4205 hdr
->nlmsg_seq
= RTM_NEWNSID
;
4206 msg
->rtgen_family
= AF_UNSPEC
;
4208 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
4210 return ret_errno(ENOMEM
);
4212 ret
= addattr(hdr
, 1024, __LXC_NETNSA_NSID
, &ns_id
, sizeof(ns_id
));
4214 return ret_errno(ENOMEM
);
4216 return __netlink_transaction(nlh_ptr
, hdr
, hdr
);
4219 static int parse_rtattr(struct rtattr
*tb
[], int max
, struct rtattr
*rta
, int len
)
4222 memset(tb
, 0, sizeof(struct rtattr
*) * (max
+ 1));
4224 while (RTA_OK(rta
, len
)) {
4225 unsigned short type
= rta
->rta_type
;
4227 if ((type
<= max
) && (!tb
[type
]))
4230 #pragma GCC diagnostic push
4231 #pragma GCC diagnostic ignored "-Wcast-align"
4232 rta
= RTA_NEXT(rta
, len
);
4233 #pragma GCC diagnostic pop
4239 static inline __s32
rta_getattr_s32(const struct rtattr
*rta
)
4241 return *(__s32
*)RTA_DATA(rta
);
4245 #define NETNS_RTA(r) \
4246 ((struct rtattr *)(((char *)(r)) + NLMSG_ALIGN(sizeof(struct rtgenmsg))))
4249 int lxc_netns_get_nsid(int fd
)
4251 struct nl_handler nlh
= NL_HANDLER_INIT
;
4252 call_cleaner(netlink_close
) struct nl_handler
*nlh_ptr
= &nlh
;
4255 char buf
[NLMSG_ALIGN(sizeof(struct nlmsghdr
)) +
4256 NLMSG_ALIGN(sizeof(struct rtgenmsg
)) +
4258 struct rtattr
*tb
[__LXC_NETNSA_MAX
+ 1];
4259 struct nlmsghdr
*hdr
;
4260 struct rtgenmsg
*msg
;
4261 __u32 netns_fd
= fd
;
4263 ret
= netlink_open(nlh_ptr
, NETLINK_ROUTE
);
4267 memset(buf
, 0, sizeof(buf
));
4269 #pragma GCC diagnostic push
4270 #pragma GCC diagnostic ignored "-Wcast-align"
4271 hdr
= (struct nlmsghdr
*)buf
;
4272 msg
= (struct rtgenmsg
*)NLMSG_DATA(hdr
);
4273 #pragma GCC diagnostic pop
4275 hdr
->nlmsg_len
= NLMSG_LENGTH(sizeof(*msg
));
4276 hdr
->nlmsg_type
= RTM_GETNSID
;
4277 hdr
->nlmsg_flags
= NLM_F_REQUEST
| NLM_F_ACK
;
4279 hdr
->nlmsg_seq
= RTM_GETNSID
;
4280 msg
->rtgen_family
= AF_UNSPEC
;
4282 ret
= addattr(hdr
, 1024, __LXC_NETNSA_FD
, &netns_fd
, sizeof(netns_fd
));
4284 return ret_errno(ENOMEM
);
4286 ret
= __netlink_transaction(nlh_ptr
, hdr
, hdr
);
4290 msg
= NLMSG_DATA(hdr
);
4291 len
= hdr
->nlmsg_len
- NLMSG_SPACE(sizeof(*msg
));
4293 return ret_errno(EINVAL
);
4295 #pragma GCC diagnostic push
4296 #pragma GCC diagnostic ignored "-Wcast-align"
4297 parse_rtattr(tb
, __LXC_NETNSA_MAX
, NETNS_RTA(msg
), len
);
4298 if (tb
[__LXC_NETNSA_NSID
])
4299 return rta_getattr_s32(tb
[__LXC_NETNSA_NSID
]);
4300 #pragma GCC diagnostic pop
4305 int lxc_create_network(struct lxc_handler
*handler
)
4309 if (handler
->am_root
) {
4310 ret
= lxc_create_network_priv(handler
);
4314 return lxc_network_move_created_netdev_priv(handler
);
4317 return lxc_create_network_unpriv(handler
);