]> git.proxmox.com Git - mirror_lxc.git/blobdiff - src/lxc/network.c
start: don't let data_sock users close the fd
[mirror_lxc.git] / src / lxc / network.c
index 9a55a4868a94ac2a3f512cc2ceb88334a03b6fb1..a7f054e795f95d7c050266e52caa86343c4c86fe 100644 (file)
@@ -4,7 +4,7 @@
  * (C) Copyright IBM Corp. 2007, 2008
  *
  * Authors:
- * Daniel Lezcano <dlezcano at fr.ibm.com>
+ * Daniel Lezcano <daniel.lezcano at free.fr>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
+
 #define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
 #include <stdio.h>
-#undef _GNU_SOURCe
 #include <stdlib.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
 #include <string.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/socket.h>
-#include <sys/param.h>
-#include <sys/ioctl.h>
+#include <time.h>
+#include <unistd.h>
 #include <arpa/inet.h>
-#include <net/if.h>
-#include <net/if_arp.h>
-#include <net/ethernet.h>
-#include <netinet/in.h>
 #include <linux/netlink.h>
 #include <linux/rtnetlink.h>
 #include <linux/sockios.h>
-#include <linux/if_bridge.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <net/if_arp.h>
+#include <netinet/in.h>
+#include <sys/inotify.h>
+#include <sys/ioctl.h>
+#include <sys/param.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
 
-#include "nl.h"
-#include "network.h"
+#include "af_unix.h"
 #include "conf.h"
+#include "config.h"
+#include "log.h"
+#include "network.h"
+#include "nl.h"
+#include "utils.h"
+
+#if HAVE_IFADDRS_H
+#include <ifaddrs.h>
+#else
+#include <../include/ifaddrs.h>
+#endif
 
 #ifndef IFLA_LINKMODE
-#  define IFLA_LINKMODE 17
+#define IFLA_LINKMODE 17
 #endif
 
 #ifndef IFLA_LINKINFO
-#  define IFLA_LINKINFO 18
+#define IFLA_LINKINFO 18
 #endif
 
 #ifndef IFLA_NET_NS_PID
-#  define IFLA_NET_NS_PID 19
+#define IFLA_NET_NS_PID 19
 #endif
 
 #ifndef IFLA_INFO_KIND
-# define IFLA_INFO_KIND 1
+#define IFLA_INFO_KIND 1
 #endif
 
 #ifndef IFLA_VLAN_ID
-# define IFLA_VLAN_ID 1
+#define IFLA_VLAN_ID 1
 #endif
 
 #ifndef IFLA_INFO_DATA
-#  define IFLA_INFO_DATA 2
+#define IFLA_INFO_DATA 2
 #endif
 
 #ifndef VETH_INFO_PEER
-# define VETH_INFO_PEER 1
+#define VETH_INFO_PEER 1
 #endif
 
 #ifndef IFLA_MACVLAN_MODE
-# define IFLA_MACVLAN_MODE 1
+#define IFLA_MACVLAN_MODE 1
 #endif
 
-struct link_req {
-       struct nlmsg nlmsg;
-       struct ifinfomsg ifinfomsg;
-};
+lxc_log_define(lxc_network, lxc);
+
+typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
+
+static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       int bridge_index, err;
+       char *veth1, *veth2;
+       char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
+       unsigned int mtu = 0;
+
+       if (netdev->priv.veth_attr.pair[0] != '\0') {
+               veth1 = netdev->priv.veth_attr.pair;
+               if (handler->conf->reboot)
+                       lxc_netdev_delete_by_name(veth1);
+       } else {
+               err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
+               if (err < 0 || (size_t)err >= sizeof(veth1buf))
+                       return -1;
+
+               veth1 = lxc_mkifname(veth1buf);
+               if (!veth1)
+                       return -1;
+
+               /* store away for deconf */
+               memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
+       }
+
+       snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
+       veth2 = lxc_mkifname(veth2buf);
+       if (!veth2)
+               goto out_delete;
+
+       err = lxc_veth_create(veth1, veth2);
+       if (err) {
+               ERROR("Failed to create veth pair \"%s\" and \"%s\": %s", veth1,
+                     veth2, strerror(-err));
+               goto out_delete;
+       }
+
+       /* changing the high byte of the mac address to 0xfe, the bridge interface
+        * will always keep the host's mac address and not take the mac address
+        * of a container */
+       err = setup_private_host_hw_addr(veth1);
+       if (err) {
+               ERROR("Failed to change mac address of host interface \"%s\": %s",
+                     veth1, strerror(-err));
+               goto out_delete;
+       }
+
+       /* Retrieve ifindex of the host's veth device. */
+       netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
+       if (!netdev->priv.veth_attr.ifindex) {
+               ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
+               goto out_delete;
+       }
+
+       /* Note that we're retrieving the container's ifindex in the host's
+        * network namespace because we need it to move the device from the
+        * host's network namespace to the container's network namespace later
+        * on.
+        */
+       netdev->ifindex = if_nametoindex(veth2);
+       if (!netdev->ifindex) {
+               ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
+               goto out_delete;
+       }
+
+       if (netdev->mtu) {
+               if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
+                       WARN("Failed to parse mtu");
+               else
+                       INFO("Retrieved mtu %d", mtu);
+       } else if (netdev->link[0] != '\0') {
+               bridge_index = if_nametoindex(netdev->link);
+               if (bridge_index) {
+                       mtu = netdev_get_mtu(bridge_index);
+                       INFO("Retrieved mtu %d from %s", mtu, netdev->link);
+               } else {
+                       mtu = netdev_get_mtu(netdev->ifindex);
+                       INFO("Retrieved mtu %d from %s", mtu, veth2);
+               }
+       }
+
+       if (mtu) {
+               err = lxc_netdev_set_mtu(veth1, mtu);
+               if (!err)
+                       err = lxc_netdev_set_mtu(veth2, mtu);
+               if (err) {
+                       ERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
+                             "and \"%s\": %s",
+                             mtu, veth1, veth2, strerror(-err));
+                       goto out_delete;
+               }
+       }
+
+       if (netdev->link[0] != '\0') {
+               err = lxc_bridge_attach(netdev->link, veth1);
+               if (err) {
+                       ERROR("Failed to attach \"%s\" to bridge \"%s\": %s",
+                             veth1, netdev->link, strerror(-err));
+                       goto out_delete;
+               }
+               INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
+       }
+
+       err = lxc_netdev_up(veth1);
+       if (err) {
+               ERROR("Failed to set \"%s\" up: %s", veth1, strerror(-err));
+               goto out_delete;
+       }
+
+       if (netdev->upscript) {
+               err = run_script(handler->name, "net", netdev->upscript, "up",
+                                "veth", veth1, (char*) NULL);
+               if (err)
+                       goto out_delete;
+       }
+
+       DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
+             netdev->ifindex);
+
+       return 0;
 
-struct ip_req {
-       struct nlmsg nlmsg;
-       struct ifaddrmsg ifa;
+out_delete:
+       if (netdev->ifindex != 0)
+               lxc_netdev_delete_by_name(veth1);
+       return -1;
+}
+
+static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       char peerbuf[IFNAMSIZ], *peer;
+       int err;
+
+       if (netdev->link[0] == '\0') {
+               ERROR("No link for macvlan network device specified");
+               return -1;
+       }
+
+       err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
+       if (err < 0 || (size_t)err >= sizeof(peerbuf))
+               return -1;
+
+       peer = lxc_mkifname(peerbuf);
+       if (!peer)
+               return -1;
+
+       err = lxc_macvlan_create(netdev->link, peer,
+                                netdev->priv.macvlan_attr.mode);
+       if (err) {
+               ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s",
+                     peer, netdev->link, strerror(-err));
+               goto on_error;
+       }
+
+       netdev->ifindex = if_nametoindex(peer);
+       if (!netdev->ifindex) {
+               ERROR("Failed to retrieve ifindex for \"%s\"", peer);
+               goto on_error;
+       }
+
+       if (netdev->upscript) {
+               err = run_script(handler->name, "net", netdev->upscript, "up",
+                                "macvlan", netdev->link, (char*) NULL);
+               if (err)
+                       goto on_error;
+       }
+
+       DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
+             peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
+
+       return 0;
+
+on_error:
+       lxc_netdev_delete_by_name(peer);
+       return -1;
+}
+
+static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       char peer[IFNAMSIZ];
+       int err;
+       static uint16_t vlan_cntr = 0;
+       unsigned int mtu = 0;
+
+       if (netdev->link[0] == '\0') {
+               ERROR("No link for vlan network device specified");
+               return -1;
+       }
+
+       err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
+       if (err < 0 || (size_t)err >= sizeof(peer))
+               return -1;
+
+       err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
+       if (err) {
+               ERROR("Failed to create vlan interface \"%s\" on \"%s\": %s",
+                     peer, netdev->link, strerror(-err));
+               return -1;
+       }
+
+       netdev->ifindex = if_nametoindex(peer);
+       if (!netdev->ifindex) {
+               ERROR("Failed to retrieve ifindex for \"%s\"", peer);
+               lxc_netdev_delete_by_name(peer);
+               return -1;
+       }
+
+       DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)",
+             peer, netdev->ifindex);
+       if (netdev->mtu) {
+               if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
+                       ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".",
+                             netdev->ifindex,
+                             netdev->name[0] != '\0' ? netdev->name : "(null)");
+                       return -1;
+               }
+               err = lxc_netdev_set_mtu(peer, mtu);
+               if (err) {
+                       ERROR("Failed to set mtu \"%s\" for \"%s\": %s",
+                             netdev->mtu, peer, strerror(-err));
+                       lxc_netdev_delete_by_name(peer);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       if (netdev->link[0] == '\0') {
+               ERROR("No link for physical interface specified");
+               return -1;
+       }
+
+       /* Note that we're retrieving the container's ifindex in the host's
+        * network namespace because we need it to move the device from the
+        * host's network namespace to the container's network namespace later
+        * on.
+        * Note that netdev->link will contain the name of the physical network
+        * device in the host's namespace.
+        */
+       netdev->ifindex = if_nametoindex(netdev->link);
+       if (!netdev->ifindex) {
+               ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
+               return -1;
+       }
+
+       /* Store the ifindex of the host's network device in the host's
+        * namespace.
+        */
+       netdev->priv.phys_attr.ifindex = netdev->ifindex;
+
+       if (netdev->upscript) {
+               int err;
+               err = run_script(handler->name, "net", netdev->upscript,
+                                "up", "phys", netdev->link, (char*) NULL);
+               if (err)
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       netdev->ifindex = 0;
+       if (netdev->upscript) {
+               int err;
+               err = run_script(handler->name, "net", netdev->upscript,
+                                "up", "empty", (char*) NULL);
+               if (err)
+                       return -1;
+       }
+       return 0;
+}
+
+static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       netdev->ifindex = 0;
+       return 0;
+}
+
+static  instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
+       [LXC_NET_VETH]    = instantiate_veth,
+       [LXC_NET_MACVLAN] = instantiate_macvlan,
+       [LXC_NET_VLAN]    = instantiate_vlan,
+       [LXC_NET_PHYS]    = instantiate_phys,
+       [LXC_NET_EMPTY]   = instantiate_empty,
+       [LXC_NET_NONE]    = instantiate_none,
 };
 
-struct rt_req {
-       struct nlmsg nlmsg;
-       struct rtmsg rt;
+static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       char *veth1;
+       int err;
+
+       if (netdev->priv.veth_attr.pair[0] != '\0')
+               veth1 = netdev->priv.veth_attr.pair;
+       else
+               veth1 = netdev->priv.veth_attr.veth1;
+
+       if (netdev->downscript) {
+               err = run_script(handler->name, "net", netdev->downscript,
+                                "down", "veth", veth1, (char*) NULL);
+               if (err)
+                       return -1;
+       }
+       return 0;
+}
+
+static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       int err;
+
+       if (netdev->downscript) {
+               err = run_script(handler->name, "net", netdev->downscript,
+                                "down", "macvlan", netdev->link,
+                                (char*) NULL);
+               if (err)
+                       return -1;
+       }
+       return 0;
+}
+
+static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       return 0;
+}
+
+static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       int err;
+
+       if (netdev->downscript) {
+               err = run_script(handler->name, "net", netdev->downscript,
+                                "down", "phys", netdev->link, (char*) NULL);
+               if (err)
+                       return -1;
+       }
+       return 0;
+}
+
+static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       int err;
+
+       if (netdev->downscript) {
+               err = run_script(handler->name, "net", netdev->downscript,
+                                "down", "empty", (char*) NULL);
+               if (err)
+                       return -1;
+       }
+       return 0;
+}
+
+static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+       return 0;
+}
+
+static  instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
+       [LXC_NET_VETH]    = shutdown_veth,
+       [LXC_NET_MACVLAN] = shutdown_macvlan,
+       [LXC_NET_VLAN]    = shutdown_vlan,
+       [LXC_NET_PHYS]    = shutdown_phys,
+       [LXC_NET_EMPTY]   = shutdown_empty,
+       [LXC_NET_NONE]    = shutdown_none,
 };
 
-int lxc_netdev_move_by_index(int ifindex, pid_t pid)
+int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
 {
+       int err;
        struct nl_handler nlh;
+       struct ifinfomsg *ifi;
        struct nlmsg *nlmsg = NULL;
-       struct link_req *link_req;
-       int err;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -112,16 +479,23 @@ int lxc_netdev_move_by_index(int ifindex, pid_t pid)
        if (!nlmsg)
                goto out;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       link_req->ifinfomsg.ifi_index = ifindex;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK;
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi)
+               goto out;
+       ifi->ifi_family = AF_UNSPEC;
+       ifi->ifi_index = ifindex;
 
        if (nla_put_u32(nlmsg, IFLA_NET_NS_PID, pid))
                goto out;
 
+       if (ifname != NULL) {
+               if (nla_put_string(nlmsg, IFLA_IFNAME, ifname))
+                       goto out;
+       }
+
        err = netlink_transaction(&nlh, nlmsg, nlmsg);
 out:
        netlink_close(&nlh);
@@ -129,12 +503,150 @@ out:
        return err;
 }
 
+/* If we are asked to move a wireless interface, then we must actually move its
+ * phyN device. Detect that condition and return the physname here. The physname
+ * will be passed to lxc_netdev_move_wlan() which will free it when done.
+ */
+#define PHYSNAME "/sys/class/net/%s/phy80211/name"
+static char *is_wlan(const char *ifname)
+{
+       int i, ret;
+       long physlen;
+       size_t len;
+       char *path;
+       FILE *f;
+       struct stat sb;
+       char *physname = NULL;
+
+       len = strlen(ifname) + strlen(PHYSNAME) - 1;
+       path = alloca(len + 1);
+       ret = snprintf(path, len, PHYSNAME, ifname);
+       if (ret < 0 || (size_t)ret >= len)
+               goto bad;
+
+       ret = stat(path, &sb);
+       if (ret)
+               goto bad;
+
+       f = fopen(path, "r");
+       if (!f)
+               goto bad;
+
+       /* Feh - sb.st_size is always 4096. */
+       fseek(f, 0, SEEK_END);
+       physlen = ftell(f);
+       fseek(f, 0, SEEK_SET);
+
+       physname = malloc(physlen + 1);
+       if (!physname) {
+               fclose(f);
+               goto bad;
+       }
+
+       memset(physname, 0, physlen + 1);
+       ret = fread(physname, 1, physlen, f);
+       fclose(f);
+       if (ret < 0)
+               goto bad;
+
+       for (i = 0; i < physlen; i++) {
+               if (physname[i] == '\n')
+                       physname[i] = '\0';
+
+               if (physname[i] == '\0')
+                       break;
+       }
+
+       return physname;
+
+bad:
+       free(physname);
+       return NULL;
+}
+
+static int lxc_netdev_rename_by_name_in_netns(pid_t pid, const char *old,
+                                             const char *new)
+{
+       pid_t fpid;
+
+       fpid = fork();
+       if (fpid < 0)
+               return -1;
+
+       if (fpid != 0)
+               return wait_for_pid(fpid);
+
+       if (!switch_to_ns(pid, "net"))
+               return -1;
+
+       exit(lxc_netdev_rename_by_name(old, new));
+}
+
+static int lxc_netdev_move_wlan(char *physname, const char *ifname, pid_t pid,
+                               const char *newname)
+{
+       char *cmd;
+       pid_t fpid;
+       int err = -1;
+
+       /* Move phyN into the container.  TODO - do this using netlink.
+        * However, IIUC this involves a bit more complicated work to talk to
+        * the 80211 module, so for now just call out to iw.
+        */
+       cmd = on_path("iw", NULL);
+       if (!cmd)
+               goto out1;
+       free(cmd);
+
+       fpid = fork();
+       if (fpid < 0)
+               goto out1;
+
+       if (fpid == 0) {
+               char pidstr[30];
+               sprintf(pidstr, "%d", pid);
+               execlp("iw", "iw", "phy", physname, "set", "netns", pidstr,
+                      (char *)NULL);
+               exit(EXIT_FAILURE);
+       }
+
+       if (wait_for_pid(fpid))
+               goto out1;
+
+       err = 0;
+       if (newname)
+               err = lxc_netdev_rename_by_name_in_netns(pid, ifname, newname);
+
+out1:
+       free(physname);
+       return err;
+}
+
+int lxc_netdev_move_by_name(const char *ifname, pid_t pid, const char* newname)
+{
+       int index;
+       char *physname;
+
+       if (!ifname)
+               return -EINVAL;
+
+       index = if_nametoindex(ifname);
+       if (!index)
+               return -EINVAL;
+
+       physname = is_wlan(ifname);
+       if (physname)
+               return lxc_netdev_move_wlan(physname, ifname, pid, newname);
+
+       return lxc_netdev_move_by_index(index, pid, newname);
+}
+
 int lxc_netdev_delete_by_index(int ifindex)
 {
-       struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct link_req *link_req;
        int err;
+       struct ifinfomsg *ifi;
+       struct nl_handler nlh;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -145,16 +657,18 @@ int lxc_netdev_delete_by_index(int ifindex)
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       link_req->ifinfomsg.ifi_index = ifindex;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags = NLM_F_ACK|NLM_F_REQUEST;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_DELLINK;
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_DELLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi)
+               goto out;
+       ifi->ifi_family = AF_UNSPEC;
+       ifi->ifi_index = ifindex;
 
        err = netlink_transaction(&nlh, nlmsg, answer);
 out:
@@ -177,10 +691,10 @@ int lxc_netdev_delete_by_name(const char *name)
 
 int lxc_netdev_rename_by_index(int ifindex, const char *newname)
 {
+       int err, len;
+       struct ifinfomsg *ifi;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct link_req *link_req;
-       int len, err;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -195,16 +709,18 @@ int lxc_netdev_rename_by_index(int ifindex, const char *newname)
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       link_req->ifinfomsg.ifi_index = ifindex;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags = NLM_F_ACK|NLM_F_REQUEST;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK;
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_ACK | NLM_F_REQUEST;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi)
+               goto out;
+       ifi->ifi_family = AF_UNSPEC;
+       ifi->ifi_index = ifindex;
 
        if (nla_put_string(nlmsg, IFLA_IFNAME, newname))
                goto out;
@@ -232,12 +748,12 @@ int lxc_netdev_rename_by_name(const char *oldname, const char *newname)
        return lxc_netdev_rename_by_index(index, newname);
 }
 
-static int netdev_set_flag(const char *name, int flag)
+int netdev_set_flag(const char *name, int flag)
 {
+       int err, index, len;
+       struct ifinfomsg *ifi;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct link_req *link_req;
-       int index, len, err;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -253,7 +769,7 @@ static int netdev_set_flag(const char *name, int flag)
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
@@ -262,14 +778,18 @@ static int netdev_set_flag(const char *name, int flag)
        if (!index)
                goto out;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       link_req->ifinfomsg.ifi_index = index;
-       link_req->ifinfomsg.ifi_change |= IFF_UP;
-       link_req->ifinfomsg.ifi_flags |= flag;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK;
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi) {
+               err = -ENOMEM;
+               goto out;
+       }
+       ifi->ifi_family = AF_UNSPEC;
+       ifi->ifi_index = index;
+       ifi->ifi_change |= IFF_UP;
+       ifi->ifi_flags |= flag;
 
        err = netlink_transaction(&nlh, nlmsg, answer);
 out:
@@ -279,12 +799,15 @@ out:
        return err;
 }
 
-int lxc_netdev_set_mtu(const char *name, int mtu)
+int netdev_get_flag(const char *name, int *flag)
 {
+       int err, index, len;
+       struct ifinfomsg *ifi;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct link_req *link_req;
-       int index, len, err;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
+
+       if (!name)
+               return -EINVAL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -300,7 +823,7 @@ int lxc_netdev_set_mtu(const char *name, int mtu)
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
@@ -309,17 +832,24 @@ int lxc_netdev_set_mtu(const char *name, int mtu)
        if (!index)
                goto out;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       link_req->ifinfomsg.ifi_index = index;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags = NLM_F_REQUEST|NLM_F_ACK;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK;
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
 
-       if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi) {
+               err = -ENOMEM;
                goto out;
+       }
+       ifi->ifi_family = AF_UNSPEC;
+       ifi->ifi_index = index;
 
        err = netlink_transaction(&nlh, nlmsg, answer);
+       if (err)
+               goto out;
+
+       ifi = NLMSG_DATA(answer->nlmsghdr);
+
+       *flag = ifi->ifi_flags;
 out:
        netlink_close(&nlh);
        nlmsg_free(nlmsg);
@@ -327,62 +857,255 @@ out:
        return err;
 }
 
-int lxc_netdev_up(const char *name)
+/*
+ * \brief Check a interface is up or not.
+ *
+ * \param name: name for the interface.
+ *
+ * \return int.
+ * 0 means interface is down.
+ * 1 means interface is up.
+ * Others means error happened, and ret-value is the error number.
+ */
+int lxc_netdev_isup(const char *name)
 {
-       return netdev_set_flag(name, IFF_UP);
-}
+       int err, flag;
 
-int lxc_netdev_down(const char *name)
-{
-       return netdev_set_flag(name, 0);
+       err = netdev_get_flag(name, &flag);
+       if (err)
+               return err;
+
+       if (flag & IFF_UP)
+               return 1;
+
+       return 0;
 }
 
-int lxc_veth_create(const char *name1, const char *name2)
+int netdev_get_mtu(int ifindex)
 {
+       int answer_len, err, res;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct link_req *link_req;
-       struct rtattr *nest1, *nest2, *nest3;
-       int len, err;
+       struct ifinfomsg *ifi;
+       struct nlmsghdr *msg;
+       int readmore = 0, recv_len = 0;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
                return err;
 
-       err = -EINVAL;
-       len = strlen(name1);
-       if (len == 1 || len >= IFNAMSIZ)
-               goto out;
-
-       len = strlen(name2);
-       if (len == 1 || len >= IFNAMSIZ)
-               goto out;
-
        err = -ENOMEM;
        nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags =
-               NLM_F_REQUEST|NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK;
+       /* Save the answer buffer length, since it will be overwritten
+        * on the first receive (and we might need to receive more than
+        * once.
+        */
+       answer_len = answer->nlmsghdr->nlmsg_len;
 
-       err = -EINVAL;
-       nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
-       if (!nest1)
-               goto out;
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_GETLINK;
 
-       if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi)
                goto out;
+       ifi->ifi_family = AF_UNSPEC;
 
-       nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
+       /* Send the request for addresses, which returns all addresses
+        * on all interfaces. */
+       err = netlink_send(&nlh, nlmsg);
+       if (err < 0)
+               goto out;
+
+       do {
+               /* Restore the answer buffer length, it might have been
+                * overwritten by a previous receive.
+                */
+               answer->nlmsghdr->nlmsg_len = answer_len;
+
+               /* Get the (next) batch of reply messages */
+               err = netlink_rcv(&nlh, answer);
+               if (err < 0)
+                       goto out;
+
+               recv_len = err;
+               err = 0;
+
+               /* Satisfy the typing for the netlink macros */
+               msg = answer->nlmsghdr;
+
+               while (NLMSG_OK(msg, recv_len)) {
+
+                       /* Stop reading if we see an error message */
+                       if (msg->nlmsg_type == NLMSG_ERROR) {
+                               struct nlmsgerr *errmsg =
+                                   (struct nlmsgerr *)NLMSG_DATA(msg);
+                               err = errmsg->error;
+                               goto out;
+                       }
+
+                       /* Stop reading if we see a NLMSG_DONE message */
+                       if (msg->nlmsg_type == NLMSG_DONE) {
+                               readmore = 0;
+                               break;
+                       }
+
+                       ifi = NLMSG_DATA(msg);
+                       if (ifi->ifi_index == ifindex) {
+                               struct rtattr *rta = IFLA_RTA(ifi);
+                               int attr_len =
+                                   msg->nlmsg_len - NLMSG_LENGTH(sizeof(*ifi));
+                               res = 0;
+                               while (RTA_OK(rta, attr_len)) {
+                                       /* Found a local address for the
+                                        * requested interface, return it.
+                                        */
+                                       if (rta->rta_type == IFLA_MTU) {
+                                               memcpy(&res, RTA_DATA(rta),
+                                                      sizeof(int));
+                                               err = res;
+                                               goto out;
+                                       }
+                                       rta = RTA_NEXT(rta, attr_len);
+                               }
+                       }
+
+                       /* Keep reading more data from the socket if the last
+                        * message had the NLF_F_MULTI flag set.
+                        */
+                       readmore = (msg->nlmsg_flags & NLM_F_MULTI);
+
+                       /* Look at the next message received in this buffer. */
+                       msg = NLMSG_NEXT(msg, recv_len);
+               }
+       } while (readmore);
+
+       /* If we end up here, we didn't find any result, so signal an error. */
+       err = -1;
+
+out:
+       netlink_close(&nlh);
+       nlmsg_free(answer);
+       nlmsg_free(nlmsg);
+       return err;
+}
+
+int lxc_netdev_set_mtu(const char *name, int mtu)
+{
+       int err, index, len;
+       struct ifinfomsg *ifi;
+       struct nl_handler nlh;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
+
+       err = netlink_open(&nlh, NETLINK_ROUTE);
+       if (err)
+               return err;
+
+       err = -EINVAL;
+       len = strlen(name);
+       if (len == 1 || len >= IFNAMSIZ)
+               goto out;
+
+       err = -ENOMEM;
+       nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       if (!nlmsg)
+               goto out;
+
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
+       if (!answer)
+               goto out;
+
+       err = -EINVAL;
+       index = if_nametoindex(name);
+       if (!index)
+               goto out;
+
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi) {
+               err = -ENOMEM;
+               goto out;
+       }
+       ifi->ifi_family = AF_UNSPEC;
+       ifi->ifi_index = index;
+
+       if (nla_put_u32(nlmsg, IFLA_MTU, mtu))
+               goto out;
+
+       err = netlink_transaction(&nlh, nlmsg, answer);
+out:
+       netlink_close(&nlh);
+       nlmsg_free(nlmsg);
+       nlmsg_free(answer);
+       return err;
+}
+
+int lxc_netdev_up(const char *name)
+{
+       return netdev_set_flag(name, IFF_UP);
+}
+
+int lxc_netdev_down(const char *name)
+{
+       return netdev_set_flag(name, 0);
+}
+
+int lxc_veth_create(const char *name1, const char *name2)
+{
+       int err, len;
+       struct ifinfomsg *ifi;
+       struct nl_handler nlh;
+       struct rtattr *nest1, *nest2, *nest3;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
+
+       err = netlink_open(&nlh, NETLINK_ROUTE);
+       if (err)
+               return err;
+
+       err = -EINVAL;
+       len = strlen(name1);
+       if (len == 1 || len >= IFNAMSIZ)
+               goto out;
+
+       len = strlen(name2);
+       if (len == 1 || len >= IFNAMSIZ)
+               goto out;
+
+       err = -ENOMEM;
+       nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       if (!nlmsg)
+               goto out;
+
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
+       if (!answer)
+               goto out;
+
+       nlmsg->nlmsghdr->nlmsg_flags =
+           NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi)
+               goto out;
+       ifi->ifi_family = AF_UNSPEC;
+
+       err = -EINVAL;
+       nest1 = nla_begin_nested(nlmsg, IFLA_LINKINFO);
+       if (!nest1)
+               goto out;
+
+       if (nla_put_string(nlmsg, IFLA_INFO_KIND, "veth"))
+               goto out;
+
+       nest2 = nla_begin_nested(nlmsg, IFLA_INFO_DATA);
        if (!nest2)
                goto out;
 
@@ -390,15 +1113,17 @@ int lxc_veth_create(const char *name1, const char *name2)
        if (!nest3)
                goto out;
 
-       nlmsg->nlmsghdr.nlmsg_len += sizeof(struct ifinfomsg);
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi) {
+               err = -ENOMEM;
+               goto out;
+       }
 
        if (nla_put_string(nlmsg, IFLA_IFNAME, name2))
                goto out;
 
        nla_end_nested(nlmsg, nest3);
-
        nla_end_nested(nlmsg, nest2);
-
        nla_end_nested(nlmsg, nest1);
 
        if (nla_put_string(nlmsg, IFLA_IFNAME, name1))
@@ -412,14 +1137,14 @@ out:
        return err;
 }
 
-/* XXX: merge with lxc_macvlan_create */
+/* TODO: merge with lxc_macvlan_create */
 int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
 {
+       int err, len, lindex;
+       struct ifinfomsg *ifi;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct link_req *link_req;
        struct rtattr *nest, *nest2;
-       int lindex, len, err;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -439,7 +1164,7 @@ int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
        if (!nlmsg)
                goto err3;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto err2;
 
@@ -448,12 +1173,16 @@ int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
        if (!lindex)
                goto err1;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags =
-               NLM_F_REQUEST|NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK;
+       nlmsg->nlmsghdr->nlmsg_flags =
+           NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi) {
+               err = -ENOMEM;
+               goto err1;
+       }
+       ifi->ifi_family = AF_UNSPEC;
 
        nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
        if (!nest)
@@ -470,7 +1199,6 @@ int lxc_vlan_create(const char *master, const char *name, unsigned short vlanid)
                goto err1;
 
        nla_end_nested(nlmsg, nest2);
-
        nla_end_nested(nlmsg, nest);
 
        if (nla_put_u32(nlmsg, IFLA_LINK, lindex))
@@ -491,11 +1219,11 @@ err3:
 
 int lxc_macvlan_create(const char *master, const char *name, int mode)
 {
+       int err, index, len;
+       struct ifinfomsg *ifi;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct link_req *link_req;
        struct rtattr *nest, *nest2;
-       int index, len, err;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -515,7 +1243,7 @@ int lxc_macvlan_create(const char *master, const char *name, int mode)
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
@@ -524,12 +1252,16 @@ int lxc_macvlan_create(const char *master, const char *name, int mode)
        if (!index)
                goto out;
 
-       link_req = (struct link_req *)nlmsg;
-       link_req->ifinfomsg.ifi_family = AF_UNSPEC;
-       nlmsg->nlmsghdr.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-       nlmsg->nlmsghdr.nlmsg_flags =
-               NLM_F_REQUEST|NLM_F_CREATE|NLM_F_EXCL|NLM_F_ACK;
-       nlmsg->nlmsghdr.nlmsg_type = RTM_NEWLINK;
+       nlmsg->nlmsghdr->nlmsg_flags =
+           NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL | NLM_F_ACK;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWLINK;
+
+       ifi = nlmsg_reserve(nlmsg, sizeof(struct ifinfomsg));
+       if (!ifi) {
+               err = -ENOMEM;
+               goto out;
+       }
+       ifi->ifi_family = AF_UNSPEC;
 
        nest = nla_begin_nested(nlmsg, IFLA_LINKINFO);
        if (!nest)
@@ -567,7 +1299,8 @@ out:
 
 static int proc_sys_net_write(const char *path, const char *value)
 {
-       int fd, err = 0;
+       int fd;
+       int err = 0;
 
        fd = open(path, O_WRONLY);
        if (fd < 0)
@@ -582,18 +1315,18 @@ static int proc_sys_net_write(const char *path, const char *value)
 
 static int ip_forward_set(const char *ifname, int family, int flag)
 {
-       char path[MAXPATHLEN];
        int rc;
+       char path[MAXPATHLEN];
 
        if (family != AF_INET && family != AF_INET6)
                return -EINVAL;
 
        rc = snprintf(path, MAXPATHLEN, "/proc/sys/net/%s/conf/%s/forwarding",
-                family == AF_INET?"ipv4":"ipv6" , ifname);
-       if (rc >= MAXPATHLEN)
+                     family == AF_INET ? "ipv4" : "ipv6", ifname);
+       if (rc < 0 || (size_t)rc >= MAXPATHLEN)
                return -E2BIG;
 
-       return proc_sys_net_write(path, flag?"1":"0");
+       return proc_sys_net_write(path, flag ? "1" : "0");
 }
 
 int lxc_ip_forward_on(const char *ifname, int family)
@@ -608,19 +1341,19 @@ int lxc_ip_forward_off(const char *ifname, int family)
 
 static int neigh_proxy_set(const char *ifname, int family, int flag)
 {
-       char path[MAXPATHLEN];
        int ret;
+       char path[MAXPATHLEN];
 
        if (family != AF_INET && family != AF_INET6)
                return -EINVAL;
 
        ret = snprintf(path, MAXPATHLEN, "/proc/sys/net/%s/conf/%s/%s",
-               family == AF_INET?"ipv4":"ipv6" , ifname,
-               family == AF_INET?"proxy_arp":"proxy_ndp");
-       if (ret < 0 || ret >= MAXPATHLEN)
+                      family == AF_INET ? "ipv4" : "ipv6", ifname,
+                      family == AF_INET ? "proxy_arp" : "proxy_ndp");
+       if (ret < 0 || (size_t)ret >= MAXPATHLEN)
                return -E2BIG;
 
-       return proc_sys_net_write(path, flag?"1":"0");
+       return proc_sys_net_write(path, flag ? "1" : "0");
 }
 
 int lxc_neigh_proxy_on(const char *name, int family)
@@ -635,62 +1368,60 @@ int lxc_neigh_proxy_off(const char *name, int family)
 
 int lxc_convert_mac(char *macaddr, struct sockaddr *sockaddr)
 {
-       unsigned char *data;
-       char c;
        int i = 0;
        unsigned val;
+       char c;
+       unsigned char *data;
 
        sockaddr->sa_family = ARPHRD_ETHER;
        data = (unsigned char *)sockaddr->sa_data;
 
        while ((*macaddr != '\0') && (i < ETH_ALEN)) {
-           val = 0;
-           c = *macaddr++;
-           if (isdigit(c))
-                   val = c - '0';
-           else if (c >= 'a' && c <= 'f')
-                   val = c - 'a' + 10;
-           else if (c >= 'A' && c <= 'F')
-                   val = c - 'A' + 10;
-           else {
-                   return -EINVAL;
-           }
-           val <<= 4;
-           c = *macaddr;
-           if (isdigit(c))
-                   val |= c - '0';
-           else if (c >= 'a' && c <= 'f')
-                   val |= c - 'a' + 10;
-           else if (c >= 'A' && c <= 'F')
-                   val |= c - 'A' + 10;
-           else if (c == ':' || c == 0)
-                   val >>= 4;
-           else {
-                   return -EINVAL;
-           }
-           if (c != 0)
-                   macaddr++;
-           *data++ = (unsigned char) (val & 0377);
-           i++;
-
-           if (*macaddr == ':')
-                   macaddr++;
-       }
-
-       return 0;
-}
-
-static int ip_addr_add(int family, int ifindex,
-                      void *addr, void *bcast, void *acast, int prefix)
+               val = 0;
+               c = *macaddr++;
+               if (isdigit(c))
+                       val = c - '0';
+               else if (c >= 'a' && c <= 'f')
+                       val = c - 'a' + 10;
+               else if (c >= 'A' && c <= 'F')
+                       val = c - 'A' + 10;
+               else
+                       return -EINVAL;
+
+               val <<= 4;
+               c = *macaddr;
+               if (isdigit(c))
+                       val |= c - '0';
+               else if (c >= 'a' && c <= 'f')
+                       val |= c - 'a' + 10;
+               else if (c >= 'A' && c <= 'F')
+                       val |= c - 'A' + 10;
+               else if (c == ':' || c == 0)
+                       val >>= 4;
+               else
+                       return -EINVAL;
+               if (c != 0)
+                       macaddr++;
+               *data++ = (unsigned char)(val & 0377);
+               i++;
+
+               if (*macaddr == ':')
+                       macaddr++;
+       }
+
+       return 0;
+}
+
+static int ip_addr_add(int family, int ifindex, void *addr, void *bcast,
+                      void *acast, int prefix)
 {
+       int addrlen, err;
+       struct ifaddrmsg *ifa;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct ip_req *ip_req;
-       int addrlen;
-       int err;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
-       addrlen = family == AF_INET ? sizeof(struct in_addr) :
-               sizeof(struct in6_addr);
+       addrlen = family == AF_INET ? sizeof(struct in_addr)
+                                   : sizeof(struct in6_addr);
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -701,21 +1432,22 @@ static int ip_addr_add(int family, int ifindex,
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
-       ip_req = (struct ip_req *)nlmsg;
-        ip_req->nlmsg.nlmsghdr.nlmsg_len =
-               NLMSG_LENGTH(sizeof(struct ifaddrmsg));
-        ip_req->nlmsg.nlmsghdr.nlmsg_flags =
-               NLM_F_ACK|NLM_F_REQUEST|NLM_F_CREATE|NLM_F_EXCL;
-        ip_req->nlmsg.nlmsghdr.nlmsg_type = RTM_NEWADDR;
-       ip_req->ifa.ifa_prefixlen = prefix;
-        ip_req->ifa.ifa_index = ifindex;
-        ip_req->ifa.ifa_family = family;
-       ip_req->ifa.ifa_scope = 0;
-       
+       nlmsg->nlmsghdr->nlmsg_flags =
+           NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWADDR;
+
+       ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
+       if (!ifa)
+               goto out;
+       ifa->ifa_prefixlen = prefix;
+       ifa->ifa_index = ifindex;
+       ifa->ifa_family = family;
+       ifa->ifa_scope = 0;
+
        err = -EINVAL;
        if (nla_put_buffer(nlmsg, IFA_LOCAL, addr, addrlen))
                goto out;
@@ -726,7 +1458,7 @@ static int ip_addr_add(int family, int ifindex,
        if (nla_put_buffer(nlmsg, IFA_BROADCAST, bcast, addrlen))
                goto out;
 
-       /* TODO : multicast, anycast with ipv6 */
+       /* TODO: multicast, anycast with ipv6 */
        err = -EPROTONOSUPPORT;
        if (family == AF_INET6 &&
            (memcmp(bcast, &in6addr_any, sizeof(in6addr_any)) ||
@@ -742,52 +1474,59 @@ out:
 }
 
 int lxc_ipv6_addr_add(int ifindex, struct in6_addr *addr,
-                     struct in6_addr *mcast,
-                     struct in6_addr *acast, int prefix)
+                     struct in6_addr *mcast, struct in6_addr *acast,
+                     int prefix)
 {
        return ip_addr_add(AF_INET6, ifindex, addr, mcast, acast, prefix);
 }
 
-int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr,
-                     struct in_addr *bcast, int prefix)
+int lxc_ipv4_addr_add(int ifindex, struct in_addr *addr, struct in_addr *bcast,
+                     int prefix)
 {
        return ip_addr_add(AF_INET, ifindex, addr, bcast, NULL, prefix);
 }
 
-/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present)
- * address from the given RTM_NEWADDR message.  Allocates memory for the
- * address and stores that pointer in *res (so res should be an
- * in_addr** or in6_addr**).
+/* Find an IFA_LOCAL (or IFA_ADDRESS if not IFA_LOCAL is present) address from
+ * the given RTM_NEWADDR message. Allocates memory for the address and stores
+ * that pointer in *res (so res should be an in_addr** or in6_addr**).
  */
-static int ifa_get_local_ip(int family, struct ip_req *ip_info, void** res) {
-       struct rtattr *rta = IFA_RTA(&ip_info->ifa);
-       int attr_len = IFA_PAYLOAD(&ip_info->nlmsg.nlmsghdr);
+static int ifa_get_local_ip(int family, struct nlmsghdr *msg, void **res)
+{
        int addrlen;
+       struct ifaddrmsg *ifa = NLMSG_DATA(msg);
+       struct rtattr *rta = IFA_RTA(ifa);
+       int attr_len = NLMSG_PAYLOAD(msg, sizeof(struct ifaddrmsg));
 
-       if (ip_info->ifa.ifa_family != family)
+       if (ifa->ifa_family != family)
                return 0;
 
-       addrlen = family == AF_INET ? sizeof(struct in_addr) :
-               sizeof(struct in6_addr);
+       addrlen = family == AF_INET ? sizeof(struct in_addr)
+                                   : sizeof(struct in6_addr);
 
        /* Loop over the rtattr's in this message */
-       while(RTA_OK(rta, attr_len)) {
+       while (RTA_OK(rta, attr_len)) {
                /* Found a local address for the requested interface,
-                * return it. */
-               if (rta->rta_type == IFA_LOCAL || rta->rta_type == IFA_ADDRESS) {
-                       /* Sanity check. The family check above should
-                        * make sure the address length is correct, but
-                        * check here just in case */
+                * return it.
+                */
+               if (rta->rta_type == IFA_LOCAL ||
+                   rta->rta_type == IFA_ADDRESS) {
+                       /* Sanity check. The family check above should make sure
+                        * the address length is correct, but check here just in
+                        * case.
+                        */
                        if (RTA_PAYLOAD(rta) != addrlen)
                                return -1;
 
-                       /* We might have found an IFA_ADDRESS before,
-                        * which we now overwrite with an IFA_LOCAL. */
-                       if (!*res)
+                       /* We might have found an IFA_ADDRESS before, which we
+                        * now overwrite with an IFA_LOCAL.
+                        */
+                       if (!*res) {
                                *res = malloc(addrlen);
+                               if (!*res)
+                                       return -1;
+                       }
 
                        memcpy(*res, RTA_DATA(rta), addrlen);
-
                        if (rta->rta_type == IFA_LOCAL)
                                break;
                }
@@ -798,13 +1537,12 @@ static int ifa_get_local_ip(int family, struct ip_req *ip_info, void** res) {
 
 static int ip_addr_get(int family, int ifindex, void **res)
 {
+       int answer_len, err;
+       struct ifaddrmsg *ifa;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct ip_req *ip_req, *ip_info;
        struct nlmsghdr *msg;
-       int err;
-       int recv_len = 0, answer_len;
-       int readmore = 0;
+       int readmore = 0, recv_len = 0;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -815,35 +1553,37 @@ static int ip_addr_get(int family, int ifindex, void **res)
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
-       /* Save the answer buffer length, since it will be overwritten
-        * on the first receive (and we might need to receive more than
-        * once. */
-       answer_len = answer->nlmsghdr.nlmsg_len;
+       /* Save the answer buffer length, since it will be overwritten on the
+        * first receive (and we might need to receive more than once).
+        */
+       answer_len = answer->nlmsghdr->nlmsg_len;
 
-       ip_req = (struct ip_req *)nlmsg;
-       ip_req->nlmsg.nlmsghdr.nlmsg_len =
-               NLMSG_LENGTH(sizeof(struct ifaddrmsg));
-       ip_req->nlmsg.nlmsghdr.nlmsg_flags = NLM_F_REQUEST|NLM_F_ROOT;
-       ip_req->nlmsg.nlmsghdr.nlmsg_type = RTM_GETADDR;
-       ip_req->ifa.ifa_family = family;
+       nlmsg->nlmsghdr->nlmsg_flags = NLM_F_REQUEST | NLM_F_ROOT;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_GETADDR;
 
-       /* Send the request for addresses, which returns all addresses
-        * on all interfaces. */
+       ifa = nlmsg_reserve(nlmsg, sizeof(struct ifaddrmsg));
+       if (!ifa)
+               goto out;
+       ifa->ifa_family = family;
+
+       /* Send the request for addresses, which returns all addresses on all
+        * interfaces.
+        */
        err = netlink_send(&nlh, nlmsg);
        if (err < 0)
                goto out;
-       err = 0;
 
        do {
                /* Restore the answer buffer length, it might have been
-                * overwritten by a previous receive. */
-               answer->nlmsghdr.nlmsg_len = answer_len;
+                * overwritten by a previous receive.
+                */
+               answer->nlmsghdr->nlmsg_len = answer_len;
 
-               /* Get the (next) batch of reply messages */
+               /* Get the (next) batch of reply messages. */
                err = netlink_rcv(&nlh, answer);
                if (err < 0)
                        goto out;
@@ -851,18 +1591,19 @@ static int ip_addr_get(int family, int ifindex, void **res)
                recv_len = err;
                err = 0;
 
-               /* Satisfy the typing for the netlink macros */
-               msg = &answer->nlmsghdr;
+               /* Satisfy the typing for the netlink macros. */
+               msg = answer->nlmsghdr;
 
                while (NLMSG_OK(msg, recv_len)) {
-                       /* Stop reading if we see an error message */
+                       /* Stop reading if we see an error message. */
                        if (msg->nlmsg_type == NLMSG_ERROR) {
-                               struct nlmsgerr *errmsg = (struct nlmsgerr*)NLMSG_DATA(msg);
+                               struct nlmsgerr *errmsg =
+                                   (struct nlmsgerr *)NLMSG_DATA(msg);
                                err = errmsg->error;
                                goto out;
                        }
 
-                       /* Stop reading if we see a NLMSG_DONE message */
+                       /* Stop reading if we see a NLMSG_DONE message. */
                        if (msg->nlmsg_type == NLMSG_DONE) {
                                readmore = 0;
                                break;
@@ -873,25 +1614,31 @@ static int ip_addr_get(int family, int ifindex, void **res)
                                goto out;
                        }
 
-                       ip_info = (struct ip_req *)msg;
-                       if (ip_info->ifa.ifa_index == ifindex) {
-                               ifa_get_local_ip(family, ip_info, res);
-                               /* Found a result, stop searching */
+                       ifa = (struct ifaddrmsg *)NLMSG_DATA(msg);
+                       if (ifa->ifa_index == ifindex) {
+                               if (ifa_get_local_ip(family, msg, res) < 0) {
+                                       err = -1;
+                                       goto out;
+                               }
+
+                               /* Found a result, stop searching. */
                                if (*res)
                                        goto out;
                        }
 
-                       /* Keep reading more data from the socket if the
-                        * last message had the NLF_F_MULTI flag set */
+                       /* Keep reading more data from the socket if the last
+                        * message had the NLF_F_MULTI flag set.
+                        */
                        readmore = (msg->nlmsg_flags & NLM_F_MULTI);
 
-                       /* Look at the next message received in this buffer */
+                       /* Look at the next message received in this buffer. */
                        msg = NLMSG_NEXT(msg, recv_len);
                }
        } while (readmore);
 
        /* If we end up here, we didn't find any result, so signal an
-        * error */
+        * error.
+        */
        err = -1;
 
 out:
@@ -903,24 +1650,23 @@ out:
 
 int lxc_ipv6_addr_get(int ifindex, struct in6_addr **res)
 {
-       return ip_addr_get(AF_INET6, ifindex, (void**)res);
+       return ip_addr_get(AF_INET6, ifindex, (void **)res);
 }
 
-int lxc_ipv4_addr_get(int ifindex, struct in_addr** res)
+int lxc_ipv4_addr_get(int ifindex, struct in_addr **res)
 {
-       return ip_addr_get(AF_INET, ifindex, (void**)res);
+       return ip_addr_get(AF_INET, ifindex, (void **)res);
 }
 
 static int ip_gateway_add(int family, int ifindex, void *gw)
 {
+       int addrlen, err;
        struct nl_handler nlh;
-       struct nlmsg *nlmsg = NULL, *answer = NULL;
-       struct rt_req *rt_req;
-       int addrlen;
-       int err;
+       struct rtmsg *rt;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
-       addrlen = family == AF_INET ? sizeof(struct in_addr) :
-               sizeof(struct in6_addr);
+       addrlen = family == AF_INET ? sizeof(struct in_addr)
+                                   : sizeof(struct in6_addr);
 
        err = netlink_open(&nlh, NETLINK_ROUTE);
        if (err)
@@ -931,30 +1677,32 @@ static int ip_gateway_add(int family, int ifindex, void *gw)
        if (!nlmsg)
                goto out;
 
-       answer = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
        if (!answer)
                goto out;
 
-       rt_req = (struct rt_req *)nlmsg;
-       rt_req->nlmsg.nlmsghdr.nlmsg_len =
-               NLMSG_LENGTH(sizeof(struct rtmsg));
-       rt_req->nlmsg.nlmsghdr.nlmsg_flags =
-               NLM_F_ACK|NLM_F_REQUEST|NLM_F_CREATE|NLM_F_EXCL;
-       rt_req->nlmsg.nlmsghdr.nlmsg_type = RTM_NEWROUTE;
-       rt_req->rt.rtm_family = family;
-       rt_req->rt.rtm_table = RT_TABLE_MAIN;
-       rt_req->rt.rtm_scope = RT_SCOPE_UNIVERSE;
-       rt_req->rt.rtm_protocol = RTPROT_BOOT;
-       rt_req->rt.rtm_type = RTN_UNICAST;
+       nlmsg->nlmsghdr->nlmsg_flags =
+           NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
+
+       rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
+       if (!rt)
+               goto out;
+       rt->rtm_family = family;
+       rt->rtm_table = RT_TABLE_MAIN;
+       rt->rtm_scope = RT_SCOPE_UNIVERSE;
+       rt->rtm_protocol = RTPROT_BOOT;
+       rt->rtm_type = RTN_UNICAST;
        /* "default" destination */
-       rt_req->rt.rtm_dst_len = 0;
+       rt->rtm_dst_len = 0;
 
        err = -EINVAL;
        if (nla_put_buffer(nlmsg, RTA_GATEWAY, gw, addrlen))
                goto out;
 
        /* Adding the interface index enables the use of link-local
-        * addresses for the gateway */
+        * addresses for the gateway.
+        */
        if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
                goto out;
 
@@ -976,47 +1724,1375 @@ int lxc_ipv6_gateway_add(int ifindex, struct in6_addr *gw)
        return ip_gateway_add(AF_INET6, ifindex, gw);
 }
 
-/*
- * There is a lxc_bridge_attach, but no need of a bridge detach
- * as automatically done by kernel when a netdev is deleted.
- */
-int lxc_bridge_attach(const char *bridge, const char *ifname)
+static int ip_route_dest_add(int family, int ifindex, void *dest)
 {
-       int fd, index, err;
-       struct ifreq ifr;
+       int addrlen, err;
+       struct nl_handler nlh;
+       struct rtmsg *rt;
+       struct nlmsg *answer = NULL, *nlmsg = NULL;
 
-       if (strlen(ifname) >= IFNAMSIZ)
-               return -EINVAL;
+       addrlen = family == AF_INET ? sizeof(struct in_addr)
+                                   : sizeof(struct in6_addr);
 
-       index = if_nametoindex(ifname);
-       if (!index)
-               return -EINVAL;
+       err = netlink_open(&nlh, NETLINK_ROUTE);
+       if (err)
+               return err;
 
-       fd = socket(AF_INET, SOCK_STREAM, 0);
-       if (fd < 0)
-               return -errno;
+       err = -ENOMEM;
+       nlmsg = nlmsg_alloc(NLMSG_GOOD_SIZE);
+       if (!nlmsg)
+               goto out;
 
-       strncpy(ifr.ifr_name, bridge, IFNAMSIZ);
-       ifr.ifr_ifindex = index;
-       err = ioctl(fd, SIOCBRADDIF, &ifr);
-       close(fd);
-       if (err)
-               err = -errno;
+       answer = nlmsg_alloc_reserve(NLMSG_GOOD_SIZE);
+       if (!answer)
+               goto out;
 
+       nlmsg->nlmsghdr->nlmsg_flags =
+           NLM_F_ACK | NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+       nlmsg->nlmsghdr->nlmsg_type = RTM_NEWROUTE;
+
+       rt = nlmsg_reserve(nlmsg, sizeof(struct rtmsg));
+       if (!rt)
+               goto out;
+       rt->rtm_family = family;
+       rt->rtm_table = RT_TABLE_MAIN;
+       rt->rtm_scope = RT_SCOPE_LINK;
+       rt->rtm_protocol = RTPROT_BOOT;
+       rt->rtm_type = RTN_UNICAST;
+       rt->rtm_dst_len = addrlen * 8;
+
+       err = -EINVAL;
+       if (nla_put_buffer(nlmsg, RTA_DST, dest, addrlen))
+               goto out;
+       if (nla_put_u32(nlmsg, RTA_OIF, ifindex))
+               goto out;
+       err = netlink_transaction(&nlh, nlmsg, answer);
+out:
+       netlink_close(&nlh);
+       nlmsg_free(answer);
+       nlmsg_free(nlmsg);
        return err;
 }
 
-static char* lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
-       [LXC_NET_VETH]    = "veth",
-       [LXC_NET_MACVLAN] = "macvlan",
-       [LXC_NET_VLAN]    = "vlan",
-       [LXC_NET_PHYS]    = "phys",
-       [LXC_NET_EMPTY]   = "empty",
+int lxc_ipv4_dest_add(int ifindex, struct in_addr *dest)
+{
+       return ip_route_dest_add(AF_INET, ifindex, dest);
+}
+
+int lxc_ipv6_dest_add(int ifindex, struct in6_addr *dest)
+{
+       return ip_route_dest_add(AF_INET6, ifindex, dest);
+}
+
+bool is_ovs_bridge(const char *bridge)
+{
+       int ret;
+       struct stat sb;
+       char brdirname[22 + IFNAMSIZ + 1] = {0};
+
+       ret = snprintf(brdirname, 22 + IFNAMSIZ + 1, "/sys/class/net/%s/bridge",
+                      bridge);
+       if (ret < 0 || (size_t)ret >= 22 + IFNAMSIZ + 1)
+               return false;
+
+       ret = stat(brdirname, &sb);
+       if (ret < 0 && errno == ENOENT)
+               return true;
+
+       return false;
+}
+
+struct ovs_veth_args {
+       const char *bridge;
+       const char *nic;
 };
 
-const char *lxc_net_type_to_str(int type)
+/* Called from a background thread - when nic goes away, remove it from the
+ * bridge.
+ */
+static int lxc_ovs_delete_port_exec(void *data)
 {
-       if (type < 0 || type > LXC_NET_MAXCONFTYPE)
-               return NULL;
-       return lxc_network_types[type];
+       struct ovs_veth_args *args = data;
+
+       execlp("ovs-vsctl", "ovs-vsctl", "del-port", args->bridge, args->nic,
+              (char *)NULL);
+       return -1;
+}
+
+int lxc_ovs_delete_port(const char *bridge, const char *nic)
+{
+       int ret;
+       char cmd_output[MAXPATHLEN];
+       struct ovs_veth_args args;
+
+       args.bridge = bridge;
+       args.nic = nic;
+       ret = run_command(cmd_output, sizeof(cmd_output),
+                         lxc_ovs_delete_port_exec, (void *)&args);
+       if (ret < 0) {
+               ERROR("Failed to delete \"%s\" from openvswitch bridge \"%s\": "
+                     "%s", bridge, nic, cmd_output);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int lxc_ovs_attach_bridge_exec(void *data)
+{
+       struct ovs_veth_args *args = data;
+
+       execlp("ovs-vsctl", "ovs-vsctl", "add-port", args->bridge, args->nic,
+              (char *)NULL);
+       return -1;
+}
+
+static int lxc_ovs_attach_bridge(const char *bridge, const char *nic)
+{
+       int ret;
+       char cmd_output[MAXPATHLEN];
+       struct ovs_veth_args args;
+
+       args.bridge = bridge;
+       args.nic = nic;
+       ret = run_command(cmd_output, sizeof(cmd_output),
+                         lxc_ovs_attach_bridge_exec, (void *)&args);
+       if (ret < 0) {
+               ERROR("Failed to attach \"%s\" to openvswitch bridge \"%s\": %s",
+                     bridge, nic, cmd_output);
+               return -1;
+       }
+
+       return 0;
+}
+
+int lxc_bridge_attach(const char *bridge, const char *ifname)
+{
+       int err, fd, index;
+       struct ifreq ifr;
+
+       if (strlen(ifname) >= IFNAMSIZ)
+               return -EINVAL;
+
+       index = if_nametoindex(ifname);
+       if (!index)
+               return -EINVAL;
+
+       if (is_ovs_bridge(bridge))
+               return lxc_ovs_attach_bridge(bridge, ifname);
+
+       fd = socket(AF_INET, SOCK_STREAM, 0);
+       if (fd < 0)
+               return -errno;
+
+       strncpy(ifr.ifr_name, bridge, IFNAMSIZ - 1);
+       ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+       ifr.ifr_ifindex = index;
+       err = ioctl(fd, SIOCBRADDIF, &ifr);
+       close(fd);
+       if (err)
+               err = -errno;
+
+       return err;
+}
+
+static const char *const lxc_network_types[LXC_NET_MAXCONFTYPE + 1] = {
+       [LXC_NET_EMPTY]   = "empty",
+       [LXC_NET_VETH]    = "veth",
+       [LXC_NET_MACVLAN] = "macvlan",
+       [LXC_NET_PHYS]    = "phys",
+       [LXC_NET_VLAN]    = "vlan",
+       [LXC_NET_NONE]    = "none",
+};
+
+const char *lxc_net_type_to_str(int type)
+{
+       if (type < 0 || type > LXC_NET_MAXCONFTYPE)
+               return NULL;
+
+       return lxc_network_types[type];
+}
+
+static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+
+char *lxc_mkifname(char *template)
+{
+       unsigned int seed;
+       FILE *urandom;
+       struct ifaddrs *ifa, *ifaddr;
+       char name[IFNAMSIZ];
+       bool exists = false;
+       size_t i = 0;
+
+       if (strlen(template) >= IFNAMSIZ)
+               return NULL;
+
+       /* Get all the network interfaces. */
+       getifaddrs(&ifaddr);
+
+       /* Initialize the random number generator. */
+       urandom = fopen("/dev/urandom", "r");
+       if (urandom != NULL) {
+               if (fread(&seed, sizeof(seed), 1, urandom) <= 0)
+                       seed = time(0);
+               fclose(urandom);
+       } else {
+               seed = time(0);
+       }
+
+#ifndef HAVE_RAND_R
+       srand(seed);
+#endif
+
+       /* Generate random names until we find one that doesn't exist. */
+       while (true) {
+               name[0] = '\0';
+               strcpy(name, template);
+
+               exists = false;
+               for (i = 0; i < strlen(name); i++) {
+                       if (name[i] == 'X') {
+#ifdef HAVE_RAND_R
+                               name[i] = padchar[rand_r(&seed) % (strlen(padchar) - 1)];
+#else
+                               name[i] = padchar[rand() % (strlen(padchar) - 1)];
+#endif
+                       }
+               }
+
+               for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
+                       if (!strcmp(ifa->ifa_name, name)) {
+                               exists = true;
+                               break;
+                       }
+               }
+
+               if (!exists)
+                       break;
+       }
+
+       freeifaddrs(ifaddr);
+       return strcpy(template, name);
+}
+
+int setup_private_host_hw_addr(char *veth1)
+{
+       int err, sockfd;
+       struct ifreq ifr;
+
+       sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (sockfd < 0)
+               return -errno;
+
+       err = snprintf((char *)ifr.ifr_name, IFNAMSIZ, "%s", veth1);
+       if (err < 0 || (size_t)err >= IFNAMSIZ)
+               return -E2BIG;
+
+       err = ioctl(sockfd, SIOCGIFHWADDR, &ifr);
+       if (err < 0) {
+               close(sockfd);
+               return -errno;
+       }
+
+       ifr.ifr_hwaddr.sa_data[0] = 0xfe;
+       err = ioctl(sockfd, SIOCSIFHWADDR, &ifr);
+       close(sockfd);
+       if (err < 0)
+               return -errno;
+
+       return 0;
+}
+
+int lxc_find_gateway_addresses(struct lxc_handler *handler)
+{
+       struct lxc_list *network = &handler->conf->network;
+       struct lxc_list *iterator;
+       struct lxc_netdev *netdev;
+       int link_index;
+
+       lxc_list_for_each(iterator, network) {
+               netdev = iterator->elem;
+
+               if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
+                       continue;
+
+               if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
+                       ERROR("Automatic gateway detection is only supported "
+                             "for veth and macvlan");
+                       return -1;
+               }
+
+               if (netdev->link[0] == '\0') {
+                       ERROR("Automatic gateway detection needs a link interface");
+                       return -1;
+               }
+
+               link_index = if_nametoindex(netdev->link);
+               if (!link_index)
+                       return -EINVAL;
+
+               if (netdev->ipv4_gateway_auto) {
+                       if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
+                               ERROR("Failed to automatically find ipv4 gateway "
+                                     "address from link interface \"%s\"", netdev->link);
+                               return -1;
+                       }
+               }
+
+               if (netdev->ipv6_gateway_auto) {
+                       if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
+                               ERROR("Failed to automatically find ipv6 gateway "
+                                     "address from link interface \"%s\"", netdev->link);
+                               return -1;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
+static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
+                                         struct lxc_netdev *netdev, pid_t pid)
+{
+       int ret;
+       pid_t child;
+       int bytes, pipefd[2];
+       char *token, *saveptr = NULL;
+       char netdev_link[IFNAMSIZ + 1];
+       char buffer[MAXPATHLEN] = {0};
+
+       if (netdev->type != LXC_NET_VETH) {
+               ERROR("Network type %d not support for unprivileged use", netdev->type);
+               return -1;
+       }
+
+       ret = pipe(pipefd);
+       if (ret < 0) {
+               SYSERROR("Failed to create pipe");
+               return -1;
+       }
+
+       child = fork();
+       if (child < 0) {
+               SYSERROR("Failed to create new process");
+               close(pipefd[0]);
+               close(pipefd[1]);
+               return -1;
+       }
+
+       if (child == 0) {
+               int ret;
+               char pidstr[LXC_NUMSTRLEN64];
+
+               close(pipefd[0]);
+
+               ret = dup2(pipefd[1], STDOUT_FILENO);
+               if (ret >= 0)
+                       ret = dup2(pipefd[1], STDERR_FILENO);
+               close(pipefd[1]);
+               if (ret < 0) {
+                       SYSERROR("Failed to duplicate std{err,out} file descriptor");
+                       exit(EXIT_FAILURE);
+               }
+
+               if (netdev->link[0] != '\0')
+                       strncpy(netdev_link, netdev->link, IFNAMSIZ);
+               else
+                       strncpy(netdev_link, "none", IFNAMSIZ);
+
+               ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
+               if (ret < 0 || ret >= LXC_NUMSTRLEN64)
+                       exit(EXIT_FAILURE);
+               pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
+
+               INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
+                    lxcname, pidstr, netdev_link,
+                    netdev->name[0] != '\0' ? netdev->name : "(null)");
+               if (netdev->name[0] != '\0')
+                       execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
+                              lxcpath, lxcname, pidstr, "veth", netdev_link,
+                              netdev->name, (char *)NULL);
+               else
+                       execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
+                              lxcpath, lxcname, pidstr, "veth", netdev_link,
+                              (char *)NULL);
+               SYSERROR("Failed to execute lxc-user-nic");
+               exit(EXIT_FAILURE);
+       }
+
+       /* close the write-end of the pipe */
+       close(pipefd[1]);
+
+       bytes = read(pipefd[0], &buffer, MAXPATHLEN);
+       if (bytes < 0) {
+               SYSERROR("Failed to read from pipe file descriptor");
+               close(pipefd[0]);
+               return -1;
+       }
+       buffer[bytes - 1] = '\0';
+
+       ret = wait_for_pid(child);
+       close(pipefd[0]);
+       if (ret != 0) {
+               ERROR("lxc-user-nic failed to configure requested network: %s",
+                     buffer[0] != '\0' ? buffer : "(null)");
+               return -1;
+       }
+       TRACE("Received output \"%s\" from lxc-user-nic", buffer);
+
+       /* netdev->name */
+       token = strtok_r(buffer, ":", &saveptr);
+       if (!token) {
+               ERROR("Failed to parse lxc-user-nic output");
+               return -1;
+       }
+
+       memset(netdev->name, 0, IFNAMSIZ + 1);
+       strncpy(netdev->name, token, IFNAMSIZ);
+
+       /* netdev->ifindex */
+       token = strtok_r(NULL, ":", &saveptr);
+       if (!token) {
+               ERROR("Failed to parse lxc-user-nic output");
+               return -1;
+       }
+
+       ret = lxc_safe_int(token, &netdev->ifindex);
+       if (ret < 0) {
+               ERROR("%s - Failed to convert string \"%s\" to integer",
+                     strerror(-ret), token);
+               return -1;
+       }
+
+       /* netdev->priv.veth_attr.veth1 */
+       token = strtok_r(NULL, ":", &saveptr);
+       if (!token) {
+               ERROR("Failed to parse lxc-user-nic output");
+               return -1;
+       }
+
+       if (strlen(token) >= IFNAMSIZ) {
+               ERROR("Host side veth device name returned by lxc-user-nic is "
+                     "too long");
+               return -E2BIG;
+       }
+       strcpy(netdev->priv.veth_attr.veth1, token);
+
+       /* netdev->priv.veth_attr.ifindex */
+       token = strtok_r(NULL, ":", &saveptr);
+       if (!token) {
+               ERROR("Failed to parse lxc-user-nic output");
+               return -1;
+       }
+
+       ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
+       if (ret < 0) {
+               ERROR("%s - Failed to convert string \"%s\" to integer",
+                     strerror(-ret), token);
+               return -1;
+       }
+
+       return 0;
+}
+
+static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
+                                         struct lxc_netdev *netdev,
+                                         const char *netns_path)
+{
+       int bytes, ret;
+       pid_t child;
+       int pipefd[2];
+       char buffer[MAXPATHLEN] = {0};
+
+       if (netdev->type != LXC_NET_VETH) {
+               ERROR("Network type %d not support for unprivileged use", netdev->type);
+               return -1;
+       }
+
+       ret = pipe(pipefd);
+       if (ret < 0) {
+               SYSERROR("Failed to create pipe");
+               return -1;
+       }
+
+       child = fork();
+       if (child < 0) {
+               SYSERROR("Failed to create new process");
+               close(pipefd[0]);
+               close(pipefd[1]);
+               return -1;
+       }
+
+       if (child == 0) {
+               char *hostveth;
+               int ret;
+
+               close(pipefd[0]);
+
+               ret = dup2(pipefd[1], STDOUT_FILENO);
+               if (ret >= 0)
+                       ret = dup2(pipefd[1], STDERR_FILENO);
+               close(pipefd[1]);
+               if (ret < 0) {
+                       SYSERROR("Failed to duplicate std{err,out} file descriptor");
+                       exit(EXIT_FAILURE);
+               }
+
+               if (netdev->priv.veth_attr.pair[0] != '\0')
+                       hostveth = netdev->priv.veth_attr.pair;
+               else
+                       hostveth = netdev->priv.veth_attr.veth1;
+               if (hostveth[0] == '\0') {
+                       SYSERROR("Host side veth device name is missing");
+                       exit(EXIT_FAILURE);
+               }
+
+               if (netdev->link[0] == '\0') {
+                       SYSERROR("Network link for network device \"%s\" is "
+                                "missing", netdev->priv.veth_attr.veth1);
+                       exit(EXIT_FAILURE);
+               }
+
+               INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
+                    lxcname, netns_path, netdev->link, hostveth);
+               execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
+                      lxcname, netns_path, "veth", netdev->link, hostveth,
+                      (char *)NULL);
+               SYSERROR("Failed to exec lxc-user-nic.");
+               exit(EXIT_FAILURE);
+       }
+
+       close(pipefd[1]);
+
+       bytes = read(pipefd[0], &buffer, MAXPATHLEN);
+       if (bytes < 0) {
+               SYSERROR("Failed to read from pipe file descriptor.");
+               close(pipefd[0]);
+               return -1;
+       }
+       buffer[bytes - 1] = '\0';
+
+       if (wait_for_pid(child) != 0) {
+               ERROR("lxc-user-nic failed to delete requested network: %s",
+                     buffer[0] != '\0' ? buffer : "(null)");
+               close(pipefd[0]);
+               return -1;
+       }
+
+       close(pipefd[0]);
+
+       return 0;
+}
+
+bool lxc_delete_network_unpriv(struct lxc_handler *handler)
+{
+       int ret;
+       struct lxc_list *iterator;
+       struct lxc_list *network = &handler->conf->network;
+       /* strlen("/proc/") = 6
+        * +
+        * LXC_NUMSTRLEN64
+        * +
+        * strlen("/fd/") = 4
+        * +
+        * LXC_NUMSTRLEN64
+        * +
+        * \0
+        */
+       char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
+       bool deleted_all = true;
+
+       if (handler->am_root)
+               return true;
+
+       *netns_path = '\0';
+
+       if (handler->netnsfd < 0) {
+               DEBUG("Cannot not guarantee safe deletion of network devices. "
+                     "Manual cleanup maybe needed");
+               return false;
+       }
+
+       ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
+                      getpid(), handler->netnsfd);
+       if (ret < 0 || ret >= sizeof(netns_path))
+               return false;
+
+       lxc_list_for_each(iterator, network) {
+               char *hostveth = NULL;
+               struct lxc_netdev *netdev = iterator->elem;
+
+               /* We can only delete devices whose ifindex we have. If we don't
+                * have the index it means that we didn't create it.
+                */
+               if (!netdev->ifindex)
+                       continue;
+
+               if (netdev->type == LXC_NET_PHYS) {
+                       ret = lxc_netdev_rename_by_index(netdev->ifindex,
+                                                        netdev->link);
+                       if (ret < 0)
+                               WARN("Failed to rename interface with index %d "
+                                    "to its initial name \"%s\"",
+                                    netdev->ifindex, netdev->link);
+                       else
+                               TRACE("Renamed interface with index %d to its "
+                                     "initial name \"%s\"",
+                                     netdev->ifindex, netdev->link);
+                       continue;
+               }
+
+               ret = netdev_deconf[netdev->type](handler, netdev);
+               if (ret < 0)
+                       WARN("Failed to deconfigure network device");
+
+               if (netdev->type != LXC_NET_VETH)
+                       continue;
+
+               if (!is_ovs_bridge(netdev->link))
+                       continue;
+
+               if (netdev->priv.veth_attr.pair[0] != '\0')
+                       hostveth = netdev->priv.veth_attr.pair;
+               else
+                       hostveth = netdev->priv.veth_attr.veth1;
+               if (hostveth[0] == '\0')
+                       continue;
+
+               ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
+                                                    handler->name, netdev,
+                                                    netns_path);
+               if (ret < 0) {
+                       deleted_all = false;
+                       WARN("Failed to remove port \"%s\" from openvswitch "
+                            "bridge \"%s\"", hostveth, netdev->link);
+                       continue;
+               }
+               INFO("Removed interface \"%s\" from \"%s\"", hostveth,
+                    netdev->link);
+       }
+
+       return deleted_all;
+}
+
+int lxc_create_network_priv(struct lxc_handler *handler)
+{
+       struct lxc_list *iterator;
+       struct lxc_list *network = &handler->conf->network;
+
+       if (!handler->am_root)
+               return 0;
+
+       lxc_list_for_each(iterator, network) {
+               struct lxc_netdev *netdev = iterator->elem;
+
+               if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
+                       ERROR("Invalid network configuration type %d", netdev->type);
+                       return -1;
+               }
+
+               if (netdev_conf[netdev->type](handler, netdev)) {
+                       ERROR("Failed to create network device");
+                       return -1;
+               }
+
+       }
+
+       return 0;
+}
+
+int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
+                                        struct lxc_list *network, pid_t pid)
+{
+       int ret;
+       char ifname[IFNAMSIZ];
+       struct lxc_list *iterator;
+
+       if (am_unpriv())
+               return 0;
+
+       lxc_list_for_each(iterator, network) {
+               struct lxc_netdev *netdev = iterator->elem;
+
+               if (!netdev->ifindex)
+                       continue;
+
+               /* retrieve the name of the interface */
+               if (!if_indextoname(netdev->ifindex, ifname)) {
+                       ERROR("No interface corresponding to ifindex \"%d\"",
+                             netdev->ifindex);
+                       return -1;
+               }
+
+               ret = lxc_netdev_move_by_name(ifname, pid, NULL);
+               if (ret) {
+                       ERROR("Failed to move network device \"%s\" to "
+                             "network namespace %d: %s", ifname, pid,
+                             strerror(-ret));
+                       return -1;
+               }
+
+               DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
+                     "of %d",
+                     ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
+                     pid);
+       }
+
+       return 0;
+}
+
+int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
+                             struct lxc_list *network, pid_t pid)
+{
+       struct lxc_list *iterator;
+
+       if (!am_unpriv())
+               return 0;
+
+       lxc_list_for_each(iterator, network) {
+               struct lxc_netdev *netdev = iterator->elem;
+
+               if (netdev->type == LXC_NET_EMPTY)
+                       continue;
+
+               if (netdev->type == LXC_NET_NONE)
+                       continue;
+
+               if (netdev->type != LXC_NET_VETH) {
+                       ERROR("Networks of type %s are not supported by "
+                             "unprivileged containers",
+                             lxc_net_type_to_str(netdev->type));
+                       return -1;
+               }
+
+               if (netdev->mtu)
+                       INFO("mtu ignored due to insufficient privilege");
+
+               if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev, pid))
+                       return -1;
+       }
+
+       return 0;
+}
+
+bool lxc_delete_network_priv(struct lxc_handler *handler)
+{
+       int ret;
+       struct lxc_list *iterator;
+       struct lxc_list *network = &handler->conf->network;
+       bool deleted_all = true;
+
+       if (!handler->am_root)
+               return true;
+
+       lxc_list_for_each(iterator, network) {
+               char *hostveth = NULL;
+               struct lxc_netdev *netdev = iterator->elem;
+
+               /* We can only delete devices whose ifindex we have. If we don't
+                * have the index it means that we didn't create it.
+                */
+               if (!netdev->ifindex)
+                       continue;
+
+               if (netdev->type == LXC_NET_PHYS) {
+                       ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
+                       if (ret < 0)
+                               WARN("Failed to rename interface with index %d "
+                                    "from \"%s\" to its initial name \"%s\"",
+                                    netdev->ifindex, netdev->name, netdev->link);
+                       else
+                               TRACE("Renamed interface with index %d from "
+                                     "\"%s\" to its initial name \"%s\"",
+                                     netdev->ifindex, netdev->name,
+                                     netdev->link);
+                       continue;
+               }
+
+               ret = netdev_deconf[netdev->type](handler, netdev);
+               if (ret < 0)
+                       WARN("Failed to deconfigure network device");
+
+               /* Recent kernels remove the virtual interfaces when the network
+                * namespace is destroyed but in case we did not move the
+                * interface to the network namespace, we have to destroy it.
+                */
+               ret = lxc_netdev_delete_by_index(netdev->ifindex);
+               if (-ret == ENODEV) {
+                       INFO("Interface \"%s\" with index %d already "
+                                       "deleted or existing in different network "
+                                       "namespace",
+                                       netdev->name[0] != '\0' ? netdev->name : "(null)",
+                                       netdev->ifindex);
+               } else if (ret < 0) {
+                       deleted_all = false;
+                       WARN("Failed to remove interface \"%s\" with "
+                                       "index %d: %s",
+                                       netdev->name[0] != '\0' ? netdev->name : "(null)",
+                                       netdev->ifindex, strerror(-ret));
+                       continue;
+               }
+               INFO("Removed interface \"%s\" with index %d",
+                               netdev->name[0] != '\0' ? netdev->name : "(null)",
+                               netdev->ifindex);
+
+               if (netdev->type != LXC_NET_VETH)
+                       continue;
+
+               /* Explicitly delete host veth device to prevent lingering
+                * devices. We had issues in LXD around this.
+                */
+               if (netdev->priv.veth_attr.pair[0] != '\0')
+                       hostveth = netdev->priv.veth_attr.pair;
+               else
+                       hostveth = netdev->priv.veth_attr.veth1;
+               if (hostveth[0] == '\0')
+                       continue;
+
+               ret = lxc_netdev_delete_by_name(hostveth);
+               if (ret < 0) {
+                       deleted_all = false;
+                       WARN("Failed to remove interface \"%s\" from \"%s\": %s",
+                            hostveth, netdev->link, strerror(-ret));
+                       continue;
+               }
+               INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
+
+               if (!is_ovs_bridge(netdev->link)) {
+                       netdev->priv.veth_attr.veth1[0] = '\0';
+                       continue;
+               }
+
+               /* Delete the openvswitch port. */
+               ret = lxc_ovs_delete_port(netdev->link, hostveth);
+               if (ret < 0)
+                       WARN("Failed to remove port \"%s\" from openvswitch "
+                            "bridge \"%s\"", hostveth, netdev->link);
+               else
+                       INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
+                            hostveth, netdev->link);
+
+               netdev->priv.veth_attr.veth1[0] = '\0';
+       }
+
+       return deleted_all;
+}
+
+int lxc_requests_empty_network(struct lxc_handler *handler)
+{
+       struct lxc_list *network = &handler->conf->network;
+       struct lxc_list *iterator;
+       bool found_none = false, found_nic = false;
+
+       if (lxc_list_empty(network))
+               return 0;
+
+       lxc_list_for_each(iterator, network) {
+               struct lxc_netdev *netdev = iterator->elem;
+
+               if (netdev->type == LXC_NET_NONE)
+                       found_none = true;
+               else
+                       found_nic = true;
+       }
+       if (found_none && !found_nic)
+               return 1;
+       return 0;
+}
+
+/* try to move physical nics to the init netns */
+int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
+{
+       int ret;
+       int oldfd;
+       char ifname[IFNAMSIZ];
+       struct lxc_list *iterator;
+       int netnsfd = handler->netnsfd;
+       struct lxc_conf *conf = handler->conf;
+
+       /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
+        * the parent network namespace. We won't have this capability if we are
+        * unprivileged.
+        */
+       if (!handler->am_root)
+               return 0;
+
+       TRACE("Moving physical network devices back to parent network namespace");
+
+       oldfd = lxc_preserve_ns(getpid(), "net");
+       if (oldfd < 0) {
+               SYSERROR("Failed to preserve network namespace");
+               return -1;
+       }
+
+       ret = setns(netnsfd, CLONE_NEWNET);
+       if (ret < 0) {
+               SYSERROR("Failed to enter network namespace");
+               close(oldfd);
+               return -1;
+       }
+
+       lxc_list_for_each(iterator, &conf->network) {
+               struct lxc_netdev *netdev = iterator->elem;
+
+               if (netdev->type != LXC_NET_PHYS)
+                       continue;
+
+               /* Retrieve the name of the interface in the container's network
+                * namespace.
+                */
+               if (!if_indextoname(netdev->ifindex, ifname)) {
+                       WARN("No interface corresponding to ifindex %d",
+                            netdev->ifindex);
+                       continue;
+               }
+
+               ret = lxc_netdev_move_by_name(ifname, 1, netdev->link);
+               if (ret < 0)
+                       WARN("Error moving network device \"%s\" back to "
+                            "network namespace", ifname);
+               else
+                       TRACE("Moved network device \"%s\" back to network "
+                             "namespace", ifname);
+       }
+
+       ret = setns(oldfd, CLONE_NEWNET);
+       close(oldfd);
+       if (ret < 0) {
+               SYSERROR("Failed to enter network namespace");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int setup_hw_addr(char *hwaddr, const char *ifname)
+{
+       struct sockaddr sockaddr;
+       struct ifreq ifr;
+       int ret, fd, saved_errno;
+
+       ret = lxc_convert_mac(hwaddr, &sockaddr);
+       if (ret) {
+               ERROR("Mac address \"%s\" conversion failed: %s", hwaddr,
+                     strerror(-ret));
+               return -1;
+       }
+
+       memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
+       ifr.ifr_name[IFNAMSIZ-1] = '\0';
+       memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
+
+       fd = socket(AF_INET, SOCK_DGRAM, 0);
+       if (fd < 0)
+               return -1;
+
+       ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
+       saved_errno = errno;
+       close(fd);
+       if (ret)
+               ERROR("Failed to perform ioctl: %s", strerror(saved_errno));
+
+       DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
+             ifr.ifr_name);
+
+       return ret;
+}
+
+static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
+{
+       struct lxc_list *iterator;
+       int err;
+
+       lxc_list_for_each(iterator, ip) {
+               struct lxc_inetdev *inetdev = iterator->elem;
+
+               err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
+                                       &inetdev->bcast, inetdev->prefix);
+               if (err) {
+                       ERROR("Failed to setup ipv4 address for network device "
+                             "with eifindex %d: %s", ifindex, strerror(-err));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
+{
+       struct lxc_list *iterator;
+       int err;
+
+       lxc_list_for_each(iterator, ip) {
+               struct lxc_inet6dev *inet6dev = iterator->elem;
+
+               err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
+                                       &inet6dev->mcast, &inet6dev->acast,
+                                       inet6dev->prefix);
+               if (err) {
+                       ERROR("Failed to setup ipv6 address for network device "
+                             "with eifindex %d: %s", ifindex, strerror(-err));
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
+static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
+{
+       char ifname[IFNAMSIZ];
+       int err;
+       const char *net_type_name;
+       char *current_ifname = ifname;
+
+       /* empty network namespace */
+       if (!netdev->ifindex) {
+               if (netdev->flags & IFF_UP) {
+                       err = lxc_netdev_up("lo");
+                       if (err) {
+                               ERROR("Failed to set the loopback network "
+                                     "device up: %s",
+                                     strerror(-err));
+                               return -1;
+                       }
+               }
+
+               if (netdev->type == LXC_NET_EMPTY)
+                       return 0;
+
+               if (netdev->type == LXC_NET_NONE)
+                       return 0;
+
+               if (netdev->type != LXC_NET_VETH) {
+                       net_type_name = lxc_net_type_to_str(netdev->type);
+                       ERROR("%s networks are not supported for containers "
+                             "not setup up by privileged users", net_type_name);
+                       return -1;
+               }
+
+               netdev->ifindex = if_nametoindex(netdev->name);
+       }
+
+       /* get the new ifindex in case of physical netdev */
+       if (netdev->type == LXC_NET_PHYS) {
+               netdev->ifindex = if_nametoindex(netdev->link);
+               if (!netdev->ifindex) {
+                       ERROR("Failed to get ifindex for network device \"%s\"",
+                             netdev->link);
+                       return -1;
+               }
+       }
+
+       /* retrieve the name of the interface */
+       if (!if_indextoname(netdev->ifindex, current_ifname)) {
+               ERROR("Failed get name for network device with ifindex %d",
+                     netdev->ifindex);
+               return -1;
+       }
+
+       /* Default: let the system to choose one interface name.
+        * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
+        * netlink will replace the format specifier with an appropriate index.
+        */
+       if (netdev->name[0] == '\0') {
+               if (netdev->type == LXC_NET_PHYS)
+                       strcpy(netdev->name, netdev->link);
+               else
+                       strcpy(netdev->name, "eth%d");
+       }
+
+       /* rename the interface name */
+       if (strcmp(ifname, netdev->name) != 0) {
+               err = lxc_netdev_rename_by_name(ifname, netdev->name);
+               if (err) {
+                       ERROR("Failed to rename network device \"%s\" to "
+                             "\"%s\": %s", ifname, netdev->name, strerror(-err));
+                       return -1;
+               }
+       }
+
+       /* Re-read the name of the interface because its name has changed
+        * and would be automatically allocated by the system
+        */
+       if (!if_indextoname(netdev->ifindex, current_ifname)) {
+               ERROR("Failed get name for network device with ifindex %d",
+                     netdev->ifindex);
+               return -1;
+       }
+
+       /* Now update the recorded name of the network device to reflect the
+        * name of the network device in the child's network namespace. We will
+        * later on send this information back to the parent.
+        */
+       strcpy(netdev->name, current_ifname);
+
+       /* set a mac address */
+       if (netdev->hwaddr) {
+               if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
+                       ERROR("Failed to setup hw address for network device \"%s\"",
+                             current_ifname);
+                       return -1;
+               }
+       }
+
+       /* setup ipv4 addresses on the interface */
+       if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
+               ERROR("Failed to setup ip addresses for network device \"%s\"",
+                     ifname);
+               return -1;
+       }
+
+       /* setup ipv6 addresses on the interface */
+       if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
+               ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
+                     ifname);
+               return -1;
+       }
+
+       /* set the network device up */
+       if (netdev->flags & IFF_UP) {
+               int err;
+
+               err = lxc_netdev_up(current_ifname);
+               if (err) {
+                       ERROR("Failed to set network device \"%s\" up: %s",
+                             current_ifname, strerror(-err));
+                       return -1;
+               }
+
+               /* the network is up, make the loopback up too */
+               err = lxc_netdev_up("lo");
+               if (err) {
+                       ERROR("Failed to set the loopback network device up: %s",
+                             strerror(-err));
+                       return -1;
+               }
+       }
+
+       /* We can only set up the default routes after bringing
+        * up the interface, sine bringing up the interface adds
+        * the link-local routes and we can't add a default
+        * route if the gateway is not reachable. */
+
+       /* setup ipv4 gateway on the interface */
+       if (netdev->ipv4_gateway) {
+               if (!(netdev->flags & IFF_UP)) {
+                       ERROR("Cannot add ipv4 gateway for network device "
+                             "\"%s\" when not bringing up the interface", ifname);
+                       return -1;
+               }
+
+               if (lxc_list_empty(&netdev->ipv4)) {
+                       ERROR("Cannot add ipv4 gateway for network device "
+                             "\"%s\" when not assigning an address", ifname);
+                       return -1;
+               }
+
+               err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
+               if (err) {
+                       err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
+                       if (err) {
+                               ERROR("Failed to add ipv4 dest for network "
+                                     "device \"%s\": %s", ifname, strerror(-err));
+                       }
+
+                       err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
+                       if (err) {
+                               ERROR("Failed to setup ipv4 gateway for "
+                                     "network device \"%s\": %s",
+                                     ifname, strerror(-err));
+                               if (netdev->ipv4_gateway_auto) {
+                                       char buf[INET_ADDRSTRLEN];
+                                       inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
+                                       ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf);
+                               }
+                               return -1;
+                       }
+               }
+       }
+
+       /* setup ipv6 gateway on the interface */
+       if (netdev->ipv6_gateway) {
+               if (!(netdev->flags & IFF_UP)) {
+                       ERROR("Cannot add ipv6 gateway for network device "
+                             "\"%s\" when not bringing up the interface", ifname);
+                       return -1;
+               }
+
+               if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
+                       ERROR("Cannot add ipv6 gateway for network device "
+                             "\"%s\" when not assigning an address", ifname);
+                       return -1;
+               }
+
+               err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
+               if (err) {
+                       err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
+                       if (err) {
+                               ERROR("Failed to add ipv6 dest for network "
+                                     "device \"%s\": %s", ifname, strerror(-err));
+                       }
+
+                       err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
+                       if (err) {
+                               ERROR("Failed to setup ipv6 gateway for "
+                                     "network device \"%s\": %s", ifname,
+                                     strerror(-err));
+                               if (netdev->ipv6_gateway_auto) {
+                                       char buf[INET6_ADDRSTRLEN];
+                                       inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
+                                       ERROR("Tried to set autodetected ipv6 "
+                                             "gateway for network device "
+                                             "\"%s\"", buf);
+                               }
+                               return -1;
+                       }
+               }
+       }
+
+       DEBUG("Network device \"%s\" has been setup", current_ifname);
+
+       return 0;
+}
+
+int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
+                                         struct lxc_list *network)
+{
+       struct lxc_list *iterator;
+       struct lxc_netdev *netdev;
+
+       lxc_list_for_each(iterator, network) {
+               netdev = iterator->elem;
+
+               /* REMOVE in LXC 3.0 */
+               if (netdev->idx < 0) {
+                       ERROR("WARNING: using \"lxc.network.*\" keys to define "
+                             "networks is DEPRECATED, please switch to using "
+                             "\"lxc.net.[i].* keys\"");
+               }
+
+               if (lxc_setup_netdev_in_child_namespaces(netdev)) {
+                       ERROR("failed to setup netdev");
+                       return -1;
+               }
+       }
+
+       if (!lxc_list_empty(network))
+               INFO("network has been setup");
+
+       return 0;
+}
+
+int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
+{
+       struct lxc_list *iterator;
+       struct lxc_list *network = &handler->conf->network;
+       int data_sock = handler->data_sock[0];
+
+       if (handler->am_root)
+               return 0;
+
+       lxc_list_for_each(iterator, network) {
+               int ret;
+               struct lxc_netdev *netdev = iterator->elem;
+
+               if (netdev->type != LXC_NET_VETH)
+                       continue;
+
+               ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
+               if (ret < 0)
+                       return -1;
+               TRACE("Sent network device name \"%s\" to child", netdev->name);
+       }
+
+       return 0;
+}
+
+int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
+{
+       struct lxc_list *iterator;
+       struct lxc_list *network = &handler->conf->network;
+       int data_sock = handler->data_sock[1];
+
+       if (handler->am_root)
+               return 0;
+
+       lxc_list_for_each(iterator, network) {
+               int ret;
+               struct lxc_netdev *netdev = iterator->elem;
+
+               if (netdev->type != LXC_NET_VETH)
+                       continue;
+
+               ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
+               if (ret < 0)
+                       return -1;
+               TRACE("Received network device name \"%s\" from parent", netdev->name);
+       }
+
+       return 0;
+}
+
+int lxc_network_send_name_and_ifindex_to_parent(struct lxc_handler *handler)
+{
+       struct lxc_list *iterator, *network;
+       int data_sock = handler->data_sock[0];
+
+       if (!handler->am_root)
+               return 0;
+
+       network = &handler->conf->network;
+       lxc_list_for_each(iterator, network) {
+               int ret;
+               struct lxc_netdev *netdev = iterator->elem;
+
+               /* Send network device name in the child's namespace to parent. */
+               ret = send(data_sock, netdev->name, IFNAMSIZ, 0);
+               if (ret < 0)
+                       return -1;
+
+               /* Send network device ifindex in the child's namespace to
+                * parent.
+                */
+               ret = send(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
+               if (ret < 0)
+                       return -1;
+       }
+
+       TRACE("Sent network device names and ifindeces to parent");
+       return 0;
+}
+
+int lxc_network_recv_name_and_ifindex_from_child(struct lxc_handler *handler)
+{
+       struct lxc_list *iterator, *network;
+       int data_sock = handler->data_sock[1];
+
+       if (!handler->am_root)
+               return 0;
+
+       network = &handler->conf->network;
+       lxc_list_for_each(iterator, network) {
+               int ret;
+               struct lxc_netdev *netdev = iterator->elem;
+
+               /* Receive network device name in the child's namespace to
+                * parent.
+                */
+               ret = recv(data_sock, netdev->name, IFNAMSIZ, 0);
+               if (ret < 0)
+                       return -1;
+
+               /* Receive network device ifindex in the child's namespace to
+                * parent.
+                */
+               ret = recv(data_sock, &netdev->ifindex, sizeof(netdev->ifindex), 0);
+               if (ret < 0)
+                       return -1;
+       }
+
+       return 0;
 }