* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "config.h"
-
+#define _GNU_SOURCE
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/types.h>
+#include "af_unix.h"
#include "conf.h"
+#include "config.h"
#include "log.h"
#include "network.h"
#include "nl.h"
lxc_log_define(lxc_network, lxc);
+typedef int (*instantiate_cb)(struct lxc_handler *, struct lxc_netdev *);
+
+static int instantiate_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ int bridge_index, err;
+ char *veth1, *veth2;
+ char veth1buf[IFNAMSIZ], veth2buf[IFNAMSIZ];
+ unsigned int mtu = 0;
+
+ if (netdev->priv.veth_attr.pair[0] != '\0') {
+ veth1 = netdev->priv.veth_attr.pair;
+ if (handler->conf->reboot)
+ lxc_netdev_delete_by_name(veth1);
+ } else {
+ err = snprintf(veth1buf, sizeof(veth1buf), "vethXXXXXX");
+ if (err < 0 || (size_t)err >= sizeof(veth1buf))
+ return -1;
+
+ veth1 = lxc_mkifname(veth1buf);
+ if (!veth1)
+ return -1;
+
+ /* store away for deconf */
+ memcpy(netdev->priv.veth_attr.veth1, veth1, IFNAMSIZ);
+ }
+
+ snprintf(veth2buf, sizeof(veth2buf), "vethXXXXXX");
+ veth2 = lxc_mkifname(veth2buf);
+ if (!veth2)
+ goto out_delete;
+
+ err = lxc_veth_create(veth1, veth2);
+ if (err) {
+ ERROR("Failed to create veth pair \"%s\" and \"%s\": %s", veth1,
+ veth2, strerror(-err));
+ goto out_delete;
+ }
+
+ /* changing the high byte of the mac address to 0xfe, the bridge interface
+ * will always keep the host's mac address and not take the mac address
+ * of a container */
+ err = setup_private_host_hw_addr(veth1);
+ if (err) {
+ ERROR("Failed to change mac address of host interface \"%s\": %s",
+ veth1, strerror(-err));
+ goto out_delete;
+ }
+
+ /* Retrieve ifindex of the host's veth device. */
+ netdev->priv.veth_attr.ifindex = if_nametoindex(veth1);
+ if (!netdev->priv.veth_attr.ifindex) {
+ ERROR("Failed to retrieve ifindex for \"%s\"", veth1);
+ goto out_delete;
+ }
+
+ /* Note that we're retrieving the container's ifindex in the host's
+ * network namespace because we need it to move the device from the
+ * host's network namespace to the container's network namespace later
+ * on.
+ */
+ netdev->ifindex = if_nametoindex(veth2);
+ if (!netdev->ifindex) {
+ ERROR("Failed to retrieve ifindex for \"%s\"", veth2);
+ goto out_delete;
+ }
+
+ if (netdev->mtu) {
+ if (lxc_safe_uint(netdev->mtu, &mtu) < 0)
+ WARN("Failed to parse mtu");
+ else
+ INFO("Retrieved mtu %d", mtu);
+ } else if (netdev->link[0] != '\0') {
+ bridge_index = if_nametoindex(netdev->link);
+ if (bridge_index) {
+ mtu = netdev_get_mtu(bridge_index);
+ INFO("Retrieved mtu %d from %s", mtu, netdev->link);
+ } else {
+ mtu = netdev_get_mtu(netdev->ifindex);
+ INFO("Retrieved mtu %d from %s", mtu, veth2);
+ }
+ }
+
+ if (mtu) {
+ err = lxc_netdev_set_mtu(veth1, mtu);
+ if (!err)
+ err = lxc_netdev_set_mtu(veth2, mtu);
+ if (err) {
+ ERROR("Failed to set mtu \"%d\" for veth pair \"%s\" "
+ "and \"%s\": %s",
+ mtu, veth1, veth2, strerror(-err));
+ goto out_delete;
+ }
+ }
+
+ if (netdev->link[0] != '\0') {
+ err = lxc_bridge_attach(netdev->link, veth1);
+ if (err) {
+ ERROR("Failed to attach \"%s\" to bridge \"%s\": %s",
+ veth1, netdev->link, strerror(-err));
+ goto out_delete;
+ }
+ INFO("Attached \"%s\" to bridge \"%s\"", veth1, netdev->link);
+ }
+
+ err = lxc_netdev_up(veth1);
+ if (err) {
+ ERROR("Failed to set \"%s\" up: %s", veth1, strerror(-err));
+ goto out_delete;
+ }
+
+ if (netdev->upscript) {
+ err = run_script(handler->name, "net", netdev->upscript, "up",
+ "veth", veth1, (char*) NULL);
+ if (err)
+ goto out_delete;
+ }
+
+ DEBUG("Instantiated veth \"%s/%s\", index is \"%d\"", veth1, veth2,
+ netdev->ifindex);
+
+ return 0;
+
+out_delete:
+ if (netdev->ifindex != 0)
+ lxc_netdev_delete_by_name(veth1);
+ if (netdev->priv.veth_attr.pair != veth1)
+ free(veth1);
+ free(veth2);
+ return -1;
+}
+
+static int instantiate_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ char peerbuf[IFNAMSIZ], *peer;
+ int err;
+
+ if (netdev->link[0] == '\0') {
+ ERROR("No link for macvlan network device specified");
+ return -1;
+ }
+
+ err = snprintf(peerbuf, sizeof(peerbuf), "mcXXXXXX");
+ if (err < 0 || (size_t)err >= sizeof(peerbuf))
+ return -1;
+
+ peer = lxc_mkifname(peerbuf);
+ if (!peer)
+ return -1;
+
+ err = lxc_macvlan_create(netdev->link, peer,
+ netdev->priv.macvlan_attr.mode);
+ if (err) {
+ ERROR("Failed to create macvlan interface \"%s\" on \"%s\": %s",
+ peer, netdev->link, strerror(-err));
+ goto out;
+ }
+
+ netdev->ifindex = if_nametoindex(peer);
+ if (!netdev->ifindex) {
+ ERROR("Failed to retrieve ifindex for \"%s\"", peer);
+ goto out;
+ }
+
+ if (netdev->upscript) {
+ err = run_script(handler->name, "net", netdev->upscript, "up",
+ "macvlan", netdev->link, (char*) NULL);
+ if (err)
+ goto out;
+ }
+
+ DEBUG("Instantiated macvlan \"%s\" with ifindex is %d and mode %d",
+ peer, netdev->ifindex, netdev->priv.macvlan_attr.mode);
+
+ return 0;
+out:
+ lxc_netdev_delete_by_name(peer);
+ free(peer);
+ return -1;
+}
+
+static int instantiate_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ char peer[IFNAMSIZ];
+ int err;
+ static uint16_t vlan_cntr = 0;
+ unsigned int mtu = 0;
+
+ if (netdev->link[0] == '\0') {
+ ERROR("No link for vlan network device specified");
+ return -1;
+ }
+
+ err = snprintf(peer, sizeof(peer), "vlan%d-%d", netdev->priv.vlan_attr.vid, vlan_cntr++);
+ if (err < 0 || (size_t)err >= sizeof(peer))
+ return -1;
+
+ err = lxc_vlan_create(netdev->link, peer, netdev->priv.vlan_attr.vid);
+ if (err) {
+ ERROR("Failed to create vlan interface \"%s\" on \"%s\": %s",
+ peer, netdev->link, strerror(-err));
+ return -1;
+ }
+
+ netdev->ifindex = if_nametoindex(peer);
+ if (!netdev->ifindex) {
+ ERROR("Failed to retrieve ifindex for \"%s\"", peer);
+ lxc_netdev_delete_by_name(peer);
+ return -1;
+ }
+
+ DEBUG("Instantiated vlan \"%s\" with ifindex is \"%d\" (vlan1000)",
+ peer, netdev->ifindex);
+ if (netdev->mtu) {
+ if (lxc_safe_uint(netdev->mtu, &mtu) < 0) {
+ ERROR("Failed to retrieve mtu from \"%d\"/\"%s\".",
+ netdev->ifindex,
+ netdev->name[0] != '\0' ? netdev->name : "(null)");
+ return -1;
+ }
+ err = lxc_netdev_set_mtu(peer, mtu);
+ if (err) {
+ ERROR("Failed to set mtu \"%s\" for \"%s\": %s",
+ netdev->mtu, peer, strerror(-err));
+ lxc_netdev_delete_by_name(peer);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int instantiate_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ if (netdev->link[0] == '\0') {
+ ERROR("No link for physical interface specified");
+ return -1;
+ }
+
+ /* Note that we're retrieving the container's ifindex in the host's
+ * network namespace because we need it to move the device from the
+ * host's network namespace to the container's network namespace later
+ * on.
+ * Note that netdev->link will contain the name of the physical network
+ * device in the host's namespace.
+ */
+ netdev->ifindex = if_nametoindex(netdev->link);
+ if (!netdev->ifindex) {
+ ERROR("Failed to retrieve ifindex for \"%s\"", netdev->link);
+ return -1;
+ }
+
+ /* Store the ifindex of the host's network device in the host's
+ * namespace.
+ */
+ netdev->priv.phys_attr.ifindex = netdev->ifindex;
+
+ if (netdev->upscript) {
+ int err;
+ err = run_script(handler->name, "net", netdev->upscript,
+ "up", "phys", netdev->link, (char*) NULL);
+ if (err)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int instantiate_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ netdev->ifindex = 0;
+ if (netdev->upscript) {
+ int err;
+ err = run_script(handler->name, "net", netdev->upscript,
+ "up", "empty", (char*) NULL);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static int instantiate_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ netdev->ifindex = 0;
+ return 0;
+}
+
+static instantiate_cb netdev_conf[LXC_NET_MAXCONFTYPE + 1] = {
+ [LXC_NET_VETH] = instantiate_veth,
+ [LXC_NET_MACVLAN] = instantiate_macvlan,
+ [LXC_NET_VLAN] = instantiate_vlan,
+ [LXC_NET_PHYS] = instantiate_phys,
+ [LXC_NET_EMPTY] = instantiate_empty,
+ [LXC_NET_NONE] = instantiate_none,
+};
+
+static int shutdown_veth(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ char *veth1;
+ int err;
+
+ if (netdev->priv.veth_attr.pair[0] != '\0')
+ veth1 = netdev->priv.veth_attr.pair;
+ else
+ veth1 = netdev->priv.veth_attr.veth1;
+
+ if (netdev->downscript) {
+ err = run_script(handler->name, "net", netdev->downscript,
+ "down", "veth", veth1, (char*) NULL);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static int shutdown_macvlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ int err;
+
+ if (netdev->downscript) {
+ err = run_script(handler->name, "net", netdev->downscript,
+ "down", "macvlan", netdev->link,
+ (char*) NULL);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static int shutdown_vlan(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ return 0;
+}
+
+static int shutdown_phys(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ int err;
+
+ if (netdev->downscript) {
+ err = run_script(handler->name, "net", netdev->downscript,
+ "down", "phys", netdev->link, (char*) NULL);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static int shutdown_empty(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ int err;
+
+ if (netdev->downscript) {
+ err = run_script(handler->name, "net", netdev->downscript,
+ "down", "empty", (char*) NULL);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static int shutdown_none(struct lxc_handler *handler, struct lxc_netdev *netdev)
+{
+ return 0;
+}
+
+static instantiate_cb netdev_deconf[LXC_NET_MAXCONFTYPE + 1] = {
+ [LXC_NET_VETH] = shutdown_veth,
+ [LXC_NET_MACVLAN] = shutdown_macvlan,
+ [LXC_NET_VLAN] = shutdown_vlan,
+ [LXC_NET_PHYS] = shutdown_phys,
+ [LXC_NET_EMPTY] = shutdown_empty,
+ [LXC_NET_NONE] = shutdown_none,
+};
+
int lxc_netdev_move_by_index(int ifindex, pid_t pid, const char *ifname)
{
int err;
static const char padchar[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
-char *lxc_mkifname(char *template)
+char *lxc_mkifname(const char *template)
{
int ifexists = 0;
size_t i = 0;
FILE *urandom;
struct ifaddrs *ifa, *ifaddr;
+ if (strlen(template) >= IFNAMSIZ)
+ return NULL;
+
/* Get all the network interfaces. */
getifaddrs(&ifaddr);
return 0;
}
+
+int lxc_find_gateway_addresses(struct lxc_handler *handler)
+{
+ struct lxc_list *network = &handler->conf->network;
+ struct lxc_list *iterator;
+ struct lxc_netdev *netdev;
+ int link_index;
+
+ lxc_list_for_each(iterator, network) {
+ netdev = iterator->elem;
+
+ if (!netdev->ipv4_gateway_auto && !netdev->ipv6_gateway_auto)
+ continue;
+
+ if (netdev->type != LXC_NET_VETH && netdev->type != LXC_NET_MACVLAN) {
+ ERROR("Automatic gateway detection is only supported "
+ "for veth and macvlan");
+ return -1;
+ }
+
+ if (netdev->link[0] == '\0') {
+ ERROR("Automatic gateway detection needs a link interface");
+ return -1;
+ }
+
+ link_index = if_nametoindex(netdev->link);
+ if (!link_index)
+ return -EINVAL;
+
+ if (netdev->ipv4_gateway_auto) {
+ if (lxc_ipv4_addr_get(link_index, &netdev->ipv4_gateway)) {
+ ERROR("Failed to automatically find ipv4 gateway "
+ "address from link interface \"%s\"", netdev->link);
+ return -1;
+ }
+ }
+
+ if (netdev->ipv6_gateway_auto) {
+ if (lxc_ipv6_addr_get(link_index, &netdev->ipv6_gateway)) {
+ ERROR("Failed to automatically find ipv6 gateway "
+ "address from link interface \"%s\"", netdev->link);
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+#define LXC_USERNIC_PATH LIBEXECDIR "/lxc/lxc-user-nic"
+static int lxc_create_network_unpriv_exec(const char *lxcpath, char *lxcname,
+ struct lxc_netdev *netdev, pid_t pid)
+{
+ int ret;
+ pid_t child;
+ int bytes, pipefd[2];
+ char *token, *saveptr = NULL;
+ char netdev_link[IFNAMSIZ + 1];
+ char buffer[MAXPATHLEN] = {0};
+
+ if (netdev->type != LXC_NET_VETH) {
+ ERROR("Network type %d not support for unprivileged use", netdev->type);
+ return -1;
+ }
+
+ ret = pipe(pipefd);
+ if (ret < 0) {
+ SYSERROR("Failed to create pipe");
+ return -1;
+ }
+
+ child = fork();
+ if (child < 0) {
+ SYSERROR("Failed to create new process");
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -1;
+ }
+
+ if (child == 0) {
+ int ret;
+ char pidstr[LXC_NUMSTRLEN64];
+
+ close(pipefd[0]);
+
+ ret = dup2(pipefd[1], STDOUT_FILENO);
+ if (ret >= 0)
+ ret = dup2(pipefd[1], STDERR_FILENO);
+ close(pipefd[1]);
+ if (ret < 0) {
+ SYSERROR("Failed to duplicate std{err,out} file descriptor");
+ exit(EXIT_FAILURE);
+ }
+
+ if (netdev->link[0] != '\0')
+ strncpy(netdev_link, netdev->link, IFNAMSIZ);
+ else
+ strncpy(netdev_link, "none", IFNAMSIZ);
+
+ ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid);
+ if (ret < 0 || ret >= LXC_NUMSTRLEN64)
+ exit(EXIT_FAILURE);
+ pidstr[LXC_NUMSTRLEN64 - 1] = '\0';
+
+ INFO("Execing lxc-user-nic create %s %s %s veth %s %s", lxcpath,
+ lxcname, pidstr, netdev_link,
+ netdev->name[0] != '\0' ? netdev->name : "(null)");
+ if (netdev->name[0] != '\0')
+ execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
+ lxcpath, lxcname, pidstr, "veth", netdev_link,
+ netdev->name, (char *)NULL);
+ else
+ execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "create",
+ lxcpath, lxcname, pidstr, "veth", netdev_link,
+ (char *)NULL);
+ SYSERROR("Failed to execute lxc-user-nic");
+ exit(EXIT_FAILURE);
+ }
+
+ /* close the write-end of the pipe */
+ close(pipefd[1]);
+
+ bytes = read(pipefd[0], &buffer, MAXPATHLEN);
+ if (bytes < 0) {
+ SYSERROR("Failed to read from pipe file descriptor");
+ close(pipefd[0]);
+ return -1;
+ }
+ buffer[bytes - 1] = '\0';
+
+ ret = wait_for_pid(child);
+ close(pipefd[0]);
+ if (ret != 0) {
+ ERROR("lxc-user-nic failed to configure requested network: %s",
+ buffer[0] != '\0' ? buffer : "(null)");
+ return -1;
+ }
+ TRACE("Received output \"%s\" from lxc-user-nic", buffer);
+
+ /* netdev->name */
+ token = strtok_r(buffer, ":", &saveptr);
+ if (!token) {
+ ERROR("Failed to parse lxc-user-nic output");
+ return -1;
+ }
+
+ memset(netdev->name, 0, IFNAMSIZ + 1);
+ strncpy(netdev->name, token, IFNAMSIZ);
+
+ /* netdev->ifindex */
+ token = strtok_r(NULL, ":", &saveptr);
+ if (!token) {
+ ERROR("Failed to parse lxc-user-nic output");
+ return -1;
+ }
+
+ ret = lxc_safe_int(token, &netdev->ifindex);
+ if (ret < 0) {
+ ERROR("%s - Failed to convert string \"%s\" to integer",
+ strerror(-ret), token);
+ return -1;
+ }
+
+ /* netdev->priv.veth_attr.veth1 */
+ token = strtok_r(NULL, ":", &saveptr);
+ if (!token) {
+ ERROR("Failed to parse lxc-user-nic output");
+ return -1;
+ }
+
+ if (strlen(token) >= IFNAMSIZ) {
+ ERROR("Host side veth device name returned by lxc-user-nic is "
+ "too long");
+ return -E2BIG;
+ }
+ strcpy(netdev->priv.veth_attr.veth1, token);
+
+ /* netdev->priv.veth_attr.ifindex */
+ token = strtok_r(NULL, ":", &saveptr);
+ if (!token) {
+ ERROR("Failed to parse lxc-user-nic output");
+ return -1;
+ }
+
+ ret = lxc_safe_int(token, &netdev->priv.veth_attr.ifindex);
+ if (ret < 0) {
+ ERROR("%s - Failed to convert string \"%s\" to integer",
+ strerror(-ret), token);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname,
+ struct lxc_netdev *netdev,
+ const char *netns_path)
+{
+ int bytes, ret;
+ pid_t child;
+ int pipefd[2];
+ char buffer[MAXPATHLEN] = {0};
+
+ if (netdev->type != LXC_NET_VETH) {
+ ERROR("Network type %d not support for unprivileged use", netdev->type);
+ return -1;
+ }
+
+ ret = pipe(pipefd);
+ if (ret < 0) {
+ SYSERROR("Failed to create pipe");
+ return -1;
+ }
+
+ child = fork();
+ if (child < 0) {
+ SYSERROR("Failed to create new process");
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -1;
+ }
+
+ if (child == 0) {
+ char *hostveth;
+ int ret;
+
+ close(pipefd[0]);
+
+ ret = dup2(pipefd[1], STDOUT_FILENO);
+ if (ret >= 0)
+ ret = dup2(pipefd[1], STDERR_FILENO);
+ close(pipefd[1]);
+ if (ret < 0) {
+ SYSERROR("Failed to duplicate std{err,out} file descriptor");
+ exit(EXIT_FAILURE);
+ }
+
+ if (netdev->priv.veth_attr.pair[0] != '\0')
+ hostveth = netdev->priv.veth_attr.pair;
+ else
+ hostveth = netdev->priv.veth_attr.veth1;
+ if (hostveth[0] == '\0') {
+ SYSERROR("Host side veth device name is missing");
+ exit(EXIT_FAILURE);
+ }
+
+ if (netdev->link[0] == '\0') {
+ SYSERROR("Network link for network device \"%s\" is "
+ "missing", netdev->priv.veth_attr.veth1);
+ exit(EXIT_FAILURE);
+ }
+
+ INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath,
+ lxcname, netns_path, netdev->link, hostveth);
+ execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath,
+ lxcname, netns_path, "veth", netdev->link, hostveth,
+ (char *)NULL);
+ SYSERROR("Failed to exec lxc-user-nic.");
+ exit(EXIT_FAILURE);
+ }
+
+ close(pipefd[1]);
+
+ bytes = read(pipefd[0], &buffer, MAXPATHLEN);
+ if (bytes < 0) {
+ SYSERROR("Failed to read from pipe file descriptor.");
+ close(pipefd[0]);
+ return -1;
+ }
+ buffer[bytes - 1] = '\0';
+
+ if (wait_for_pid(child) != 0) {
+ ERROR("lxc-user-nic failed to delete requested network: %s",
+ buffer[0] != '\0' ? buffer : "(null)");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ close(pipefd[0]);
+
+ return 0;
+}
+
+bool lxc_delete_network_unpriv(struct lxc_handler *handler)
+{
+ int ret;
+ struct lxc_list *iterator;
+ struct lxc_list *network = &handler->conf->network;
+ /* strlen("/proc/") = 6
+ * +
+ * LXC_NUMSTRLEN64
+ * +
+ * strlen("/fd/") = 4
+ * +
+ * LXC_NUMSTRLEN64
+ * +
+ * \0
+ */
+ char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1];
+ bool deleted_all = true;
+
+ if (handler->root)
+ return true;
+
+ *netns_path = '\0';
+
+ if (handler->netnsfd < 0) {
+ DEBUG("Cannot not guarantee safe deletion of network devices. "
+ "Manual cleanup maybe needed");
+ return false;
+ }
+
+ ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d",
+ getpid(), handler->netnsfd);
+ if (ret < 0 || ret >= sizeof(netns_path))
+ return false;
+
+ lxc_list_for_each(iterator, network) {
+ char *hostveth = NULL;
+ struct lxc_netdev *netdev = iterator->elem;
+
+ /* We can only delete devices whose ifindex we have. If we don't
+ * have the index it means that we didn't create it.
+ */
+ if (!netdev->ifindex)
+ continue;
+
+ if (netdev->type == LXC_NET_PHYS) {
+ ret = lxc_netdev_rename_by_index(netdev->ifindex,
+ netdev->link);
+ if (ret < 0)
+ WARN("Failed to rename interface with index %d "
+ "to its initial name \"%s\"",
+ netdev->ifindex, netdev->link);
+ else
+ TRACE("Renamed interface with index %d to its "
+ "initial name \"%s\"",
+ netdev->ifindex, netdev->link);
+ continue;
+ }
+
+ ret = netdev_deconf[netdev->type](handler, netdev);
+ if (ret < 0)
+ WARN("Failed to deconfigure network device");
+
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+
+ if (!is_ovs_bridge(netdev->link))
+ continue;
+
+ if (netdev->priv.veth_attr.pair[0] != '\0')
+ hostveth = netdev->priv.veth_attr.pair;
+ else
+ hostveth = netdev->priv.veth_attr.veth1;
+ if (hostveth[0] == '\0')
+ continue;
+
+ ret = lxc_delete_network_unpriv_exec(handler->lxcpath,
+ handler->name, netdev,
+ netns_path);
+ if (ret < 0) {
+ deleted_all = false;
+ WARN("Failed to remove port \"%s\" from openvswitch "
+ "bridge \"%s\"", hostveth, netdev->link);
+ continue;
+ }
+ INFO("Removed interface \"%s\" from \"%s\"", hostveth,
+ netdev->link);
+ }
+
+ return deleted_all;
+}
+
+int lxc_create_network_priv(struct lxc_handler *handler)
+{
+ struct lxc_list *iterator;
+ struct lxc_list *network = &handler->conf->network;
+
+ if (!handler->root)
+ return 0;
+
+ lxc_list_for_each(iterator, network) {
+ struct lxc_netdev *netdev = iterator->elem;
+
+ if (netdev->type < 0 || netdev->type > LXC_NET_MAXCONFTYPE) {
+ ERROR("Invalid network configuration type %d", netdev->type);
+ return -1;
+ }
+
+ if (netdev_conf[netdev->type](handler, netdev)) {
+ ERROR("Failed to create network device");
+ return -1;
+ }
+
+ }
+
+ return 0;
+}
+
+int lxc_network_move_created_netdev_priv(const char *lxcpath, char *lxcname,
+ struct lxc_list *network, pid_t pid)
+{
+ int ret;
+ char ifname[IFNAMSIZ];
+ struct lxc_list *iterator;
+
+ if (am_unpriv())
+ return 0;
+
+ lxc_list_for_each(iterator, network) {
+ struct lxc_netdev *netdev = iterator->elem;
+
+ if (!netdev->ifindex)
+ continue;
+
+ /* retrieve the name of the interface */
+ if (!if_indextoname(netdev->ifindex, ifname)) {
+ ERROR("No interface corresponding to ifindex \"%d\"",
+ netdev->ifindex);
+ return -1;
+ }
+
+ ret = lxc_netdev_move_by_name(ifname, pid, NULL);
+ if (ret) {
+ ERROR("Failed to move network device \"%s\" to "
+ "network namespace %d: %s", ifname, pid,
+ strerror(-ret));
+ return -1;
+ }
+
+ DEBUG("Moved network device \"%s\"/\"%s\" to network namespace "
+ "of %d",
+ ifname, netdev->name[0] != '\0' ? netdev->name : "(null)",
+ pid);
+ }
+
+ return 0;
+}
+
+int lxc_create_network_unpriv(const char *lxcpath, char *lxcname,
+ struct lxc_list *network, pid_t pid)
+{
+ struct lxc_list *iterator;
+
+ if (!am_unpriv())
+ return 0;
+
+ lxc_list_for_each(iterator, network) {
+ struct lxc_netdev *netdev = iterator->elem;
+
+ if (netdev->type == LXC_NET_EMPTY)
+ continue;
+
+ if (netdev->type == LXC_NET_NONE)
+ continue;
+
+ if (netdev->type != LXC_NET_VETH) {
+ ERROR("Networks of type %s are not supported by "
+ "unprivileged containers",
+ lxc_net_type_to_str(netdev->type));
+ return -1;
+ }
+
+ if (netdev->mtu)
+ INFO("mtu ignored due to insufficient privilege");
+
+ if (lxc_create_network_unpriv_exec(lxcpath, lxcname, netdev, pid))
+ return -1;
+ }
+
+ return 0;
+}
+
+bool lxc_delete_network_priv(struct lxc_handler *handler)
+{
+ int ret;
+ struct lxc_list *iterator;
+ struct lxc_list *network = &handler->conf->network;
+ bool deleted_all = true;
+
+ if (!handler->root)
+ return true;
+
+ lxc_list_for_each(iterator, network) {
+ char *hostveth = NULL;
+ struct lxc_netdev *netdev = iterator->elem;
+
+ /* We can only delete devices whose ifindex we have. If we don't
+ * have the index it means that we didn't create it.
+ */
+ if (!netdev->ifindex)
+ continue;
+
+ if (netdev->type == LXC_NET_PHYS) {
+ ret = lxc_netdev_rename_by_index(netdev->ifindex, netdev->link);
+ if (ret < 0)
+ WARN("Failed to rename interface with index %d "
+ "from \"%s\" to its initial name \"%s\"",
+ netdev->ifindex, netdev->name, netdev->link);
+ else
+ TRACE("Renamed interface with index %d to its "
+ "from \"%s\" to its initial name \"%s\"",
+ netdev->ifindex, netdev->name, netdev->link);
+ continue;
+ }
+
+ ret = netdev_deconf[netdev->type](handler, netdev);
+ if (ret < 0)
+ WARN("Failed to deconfigure network device");
+
+ /* Recent kernels remove the virtual interfaces when the network
+ * namespace is destroyed but in case we did not move the
+ * interface to the network namespace, we have to destroy it.
+ */
+ ret = lxc_netdev_delete_by_index(netdev->ifindex);
+ if (-ret == ENODEV) {
+ INFO("Interface \"%s\" with index %d already "
+ "deleted or existing in different network "
+ "namespace",
+ netdev->name[0] != '\0' ? netdev->name : "(null)",
+ netdev->ifindex);
+ } else if (ret < 0) {
+ deleted_all = false;
+ WARN("Failed to remove interface \"%s\" with "
+ "index %d: %s",
+ netdev->name[0] != '\0' ? netdev->name : "(null)",
+ netdev->ifindex, strerror(-ret));
+ continue;
+ }
+ INFO("Removed interface \"%s\" with index %d",
+ netdev->name[0] != '\0' ? netdev->name : "(null)",
+ netdev->ifindex);
+
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+
+ /* Explicitly delete host veth device to prevent lingering
+ * devices. We had issues in LXD around this.
+ */
+ if (netdev->priv.veth_attr.pair[0] != '\0')
+ hostveth = netdev->priv.veth_attr.pair;
+ else
+ hostveth = netdev->priv.veth_attr.veth1;
+ if (hostveth[0] == '\0')
+ continue;
+
+ ret = lxc_netdev_delete_by_name(hostveth);
+ if (ret < 0) {
+ deleted_all = false;
+ WARN("Failed to remove interface \"%s\" from \"%s\": %s",
+ hostveth, netdev->link, strerror(-ret));
+ continue;
+ }
+ INFO("Removed interface \"%s\" from \"%s\"", hostveth, netdev->link);
+
+ if (!is_ovs_bridge(netdev->link)) {
+ netdev->priv.veth_attr.veth1[0] = '\0';
+ continue;
+ }
+
+ /* Delete the openvswitch port. */
+ ret = lxc_ovs_delete_port(netdev->link, hostveth);
+ if (ret < 0)
+ WARN("Failed to remove port \"%s\" from openvswitch "
+ "bridge \"%s\"", hostveth, netdev->link);
+ else
+ INFO("Removed port \"%s\" from openvswitch bridge \"%s\"",
+ hostveth, netdev->link);
+
+ netdev->priv.veth_attr.veth1[0] = '\0';
+ }
+
+ return deleted_all;
+}
+
+int lxc_requests_empty_network(struct lxc_handler *handler)
+{
+ struct lxc_list *network = &handler->conf->network;
+ struct lxc_list *iterator;
+ bool found_none = false, found_nic = false;
+
+ if (lxc_list_empty(network))
+ return 0;
+
+ lxc_list_for_each(iterator, network) {
+ struct lxc_netdev *netdev = iterator->elem;
+
+ if (netdev->type == LXC_NET_NONE)
+ found_none = true;
+ else
+ found_nic = true;
+ }
+ if (found_none && !found_nic)
+ return 1;
+ return 0;
+}
+
+/* try to move physical nics to the init netns */
+int lxc_restore_phys_nics_to_netns(struct lxc_handler *handler)
+{
+ int ret;
+ int oldfd;
+ char ifname[IFNAMSIZ];
+ struct lxc_list *iterator;
+ int netnsfd = handler->netnsfd;
+ struct lxc_conf *conf = handler->conf;
+
+ /* We need CAP_NET_ADMIN in the parent namespace in order to setns() to
+ * the parent network namespace. We won't have this capability if we are
+ * unprivileged.
+ */
+ if (!handler->root)
+ return 0;
+
+ TRACE("Moving physical network devices back to parent network namespace");
+
+ oldfd = lxc_preserve_ns(getpid(), "net");
+ if (oldfd < 0) {
+ SYSERROR("Failed to preserve network namespace");
+ return -1;
+ }
+
+ ret = setns(netnsfd, CLONE_NEWNET);
+ if (ret < 0) {
+ SYSERROR("Failed to enter network namespace");
+ close(oldfd);
+ return -1;
+ }
+
+ lxc_list_for_each(iterator, &conf->network) {
+ struct lxc_netdev *netdev = iterator->elem;
+
+ if (netdev->type != LXC_NET_PHYS)
+ continue;
+
+ /* Retrieve the name of the interface in the container's network
+ * namespace.
+ */
+ if (!if_indextoname(netdev->ifindex, ifname)) {
+ WARN("No interface corresponding to ifindex %d",
+ netdev->ifindex);
+ continue;
+ }
+
+ ret = lxc_netdev_move_by_name(ifname, 1, netdev->link);
+ if (ret < 0)
+ WARN("Error moving network device \"%s\" back to "
+ "network namespace", ifname);
+ else
+ TRACE("Moved network device \"%s\" back to network "
+ "namespace", ifname);
+ }
+
+ ret = setns(oldfd, CLONE_NEWNET);
+ close(oldfd);
+ if (ret < 0) {
+ SYSERROR("Failed to enter network namespace");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int setup_hw_addr(char *hwaddr, const char *ifname)
+{
+ struct sockaddr sockaddr;
+ struct ifreq ifr;
+ int ret, fd, saved_errno;
+
+ ret = lxc_convert_mac(hwaddr, &sockaddr);
+ if (ret) {
+ ERROR("Mac address \"%s\" conversion failed: %s", hwaddr,
+ strerror(-ret));
+ return -1;
+ }
+
+ memcpy(ifr.ifr_name, ifname, IFNAMSIZ);
+ ifr.ifr_name[IFNAMSIZ-1] = '\0';
+ memcpy((char *) &ifr.ifr_hwaddr, (char *) &sockaddr, sizeof(sockaddr));
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return -1;
+
+ ret = ioctl(fd, SIOCSIFHWADDR, &ifr);
+ saved_errno = errno;
+ close(fd);
+ if (ret)
+ ERROR("Failed to perform ioctl: %s", strerror(saved_errno));
+
+ DEBUG("Mac address \"%s\" on \"%s\" has been setup", hwaddr,
+ ifr.ifr_name);
+
+ return ret;
+}
+
+static int setup_ipv4_addr(struct lxc_list *ip, int ifindex)
+{
+ struct lxc_list *iterator;
+ int err;
+
+ lxc_list_for_each(iterator, ip) {
+ struct lxc_inetdev *inetdev = iterator->elem;
+
+ err = lxc_ipv4_addr_add(ifindex, &inetdev->addr,
+ &inetdev->bcast, inetdev->prefix);
+ if (err) {
+ ERROR("Failed to setup ipv4 address for network device "
+ "with eifindex %d: %s", ifindex, strerror(-err));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int setup_ipv6_addr(struct lxc_list *ip, int ifindex)
+{
+ struct lxc_list *iterator;
+ int err;
+
+ lxc_list_for_each(iterator, ip) {
+ struct lxc_inet6dev *inet6dev = iterator->elem;
+
+ err = lxc_ipv6_addr_add(ifindex, &inet6dev->addr,
+ &inet6dev->mcast, &inet6dev->acast,
+ inet6dev->prefix);
+ if (err) {
+ ERROR("Failed to setup ipv6 address for network device "
+ "with eifindex %d: %s", ifindex, strerror(-err));
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int lxc_setup_netdev_in_child_namespaces(struct lxc_netdev *netdev)
+{
+ char ifname[IFNAMSIZ];
+ int err;
+ const char *net_type_name;
+ char *current_ifname = ifname;
+
+ /* empty network namespace */
+ if (!netdev->ifindex) {
+ if (netdev->flags & IFF_UP) {
+ err = lxc_netdev_up("lo");
+ if (err) {
+ ERROR("Failed to set the loopback network "
+ "device up: %s",
+ strerror(-err));
+ return -1;
+ }
+ }
+
+ if (netdev->type == LXC_NET_EMPTY)
+ return 0;
+
+ if (netdev->type == LXC_NET_NONE)
+ return 0;
+
+ if (netdev->type != LXC_NET_VETH) {
+ net_type_name = lxc_net_type_to_str(netdev->type);
+ ERROR("%s networks are not supported for containers "
+ "not setup up by privileged users", net_type_name);
+ return -1;
+ }
+
+ netdev->ifindex = if_nametoindex(netdev->name);
+ }
+
+ /* get the new ifindex in case of physical netdev */
+ if (netdev->type == LXC_NET_PHYS) {
+ netdev->ifindex = if_nametoindex(netdev->link);
+ if (!netdev->ifindex) {
+ ERROR("Failed to get ifindex for network device \"%s\"",
+ netdev->link);
+ return -1;
+ }
+ }
+
+ /* retrieve the name of the interface */
+ if (!if_indextoname(netdev->ifindex, current_ifname)) {
+ ERROR("Failed get name for network device with ifindex %d",
+ netdev->ifindex);
+ return -1;
+ }
+
+ /* Default: let the system to choose one interface name.
+ * When the IFLA_IFNAME attribute is passed something like "<prefix>%d"
+ * netlink will replace the format specifier with an appropriate index.
+ */
+ if (netdev->name[0] == '\0') {
+ if (netdev->type == LXC_NET_PHYS)
+ strcpy(netdev->name, netdev->link);
+ else
+ strcpy(netdev->name, "eth%d");
+ }
+
+ /* rename the interface name */
+ if (strcmp(ifname, netdev->name) != 0) {
+ err = lxc_netdev_rename_by_name(ifname, netdev->name);
+ if (err) {
+ ERROR("Failed to rename network device \"%s\" to "
+ "\"%s\": %s", ifname, netdev->name, strerror(-err));
+ return -1;
+ }
+ }
+
+ /* Re-read the name of the interface because its name has changed
+ * and would be automatically allocated by the system
+ */
+ if (!if_indextoname(netdev->ifindex, current_ifname)) {
+ ERROR("Failed get name for network device with ifindex %d",
+ netdev->ifindex);
+ return -1;
+ }
+
+ /* Now update the recorded name of the network device to reflect the
+ * name of the network device in the child's network namespace. We will
+ * later on send this information back to the parent.
+ */
+ strcpy(netdev->name, current_ifname);
+
+ /* set a mac address */
+ if (netdev->hwaddr) {
+ if (setup_hw_addr(netdev->hwaddr, current_ifname)) {
+ ERROR("Failed to setup hw address for network device \"%s\"",
+ current_ifname);
+ return -1;
+ }
+ }
+
+ /* setup ipv4 addresses on the interface */
+ if (setup_ipv4_addr(&netdev->ipv4, netdev->ifindex)) {
+ ERROR("Failed to setup ip addresses for network device \"%s\"",
+ ifname);
+ return -1;
+ }
+
+ /* setup ipv6 addresses on the interface */
+ if (setup_ipv6_addr(&netdev->ipv6, netdev->ifindex)) {
+ ERROR("Failed to setup ipv6 addresses for network device \"%s\"",
+ ifname);
+ return -1;
+ }
+
+ /* set the network device up */
+ if (netdev->flags & IFF_UP) {
+ int err;
+
+ err = lxc_netdev_up(current_ifname);
+ if (err) {
+ ERROR("Failed to set network device \"%s\" up: %s",
+ current_ifname, strerror(-err));
+ return -1;
+ }
+
+ /* the network is up, make the loopback up too */
+ err = lxc_netdev_up("lo");
+ if (err) {
+ ERROR("Failed to set the loopback network device up: %s",
+ strerror(-err));
+ return -1;
+ }
+ }
+
+ /* We can only set up the default routes after bringing
+ * up the interface, sine bringing up the interface adds
+ * the link-local routes and we can't add a default
+ * route if the gateway is not reachable. */
+
+ /* setup ipv4 gateway on the interface */
+ if (netdev->ipv4_gateway) {
+ if (!(netdev->flags & IFF_UP)) {
+ ERROR("Cannot add ipv4 gateway for network device "
+ "\"%s\" when not bringing up the interface", ifname);
+ return -1;
+ }
+
+ if (lxc_list_empty(&netdev->ipv4)) {
+ ERROR("Cannot add ipv4 gateway for network device "
+ "\"%s\" when not assigning an address", ifname);
+ return -1;
+ }
+
+ err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
+ if (err) {
+ err = lxc_ipv4_dest_add(netdev->ifindex, netdev->ipv4_gateway);
+ if (err) {
+ ERROR("Failed to add ipv4 dest for network "
+ "device \"%s\": %s", ifname, strerror(-err));
+ }
+
+ err = lxc_ipv4_gateway_add(netdev->ifindex, netdev->ipv4_gateway);
+ if (err) {
+ ERROR("Failed to setup ipv4 gateway for "
+ "network device \"%s\": %s",
+ ifname, strerror(-err));
+ if (netdev->ipv4_gateway_auto) {
+ char buf[INET_ADDRSTRLEN];
+ inet_ntop(AF_INET, netdev->ipv4_gateway, buf, sizeof(buf));
+ ERROR("Fried to set autodetected ipv4 gateway \"%s\"", buf);
+ }
+ return -1;
+ }
+ }
+ }
+
+ /* setup ipv6 gateway on the interface */
+ if (netdev->ipv6_gateway) {
+ if (!(netdev->flags & IFF_UP)) {
+ ERROR("Cannot add ipv6 gateway for network device "
+ "\"%s\" when not bringing up the interface", ifname);
+ return -1;
+ }
+
+ if (lxc_list_empty(&netdev->ipv6) && !IN6_IS_ADDR_LINKLOCAL(netdev->ipv6_gateway)) {
+ ERROR("Cannot add ipv6 gateway for network device "
+ "\"%s\" when not assigning an address", ifname);
+ return -1;
+ }
+
+ err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
+ if (err) {
+ err = lxc_ipv6_dest_add(netdev->ifindex, netdev->ipv6_gateway);
+ if (err) {
+ ERROR("Failed to add ipv6 dest for network "
+ "device \"%s\": %s", ifname, strerror(-err));
+ }
+
+ err = lxc_ipv6_gateway_add(netdev->ifindex, netdev->ipv6_gateway);
+ if (err) {
+ ERROR("Failed to setup ipv6 gateway for "
+ "network device \"%s\": %s", ifname,
+ strerror(-err));
+ if (netdev->ipv6_gateway_auto) {
+ char buf[INET6_ADDRSTRLEN];
+ inet_ntop(AF_INET6, netdev->ipv6_gateway, buf, sizeof(buf));
+ ERROR("Tried to set autodetected ipv6 "
+ "gateway for network device "
+ "\"%s\"", buf);
+ }
+ return -1;
+ }
+ }
+ }
+
+ DEBUG("Network device \"%s\" has been setup", current_ifname);
+
+ return 0;
+}
+
+int lxc_setup_network_in_child_namespaces(const struct lxc_conf *conf,
+ struct lxc_list *network)
+{
+ struct lxc_list *iterator;
+ struct lxc_netdev *netdev;
+
+ lxc_list_for_each(iterator, network) {
+ netdev = iterator->elem;
+
+ /* REMOVE in LXC 3.0 */
+ if (netdev->idx < 0) {
+ ERROR("WARNING: using \"lxc.network.*\" keys to define "
+ "networks is DEPRECATED, please switch to using "
+ "\"lxc.net.[i].* keys\"");
+ }
+
+ if (lxc_setup_netdev_in_child_namespaces(netdev)) {
+ ERROR("failed to setup netdev");
+ return -1;
+ }
+ }
+
+ if (!lxc_list_empty(network))
+ INFO("network has been setup");
+
+ return 0;
+}
+
+int lxc_network_send_veth_names_to_child(struct lxc_handler *handler)
+{
+ struct lxc_list *iterator;
+ struct lxc_list *network = &handler->conf->network;
+ int data_sock = handler->data_sock[0];
+
+ if (handler->root)
+ return 0;
+
+ lxc_list_for_each(iterator, network) {
+ int ret;
+ struct lxc_netdev *netdev = iterator->elem;
+
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+
+ ret = lxc_abstract_unix_send_credential(data_sock, netdev->name,
+ IFNAMSIZ);
+ if (ret < 0) {
+ close(handler->data_sock[0]);
+ close(handler->data_sock[1]);
+ return -1;
+ } else {
+ TRACE("Sent network device name \"%s\" to child",
+ netdev->name);
+ }
+ }
+
+ return 0;
+}
+
+int lxc_network_recv_veth_names_from_parent(struct lxc_handler *handler)
+{
+ struct lxc_list *iterator;
+ struct lxc_list *network = &handler->conf->network;
+ int data_sock = handler->data_sock[1];
+
+ if (handler->root)
+ return 0;
+
+ lxc_list_for_each(iterator, network) {
+ int ret;
+ struct lxc_netdev *netdev = iterator->elem;
+
+ if (netdev->type != LXC_NET_VETH)
+ continue;
+
+ ret = lxc_abstract_unix_rcv_credential(data_sock, netdev->name,
+ IFNAMSIZ);
+ if (ret < 0) {
+ close(handler->data_sock[0]);
+ close(handler->data_sock[1]);
+ return -1;
+ } else {
+ TRACE("Received network device name \"%s\" from parent",
+ netdev->name);
+ }
+ }
+
+ return 0;
+}