From 1bd8d726a7449c7b8b7e0ca84c4bae0ff193f8a8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 31 Aug 2017 01:32:39 +0200 Subject: [PATCH] lxc-user-nic: test privilege over netns on delete When lxc-user-nic is called with the "delete" subcommand we need to make sure that we are actually privileged over the network namespace for which we are supposed to delete devices on the host. To this end we require that path to the affected network namespace is passed. We then setns() to the network namespace and drop privilege to the caller's real user id. Then we try to delete the loopback interface which is not possible. If we are privileged over the network namespace this operation will fail with ENOTSUP. If we are not privileged over the network namespace we will get EPERM. This is the first part of the commit. As of now nothing guarantees that the caller does not just give us a random path to a network namespace it is privileged over. Signed-off-by: Christian Brauner --- src/lxc/lxc_user_nic.c | 116 +++++++++++++++++++++++++++--- src/lxc/network.c | 156 +++++++++++++++++++++++++++++------------ src/lxc/network.h | 3 +- src/lxc/start.c | 49 ++++++++----- 4 files changed, 254 insertions(+), 70 deletions(-) diff --git a/src/lxc/lxc_user_nic.c b/src/lxc/lxc_user_nic.c index 0fb788877..4c446f7c3 100644 --- a/src/lxc/lxc_user_nic.c +++ b/src/lxc/lxc_user_nic.c @@ -976,13 +976,89 @@ struct user_nic_args { #define LXC_USERNIC_CREATE 0 #define LXC_USERNIC_DELETE 1 +static bool is_privileged_over_netns(int netns_fd) +{ + int ret; + uid_t euid, ruid, suid; + bool bret = false; + int ofd = -1; + + ofd = lxc_preserve_ns(getpid(), "net"); + if (ofd < 0) { + usernic_error("Failed opening network namespace path for %d", getpid()); + return false; + } + + ret = getresuid(&ruid, &euid, &suid); + if (ret < 0) { + usernic_error("Failed to retrieve real, effective, and saved " + "user IDs: %s\n", + strerror(errno)); + goto do_partial_cleanup; + } + + ret = setns(netns_fd, CLONE_NEWNET); + if (ret < 0) { + usernic_error("Failed to setns() to network namespace %s\n", + strerror(errno)); + goto do_partial_cleanup; + } + + ret = setresuid(ruid, ruid, 0); + if (ret < 0) { + usernic_error("Failed to drop privilege by setting effective " + "user id and real user id to %d, and saved user " + "ID to 0: %s\n", + ruid, strerror(errno)); + /* It's ok to jump to do_full_cleanup here since setresuid() + * will succeed when trying to set real, effective, and saved to + * values they currently have. + */ + goto do_full_cleanup; + } + + /* Test whether we are privileged over the network namespace. To do this + * we try to delete the loopback interface which is not possible. If we + * are privileged over the network namespace we will get ENOTSUP. If we + * are not privileged over the network namespace we will get EPERM. + */ + ret = lxc_netdev_delete_by_name("lo"); + if (ret == -ENOTSUP) + bret = true; + +do_full_cleanup: + ret = setresuid(ruid, euid, suid); + if (ret < 0) { + usernic_error("Failed to restore privilege by setting " + "effective user id to %d, real user id to %d, " + "and saved user ID to %d: %s\n", ruid, euid, suid, + strerror(errno)); + + bret = false; + } + + ret = setns(ofd, CLONE_NEWNET); + if (ret < 0) { + usernic_error("Failed to setns() to original network namespace " + "of PID %d: %s\n", ofd, strerror(errno)); + + bret = false; + } + +do_partial_cleanup: + + close(ofd); + return bret; +} + int main(int argc, char *argv[]) { int fd, ifindex, n, pid, request, ret; char *me, *newname; + struct user_nic_args args; + int netns_fd = -1; char *cnic = NULL, *nicname = NULL; struct alloted_s *alloted = NULL; - struct user_nic_args args; if (argc < 7 || argc > 8) { usage(argv[0], true); @@ -1027,26 +1103,50 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - ret = lxc_safe_int(args.pid, &pid); - if (ret < 0) { - usernic_error("Could not read pid: %s\n", args.pid); - exit(EXIT_FAILURE); + if (request == LXC_USERNIC_CREATE) { + ret = lxc_safe_int(args.pid, &pid); + if (ret < 0) { + usernic_error("Could not read pid: %s\n", args.pid); + exit(EXIT_FAILURE); + } + } else if (request == LXC_USERNIC_DELETE) { + netns_fd = open(args.pid, O_RDONLY); + if (netns_fd < 0) { + usernic_error("Could not open \"%s\": %s\n", args.pid, + strerror(errno)); + exit(EXIT_FAILURE); + } } if (!create_db_dir(LXC_USERNIC_DB)) { usernic_error("%s", "Failed to create directory for db file\n"); + if (netns_fd >= 0) + close(netns_fd); exit(EXIT_FAILURE); } fd = open_and_lock(LXC_USERNIC_DB); if (fd < 0) { usernic_error("Failed to lock %s\n", LXC_USERNIC_DB); + if (netns_fd >= 0) + close(netns_fd); exit(EXIT_FAILURE); } - if (!may_access_netns(pid)) { - usernic_error("User %s may not modify netns for pid %d\n", me, pid); - exit(EXIT_FAILURE); + if (request == LXC_USERNIC_CREATE) { + if (!may_access_netns(pid)) { + usernic_error("User %s may not modify netns for pid %d\n", me, pid); + exit(EXIT_FAILURE); + } + } else if (request == LXC_USERNIC_DELETE) { + bool has_priv; + has_priv = is_privileged_over_netns(netns_fd); + close(netns_fd); + if (!has_priv) { + usernic_error("%s", "Process is not privileged over " + "network namespace\n"); + exit(EXIT_FAILURE); + } } n = get_alloted(me, args.type, args.link, &alloted); diff --git a/src/lxc/network.c b/src/lxc/network.c index 12b7d697e..315b44d79 100644 --- a/src/lxc/network.c +++ b/src/lxc/network.c @@ -2160,8 +2160,9 @@ static int lxc_create_network_unpriv(const char *lxcpath, char *lxcname, return 0; } -static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, - struct lxc_netdev *netdev, pid_t pid) +static int lxc_delete_network_unpriv_exec(const char *lxcpath, char *lxcname, + struct lxc_netdev *netdev, + const char *netns_path) { int bytes, ret; pid_t child; @@ -2189,7 +2190,6 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, if (child == 0) { int ret; - char pidstr[LXC_NUMSTRLEN64]; close(pipefd[0]); @@ -2206,15 +2206,10 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, SYSERROR("Network link for network device \"%s\" is " "missing", netdev->priv.veth_attr.pair); - ret = snprintf(pidstr, LXC_NUMSTRLEN64, "%d", pid); - if (ret < 0 || ret >= LXC_NUMSTRLEN64) - exit(EXIT_FAILURE); - pidstr[LXC_NUMSTRLEN64 - 1] = '\0'; - INFO("Execing lxc-user-nic delete %s %s %s veth %s %s", lxcpath, - lxcname, pidstr, netdev->link, netdev->priv.veth_attr.pair); + lxcname, netns_path, netdev->link, netdev->priv.veth_attr.pair); execlp(LXC_USERNIC_PATH, LXC_USERNIC_PATH, "delete", lxcpath, - lxcname, pidstr, "veth", netdev->link, + lxcname, netns_path, "veth", netdev->link, netdev->priv.veth_attr.pair, (char *)NULL); SYSERROR("Failed to exec lxc-user-nic."); exit(EXIT_FAILURE); @@ -2242,6 +2237,91 @@ static int lxc_delete_network_unpriv(const char *lxcpath, char *lxcname, return 0; } +bool lxc_delete_network_unpriv(struct lxc_handler *handler) +{ + int ret; + struct lxc_list *iterator; + struct lxc_list *network = &handler->conf->network; + /* strlen("/proc/") = 6 + * + + * LXC_NUMSTRLEN64 + * + + * strlen("/fd/") = 4 + * + + * LXC_NUMSTRLEN64 + * + + * \0 + */ + char netns_path[6 + LXC_NUMSTRLEN64 + 4 + LXC_NUMSTRLEN64 + 1]; + bool deleted_all = true; + + if (!am_unpriv()) + return true; + + *netns_path = '\0'; + + if (handler->netnsfd < 0) { + DEBUG("Cannot not guarantee safe deletion of network devices. " + "Manual cleanup maybe needed"); + return false; + } + + ret = snprintf(netns_path, sizeof(netns_path), "/proc/%d/fd/%d", + getpid(), handler->netnsfd); + if (ret < 0 || ret >= sizeof(netns_path)) + return false; + + lxc_list_for_each(iterator, network) { + char *hostveth = NULL; + struct lxc_netdev *netdev = iterator->elem; + + /* We can only delete devices whose ifindex we have. If we don't + * have the index it means that we didn't create it. + */ + if (!netdev->ifindex) + continue; + + if (netdev->type == LXC_NET_PHYS) { + ret = lxc_netdev_rename_by_index(netdev->ifindex, + netdev->link); + if (ret < 0) + WARN("Failed to rename interface with index %d " + "to its initial name \"%s\"", + netdev->ifindex, netdev->link); + else + TRACE("Renamed interface with index %d to its " + "initial name \"%s\"", + netdev->ifindex, netdev->link); + continue; + } + + ret = netdev_deconf[netdev->type](handler, netdev); + if (ret < 0) + WARN("Failed to deconfigure network device"); + + if (netdev->type != LXC_NET_VETH) + continue; + + if (!is_ovs_bridge(netdev->link)) + continue; + + ret = lxc_delete_network_unpriv_exec(handler->lxcpath, + handler->name, netdev, + netns_path); + if (ret < 0) { + deleted_all = false; + WARN("Failed to remove port \"%s\" from openvswitch " + "bridge \"%s\"", + netdev->priv.veth_attr.pair, netdev->link); + continue; + } + INFO("Removed interface \"%s\" from \"%s\"", hostveth, + netdev->link); + } + + return deleted_all; +} + int lxc_create_network_priv(struct lxc_handler *handler) { bool am_root; @@ -2324,13 +2404,16 @@ int lxc_create_network(const char *lxcpath, char *lxcname, return 0; } -bool lxc_delete_network(struct lxc_handler *handler) +bool lxc_delete_network_priv(struct lxc_handler *handler) { int ret; struct lxc_list *iterator; struct lxc_list *network = &handler->conf->network; bool deleted_all = true; + if (am_unpriv()) + return true; + lxc_list_for_each(iterator, network) { char *hostveth = NULL; struct lxc_netdev *netdev = iterator->elem; @@ -2362,45 +2445,28 @@ bool lxc_delete_network(struct lxc_handler *handler) * namespace is destroyed but in case we did not move the * interface to the network namespace, we have to destroy it. */ - if (!am_unpriv()) { - ret = lxc_netdev_delete_by_index(netdev->ifindex); - if (-ret == ENODEV) { - INFO("Interface \"%s\" with index %d already " - "deleted or existing in different network " - "namespace", - netdev->name ? netdev->name : "(null)", - netdev->ifindex); - } else if (ret < 0) { - deleted_all = false; - WARN("Failed to remove interface \"%s\" with " - "index %d: %s", - netdev->name ? netdev->name : "(null)", - netdev->ifindex, strerror(-ret)); - continue; - } - INFO("Removed interface \"%s\" with index %d", - netdev->name ? netdev->name : "(null)", - netdev->ifindex); + ret = lxc_netdev_delete_by_index(netdev->ifindex); + if (-ret == ENODEV) { + INFO("Interface \"%s\" with index %d already " + "deleted or existing in different network " + "namespace", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); + } else if (ret < 0) { + deleted_all = false; + WARN("Failed to remove interface \"%s\" with " + "index %d: %s", + netdev->name ? netdev->name : "(null)", + netdev->ifindex, strerror(-ret)); + continue; } + INFO("Removed interface \"%s\" with index %d", + netdev->name ? netdev->name : "(null)", + netdev->ifindex); if (netdev->type != LXC_NET_VETH) continue; - if (am_unpriv()) { - if (is_ovs_bridge(netdev->link)) { - ret = lxc_delete_network_unpriv(handler->lxcpath, - handler->name, - netdev, getpid()); - if (ret < 0) - WARN("Failed to remove port \"%s\" " - "from openvswitch bridge \"%s\"", - netdev->priv.veth_attr.pair, - netdev->link); - } - - continue; - } - /* Explicitly delete host veth device to prevent lingering * devices. We had issues in LXD around this. */ diff --git a/src/lxc/network.h b/src/lxc/network.h index d1b8de9b7..27d2c1e74 100644 --- a/src/lxc/network.h +++ b/src/lxc/network.h @@ -224,7 +224,8 @@ extern const char *lxc_net_type_to_str(int type); extern int setup_private_host_hw_addr(char *veth1); extern int netdev_get_mtu(int ifindex); extern int lxc_create_network_priv(struct lxc_handler *handler); -extern bool lxc_delete_network(struct lxc_handler *handler); +extern bool lxc_delete_network_priv(struct lxc_handler *handler); +extern bool lxc_delete_network_unpriv(struct lxc_handler *handler); extern int lxc_find_gateway_addresses(struct lxc_handler *handler); extern int lxc_create_network(const char *lxcpath, char *lxcname, struct lxc_list *network, pid_t pid); diff --git a/src/lxc/start.c b/src/lxc/start.c index ac37a091c..c41b8dfdd 100644 --- a/src/lxc/start.c +++ b/src/lxc/start.c @@ -1312,6 +1312,7 @@ static int lxc_spawn(struct lxc_handler *handler) SYSERROR("Failed to clone a new set of namespaces."); goto out_delete_net; } + for (i = 0; i < LXC_NS_MAX; i++) if (flags & ns_info[i].clone_flag) INFO("Cloned %s.", ns_info[i].flag_name); @@ -1363,6 +1364,12 @@ static int lxc_spawn(struct lxc_handler *handler) if (failed_before_rename) goto out_delete_net; + handler->netnsfd = lxc_preserve_ns(handler->pid, "net"); + if (handler->netnsfd < 0) { + ERROR("Failed to preserve network namespace"); + goto out_delete_net; + } + /* Create the network configuration. */ if (handler->clone_flags & CLONE_NEWNET) { if (lxc_create_network(handler->lxcpath, handler->name, @@ -1437,15 +1444,22 @@ static int lxc_spawn(struct lxc_handler *handler) } lxc_sync_fini(handler); - handler->netnsfd = lxc_preserve_ns(handler->pid, "net"); return 0; out_delete_net: if (cgroups_connected) cgroup_disconnect(); - if (handler->clone_flags & CLONE_NEWNET) - lxc_delete_network(handler); + + if (handler->clone_flags & CLONE_NEWNET) { + DEBUG("Tearing down network devices"); + if (!lxc_delete_network_priv(handler)) + DEBUG("Failed tearing down network devices"); + + if (!lxc_delete_network_unpriv(handler)) + DEBUG("Failed tearing down network devices"); + } + out_abort: lxc_abort(name, handler); lxc_sync_fini(handler); @@ -1454,6 +1468,11 @@ out_abort: handler->pinfd = -1; } + if (handler->netnsfd >= 0) { + close(handler->netnsfd); + handler->netnsfd = -1; + } + return -1; } @@ -1463,7 +1482,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler, { int status; int err = -1; - bool removed_all_netdevs = true; struct lxc_conf *conf = handler->conf; if (lxc_init(name, handler) < 0) { @@ -1509,10 +1527,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler, err = lxc_poll(name, handler); if (err) { ERROR("LXC mainloop exited with error: %d.", err); - if (handler->netnsfd >= 0) { - close(handler->netnsfd); - handler->netnsfd = -1; - } goto out_abort; } @@ -1544,9 +1558,6 @@ int __lxc_start(const char *name, struct lxc_handler *handler, DEBUG("Pushing physical nics back to host namespace"); lxc_restore_phys_nics_to_netns(handler->netnsfd, handler->conf); - DEBUG("Tearing down virtual network devices used by container \"%s\".", name); - removed_all_netdevs = lxc_delete_network(handler); - if (handler->pinfd >= 0) { close(handler->pinfd); handler->pinfd = -1; @@ -1554,12 +1565,18 @@ int __lxc_start(const char *name, struct lxc_handler *handler, lxc_monitor_send_exit_code(name, status, handler->lxcpath); err = lxc_error_set_and_log(handler->pid, status); + out_fini: - if (!removed_all_netdevs) { - DEBUG("Failed tearing down network devices used by container. Trying again!"); - removed_all_netdevs = lxc_delete_network(handler); - if (!removed_all_netdevs) - DEBUG("Failed tearing down network devices used by container. Not trying again!"); + DEBUG("Tearing down network devices"); + if (!lxc_delete_network_priv(handler)) + DEBUG("Failed tearing down network devices"); + + if (!lxc_delete_network_unpriv(handler)) + DEBUG("Failed tearing down network devices"); + + if (handler->netnsfd >= 0) { + close(handler->netnsfd); + handler->netnsfd = -1; } out_detach_blockdev: -- 2.39.2