Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[mirror_ubuntu-artful-kernel.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index d6b729beba4930bbb1a1ddc4a6a6b8533bbf94e0..2ee9ec3051b20774b118a57e4609f30e87bf82be 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -64,6 +64,9 @@
  #include <linux/file.h>
  #include <net/sock.h>
  
+#define CREATE_TRACE_POINTS
+#include <trace/events/cgroup.h>
+
  /*
   * pidlists linger the following amount before being destroyed.  The goal
   * is avoiding frequent destruction in the middle of consecutive read calls
@@ -1176,6 +1179,8 @@ static void cgroup_destroy_root(struct cgroup_root *root)
         struct cgroup *cgrp = &root->cgrp;
         struct cgrp_cset_link *link, *tmp_link;
  
+       trace_cgroup_destroy_root(root);
+
         cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
  
         BUG_ON(atomic_read(&root->nr_cgrps));
@@ -1874,6 +1879,9 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
                 strcpy(root->release_agent_path, opts.release_agent);
                 spin_unlock(&release_agent_path_lock);
         }
+
+       trace_cgroup_remount(root);
+
   out_unlock:
         kfree(opts.release_agent);
         kfree(opts.name);
@@ -2031,6 +2039,8 @@ static int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
         if (ret)
                 goto destroy_root;
  
+       trace_cgroup_setup_root(root);
+
         /*
          * There must be no failure case after here, since rebinding takes
          * care of subsystems' refcounts, which are explicitly dropped in
@@ -2315,22 +2325,18 @@ static struct file_system_type cgroup2_fs_type = {
         .fs_flags = FS_USERNS_MOUNT,
  };
  
-static char *cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
-                                  struct cgroup_namespace *ns)
+static int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
+                                struct cgroup_namespace *ns)
  {
         struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
-       int ret;
  
-       ret = kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
-       if (ret < 0 || ret >= buflen)
-               return NULL;
-       return buf;
+       return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
  }
  
-char *cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
-                    struct cgroup_namespace *ns)
+int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
+                  struct cgroup_namespace *ns)
  {
-       char *ret;
+       int ret;
  
         mutex_lock(&cgroup_mutex);
         spin_lock_irq(&css_set_lock);
@@ -2357,12 +2363,12 @@ EXPORT_SYMBOL_GPL(cgroup_path_ns);
   *
   * Return value is the same as kernfs_path().
   */
-char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
+int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
  {
         struct cgroup_root *root;
         struct cgroup *cgrp;
         int hierarchy_id = 1;
-       char *path = NULL;
+       int ret;
  
         mutex_lock(&cgroup_mutex);
         spin_lock_irq(&css_set_lock);
@@ -2371,16 +2377,15 @@ char *task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
  
         if (root) {
                 cgrp = task_cgroup_from_root(task, root);
-               path = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
+               ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
         } else {
                 /* if no hierarchy exists, everyone is in "/" */
-               if (strlcpy(buf, "/", buflen) < buflen)
-                       path = buf;
+               ret = strlcpy(buf, "/", buflen);
         }
  
         spin_unlock_irq(&css_set_lock);
         mutex_unlock(&cgroup_mutex);
-       return path;
+       return ret;
  }
  EXPORT_SYMBOL_GPL(task_cgroup_path);
  
@@ -2830,6 +2835,10 @@ static int cgroup_attach_task(struct cgroup *dst_cgrp,
                 ret = cgroup_migrate(leader, threadgroup, dst_cgrp->root);
  
         cgroup_migrate_finish(&preloaded_csets);
+
+       if (!ret)
+               trace_cgroup_attach_task(dst_cgrp, leader, threadgroup);
+
         return ret;
  }
  
@@ -3611,6 +3620,8 @@ static int cgroup_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
         mutex_lock(&cgroup_mutex);
  
         ret = kernfs_rename(kn, new_parent, new_name_str);
+       if (!ret)
+               trace_cgroup_rename(cgrp);
  
         mutex_unlock(&cgroup_mutex);
  
@@ -4381,6 +4392,8 @@ int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
  
                 if (task) {
                         ret = cgroup_migrate(task, false, to->root);
+                       if (!ret)
+                               trace_cgroup_transfer_tasks(to, task, false);
                         put_task_struct(task);
                 }
         } while (task && !ret);
@@ -5046,6 +5059,8 @@ static void css_release_work_fn(struct work_struct *work)
                         ss->css_released(css);
         } else {
                 /* cgroup release path */
+               trace_cgroup_release(cgrp);
+
                 cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
                 cgrp->id = -1;
  
@@ -5059,6 +5074,8 @@ static void css_release_work_fn(struct work_struct *work)
                 if (cgrp->kn)
                         RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
                                          NULL);
+
+               cgroup_bpf_put(cgrp);
         }
  
         mutex_unlock(&cgroup_mutex);
@@ -5266,6 +5283,9 @@ static struct cgroup *cgroup_create(struct cgroup *parent)
         if (!cgroup_on_dfl(cgrp))
                 cgrp->subtree_control = cgroup_control(cgrp);
  
+       if (parent)
+               cgroup_bpf_inherit(cgrp, parent);
+
         cgroup_propagate_control(cgrp);
  
         /* @cgrp doesn't have dir yet so the following will only create csses */
@@ -5332,6 +5352,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
         if (ret)
                 goto out_destroy;
  
+       trace_cgroup_mkdir(cgrp);
+
         /* let's create and online css's */
         kernfs_activate(kn);
  
@@ -5507,6 +5529,9 @@ static int cgroup_rmdir(struct kernfs_node *kn)
  
         ret = cgroup_destroy_locked(cgrp);
  
+       if (!ret)
+               trace_cgroup_rmdir(cgrp);
+
         cgroup_kn_unlock(kn);
         return ret;
  }
@@ -5627,6 +5652,12 @@ int __init cgroup_init(void)
         BUG_ON(cgroup_init_cftypes(NULL, cgroup_dfl_base_files));
         BUG_ON(cgroup_init_cftypes(NULL, cgroup_legacy_base_files));
  
+       /*
+        * The latency of the synchronize_sched() is too high for cgroups,
+        * avoid it at the cost of forcing all readers into the slow path.
+        */
+       rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
+
         get_user_ns(init_cgroup_ns.user_ns);
  
         mutex_lock(&cgroup_mutex);
@@ -5737,7 +5768,7 @@ core_initcall(cgroup_wq_init);
  int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                      struct pid *pid, struct task_struct *tsk)
  {
-       char *buf, *path;
+       char *buf;
         int retval;
         struct cgroup_root *root;
  
@@ -5780,18 +5811,18 @@ int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
                  * " (deleted)" is appended to the cgroup path.
                  */
                 if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
-                       path = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
+                       retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
                                                 current->nsproxy->cgroup_ns);
-                       if (!path) {
+                       if (retval >= PATH_MAX)
                                 retval = -ENAMETOOLONG;
+                       if (retval < 0)
                                 goto out_unlock;
-                       }
+
+                       seq_puts(m, buf);
                 } else {
-                       path = "/";
+                       seq_puts(m, "/");
                 }
  
-               seq_puts(m, path);
-
                 if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
                         seq_puts(m, " (deleted)\n");
                 else
@@ -6056,8 +6087,9 @@ static void cgroup_release_agent(struct work_struct *work)
  {
         struct cgroup *cgrp =
                 container_of(work, struct cgroup, release_agent_work);
-       char *pathbuf = NULL, *agentbuf = NULL, *path;
+       char *pathbuf = NULL, *agentbuf = NULL;
         char *argv[3], *envp[3];
+       int ret;
  
         mutex_lock(&cgroup_mutex);
  
@@ -6067,13 +6099,13 @@ static void cgroup_release_agent(struct work_struct *work)
                 goto out;
  
         spin_lock_irq(&css_set_lock);
-       path = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
+       ret = cgroup_path_ns_locked(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
         spin_unlock_irq(&css_set_lock);
-       if (!path)
+       if (ret < 0 || ret >= PATH_MAX)
                 goto out;
  
         argv[0] = agentbuf;
-       argv[1] = path;
+       argv[1] = pathbuf;
         argv[2] = NULL;
  
         /* minimal command environment */
@@ -6322,6 +6354,16 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd)
  
  /* cgroup namespaces */
  
+static struct ucounts *inc_cgroup_namespaces(struct user_namespace *ns)
+{
+       return inc_ucount(ns, current_euid(), UCOUNT_CGROUP_NAMESPACES);
+}
+
+static void dec_cgroup_namespaces(struct ucounts *ucounts)
+{
+       dec_ucount(ucounts, UCOUNT_CGROUP_NAMESPACES);
+}
+
  static struct cgroup_namespace *alloc_cgroup_ns(void)
  {
         struct cgroup_namespace *new_ns;
@@ -6343,6 +6385,7 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
  void free_cgroup_ns(struct cgroup_namespace *ns)
  {
         put_css_set(ns->root_cset);
+       dec_cgroup_namespaces(ns->ucounts);
         put_user_ns(ns->user_ns);
         ns_free_inum(&ns->ns);
         kfree(ns);
@@ -6354,6 +6397,7 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
                                         struct cgroup_namespace *old_ns)
  {
         struct cgroup_namespace *new_ns;
+       struct ucounts *ucounts;
         struct css_set *cset;
  
         BUG_ON(!old_ns);
@@ -6367,6 +6411,10 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
         if (!ns_capable(user_ns, CAP_SYS_ADMIN))
                 return ERR_PTR(-EPERM);
  
+       ucounts = inc_cgroup_namespaces(user_ns);
+       if (!ucounts)
+               return ERR_PTR(-ENOSPC);
+
         /* It is not safe to take cgroup_mutex here */
         spin_lock_irq(&css_set_lock);
         cset = task_css_set(current);
@@ -6376,10 +6424,12 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags,
         new_ns = alloc_cgroup_ns();
         if (IS_ERR(new_ns)) {
                 put_css_set(cset);
+               dec_cgroup_namespaces(ucounts);
                 return new_ns;
         }
  
         new_ns->user_ns = get_user_ns(user_ns);
+       new_ns->ucounts = ucounts;
         new_ns->root_cset = cset;
  
         return new_ns;
@@ -6430,12 +6480,18 @@ static void cgroupns_put(struct ns_common *ns)
         put_cgroup_ns(to_cg_ns(ns));
  }
  
+static struct user_namespace *cgroupns_owner(struct ns_common *ns)
+{
+       return to_cg_ns(ns)->user_ns;
+}
+
  const struct proc_ns_operations cgroupns_operations = {
         .name           = "cgroup",
         .type           = CLONE_NEWCGROUP,
         .get            = cgroupns_get,
         .put            = cgroupns_put,
         .install        = cgroupns_install,
+       .owner          = cgroupns_owner,
  };
  
  static __init int cgroup_namespaces_init(void)
@@ -6444,6 +6500,19 @@ static __init int cgroup_namespaces_init(void)
  }
  subsys_initcall(cgroup_namespaces_init);
  
+#ifdef CONFIG_CGROUP_BPF
+void cgroup_bpf_update(struct cgroup *cgrp,
+                      struct bpf_prog *prog,
+                      enum bpf_attach_type type)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+
+       mutex_lock(&cgroup_mutex);
+       __cgroup_bpf_update(cgrp, parent, prog, type);
+       mutex_unlock(&cgroup_mutex);
+}
+#endif /* CONFIG_CGROUP_BPF */
+
  #ifdef CONFIG_CGROUP_DEBUG
  static struct cgroup_subsys_state *
  debug_css_alloc(struct cgroup_subsys_state *parent_css)