Merge branch 'for-3.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

[mirror_ubuntu-bionic-kernel.git] / kernel / cgroup.c
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index f2a6795e695be2ab432a7add3a26b7f7d319fd86..7dc8788cfd52dd222856057840a8d0a45a19816e 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1698,7 +1698,7 @@ destroy_root:
  exit_root_id:
         cgroup_exit_root_id(root);
  cancel_ref:
-       percpu_ref_cancel_init(&root_cgrp->self.refcnt);
+       percpu_ref_exit(&root_cgrp->self.refcnt);
  out:
         free_cgrp_cset_links(&tmp_links);
         return ret;
@@ -1708,10 +1708,13 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                          int flags, const char *unused_dev_name,
                          void *data)
  {
+       struct super_block *pinned_sb = NULL;
+       struct cgroup_subsys *ss;
         struct cgroup_root *root;
         struct cgroup_sb_opts opts;
         struct dentry *dentry;
         int ret;
+       int i;
         bool new_sb;
  
         /*
@@ -1737,6 +1740,27 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                 goto out_unlock;
         }
  
+       /*
+        * Destruction of cgroup root is asynchronous, so subsystems may
+        * still be dying after the previous unmount.  Let's drain the
+        * dying subsystems.  We just need to ensure that the ones
+        * unmounted previously finish dying and don't care about new ones
+        * starting.  Testing ref liveliness is good enough.
+        */
+       for_each_subsys(ss, i) {
+               if (!(opts.subsys_mask & (1 << i)) ||
+                   ss->root == &cgrp_dfl_root)
+                       continue;
+
+               if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt)) {
+                       mutex_unlock(&cgroup_mutex);
+                       msleep(10);
+                       ret = restart_syscall();
+                       goto out_free;
+               }
+               cgroup_put(&ss->root->cgrp);
+       }
+
         for_each_root(root) {
                 bool name_match = false;
  
@@ -1770,15 +1794,23 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                         pr_warn("new mount options do not match the existing superblock, will be ignored\n");
  
                 /*
-                * A root's lifetime is governed by its root cgroup.
-                * tryget_live failure indicate that the root is being
-                * destroyed.  Wait for destruction to complete so that the
-                * subsystems are free.  We can use wait_queue for the wait
-                * but this path is super cold.  Let's just sleep for a bit
-                * and retry.
+                * We want to reuse @root whose lifetime is governed by its
+                * ->cgrp.  Let's check whether @root is alive and keep it
+                * that way.  As cgroup_kill_sb() can happen anytime, we
+                * want to block it by pinning the sb so that @root doesn't
+                * get killed before mount is complete.
+                *
+                * With the sb pinned, tryget_live can reliably indicate
+                * whether @root can be reused.  If it's being killed,
+                * drain it.  We can use wait_queue for the wait but this
+                * path is super cold.  Let's just sleep a bit and retry.
                  */
-               if (!percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
+               pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
+               if (IS_ERR(pinned_sb) ||
+                   !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
                         mutex_unlock(&cgroup_mutex);
+                       if (!IS_ERR_OR_NULL(pinned_sb))
+                               deactivate_super(pinned_sb);
                         msleep(10);
                         ret = restart_syscall();
                         goto out_free;
@@ -1823,6 +1855,16 @@ out_free:
                                 CGROUP_SUPER_MAGIC, &new_sb);
         if (IS_ERR(dentry) || !new_sb)
                 cgroup_put(&root->cgrp);
+
+       /*
+        * If @pinned_sb, we're reusing an existing root and holding an
+        * extra ref on its sb.  Mount is complete.  Put the extra ref.
+        */
+       if (pinned_sb) {
+               WARN_ON(new_sb);
+               deactivate_super(pinned_sb);
+       }
+
         return dentry;
  }
  
@@ -3470,7 +3512,7 @@ bool css_has_online_children(struct cgroup_subsys_state *css)
  
         rcu_read_lock();
         css_for_each_child(child, css) {
-               if (css->flags & CSS_ONLINE) {
+               if (child->flags & CSS_ONLINE) {
                         ret = true;
                         break;
                 }
@@ -4282,6 +4324,8 @@ static void css_free_work_fn(struct work_struct *work)
                 container_of(work, struct cgroup_subsys_state, destroy_work);
         struct cgroup *cgrp = css->cgroup;
  
+       percpu_ref_exit(&css->refcnt);
+
         if (css->ss) {
                 /* css free path */
                 if (css->parent)
@@ -4483,7 +4527,7 @@ err_list_del:
  err_free_id:
         cgroup_idr_remove(&ss->css_idr, css->id);
  err_free_percpu_ref:
-       percpu_ref_cancel_init(&css->refcnt);
+       percpu_ref_exit(&css->refcnt);
  err_free_css:
         call_rcu(&css->rcu_head, css_free_rcu_fn);
         return err;
@@ -4603,7 +4647,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
  out_free_id:
         cgroup_idr_remove(&root->cgroup_idr, cgrp->id);
  out_cancel_ref:
-       percpu_ref_cancel_init(&cgrp->self.refcnt);
+       percpu_ref_exit(&cgrp->self.refcnt);
  out_free_cgrp:
         kfree(cgrp);
  out_unlock: