]> git.proxmox.com Git - mirror_ubuntu-zesty-kernel.git/blobdiff - kernel/cgroup.c
cgroup: make interface file "cgroup.sane_behavior" legacy-only
[mirror_ubuntu-zesty-kernel.git] / kernel / cgroup.c
index 7868fc3c0bc59b3223490114ca8e27f83b4c864c..0ea54af6b1333a21b3a03047af60525649f67e97 100644 (file)
@@ -186,7 +186,8 @@ static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
                             unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
-static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss);
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
+                     bool visible);
 static void css_release(struct percpu_ref *ref);
 static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
@@ -1036,6 +1037,58 @@ static void cgroup_put(struct cgroup *cgrp)
        css_put(&cgrp->self);
 }
 
+/**
+ * cgroup_refresh_child_subsys_mask - update child_subsys_mask
+ * @cgrp: the target cgroup
+ *
+ * On the default hierarchy, a subsystem may request other subsystems to be
+ * enabled together through its ->depends_on mask.  In such cases, more
+ * subsystems than specified in "cgroup.subtree_control" may be enabled.
+ *
+ * This function determines which subsystems need to be enabled given the
+ * current @cgrp->subtree_control and records it in
+ * @cgrp->child_subsys_mask.  The resulting mask is always a superset of
+ * @cgrp->subtree_control and follows the usual hierarchy rules.
+ */
+static void cgroup_refresh_child_subsys_mask(struct cgroup *cgrp)
+{
+       struct cgroup *parent = cgroup_parent(cgrp);
+       unsigned int cur_ss_mask = cgrp->subtree_control;
+       struct cgroup_subsys *ss;
+       int ssid;
+
+       lockdep_assert_held(&cgroup_mutex);
+
+       if (!cgroup_on_dfl(cgrp)) {
+               cgrp->child_subsys_mask = cur_ss_mask;
+               return;
+       }
+
+       while (true) {
+               unsigned int new_ss_mask = cur_ss_mask;
+
+               for_each_subsys(ss, ssid)
+                       if (cur_ss_mask & (1 << ssid))
+                               new_ss_mask |= ss->depends_on;
+
+               /*
+                * Mask out subsystems which aren't available.  This can
+                * happen only if some depended-upon subsystems were bound
+                * to non-default hierarchies.
+                */
+               if (parent)
+                       new_ss_mask &= parent->child_subsys_mask;
+               else
+                       new_ss_mask &= cgrp->root->subsys_mask;
+
+               if (new_ss_mask == cur_ss_mask)
+                       break;
+               cur_ss_mask = new_ss_mask;
+       }
+
+       cgrp->child_subsys_mask = cur_ss_mask;
+}
+
 /**
  * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
  * @kn: the kernfs_node being serviced
@@ -1208,12 +1261,15 @@ static int rebind_subsystems(struct cgroup_root *dst_root, unsigned int ss_mask)
                up_write(&css_set_rwsem);
 
                src_root->subsys_mask &= ~(1 << ssid);
-               src_root->cgrp.child_subsys_mask &= ~(1 << ssid);
+               src_root->cgrp.subtree_control &= ~(1 << ssid);
+               cgroup_refresh_child_subsys_mask(&src_root->cgrp);
 
                /* default hierarchy doesn't enable controllers by default */
                dst_root->subsys_mask |= 1 << ssid;
-               if (dst_root != &cgrp_dfl_root)
-                       dst_root->cgrp.child_subsys_mask |= 1 << ssid;
+               if (dst_root != &cgrp_dfl_root) {
+                       dst_root->cgrp.subtree_control |= 1 << ssid;
+                       cgroup_refresh_child_subsys_mask(&dst_root->cgrp);
+               }
 
                if (ss->bind)
                        ss->bind(css);
@@ -1434,11 +1490,10 @@ static int cgroup_remount(struct kernfs_root *kf_root, int *flags, char *data)
        removed_mask = root->subsys_mask & ~opts.subsys_mask;
 
        /* Don't allow flags or name to change at remount */
-       if (((opts.flags ^ root->flags) & CGRP_ROOT_OPTION_MASK) ||
+       if ((opts.flags ^ root->flags) ||
            (opts.name && strcmp(opts.name, root->name))) {
                pr_err("option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"\n",
-                      opts.flags & CGRP_ROOT_OPTION_MASK, opts.name ?: "",
-                      root->flags & CGRP_ROOT_OPTION_MASK, root->name);
+                      opts.flags, opts.name ?: "", root->flags, root->name);
                ret = -EINVAL;
                goto out_unlock;
        }
@@ -1706,7 +1761,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,
                        goto out_unlock;
                }
 
-               if ((root->flags ^ opts.flags) & CGRP_ROOT_OPTION_MASK) {
+               if (root->flags ^ opts.flags) {
                        if ((root->flags | opts.flags) & CGRP_ROOT_SANE_BEHAVIOR) {
                                pr_err("sane_behavior: new mount options should match the existing superblock\n");
                                ret = -EINVAL;
@@ -2415,9 +2470,7 @@ static int cgroup_release_agent_show(struct seq_file *seq, void *v)
 
 static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
 {
-       struct cgroup *cgrp = seq_css(seq)->cgroup;
-
-       seq_printf(seq, "%d\n", cgroup_sane_behavior(cgrp));
+       seq_puts(seq, "0\n");
        return 0;
 }
 
@@ -2454,7 +2507,7 @@ static int cgroup_controllers_show(struct seq_file *seq, void *v)
 {
        struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-       cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->child_subsys_mask);
+       cgroup_print_ss_mask(seq, cgroup_parent(cgrp)->subtree_control);
        return 0;
 }
 
@@ -2463,7 +2516,7 @@ static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
 {
        struct cgroup *cgrp = seq_css(seq)->cgroup;
 
-       cgroup_print_ss_mask(seq, cgrp->child_subsys_mask);
+       cgroup_print_ss_mask(seq, cgrp->subtree_control);
        return 0;
 }
 
@@ -2569,6 +2622,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
                                            loff_t off)
 {
        unsigned int enable = 0, disable = 0;
+       unsigned int css_enable, css_disable, old_ctrl, new_ctrl;
        struct cgroup *cgrp, *child;
        struct cgroup_subsys *ss;
        char *tok;
@@ -2608,11 +2662,26 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 
        for_each_subsys(ss, ssid) {
                if (enable & (1 << ssid)) {
-                       if (cgrp->child_subsys_mask & (1 << ssid)) {
+                       if (cgrp->subtree_control & (1 << ssid)) {
                                enable &= ~(1 << ssid);
                                continue;
                        }
 
+                       /* unavailable or not enabled on the parent? */
+                       if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
+                           (cgroup_parent(cgrp) &&
+                            !(cgroup_parent(cgrp)->subtree_control & (1 << ssid)))) {
+                               ret = -ENOENT;
+                               goto out_unlock;
+                       }
+
+                       /*
+                        * @ss is already enabled through dependency and
+                        * we'll just make it visible.  Skip draining.
+                        */
+                       if (cgrp->child_subsys_mask & (1 << ssid))
+                               continue;
+
                        /*
                         * Because css offlining is asynchronous, userland
                         * might try to re-enable the same controller while
@@ -2635,23 +2704,15 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 
                                return restart_syscall();
                        }
-
-                       /* unavailable or not enabled on the parent? */
-                       if (!(cgrp_dfl_root.subsys_mask & (1 << ssid)) ||
-                           (cgroup_parent(cgrp) &&
-                            !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ssid)))) {
-                               ret = -ENOENT;
-                               goto out_unlock;
-                       }
                } else if (disable & (1 << ssid)) {
-                       if (!(cgrp->child_subsys_mask & (1 << ssid))) {
+                       if (!(cgrp->subtree_control & (1 << ssid))) {
                                disable &= ~(1 << ssid);
                                continue;
                        }
 
                        /* a child has it enabled? */
                        cgroup_for_each_live_child(child, cgrp) {
-                               if (child->child_subsys_mask & (1 << ssid)) {
+                               if (child->subtree_control & (1 << ssid)) {
                                        ret = -EBUSY;
                                        goto out_unlock;
                                }
@@ -2665,7 +2726,7 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
        }
 
        /*
-        * Except for the root, child_subsys_mask must be zero for a cgroup
+        * Except for the root, subtree_control must be zero for a cgroup
         * with tasks so that child cgroups don't compete against tasks.
         */
        if (enable && cgroup_parent(cgrp) && !list_empty(&cgrp->cset_links)) {
@@ -2674,36 +2735,75 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
        }
 
        /*
-        * Create csses for enables and update child_subsys_mask.  This
-        * changes cgroup_e_css() results which in turn makes the
-        * subsequent cgroup_update_dfl_csses() associate all tasks in the
-        * subtree to the updated csses.
+        * Update subsys masks and calculate what needs to be done.  More
+        * subsystems than specified may need to be enabled or disabled
+        * depending on subsystem dependencies.
+        */
+       cgrp->subtree_control |= enable;
+       cgrp->subtree_control &= ~disable;
+
+       old_ctrl = cgrp->child_subsys_mask;
+       cgroup_refresh_child_subsys_mask(cgrp);
+       new_ctrl = cgrp->child_subsys_mask;
+
+       css_enable = ~old_ctrl & new_ctrl;
+       css_disable = old_ctrl & ~new_ctrl;
+       enable |= css_enable;
+       disable |= css_disable;
+
+       /*
+        * Create new csses or make the existing ones visible.  A css is
+        * created invisible if it's being implicitly enabled through
+        * dependency.  An invisible css is made visible when the userland
+        * explicitly enables it.
         */
        for_each_subsys(ss, ssid) {
                if (!(enable & (1 << ssid)))
                        continue;
 
                cgroup_for_each_live_child(child, cgrp) {
-                       ret = create_css(child, ss);
+                       if (css_enable & (1 << ssid))
+                               ret = create_css(child, ss,
+                                       cgrp->subtree_control & (1 << ssid));
+                       else
+                               ret = cgroup_populate_dir(child, 1 << ssid);
                        if (ret)
                                goto err_undo_css;
                }
        }
 
-       cgrp->child_subsys_mask |= enable;
-       cgrp->child_subsys_mask &= ~disable;
-
+       /*
+        * At this point, cgroup_e_css() results reflect the new csses
+        * making the following cgroup_update_dfl_csses() properly update
+        * css associations of all tasks in the subtree.
+        */
        ret = cgroup_update_dfl_csses(cgrp);
        if (ret)
                goto err_undo_css;
 
-       /* all tasks are now migrated away from the old csses, kill them */
+       /*
+        * All tasks are migrated out of disabled csses.  Kill or hide
+        * them.  A css is hidden when the userland requests it to be
+        * disabled while other subsystems are still depending on it.  The
+        * css must not actively control resources and be in the vanilla
+        * state if it's made visible again later.  Controllers which may
+        * be depended upon should provide ->css_reset() for this purpose.
+        */
        for_each_subsys(ss, ssid) {
                if (!(disable & (1 << ssid)))
                        continue;
 
-               cgroup_for_each_live_child(child, cgrp)
-                       kill_css(cgroup_css(child, ss));
+               cgroup_for_each_live_child(child, cgrp) {
+                       struct cgroup_subsys_state *css = cgroup_css(child, ss);
+
+                       if (css_disable & (1 << ssid)) {
+                               kill_css(css);
+                       } else {
+                               cgroup_clear_dir(child, 1 << ssid);
+                               if (ss->css_reset)
+                                       ss->css_reset(css);
+                       }
+               }
        }
 
        kernfs_activate(cgrp->kn);
@@ -2713,8 +2813,9 @@ out_unlock:
        return ret ?: nbytes;
 
 err_undo_css:
-       cgrp->child_subsys_mask &= ~enable;
-       cgrp->child_subsys_mask |= disable;
+       cgrp->subtree_control &= ~enable;
+       cgrp->subtree_control |= disable;
+       cgroup_refresh_child_subsys_mask(cgrp);
 
        for_each_subsys(ss, ssid) {
                if (!(enable & (1 << ssid)))
@@ -2722,8 +2823,14 @@ err_undo_css:
 
                cgroup_for_each_live_child(child, cgrp) {
                        struct cgroup_subsys_state *css = cgroup_css(child, ss);
-                       if (css)
+
+                       if (!css)
+                               continue;
+
+                       if (css_enable & (1 << ssid))
                                kill_css(css);
+                       else
+                               cgroup_clear_dir(child, 1 << ssid);
                }
        }
        goto out_unlock;
@@ -4017,7 +4124,7 @@ static struct cftype cgroup_base_files[] = {
        },
        {
                .name = "cgroup.sane_behavior",
-               .flags = CFTYPE_ONLY_ON_ROOT,
+               .flags = CFTYPE_INSANE | CFTYPE_ONLY_ON_ROOT,
                .seq_show = cgroup_sane_behavior_show,
        },
        {
@@ -4272,12 +4379,14 @@ static void offline_css(struct cgroup_subsys_state *css)
  * create_css - create a cgroup_subsys_state
  * @cgrp: the cgroup new css will be associated with
  * @ss: the subsys of new css
+ * @visible: whether to create control knobs for the new css or not
  *
  * Create a new css associated with @cgrp - @ss pair.  On success, the new
- * css is online and installed in @cgrp with all interface files created.
- * Returns 0 on success, -errno on failure.
+ * css is online and installed in @cgrp with all interface files created if
+ * @visible.  Returns 0 on success, -errno on failure.
  */
-static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
+static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
+                     bool visible)
 {
        struct cgroup *parent = cgroup_parent(cgrp);
        struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
@@ -4301,9 +4410,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)
                goto err_free_percpu_ref;
        css->id = err;
 
-       err = cgroup_populate_dir(cgrp, 1 << ss->id);
-       if (err)
-               goto err_free_id;
+       if (visible) {
+               err = cgroup_populate_dir(cgrp, 1 << ss->id);
+               if (err)
+                       goto err_free_id;
+       }
 
        /* @css is ready to be brought online now, make it visible */
        list_add_tail_rcu(&css->sibling, &parent_css->children);
@@ -4420,7 +4531,8 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
        /* let's create and online css's */
        for_each_subsys(ss, ssid) {
                if (parent->child_subsys_mask & (1 << ssid)) {
-                       ret = create_css(cgrp, ss);
+                       ret = create_css(cgrp, ss,
+                                        parent->subtree_control & (1 << ssid));
                        if (ret)
                                goto out_destroy;
                }
@@ -4428,10 +4540,12 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 
        /*
         * On the default hierarchy, a child doesn't automatically inherit
-        * child_subsys_mask from the parent.  Each is configured manually.
+        * subtree_control from the parent.  Each is configured manually.
         */
-       if (!cgroup_on_dfl(cgrp))
-               cgrp->child_subsys_mask = parent->child_subsys_mask;
+       if (!cgroup_on_dfl(cgrp)) {
+               cgrp->subtree_control = parent->subtree_control;
+               cgroup_refresh_child_subsys_mask(cgrp);
+       }
 
        kernfs_activate(kn);