]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/commitdiff
cgroup/cpuset: Fix violation of cpuset locking rule
authorWaiman Long <longman@redhat.com>
Tue, 20 Jul 2021 14:18:28 +0000 (10:18 -0400)
committerTejun Heo <tj@kernel.org>
Mon, 9 Aug 2021 22:53:19 +0000 (12:53 -1000)
The cpuset fields that manage partition root state do not strictly
follow the cpuset locking rule that update to cpuset has to be done
with both the callback_lock and cpuset_mutex held. This is now fixed
by making sure that the locking rule is upheld.

Fixes: 3881b86128d0 ("cpuset: Add an error state to cpuset.sched.partition")
Fixes: 4b842da276a8 ("cpuset: Make CPU hotplug work with partition")
Signed-off-by: Waiman Long <longman@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
kernel/cgroup/cpuset.c

index 28a784bf64b1d64a4c5155144de1aa2e92d6a6a3..13b5be6df4da2749559d38fae5654aa59dfbfb75 100644 (file)
@@ -1148,6 +1148,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
        struct cpuset *parent = parent_cs(cpuset);
        int adding;     /* Moving cpus from effective_cpus to subparts_cpus */
        int deleting;   /* Moving cpus from subparts_cpus to effective_cpus */
+       int new_prs;
        bool part_error = false;        /* Partition error? */
 
        percpu_rwsem_assert_held(&cpuset_rwsem);
@@ -1183,6 +1184,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
         * A cpumask update cannot make parent's effective_cpus become empty.
         */
        adding = deleting = false;
+       new_prs = cpuset->partition_root_state;
        if (cmd == partcmd_enable) {
                cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
                adding = true;
@@ -1247,11 +1249,11 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
                switch (cpuset->partition_root_state) {
                case PRS_ENABLED:
                        if (part_error)
-                               cpuset->partition_root_state = PRS_ERROR;
+                               new_prs = PRS_ERROR;
                        break;
                case PRS_ERROR:
                        if (!part_error)
-                               cpuset->partition_root_state = PRS_ENABLED;
+                               new_prs = PRS_ENABLED;
                        break;
                }
                /*
@@ -1260,10 +1262,10 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
                part_error = (prev_prs == PRS_ERROR);
        }
 
-       if (!part_error && (cpuset->partition_root_state == PRS_ERROR))
+       if (!part_error && (new_prs == PRS_ERROR))
                return 0;       /* Nothing need to be done */
 
-       if (cpuset->partition_root_state == PRS_ERROR) {
+       if (new_prs == PRS_ERROR) {
                /*
                 * Remove all its cpus from parent's subparts_cpus.
                 */
@@ -1272,7 +1274,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
                                       parent->subparts_cpus);
        }
 
-       if (!adding && !deleting)
+       if (!adding && !deleting && (new_prs == cpuset->partition_root_state))
                return 0;
 
        /*
@@ -1299,6 +1301,9 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
        }
 
        parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
+
+       if (cpuset->partition_root_state != new_prs)
+               cpuset->partition_root_state = new_prs;
        spin_unlock_irq(&callback_lock);
 
        return cmd == partcmd_update;
@@ -1321,6 +1326,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
        struct cpuset *cp;
        struct cgroup_subsys_state *pos_css;
        bool need_rebuild_sched_domains = false;
+       int new_prs;
 
        rcu_read_lock();
        cpuset_for_each_descendant_pre(cp, pos_css, cs) {
@@ -1360,7 +1366,8 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
                 * update_tasks_cpumask() again for tasks in the parent
                 * cpuset if the parent's subparts_cpus changes.
                 */
-               if ((cp != cs) && cp->partition_root_state) {
+               new_prs = cp->partition_root_state;
+               if ((cp != cs) && new_prs) {
                        switch (parent->partition_root_state) {
                        case PRS_DISABLED:
                                /*
@@ -1370,7 +1377,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
                                 */
                                WARN_ON_ONCE(cp->partition_root_state
                                             != PRS_ERROR);
-                               cp->partition_root_state = PRS_DISABLED;
+                               new_prs = PRS_DISABLED;
 
                                /*
                                 * clear_bit() is an atomic operation and
@@ -1391,11 +1398,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
                                /*
                                 * When parent is invalid, it has to be too.
                                 */
-                               cp->partition_root_state = PRS_ERROR;
-                               if (cp->nr_subparts_cpus) {
-                                       cp->nr_subparts_cpus = 0;
-                                       cpumask_clear(cp->subparts_cpus);
-                               }
+                               new_prs = PRS_ERROR;
                                break;
                        }
                }
@@ -1407,8 +1410,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
                spin_lock_irq(&callback_lock);
 
                cpumask_copy(cp->effective_cpus, tmp->new_cpus);
-               if (cp->nr_subparts_cpus &&
-                  (cp->partition_root_state != PRS_ENABLED)) {
+               if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
                        cp->nr_subparts_cpus = 0;
                        cpumask_clear(cp->subparts_cpus);
                } else if (cp->nr_subparts_cpus) {
@@ -1435,6 +1437,10 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
                                        = cpumask_weight(cp->subparts_cpus);
                        }
                }
+
+               if (new_prs != cp->partition_root_state)
+                       cp->partition_root_state = new_prs;
+
                spin_unlock_irq(&callback_lock);
 
                WARN_ON(!is_in_v2_mode() &&
@@ -1944,25 +1950,25 @@ out:
  */
 static int update_prstate(struct cpuset *cs, int new_prs)
 {
-       int err;
+       int err, old_prs = cs->partition_root_state;
        struct cpuset *parent = parent_cs(cs);
        struct tmpmasks tmpmask;
 
-       if (new_prs == cs->partition_root_state)
+       if (old_prs == new_prs)
                return 0;
 
        /*
         * Cannot force a partial or invalid partition root to a full
         * partition root.
         */
-       if (new_prs && (cs->partition_root_state < 0))
+       if (new_prs && (old_prs == PRS_ERROR))
                return -EINVAL;
 
        if (alloc_cpumasks(NULL, &tmpmask))
                return -ENOMEM;
 
        err = -EINVAL;
-       if (!cs->partition_root_state) {
+       if (!old_prs) {
                /*
                 * Turning on partition root requires setting the
                 * CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
@@ -1981,14 +1987,12 @@ static int update_prstate(struct cpuset *cs, int new_prs)
                        update_flag(CS_CPU_EXCLUSIVE, cs, 0);
                        goto out;
                }
-               cs->partition_root_state = PRS_ENABLED;
        } else {
                /*
                 * Turning off partition root will clear the
                 * CS_CPU_EXCLUSIVE bit.
                 */
-               if (cs->partition_root_state == PRS_ERROR) {
-                       cs->partition_root_state = PRS_DISABLED;
+               if (old_prs == PRS_ERROR) {
                        update_flag(CS_CPU_EXCLUSIVE, cs, 0);
                        err = 0;
                        goto out;
@@ -1999,8 +2003,6 @@ static int update_prstate(struct cpuset *cs, int new_prs)
                if (err)
                        goto out;
 
-               cs->partition_root_state = PRS_DISABLED;
-
                /* Turning off CS_CPU_EXCLUSIVE will not return error */
                update_flag(CS_CPU_EXCLUSIVE, cs, 0);
        }
@@ -2017,6 +2019,12 @@ static int update_prstate(struct cpuset *cs, int new_prs)
 
        rebuild_sched_domains_locked();
 out:
+       if (!err) {
+               spin_lock_irq(&callback_lock);
+               cs->partition_root_state = new_prs;
+               spin_unlock_irq(&callback_lock);
+       }
+
        free_cpumasks(NULL, &tmpmask);
        return err;
 }
@@ -3080,8 +3088,10 @@ retry:
        if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
           (parent->partition_root_state == PRS_ERROR))) {
                if (cs->nr_subparts_cpus) {
+                       spin_lock_irq(&callback_lock);
                        cs->nr_subparts_cpus = 0;
                        cpumask_clear(cs->subparts_cpus);
+                       spin_unlock_irq(&callback_lock);
                        compute_effective_cpumask(&new_cpus, cs, parent);
                }
 
@@ -3095,7 +3105,9 @@ retry:
                     cpumask_empty(&new_cpus)) {
                        update_parent_subparts_cpumask(cs, partcmd_disable,
                                                       NULL, tmp);
+                       spin_lock_irq(&callback_lock);
                        cs->partition_root_state = PRS_ERROR;
+                       spin_unlock_irq(&callback_lock);
                }
                cpuset_force_rebuild();
        }