]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blobdiff - kernel/rcu/tree_plugin.h
rcu: Revert "Allow post-unlock reference for rt_mutex" to avoid priority-inversion
[mirror_ubuntu-jammy-kernel.git] / kernel / rcu / tree_plugin.h
index 3ec85cb5d544b8588fd574a80e19bd564079533f..625e26040e6b548f1cdf07c74ed449b378f93c46 100644 (file)
@@ -103,6 +103,8 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
 static struct rcu_state *rcu_state_p = &rcu_preempt_state;
 
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
+                              bool wake);
 
 /*
  * Tell them what RCU they are running.
@@ -306,6 +308,15 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
        return np;
 }
 
+/*
+ * Return true if the specified rcu_node structure has tasks that were
+ * preempted within an RCU read-side critical section.
+ */
+static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
+{
+       return !list_empty(&rnp->blkd_tasks);
+}
+
 /*
  * Handle special cases during rcu_read_unlock(), such as needing to
  * notify RCU core processing or task having blocked during the RCU
@@ -313,9 +324,10 @@ static struct list_head *rcu_next_node_entry(struct task_struct *t,
  */
 void rcu_read_unlock_special(struct task_struct *t)
 {
-       int empty;
-       int empty_exp;
-       int empty_exp_now;
+       bool empty;
+       bool empty_exp;
+       bool empty_norm;
+       bool empty_exp_now;
        unsigned long flags;
        struct list_head *np;
 #ifdef CONFIG_RCU_BOOST
@@ -367,7 +379,8 @@ void rcu_read_unlock_special(struct task_struct *t)
                                break;
                        raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
                }
-               empty = !rcu_preempt_blocked_readers_cgp(rnp);
+               empty = !rcu_preempt_has_tasks(rnp);
+               empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
                empty_exp = !rcu_preempted_readers_exp(rnp);
                smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
                np = rcu_next_node_entry(t, rnp);
@@ -386,6 +399,14 @@ void rcu_read_unlock_special(struct task_struct *t)
                drop_boost_mutex = rt_mutex_owner(&rnp->boost_mtx) == t;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
+               /*
+                * If this was the last task on the list, go see if we
+                * need to propagate ->qsmaskinit bit clearing up the
+                * rcu_node tree.
+                */
+               if (!empty && !rcu_preempt_has_tasks(rnp))
+                       rcu_cleanup_dead_rnp(rnp);
+
                /*
                 * If this was the last task on the current list, and if
                 * we aren't waiting on any CPUs, report the quiescent state.
@@ -393,7 +414,7 @@ void rcu_read_unlock_special(struct task_struct *t)
                 * so we must take a snapshot of the expedited state.
                 */
                empty_exp_now = !rcu_preempted_readers_exp(rnp);
-               if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
+               if (!empty_norm && !rcu_preempt_blocked_readers_cgp(rnp)) {
                        trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
                                                         rnp->gpnum,
                                                         0, rnp->qsmask,
@@ -408,10 +429,8 @@ void rcu_read_unlock_special(struct task_struct *t)
 
 #ifdef CONFIG_RCU_BOOST
                /* Unboost if we were boosted. */
-               if (drop_boost_mutex) {
+               if (drop_boost_mutex)
                        rt_mutex_unlock(&rnp->boost_mtx);
-                       complete(&rnp->boost_completion);
-               }
 #endif /* #ifdef CONFIG_RCU_BOOST */
 
                /*
@@ -519,99 +538,13 @@ static int rcu_print_task_stall(struct rcu_node *rnp)
 static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
 {
        WARN_ON_ONCE(rcu_preempt_blocked_readers_cgp(rnp));
-       if (!list_empty(&rnp->blkd_tasks))
+       if (rcu_preempt_has_tasks(rnp))
                rnp->gp_tasks = rnp->blkd_tasks.next;
        WARN_ON_ONCE(rnp->qsmask);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-/*
- * Handle tasklist migration for case in which all CPUs covered by the
- * specified rcu_node have gone offline.  Move them up to the root
- * rcu_node.  The reason for not just moving them to the immediate
- * parent is to remove the need for rcu_read_unlock_special() to
- * make more than two attempts to acquire the target rcu_node's lock.
- * Returns true if there were tasks blocking the current RCU grace
- * period.
- *
- * Returns 1 if there was previously a task blocking the current grace
- * period on the specified rcu_node structure.
- *
- * The caller must hold rnp->lock with irqs disabled.
- */
-static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
-                                    struct rcu_node *rnp,
-                                    struct rcu_data *rdp)
-{
-       struct list_head *lp;
-       struct list_head *lp_root;
-       int retval = 0;
-       struct rcu_node *rnp_root = rcu_get_root(rsp);
-       struct task_struct *t;
-
-       if (rnp == rnp_root) {
-               WARN_ONCE(1, "Last CPU thought to be offlined?");
-               return 0;  /* Shouldn't happen: at least one CPU online. */
-       }
-
-       /* If we are on an internal node, complain bitterly. */
-       WARN_ON_ONCE(rnp != rdp->mynode);
-
-       /*
-        * Move tasks up to root rcu_node.  Don't try to get fancy for
-        * this corner-case operation -- just put this node's tasks
-        * at the head of the root node's list, and update the root node's
-        * ->gp_tasks and ->exp_tasks pointers to those of this node's,
-        * if non-NULL.  This might result in waiting for more tasks than
-        * absolutely necessary, but this is a good performance/complexity
-        * tradeoff.
-        */
-       if (rcu_preempt_blocked_readers_cgp(rnp) && rnp->qsmask == 0)
-               retval |= RCU_OFL_TASKS_NORM_GP;
-       if (rcu_preempted_readers_exp(rnp))
-               retval |= RCU_OFL_TASKS_EXP_GP;
-       lp = &rnp->blkd_tasks;
-       lp_root = &rnp_root->blkd_tasks;
-       while (!list_empty(lp)) {
-               t = list_entry(lp->next, typeof(*t), rcu_node_entry);
-               raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
-               smp_mb__after_unlock_lock();
-               list_del(&t->rcu_node_entry);
-               t->rcu_blocked_node = rnp_root;
-               list_add(&t->rcu_node_entry, lp_root);
-               if (&t->rcu_node_entry == rnp->gp_tasks)
-                       rnp_root->gp_tasks = rnp->gp_tasks;
-               if (&t->rcu_node_entry == rnp->exp_tasks)
-                       rnp_root->exp_tasks = rnp->exp_tasks;
-#ifdef CONFIG_RCU_BOOST
-               if (&t->rcu_node_entry == rnp->boost_tasks)
-                       rnp_root->boost_tasks = rnp->boost_tasks;
-#endif /* #ifdef CONFIG_RCU_BOOST */
-               raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
-       }
-
-       rnp->gp_tasks = NULL;
-       rnp->exp_tasks = NULL;
-#ifdef CONFIG_RCU_BOOST
-       rnp->boost_tasks = NULL;
-       /*
-        * In case root is being boosted and leaf was not.  Make sure
-        * that we boost the tasks blocking the current grace period
-        * in this case.
-        */
-       raw_spin_lock(&rnp_root->lock); /* irqs already disabled */
-       smp_mb__after_unlock_lock();
-       if (rnp_root->boost_tasks != NULL &&
-           rnp_root->boost_tasks != rnp_root->gp_tasks &&
-           rnp_root->boost_tasks != rnp_root->exp_tasks)
-               rnp_root->boost_tasks = rnp_root->gp_tasks;
-       raw_spin_unlock(&rnp_root->lock); /* irqs still disabled */
-#endif /* #ifdef CONFIG_RCU_BOOST */
-
-       return retval;
-}
-
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 
 /*
@@ -771,7 +704,7 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
        smp_mb__after_unlock_lock();
-       if (list_empty(&rnp->blkd_tasks)) {
+       if (!rcu_preempt_has_tasks(rnp)) {
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
        } else {
                rnp->exp_tasks = rnp->blkd_tasks.next;
@@ -960,11 +893,12 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-/* Because preemptible RCU does not exist, no quieting of tasks. */
-static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, unsigned long flags)
-       __releases(rnp->lock)
+/*
+ * Because there is no preemptible RCU, there can be no readers blocked.
+ */
+static bool rcu_preempt_has_tasks(struct rcu_node *rnp)
 {
-       raw_spin_unlock_irqrestore(&rnp->lock, flags);
+       return false;
 }
 
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -996,23 +930,6 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp)
        WARN_ON_ONCE(rnp->qsmask);
 }
 
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Because preemptible RCU does not exist, it never needs to migrate
- * tasks that were blocked within RCU read-side critical sections, and
- * such non-existent tasks cannot possibly have been blocking the current
- * grace period.
- */
-static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
-                                    struct rcu_node *rnp,
-                                    struct rcu_data *rdp)
-{
-       return 0;
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
 /*
  * Because preemptible RCU does not exist, it never has any callbacks
  * to check.
@@ -1031,20 +948,6 @@ void synchronize_rcu_expedited(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);
 
-#ifdef CONFIG_HOTPLUG_CPU
-
-/*
- * Because preemptible RCU does not exist, there is never any need to
- * report on tasks preempted in RCU read-side critical sections during
- * expedited RCU grace periods.
- */
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-                              bool wake)
-{
-}
-
-#endif /* #ifdef CONFIG_HOTPLUG_CPU */
-
 /*
  * Because preemptible RCU does not exist, rcu_barrier() is just
  * another name for rcu_barrier_sched().
@@ -1080,7 +983,7 @@ void exit_rcu(void)
 
 static void rcu_initiate_boost_trace(struct rcu_node *rnp)
 {
-       if (list_empty(&rnp->blkd_tasks))
+       if (!rcu_preempt_has_tasks(rnp))
                rnp->n_balk_blkd_tasks++;
        else if (rnp->exp_tasks == NULL && rnp->gp_tasks == NULL)
                rnp->n_balk_exp_gp_tasks++;
@@ -1127,7 +1030,8 @@ static int rcu_boost(struct rcu_node *rnp)
        struct task_struct *t;
        struct list_head *tb;
 
-       if (rnp->exp_tasks == NULL && rnp->boost_tasks == NULL)
+       if (ACCESS_ONCE(rnp->exp_tasks) == NULL &&
+           ACCESS_ONCE(rnp->boost_tasks) == NULL)
                return 0;  /* Nothing left to boost. */
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
@@ -1175,15 +1079,11 @@ static int rcu_boost(struct rcu_node *rnp)
         */
        t = container_of(tb, struct task_struct, rcu_node_entry);
        rt_mutex_init_proxy_locked(&rnp->boost_mtx, t);
-       init_completion(&rnp->boost_completion);
        raw_spin_unlock_irqrestore(&rnp->lock, flags);
        /* Lock only for side effect: boosts task t's priority. */
        rt_mutex_lock(&rnp->boost_mtx);
        rt_mutex_unlock(&rnp->boost_mtx);  /* Then keep lockdep happy. */
 
-       /* Wait for boostee to be done w/boost_mtx before reinitializing. */
-       wait_for_completion(&rnp->boost_completion);
-
        return ACCESS_ONCE(rnp->exp_tasks) != NULL ||
               ACCESS_ONCE(rnp->boost_tasks) != NULL;
 }
@@ -1416,12 +1316,8 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
        for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
                if ((mask & 0x1) && cpu != outgoingcpu)
                        cpumask_set_cpu(cpu, cm);
-       if (cpumask_weight(cm) == 0) {
+       if (cpumask_weight(cm) == 0)
                cpumask_setall(cm);
-               for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++)
-                       cpumask_clear_cpu(cpu, cm);
-               WARN_ON_ONCE(cpumask_weight(cm) == 0);
-       }
        set_cpus_allowed_ptr(t, cm);
        free_cpumask_var(cm);
 }
@@ -1446,12 +1342,8 @@ static void __init rcu_spawn_boost_kthreads(void)
        for_each_possible_cpu(cpu)
                per_cpu(rcu_cpu_has_work, cpu) = 0;
        BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec));
-       rnp = rcu_get_root(rcu_state_p);
-       (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
-       if (NUM_RCU_NODES > 1) {
-               rcu_for_each_leaf_node(rcu_state_p, rnp)
-                       (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
-       }
+       rcu_for_each_leaf_node(rcu_state_p, rnp)
+               (void)rcu_spawn_one_boost_kthread(rcu_state_p, rnp);
 }
 
 static void rcu_prepare_kthreads(int cpu)