Merge branches 'doc.2019.12.10a', 'exp.2019.12.09a', 'fixes.2020.01.24a', 'kfree_rcu...

author Paul E. McKenney <paulmck@kernel.org>

Fri, 24 Jan 2020 18:37:27 +0000 (10:37 -0800)

committer Paul E. McKenney <paulmck@kernel.org>

Fri, 24 Jan 2020 18:37:27 +0000 (10:37 -0800)
author Paul E. McKenney <paulmck@kernel.org>
Fri, 24 Jan 2020 18:37:27 +0000 (10:37 -0800)
committer Paul E. McKenney <paulmck@kernel.org>
Fri, 24 Jan 2020 18:37:27 +0000 (10:37 -0800)
diff --cc include/linux/rcupdate.h
Simple merge
diff --cc include/trace/events/rcu.h
Simple merge
diff --cc kernel/rcu/rcu.h
Simple merge
diff --cc kernel/rcu/rcutorture.c
Simple merge
diff --cc kernel/rcu/srcutree.c
Simple merge
diff --cc kernel/rcu/tree.c

index 1694a6b57ad8c96e5614a385789084e0e963f837,6145e08a14072f118d39ca776fa943368d6abba9,878f62f218e905010818a3cb009a08906438fc64,31d2d9255d959955356e376ef0ae9b8596a6393a,1694a6b57ad8c96e5614a385789084e0e963f837,b0e0612392a961e3aaf8a6626445931b8f0f9ec4,1694a6b57ad8c96e5614a385789084e0e963f837..d91c9156fab2ef0ad64ef1b31a77c3a10626d254
--- 1/kernel/rcu/tree.c
--- 2/kernel/rcu/tree.c
--- 3/kernel/rcu/tree.c
--- 4/kernel/rcu/tree.c
--- 5/kernel/rcu/tree.c
--- 6/kernel/rcu/tree.c
--- 7/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@@@@@@@ -2683,12 -2684,12 -2669,12 -2691,165 -2683,12 -2689,12 -2683,12 +2684,165 @@@@@@@@ void call_rcu(struct rcu_head *head, rc
        }
        EXPORT_SYMBOL_GPL(call_rcu);
        
--  - - * Queue an RCU callback for lazy invocation after a grace period.
--  - - * This will likely be later named something like "call_rcu_lazy()",
--  - - * but this change will require some way of tagging the lazy RCU
--  - - * callbacks in the list of pending callbacks. Until then, this
--  - - * function may only be called from __kfree_rcu().
+++ +++
+++ +++/* Maximum number of jiffies to wait before draining a batch. */
+++ +++#define KFREE_DRAIN_JIFFIES (HZ / 50)
+++ +++#define KFREE_N_BATCHES 2
+++ +++
+++ +++/**
+++ +++ * struct kfree_rcu_cpu_work - single batch of kfree_rcu() requests
+++ +++ * @rcu_work: Let queue_rcu_work() invoke workqueue handler after grace period
+++ +++ * @head_free: List of kfree_rcu() objects waiting for a grace period
+++ +++ * @krcp: Pointer to @kfree_rcu_cpu structure
+++ +++ */
+++ +++
+++ +++struct kfree_rcu_cpu_work {
+++ +++ struct rcu_work rcu_work;
+++ +++ struct rcu_head *head_free;
+++ +++ struct kfree_rcu_cpu *krcp;
+++ +++};
+++ +++
+++ +++/**
+++ +++ * struct kfree_rcu_cpu - batch up kfree_rcu() requests for RCU grace period
+++ +++ * @head: List of kfree_rcu() objects not yet waiting for a grace period
+++ +++ * @krw_arr: Array of batches of kfree_rcu() objects waiting for a grace period
+++ +++ * @lock: Synchronize access to this structure
+++ +++ * @monitor_work: Promote @head to @head_free after KFREE_DRAIN_JIFFIES
+++ +++ * @monitor_todo: Tracks whether a @monitor_work delayed work is pending
+++ +++ * @initialized: The @lock and @rcu_work fields have been initialized
+++ +++ *
+++ +++ * This is a per-CPU structure.  The reason that it is not included in
+++ +++ * the rcu_data structure is to permit this code to be extracted from
+++ +++ * the RCU files.  Such extraction could allow further optimization of
+++ +++ * the interactions with the slab allocators.
+++ +++ */
+++ +++struct kfree_rcu_cpu {
+++ +++ struct rcu_head *head;
+++ +++ struct kfree_rcu_cpu_work krw_arr[KFREE_N_BATCHES];
+++ +++ spinlock_t lock;
+++ +++ struct delayed_work monitor_work;
+++ +++ bool monitor_todo;
+++ +++ bool initialized;
+++ +++};
+++ +++
+++ +++static DEFINE_PER_CPU(struct kfree_rcu_cpu, krc);
+++ +++
+++ +++/*
+++ +++ * This function is invoked in workqueue context after a grace period.
+++ +++ * It frees all the objects queued on ->head_free.
+++ +++ */
+++ +++static void kfree_rcu_work(struct work_struct *work)
+++ +++{
+++ +++ unsigned long flags;
+++ +++ struct rcu_head *head, *next;
+++ +++ struct kfree_rcu_cpu *krcp;
+++ +++ struct kfree_rcu_cpu_work *krwp;
+++ +++
+++ +++ krwp = container_of(to_rcu_work(work),
+++ +++                     struct kfree_rcu_cpu_work, rcu_work);
+++ +++ krcp = krwp->krcp;
+++ +++ spin_lock_irqsave(&krcp->lock, flags);
+++ +++ head = krwp->head_free;
+++ +++ krwp->head_free = NULL;
+++ +++ spin_unlock_irqrestore(&krcp->lock, flags);
+++ +++
+++ +++ // List "head" is now private, so traverse locklessly.
+++ +++ for (; head; head = next) {
+++ +++         unsigned long offset = (unsigned long)head->func;
+++ +++
+++ +++         next = head->next;
+++ +++         // Potentially optimize with kfree_bulk in future.
+++ +++         debug_rcu_head_unqueue(head);
+++ +++         rcu_lock_acquire(&rcu_callback_map);
+++ +++         trace_rcu_invoke_kfree_callback(rcu_state.name, head, offset);
+++ +++
+++ +++         if (!WARN_ON_ONCE(!__is_kfree_rcu_offset(offset))) {
+++ +++                 /* Could be optimized with kfree_bulk() in future. */
+++ +++                 kfree((void *)head - offset);
+++ +++         }
+++ +++
+++ +++         rcu_lock_release(&rcu_callback_map);
+++ +++         cond_resched_tasks_rcu_qs();
+++ +++ }
+++ +++}
+++ +++
+  +  + /*
-  -     * Queue an RCU callback for lazy invocation after a grace period.
-  -     * This will likely be later named something like "call_rcu_lazy()",
-  -     * but this change will require some way of tagging the lazy RCU
-  -     * callbacks in the list of pending callbacks. Until then, this
-  -     * function may only be called from __kfree_rcu().
+++ +++ * Schedule the kfree batch RCU work to run in workqueue context after a GP.
+++ +++ *
+++ +++ * This function is invoked by kfree_rcu_monitor() when the KFREE_DRAIN_JIFFIES
+++ +++ * timeout has been reached.
+++ +++ */
+++ +++static inline bool queue_kfree_rcu_work(struct kfree_rcu_cpu *krcp)
+++ +++{
+++ +++ int i;
+++ +++ struct kfree_rcu_cpu_work *krwp = NULL;
+++ +++
+++ +++ lockdep_assert_held(&krcp->lock);
+++ +++ for (i = 0; i < KFREE_N_BATCHES; i++)
+++ +++         if (!krcp->krw_arr[i].head_free) {
+++ +++                 krwp = &(krcp->krw_arr[i]);
+++ +++                 break;
+++ +++         }
+++ +++
+++ +++ // If a previous RCU batch is in progress, we cannot immediately
+++ +++ // queue another one, so return false to tell caller to retry.
+++ +++ if (!krwp)
+++ +++         return false;
+++ +++
+++ +++ krwp->head_free = krcp->head;
+++ +++ krcp->head = NULL;
+++ +++ INIT_RCU_WORK(&krwp->rcu_work, kfree_rcu_work);
+++ +++ queue_rcu_work(system_wq, &krwp->rcu_work);
+++ +++ return true;
+++ +++}
+++ +++
+++ +++static inline void kfree_rcu_drain_unlock(struct kfree_rcu_cpu *krcp,
+++ +++                                   unsigned long flags)
+++ +++{
+++ +++ // Attempt to start a new batch.
+++ +++ krcp->monitor_todo = false;
+++ +++ if (queue_kfree_rcu_work(krcp)) {
+++ +++         // Success! Our job is done here.
+++ +++         spin_unlock_irqrestore(&krcp->lock, flags);
+++ +++         return;
+++ +++ }
+++ +++
+++ +++ // Previous RCU batch still in progress, try again later.
+++ +++ krcp->monitor_todo = true;
+++ +++ schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+++ +++ spin_unlock_irqrestore(&krcp->lock, flags);
+++ +++}
+++ +++
++  +++/*
-     -  * Queue an RCU callback for lazy invocation after a grace period.
-     -  * This will likely be later named something like "call_rcu_lazy()",
-     -  * but this change will require some way of tagging the lazy RCU
-     -  * callbacks in the list of pending callbacks. Until then, this
-     -  * function may only be called from __kfree_rcu().
+++ +++ * This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
+++ +++ * It invokes kfree_rcu_drain_unlock() to attempt to start another batch.
+++ +++ */
+++ +++static void kfree_rcu_monitor(struct work_struct *work)
+++ +++{
+++ +++ unsigned long flags;
+++ +++ struct kfree_rcu_cpu *krcp = container_of(work, struct kfree_rcu_cpu,
+++ +++                                          monitor_work.work);
+++ +++
+++ +++ spin_lock_irqsave(&krcp->lock, flags);
+++ +++ if (krcp->monitor_todo)
+++ +++         kfree_rcu_drain_unlock(krcp, flags);
+++ +++ else
+++ +++         spin_unlock_irqrestore(&krcp->lock, flags);
+++ +++}
+++ +++
+++ + +/*
+++ +++ * Queue a request for lazy invocation of kfree() after a grace period.
+++ +++ *
+++ +++ * Each kfree_call_rcu() request is added to a batch. The batch will be drained
+++ +++ * every KFREE_DRAIN_JIFFIES number of jiffies. All the objects in the batch
+++ +++ * will be kfree'd in workqueue context. This allows us to:
+++ +++ *
+++ +++ * 1.    Batch requests together to reduce the number of grace periods during
+++ +++ *       heavy kfree_rcu() load.
+++ +++ *
+++ +++ * 2.    It makes it possible to use kfree_bulk() on a large number of
+++ +++ *       kfree_rcu() requests thus reducing cache misses and the per-object
+++ +++ *       overhead of kfree().
         */
        void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
        {
@@@@@@@@ -2696,11 -2697,11 -2682,11 -2886,31 -2696,11 -2702,11 -2696,11 +2879,31 @@@@@@@@ unlock_return
        }
        EXPORT_SYMBOL_GPL(kfree_call_rcu);
        
+++ +++void __init kfree_rcu_scheduler_running(void)
+++ +++{
+++ +++ int cpu;
+++ +++ unsigned long flags;
+++ +++
+++ +++ for_each_online_cpu(cpu) {
+++ +++         struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
+++ +++
+++ +++         spin_lock_irqsave(&krcp->lock, flags);
+++ +++         if (!krcp->head || krcp->monitor_todo) {
+++ +++                 spin_unlock_irqrestore(&krcp->lock, flags);
+++ +++                 continue;
+++ +++         }
+++ +++         krcp->monitor_todo = true;
+++ +++         schedule_delayed_work_on(cpu, &krcp->monitor_work,
+++ +++                                  KFREE_DRAIN_JIFFIES);
+++ +++         spin_unlock_irqrestore(&krcp->lock, flags);
+++ +++ }
+++ +++}
+++ +++
        /*
         * During early boot, any blocking grace-period wait automatically
----- - * implies a grace period.  Later on, this is never the case for PREEMPT.
+++++ + * implies a grace period.  Later on, this is never the case for PREEMPTION.
         *
----- - * Howevr, because a context switch is a grace period for !PREEMPT, any
+++++ + * Howevr, because a context switch is a grace period for !PREEMPTION, any
         * blocking grace-period wait automatically implies a grace period if
         * there is only one CPU online at any point time during execution of
         * either synchronize_rcu() or synchronize_rcu_expedited().  It is OK to
diff --cc kernel/rcu/tree.h

index 055c31781d3ae1a6665136da5a66dd178083f559,f9253ed406ba48f4d2716e31ab6975fb0518d845,ce90c68c184b48457b55babc9863f33d265686c2,15405420b40c197faecc7d3235e78a3380e9886c,055c31781d3ae1a6665136da5a66dd178083f559,055c31781d3ae1a6665136da5a66dd178083f559,055c31781d3ae1a6665136da5a66dd178083f559..0c87e4c161c2fa9f382afc811378bbec84d8e01a
--- 1/kernel/rcu/tree.h
--- 2/kernel/rcu/tree.h
--- 3/kernel/rcu/tree.h
--- 4/kernel/rcu/tree.h
--- 5/kernel/rcu/tree.h
--- 6/kernel/rcu/tree.h
--- 7/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@@@@@@@ -182,8 -182,9 -181,8 -182,7 -182,8 -182,8 -182,8 +181,8 @@@@@@@@ struct rcu_data 
         bool rcu_need_heavy_qs;         /* GP old, so heavy quiescent state! */
         bool rcu_urgent_qs;             /* GP old need light quiescent state. */
         bool rcu_forced_tick;           /* Forced tick to provide QS. */
+ +++++ bool rcu_forced_tick_exp;       /*   ... provide QS to expedited GP. */
        #ifdef CONFIG_RCU_FAST_NO_HZ
--- --- bool all_lazy;                  /* All CPU's CBs lazy at idle start? */
         unsigned long last_accelerate;  /* Last jiffy CBs were accelerated. */
         unsigned long last_advance_all; /* Last jiffy CBs were all advanced. */
         int tick_nohz_enabled_snap;     /* Previously seen value from sysfs. */
diff --cc kernel/rcu/tree_exp.h
Simple merge
diff --cc kernel/rcu/tree_plugin.h
Simple merge
diff --cc kernel/rcu/tree_stall.h
Simple merge
diff --cc kernel/rcu/update.c
Simple merge
author	Paul E. McKenney <paulmck@kernel.org>
	Fri, 24 Jan 2020 18:37:27 +0000 (10:37 -0800)
committer	Paul E. McKenney <paulmck@kernel.org>
	Fri, 24 Jan 2020 18:37:27 +0000 (10:37 -0800)
		1	2	3	4	5	6	7
include/linux/rcupdate.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
include/trace/events/rcu.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/rcu.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/rcutorture.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/srcutree.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/tree.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/tree.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/tree_exp.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/tree_plugin.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/tree_stall.h	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history
kernel/rcu/update.c	patch \|	diff1 \|	diff2 \|	diff3 \|	diff4 \|	diff5 \|	diff6 \|	diff7 \|	blob \| history