]> git.proxmox.com Git - mirror_ubuntu-bionic-kernel.git/blob - kernel/rcu/srcutree.c
Merge tag 'platform-drivers-x86-v4.13-2' of git://git.infradead.org/linux-platform...
[mirror_ubuntu-bionic-kernel.git] / kernel / rcu / srcutree.c
1 /*
2 * Sleepable Read-Copy Update mechanism for mutual exclusion.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, you can access it online at
16 * http://www.gnu.org/licenses/gpl-2.0.html.
17 *
18 * Copyright (C) IBM Corporation, 2006
19 * Copyright (C) Fujitsu, 2012
20 *
21 * Author: Paul McKenney <paulmck@us.ibm.com>
22 * Lai Jiangshan <laijs@cn.fujitsu.com>
23 *
24 * For detailed explanation of Read-Copy Update mechanism see -
25 * Documentation/RCU/ *.txt
26 *
27 */
28
29 #include <linux/export.h>
30 #include <linux/mutex.h>
31 #include <linux/percpu.h>
32 #include <linux/preempt.h>
33 #include <linux/rcupdate_wait.h>
34 #include <linux/sched.h>
35 #include <linux/smp.h>
36 #include <linux/delay.h>
37 #include <linux/module.h>
38 #include <linux/srcu.h>
39
40 #include "rcu.h"
41 #include "rcu_segcblist.h"
42
43 /* Holdoff in nanoseconds for auto-expediting. */
44 #define DEFAULT_SRCU_EXP_HOLDOFF (25 * 1000)
45 static ulong exp_holdoff = DEFAULT_SRCU_EXP_HOLDOFF;
46 module_param(exp_holdoff, ulong, 0444);
47
48 /* Overflow-check frequency. N bits roughly says every 2**N grace periods. */
49 static ulong counter_wrap_check = (ULONG_MAX >> 2);
50 module_param(counter_wrap_check, ulong, 0444);
51
52 static void srcu_invoke_callbacks(struct work_struct *work);
53 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay);
54
55 /*
56 * Initialize SRCU combining tree. Note that statically allocated
57 * srcu_struct structures might already have srcu_read_lock() and
58 * srcu_read_unlock() running against them. So if the is_static parameter
59 * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[].
60 */
61 static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static)
62 {
63 int cpu;
64 int i;
65 int level = 0;
66 int levelspread[RCU_NUM_LVLS];
67 struct srcu_data *sdp;
68 struct srcu_node *snp;
69 struct srcu_node *snp_first;
70
71 /* Work out the overall tree geometry. */
72 sp->level[0] = &sp->node[0];
73 for (i = 1; i < rcu_num_lvls; i++)
74 sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1];
75 rcu_init_levelspread(levelspread, num_rcu_lvl);
76
77 /* Each pass through this loop initializes one srcu_node structure. */
78 rcu_for_each_node_breadth_first(sp, snp) {
79 raw_spin_lock_init(&ACCESS_PRIVATE(snp, lock));
80 WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) !=
81 ARRAY_SIZE(snp->srcu_data_have_cbs));
82 for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) {
83 snp->srcu_have_cbs[i] = 0;
84 snp->srcu_data_have_cbs[i] = 0;
85 }
86 snp->srcu_gp_seq_needed_exp = 0;
87 snp->grplo = -1;
88 snp->grphi = -1;
89 if (snp == &sp->node[0]) {
90 /* Root node, special case. */
91 snp->srcu_parent = NULL;
92 continue;
93 }
94
95 /* Non-root node. */
96 if (snp == sp->level[level + 1])
97 level++;
98 snp->srcu_parent = sp->level[level - 1] +
99 (snp - sp->level[level]) /
100 levelspread[level - 1];
101 }
102
103 /*
104 * Initialize the per-CPU srcu_data array, which feeds into the
105 * leaves of the srcu_node tree.
106 */
107 WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) !=
108 ARRAY_SIZE(sdp->srcu_unlock_count));
109 level = rcu_num_lvls - 1;
110 snp_first = sp->level[level];
111 for_each_possible_cpu(cpu) {
112 sdp = per_cpu_ptr(sp->sda, cpu);
113 raw_spin_lock_init(&ACCESS_PRIVATE(sdp, lock));
114 rcu_segcblist_init(&sdp->srcu_cblist);
115 sdp->srcu_cblist_invoking = false;
116 sdp->srcu_gp_seq_needed = sp->srcu_gp_seq;
117 sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq;
118 sdp->mynode = &snp_first[cpu / levelspread[level]];
119 for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) {
120 if (snp->grplo < 0)
121 snp->grplo = cpu;
122 snp->grphi = cpu;
123 }
124 sdp->cpu = cpu;
125 INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks);
126 sdp->sp = sp;
127 sdp->grpmask = 1 << (cpu - sdp->mynode->grplo);
128 if (is_static)
129 continue;
130
131 /* Dynamically allocated, better be no srcu_read_locks()! */
132 for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) {
133 sdp->srcu_lock_count[i] = 0;
134 sdp->srcu_unlock_count[i] = 0;
135 }
136 }
137 }
138
139 /*
140 * Initialize non-compile-time initialized fields, including the
141 * associated srcu_node and srcu_data structures. The is_static
142 * parameter is passed through to init_srcu_struct_nodes(), and
143 * also tells us that ->sda has already been wired up to srcu_data.
144 */
145 static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static)
146 {
147 mutex_init(&sp->srcu_cb_mutex);
148 mutex_init(&sp->srcu_gp_mutex);
149 sp->srcu_idx = 0;
150 sp->srcu_gp_seq = 0;
151 sp->srcu_barrier_seq = 0;
152 mutex_init(&sp->srcu_barrier_mutex);
153 atomic_set(&sp->srcu_barrier_cpu_cnt, 0);
154 INIT_DELAYED_WORK(&sp->work, process_srcu);
155 if (!is_static)
156 sp->sda = alloc_percpu(struct srcu_data);
157 init_srcu_struct_nodes(sp, is_static);
158 sp->srcu_gp_seq_needed_exp = 0;
159 sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
160 smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */
161 return sp->sda ? 0 : -ENOMEM;
162 }
163
164 #ifdef CONFIG_DEBUG_LOCK_ALLOC
165
166 int __init_srcu_struct(struct srcu_struct *sp, const char *name,
167 struct lock_class_key *key)
168 {
169 /* Don't re-initialize a lock while it is held. */
170 debug_check_no_locks_freed((void *)sp, sizeof(*sp));
171 lockdep_init_map(&sp->dep_map, name, key, 0);
172 raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
173 return init_srcu_struct_fields(sp, false);
174 }
175 EXPORT_SYMBOL_GPL(__init_srcu_struct);
176
177 #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
178
179 /**
180 * init_srcu_struct - initialize a sleep-RCU structure
181 * @sp: structure to initialize.
182 *
183 * Must invoke this on a given srcu_struct before passing that srcu_struct
184 * to any other function. Each srcu_struct represents a separate domain
185 * of SRCU protection.
186 */
187 int init_srcu_struct(struct srcu_struct *sp)
188 {
189 raw_spin_lock_init(&ACCESS_PRIVATE(sp, lock));
190 return init_srcu_struct_fields(sp, false);
191 }
192 EXPORT_SYMBOL_GPL(init_srcu_struct);
193
194 #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
195
196 /*
197 * First-use initialization of statically allocated srcu_struct
198 * structure. Wiring up the combining tree is more than can be
199 * done with compile-time initialization, so this check is added
200 * to each update-side SRCU primitive. Use sp->lock, which -is-
201 * compile-time initialized, to resolve races involving multiple
202 * CPUs trying to garner first-use privileges.
203 */
204 static void check_init_srcu_struct(struct srcu_struct *sp)
205 {
206 unsigned long flags;
207
208 WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT);
209 /* The smp_load_acquire() pairs with the smp_store_release(). */
210 if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/
211 return; /* Already initialized. */
212 raw_spin_lock_irqsave_rcu_node(sp, flags);
213 if (!rcu_seq_state(sp->srcu_gp_seq_needed)) {
214 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
215 return;
216 }
217 init_srcu_struct_fields(sp, true);
218 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
219 }
220
221 /*
222 * Returns approximate total of the readers' ->srcu_lock_count[] values
223 * for the rank of per-CPU counters specified by idx.
224 */
225 static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx)
226 {
227 int cpu;
228 unsigned long sum = 0;
229
230 for_each_possible_cpu(cpu) {
231 struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
232
233 sum += READ_ONCE(cpuc->srcu_lock_count[idx]);
234 }
235 return sum;
236 }
237
238 /*
239 * Returns approximate total of the readers' ->srcu_unlock_count[] values
240 * for the rank of per-CPU counters specified by idx.
241 */
242 static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx)
243 {
244 int cpu;
245 unsigned long sum = 0;
246
247 for_each_possible_cpu(cpu) {
248 struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
249
250 sum += READ_ONCE(cpuc->srcu_unlock_count[idx]);
251 }
252 return sum;
253 }
254
255 /*
256 * Return true if the number of pre-existing readers is determined to
257 * be zero.
258 */
259 static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx)
260 {
261 unsigned long unlocks;
262
263 unlocks = srcu_readers_unlock_idx(sp, idx);
264
265 /*
266 * Make sure that a lock is always counted if the corresponding
267 * unlock is counted. Needs to be a smp_mb() as the read side may
268 * contain a read from a variable that is written to before the
269 * synchronize_srcu() in the write side. In this case smp_mb()s
270 * A and B act like the store buffering pattern.
271 *
272 * This smp_mb() also pairs with smp_mb() C to prevent accesses
273 * after the synchronize_srcu() from being executed before the
274 * grace period ends.
275 */
276 smp_mb(); /* A */
277
278 /*
279 * If the locks are the same as the unlocks, then there must have
280 * been no readers on this index at some time in between. This does
281 * not mean that there are no more readers, as one could have read
282 * the current index but not have incremented the lock counter yet.
283 *
284 * So suppose that the updater is preempted here for so long
285 * that more than ULONG_MAX non-nested readers come and go in
286 * the meantime. It turns out that this cannot result in overflow
287 * because if a reader modifies its unlock count after we read it
288 * above, then that reader's next load of ->srcu_idx is guaranteed
289 * to get the new value, which will cause it to operate on the
290 * other bank of counters, where it cannot contribute to the
291 * overflow of these counters. This means that there is a maximum
292 * of 2*NR_CPUS increments, which cannot overflow given current
293 * systems, especially not on 64-bit systems.
294 *
295 * OK, how about nesting? This does impose a limit on nesting
296 * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient,
297 * especially on 64-bit systems.
298 */
299 return srcu_readers_lock_idx(sp, idx) == unlocks;
300 }
301
302 /**
303 * srcu_readers_active - returns true if there are readers. and false
304 * otherwise
305 * @sp: which srcu_struct to count active readers (holding srcu_read_lock).
306 *
307 * Note that this is not an atomic primitive, and can therefore suffer
308 * severe errors when invoked on an active srcu_struct. That said, it
309 * can be useful as an error check at cleanup time.
310 */
311 static bool srcu_readers_active(struct srcu_struct *sp)
312 {
313 int cpu;
314 unsigned long sum = 0;
315
316 for_each_possible_cpu(cpu) {
317 struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu);
318
319 sum += READ_ONCE(cpuc->srcu_lock_count[0]);
320 sum += READ_ONCE(cpuc->srcu_lock_count[1]);
321 sum -= READ_ONCE(cpuc->srcu_unlock_count[0]);
322 sum -= READ_ONCE(cpuc->srcu_unlock_count[1]);
323 }
324 return sum;
325 }
326
327 #define SRCU_INTERVAL 1
328
329 /*
330 * Return grace-period delay, zero if there are expedited grace
331 * periods pending, SRCU_INTERVAL otherwise.
332 */
333 static unsigned long srcu_get_delay(struct srcu_struct *sp)
334 {
335 if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq),
336 READ_ONCE(sp->srcu_gp_seq_needed_exp)))
337 return 0;
338 return SRCU_INTERVAL;
339 }
340
341 /**
342 * cleanup_srcu_struct - deconstruct a sleep-RCU structure
343 * @sp: structure to clean up.
344 *
345 * Must invoke this after you are finished using a given srcu_struct that
346 * was initialized via init_srcu_struct(), else you leak memory.
347 */
348 void cleanup_srcu_struct(struct srcu_struct *sp)
349 {
350 int cpu;
351
352 if (WARN_ON(!srcu_get_delay(sp)))
353 return; /* Leakage unless caller handles error. */
354 if (WARN_ON(srcu_readers_active(sp)))
355 return; /* Leakage unless caller handles error. */
356 flush_delayed_work(&sp->work);
357 for_each_possible_cpu(cpu)
358 flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
359 if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
360 WARN_ON(srcu_readers_active(sp))) {
361 pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
362 return; /* Caller forgot to stop doing call_srcu()? */
363 }
364 free_percpu(sp->sda);
365 sp->sda = NULL;
366 }
367 EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
368
369 /*
370 * Counts the new reader in the appropriate per-CPU element of the
371 * srcu_struct.
372 * Returns an index that must be passed to the matching srcu_read_unlock().
373 */
374 int __srcu_read_lock(struct srcu_struct *sp)
375 {
376 int idx;
377
378 idx = READ_ONCE(sp->srcu_idx) & 0x1;
379 this_cpu_inc(sp->sda->srcu_lock_count[idx]);
380 smp_mb(); /* B */ /* Avoid leaking the critical section. */
381 return idx;
382 }
383 EXPORT_SYMBOL_GPL(__srcu_read_lock);
384
385 /*
386 * Removes the count for the old reader from the appropriate per-CPU
387 * element of the srcu_struct. Note that this may well be a different
388 * CPU than that which was incremented by the corresponding srcu_read_lock().
389 */
390 void __srcu_read_unlock(struct srcu_struct *sp, int idx)
391 {
392 smp_mb(); /* C */ /* Avoid leaking the critical section. */
393 this_cpu_inc(sp->sda->srcu_unlock_count[idx]);
394 }
395 EXPORT_SYMBOL_GPL(__srcu_read_unlock);
396
397 /*
398 * We use an adaptive strategy for synchronize_srcu() and especially for
399 * synchronize_srcu_expedited(). We spin for a fixed time period
400 * (defined below) to allow SRCU readers to exit their read-side critical
401 * sections. If there are still some readers after a few microseconds,
402 * we repeatedly block for 1-millisecond time periods.
403 */
404 #define SRCU_RETRY_CHECK_DELAY 5
405
406 /*
407 * Start an SRCU grace period.
408 */
409 static void srcu_gp_start(struct srcu_struct *sp)
410 {
411 struct srcu_data *sdp = this_cpu_ptr(sp->sda);
412 int state;
413
414 lockdep_assert_held(&sp->lock);
415 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
416 rcu_segcblist_advance(&sdp->srcu_cblist,
417 rcu_seq_current(&sp->srcu_gp_seq));
418 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
419 rcu_seq_snap(&sp->srcu_gp_seq));
420 smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */
421 rcu_seq_start(&sp->srcu_gp_seq);
422 state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
423 WARN_ON_ONCE(state != SRCU_STATE_SCAN1);
424 }
425
426 /*
427 * Track online CPUs to guide callback workqueue placement.
428 */
429 DEFINE_PER_CPU(bool, srcu_online);
430
431 void srcu_online_cpu(unsigned int cpu)
432 {
433 WRITE_ONCE(per_cpu(srcu_online, cpu), true);
434 }
435
436 void srcu_offline_cpu(unsigned int cpu)
437 {
438 WRITE_ONCE(per_cpu(srcu_online, cpu), false);
439 }
440
441 /*
442 * Place the workqueue handler on the specified CPU if online, otherwise
443 * just run it whereever. This is useful for placing workqueue handlers
444 * that are to invoke the specified CPU's callbacks.
445 */
446 static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
447 struct delayed_work *dwork,
448 unsigned long delay)
449 {
450 bool ret;
451
452 preempt_disable();
453 if (READ_ONCE(per_cpu(srcu_online, cpu)))
454 ret = queue_delayed_work_on(cpu, wq, dwork, delay);
455 else
456 ret = queue_delayed_work(wq, dwork, delay);
457 preempt_enable();
458 return ret;
459 }
460
461 /*
462 * Schedule callback invocation for the specified srcu_data structure,
463 * if possible, on the corresponding CPU.
464 */
465 static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay)
466 {
467 srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq,
468 &sdp->work, delay);
469 }
470
471 /*
472 * Schedule callback invocation for all srcu_data structures associated
473 * with the specified srcu_node structure that have callbacks for the
474 * just-completed grace period, the one corresponding to idx. If possible,
475 * schedule this invocation on the corresponding CPUs.
476 */
477 static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp,
478 unsigned long mask, unsigned long delay)
479 {
480 int cpu;
481
482 for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
483 if (!(mask & (1 << (cpu - snp->grplo))))
484 continue;
485 srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay);
486 }
487 }
488
489 /*
490 * Note the end of an SRCU grace period. Initiates callback invocation
491 * and starts a new grace period if needed.
492 *
493 * The ->srcu_cb_mutex acquisition does not protect any data, but
494 * instead prevents more than one grace period from starting while we
495 * are initiating callback invocation. This allows the ->srcu_have_cbs[]
496 * array to have a finite number of elements.
497 */
498 static void srcu_gp_end(struct srcu_struct *sp)
499 {
500 unsigned long cbdelay;
501 bool cbs;
502 int cpu;
503 unsigned long flags;
504 unsigned long gpseq;
505 int idx;
506 int idxnext;
507 unsigned long mask;
508 struct srcu_data *sdp;
509 struct srcu_node *snp;
510
511 /* Prevent more than one additional grace period. */
512 mutex_lock(&sp->srcu_cb_mutex);
513
514 /* End the current grace period. */
515 raw_spin_lock_irq_rcu_node(sp);
516 idx = rcu_seq_state(sp->srcu_gp_seq);
517 WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
518 cbdelay = srcu_get_delay(sp);
519 sp->srcu_last_gp_end = ktime_get_mono_fast_ns();
520 rcu_seq_end(&sp->srcu_gp_seq);
521 gpseq = rcu_seq_current(&sp->srcu_gp_seq);
522 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq))
523 sp->srcu_gp_seq_needed_exp = gpseq;
524 raw_spin_unlock_irq_rcu_node(sp);
525 mutex_unlock(&sp->srcu_gp_mutex);
526 /* A new grace period can start at this point. But only one. */
527
528 /* Initiate callback invocation as needed. */
529 idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs);
530 idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs);
531 rcu_for_each_node_breadth_first(sp, snp) {
532 raw_spin_lock_irq_rcu_node(snp);
533 cbs = false;
534 if (snp >= sp->level[rcu_num_lvls - 1])
535 cbs = snp->srcu_have_cbs[idx] == gpseq;
536 snp->srcu_have_cbs[idx] = gpseq;
537 rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1);
538 if (ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, gpseq))
539 snp->srcu_gp_seq_needed_exp = gpseq;
540 mask = snp->srcu_data_have_cbs[idx];
541 snp->srcu_data_have_cbs[idx] = 0;
542 raw_spin_unlock_irq_rcu_node(snp);
543 if (cbs)
544 srcu_schedule_cbs_snp(sp, snp, mask, cbdelay);
545
546 /* Occasionally prevent srcu_data counter wrap. */
547 if (!(gpseq & counter_wrap_check))
548 for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) {
549 sdp = per_cpu_ptr(sp->sda, cpu);
550 raw_spin_lock_irqsave_rcu_node(sdp, flags);
551 if (ULONG_CMP_GE(gpseq,
552 sdp->srcu_gp_seq_needed + 100))
553 sdp->srcu_gp_seq_needed = gpseq;
554 raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
555 }
556 }
557
558 /* Callback initiation done, allow grace periods after next. */
559 mutex_unlock(&sp->srcu_cb_mutex);
560
561 /* Start a new grace period if needed. */
562 raw_spin_lock_irq_rcu_node(sp);
563 gpseq = rcu_seq_current(&sp->srcu_gp_seq);
564 if (!rcu_seq_state(gpseq) &&
565 ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) {
566 srcu_gp_start(sp);
567 raw_spin_unlock_irq_rcu_node(sp);
568 /* Throttle expedited grace periods: Should be rare! */
569 srcu_reschedule(sp, rcu_seq_ctr(gpseq) & 0x3ff
570 ? 0 : SRCU_INTERVAL);
571 } else {
572 raw_spin_unlock_irq_rcu_node(sp);
573 }
574 }
575
576 /*
577 * Funnel-locking scheme to scalably mediate many concurrent expedited
578 * grace-period requests. This function is invoked for the first known
579 * expedited request for a grace period that has already been requested,
580 * but without expediting. To start a completely new grace period,
581 * whether expedited or not, use srcu_funnel_gp_start() instead.
582 */
583 static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp,
584 unsigned long s)
585 {
586 unsigned long flags;
587
588 for (; snp != NULL; snp = snp->srcu_parent) {
589 if (rcu_seq_done(&sp->srcu_gp_seq, s) ||
590 ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s))
591 return;
592 raw_spin_lock_irqsave_rcu_node(snp, flags);
593 if (ULONG_CMP_GE(snp->srcu_gp_seq_needed_exp, s)) {
594 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
595 return;
596 }
597 WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s);
598 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
599 }
600 raw_spin_lock_irqsave_rcu_node(sp, flags);
601 if (!ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
602 sp->srcu_gp_seq_needed_exp = s;
603 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
604 }
605
606 /*
607 * Funnel-locking scheme to scalably mediate many concurrent grace-period
608 * requests. The winner has to do the work of actually starting grace
609 * period s. Losers must either ensure that their desired grace-period
610 * number is recorded on at least their leaf srcu_node structure, or they
611 * must take steps to invoke their own callbacks.
612 */
613 static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp,
614 unsigned long s, bool do_norm)
615 {
616 unsigned long flags;
617 int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs);
618 struct srcu_node *snp = sdp->mynode;
619 unsigned long snp_seq;
620
621 /* Each pass through the loop does one level of the srcu_node tree. */
622 for (; snp != NULL; snp = snp->srcu_parent) {
623 if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode)
624 return; /* GP already done and CBs recorded. */
625 raw_spin_lock_irqsave_rcu_node(snp, flags);
626 if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) {
627 snp_seq = snp->srcu_have_cbs[idx];
628 if (snp == sdp->mynode && snp_seq == s)
629 snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
630 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
631 if (snp == sdp->mynode && snp_seq != s) {
632 srcu_schedule_cbs_sdp(sdp, do_norm
633 ? SRCU_INTERVAL
634 : 0);
635 return;
636 }
637 if (!do_norm)
638 srcu_funnel_exp_start(sp, snp, s);
639 return;
640 }
641 snp->srcu_have_cbs[idx] = s;
642 if (snp == sdp->mynode)
643 snp->srcu_data_have_cbs[idx] |= sdp->grpmask;
644 if (!do_norm && ULONG_CMP_LT(snp->srcu_gp_seq_needed_exp, s))
645 snp->srcu_gp_seq_needed_exp = s;
646 raw_spin_unlock_irqrestore_rcu_node(snp, flags);
647 }
648
649 /* Top of tree, must ensure the grace period will be started. */
650 raw_spin_lock_irqsave_rcu_node(sp, flags);
651 if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) {
652 /*
653 * Record need for grace period s. Pair with load
654 * acquire setting up for initialization.
655 */
656 smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/
657 }
658 if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s))
659 sp->srcu_gp_seq_needed_exp = s;
660
661 /* If grace period not already done and none in progress, start it. */
662 if (!rcu_seq_done(&sp->srcu_gp_seq, s) &&
663 rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) {
664 WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed));
665 srcu_gp_start(sp);
666 queue_delayed_work(system_power_efficient_wq, &sp->work,
667 srcu_get_delay(sp));
668 }
669 raw_spin_unlock_irqrestore_rcu_node(sp, flags);
670 }
671
672 /*
673 * Wait until all readers counted by array index idx complete, but
674 * loop an additional time if there is an expedited grace period pending.
675 * The caller must ensure that ->srcu_idx is not changed while checking.
676 */
677 static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount)
678 {
679 for (;;) {
680 if (srcu_readers_active_idx_check(sp, idx))
681 return true;
682 if (--trycount + !srcu_get_delay(sp) <= 0)
683 return false;
684 udelay(SRCU_RETRY_CHECK_DELAY);
685 }
686 }
687
688 /*
689 * Increment the ->srcu_idx counter so that future SRCU readers will
690 * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows
691 * us to wait for pre-existing readers in a starvation-free manner.
692 */
693 static void srcu_flip(struct srcu_struct *sp)
694 {
695 /*
696 * Ensure that if this updater saw a given reader's increment
697 * from __srcu_read_lock(), that reader was using an old value
698 * of ->srcu_idx. Also ensure that if a given reader sees the
699 * new value of ->srcu_idx, this updater's earlier scans cannot
700 * have seen that reader's increments (which is OK, because this
701 * grace period need not wait on that reader).
702 */
703 smp_mb(); /* E */ /* Pairs with B and C. */
704
705 WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1);
706
707 /*
708 * Ensure that if the updater misses an __srcu_read_unlock()
709 * increment, that task's next __srcu_read_lock() will see the
710 * above counter update. Note that both this memory barrier
711 * and the one in srcu_readers_active_idx_check() provide the
712 * guarantee for __srcu_read_lock().
713 */
714 smp_mb(); /* D */ /* Pairs with C. */
715 }
716
717 /*
718 * If SRCU is likely idle, return true, otherwise return false.
719 *
720 * Note that it is OK for several current from-idle requests for a new
721 * grace period from idle to specify expediting because they will all end
722 * up requesting the same grace period anyhow. So no loss.
723 *
724 * Note also that if any CPU (including the current one) is still invoking
725 * callbacks, this function will nevertheless say "idle". This is not
726 * ideal, but the overhead of checking all CPUs' callback lists is even
727 * less ideal, especially on large systems. Furthermore, the wakeup
728 * can happen before the callback is fully removed, so we have no choice
729 * but to accept this type of error.
730 *
731 * This function is also subject to counter-wrap errors, but let's face
732 * it, if this function was preempted for enough time for the counters
733 * to wrap, it really doesn't matter whether or not we expedite the grace
734 * period. The extra overhead of a needlessly expedited grace period is
735 * negligible when amoritized over that time period, and the extra latency
736 * of a needlessly non-expedited grace period is similarly negligible.
737 */
738 static bool srcu_might_be_idle(struct srcu_struct *sp)
739 {
740 unsigned long curseq;
741 unsigned long flags;
742 struct srcu_data *sdp;
743 unsigned long t;
744
745 /* If the local srcu_data structure has callbacks, not idle. */
746 local_irq_save(flags);
747 sdp = this_cpu_ptr(sp->sda);
748 if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) {
749 local_irq_restore(flags);
750 return false; /* Callbacks already present, so not idle. */
751 }
752 local_irq_restore(flags);
753
754 /*
755 * No local callbacks, so probabalistically probe global state.
756 * Exact information would require acquiring locks, which would
757 * kill scalability, hence the probabalistic nature of the probe.
758 */
759
760 /* First, see if enough time has passed since the last GP. */
761 t = ktime_get_mono_fast_ns();
762 if (exp_holdoff == 0 ||
763 time_in_range_open(t, sp->srcu_last_gp_end,
764 sp->srcu_last_gp_end + exp_holdoff))
765 return false; /* Too soon after last GP. */
766
767 /* Next, check for probable idleness. */
768 curseq = rcu_seq_current(&sp->srcu_gp_seq);
769 smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */
770 if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed)))
771 return false; /* Grace period in progress, so not idle. */
772 smp_mb(); /* Order ->srcu_gp_seq with prior access. */
773 if (curseq != rcu_seq_current(&sp->srcu_gp_seq))
774 return false; /* GP # changed, so not idle. */
775 return true; /* With reasonable probability, idle! */
776 }
777
778 /*
779 * SRCU callback function to leak a callback.
780 */
781 static void srcu_leak_callback(struct rcu_head *rhp)
782 {
783 }
784
785 /*
786 * Enqueue an SRCU callback on the srcu_data structure associated with
787 * the current CPU and the specified srcu_struct structure, initiating
788 * grace-period processing if it is not already running.
789 *
790 * Note that all CPUs must agree that the grace period extended beyond
791 * all pre-existing SRCU read-side critical section. On systems with
792 * more than one CPU, this means that when "func()" is invoked, each CPU
793 * is guaranteed to have executed a full memory barrier since the end of
794 * its last corresponding SRCU read-side critical section whose beginning
795 * preceded the call to call_rcu(). It also means that each CPU executing
796 * an SRCU read-side critical section that continues beyond the start of
797 * "func()" must have executed a memory barrier after the call_rcu()
798 * but before the beginning of that SRCU read-side critical section.
799 * Note that these guarantees include CPUs that are offline, idle, or
800 * executing in user mode, as well as CPUs that are executing in the kernel.
801 *
802 * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the
803 * resulting SRCU callback function "func()", then both CPU A and CPU
804 * B are guaranteed to execute a full memory barrier during the time
805 * interval between the call to call_rcu() and the invocation of "func()".
806 * This guarantee applies even if CPU A and CPU B are the same CPU (but
807 * again only if the system has more than one CPU).
808 *
809 * Of course, these guarantees apply only for invocations of call_srcu(),
810 * srcu_read_lock(), and srcu_read_unlock() that are all passed the same
811 * srcu_struct structure.
812 */
813 void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
814 rcu_callback_t func, bool do_norm)
815 {
816 unsigned long flags;
817 bool needexp = false;
818 bool needgp = false;
819 unsigned long s;
820 struct srcu_data *sdp;
821
822 check_init_srcu_struct(sp);
823 if (debug_rcu_head_queue(rhp)) {
824 /* Probable double call_srcu(), so leak the callback. */
825 WRITE_ONCE(rhp->func, srcu_leak_callback);
826 WARN_ONCE(1, "call_srcu(): Leaked duplicate callback\n");
827 return;
828 }
829 rhp->func = func;
830 local_irq_save(flags);
831 sdp = this_cpu_ptr(sp->sda);
832 raw_spin_lock_rcu_node(sdp);
833 rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false);
834 rcu_segcblist_advance(&sdp->srcu_cblist,
835 rcu_seq_current(&sp->srcu_gp_seq));
836 s = rcu_seq_snap(&sp->srcu_gp_seq);
837 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s);
838 if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) {
839 sdp->srcu_gp_seq_needed = s;
840 needgp = true;
841 }
842 if (!do_norm && ULONG_CMP_LT(sdp->srcu_gp_seq_needed_exp, s)) {
843 sdp->srcu_gp_seq_needed_exp = s;
844 needexp = true;
845 }
846 raw_spin_unlock_irqrestore_rcu_node(sdp, flags);
847 if (needgp)
848 srcu_funnel_gp_start(sp, sdp, s, do_norm);
849 else if (needexp)
850 srcu_funnel_exp_start(sp, sdp->mynode, s);
851 }
852
853 /**
854 * call_srcu() - Queue a callback for invocation after an SRCU grace period
855 * @sp: srcu_struct in queue the callback
856 * @head: structure to be used for queueing the SRCU callback.
857 * @func: function to be invoked after the SRCU grace period
858 *
859 * The callback function will be invoked some time after a full SRCU
860 * grace period elapses, in other words after all pre-existing SRCU
861 * read-side critical sections have completed. However, the callback
862 * function might well execute concurrently with other SRCU read-side
863 * critical sections that started after call_srcu() was invoked. SRCU
864 * read-side critical sections are delimited by srcu_read_lock() and
865 * srcu_read_unlock(), and may be nested.
866 *
867 * The callback will be invoked from process context, but must nevertheless
868 * be fast and must not block.
869 */
870 void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp,
871 rcu_callback_t func)
872 {
873 __call_srcu(sp, rhp, func, true);
874 }
875 EXPORT_SYMBOL_GPL(call_srcu);
876
877 /*
878 * Helper function for synchronize_srcu() and synchronize_srcu_expedited().
879 */
880 static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm)
881 {
882 struct rcu_synchronize rcu;
883
884 RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) ||
885 lock_is_held(&rcu_bh_lock_map) ||
886 lock_is_held(&rcu_lock_map) ||
887 lock_is_held(&rcu_sched_lock_map),
888 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section");
889
890 if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
891 return;
892 might_sleep();
893 check_init_srcu_struct(sp);
894 init_completion(&rcu.completion);
895 init_rcu_head_on_stack(&rcu.head);
896 __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm);
897 wait_for_completion(&rcu.completion);
898 destroy_rcu_head_on_stack(&rcu.head);
899 }
900
901 /**
902 * synchronize_srcu_expedited - Brute-force SRCU grace period
903 * @sp: srcu_struct with which to synchronize.
904 *
905 * Wait for an SRCU grace period to elapse, but be more aggressive about
906 * spinning rather than blocking when waiting.
907 *
908 * Note that synchronize_srcu_expedited() has the same deadlock and
909 * memory-ordering properties as does synchronize_srcu().
910 */
911 void synchronize_srcu_expedited(struct srcu_struct *sp)
912 {
913 __synchronize_srcu(sp, rcu_gp_is_normal());
914 }
915 EXPORT_SYMBOL_GPL(synchronize_srcu_expedited);
916
917 /**
918 * synchronize_srcu - wait for prior SRCU read-side critical-section completion
919 * @sp: srcu_struct with which to synchronize.
920 *
921 * Wait for the count to drain to zero of both indexes. To avoid the
922 * possible starvation of synchronize_srcu(), it waits for the count of
923 * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first,
924 * and then flip the srcu_idx and wait for the count of the other index.
925 *
926 * Can block; must be called from process context.
927 *
928 * Note that it is illegal to call synchronize_srcu() from the corresponding
929 * SRCU read-side critical section; doing so will result in deadlock.
930 * However, it is perfectly legal to call synchronize_srcu() on one
931 * srcu_struct from some other srcu_struct's read-side critical section,
932 * as long as the resulting graph of srcu_structs is acyclic.
933 *
934 * There are memory-ordering constraints implied by synchronize_srcu().
935 * On systems with more than one CPU, when synchronize_srcu() returns,
936 * each CPU is guaranteed to have executed a full memory barrier since
937 * the end of its last corresponding SRCU-sched read-side critical section
938 * whose beginning preceded the call to synchronize_srcu(). In addition,
939 * each CPU having an SRCU read-side critical section that extends beyond
940 * the return from synchronize_srcu() is guaranteed to have executed a
941 * full memory barrier after the beginning of synchronize_srcu() and before
942 * the beginning of that SRCU read-side critical section. Note that these
943 * guarantees include CPUs that are offline, idle, or executing in user mode,
944 * as well as CPUs that are executing in the kernel.
945 *
946 * Furthermore, if CPU A invoked synchronize_srcu(), which returned
947 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
948 * to have executed a full memory barrier during the execution of
949 * synchronize_srcu(). This guarantee applies even if CPU A and CPU B
950 * are the same CPU, but again only if the system has more than one CPU.
951 *
952 * Of course, these memory-ordering guarantees apply only when
953 * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are
954 * passed the same srcu_struct structure.
955 *
956 * If SRCU is likely idle, expedite the first request. This semantic
957 * was provided by Classic SRCU, and is relied upon by its users, so TREE
958 * SRCU must also provide it. Note that detecting idleness is heuristic
959 * and subject to both false positives and negatives.
960 */
961 void synchronize_srcu(struct srcu_struct *sp)
962 {
963 if (srcu_might_be_idle(sp) || rcu_gp_is_expedited())
964 synchronize_srcu_expedited(sp);
965 else
966 __synchronize_srcu(sp, true);
967 }
968 EXPORT_SYMBOL_GPL(synchronize_srcu);
969
970 /*
971 * Callback function for srcu_barrier() use.
972 */
973 static void srcu_barrier_cb(struct rcu_head *rhp)
974 {
975 struct srcu_data *sdp;
976 struct srcu_struct *sp;
977
978 sdp = container_of(rhp, struct srcu_data, srcu_barrier_head);
979 sp = sdp->sp;
980 if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
981 complete(&sp->srcu_barrier_completion);
982 }
983
984 /**
985 * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete.
986 * @sp: srcu_struct on which to wait for in-flight callbacks.
987 */
988 void srcu_barrier(struct srcu_struct *sp)
989 {
990 int cpu;
991 struct srcu_data *sdp;
992 unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq);
993
994 check_init_srcu_struct(sp);
995 mutex_lock(&sp->srcu_barrier_mutex);
996 if (rcu_seq_done(&sp->srcu_barrier_seq, s)) {
997 smp_mb(); /* Force ordering following return. */
998 mutex_unlock(&sp->srcu_barrier_mutex);
999 return; /* Someone else did our work for us. */
1000 }
1001 rcu_seq_start(&sp->srcu_barrier_seq);
1002 init_completion(&sp->srcu_barrier_completion);
1003
1004 /* Initial count prevents reaching zero until all CBs are posted. */
1005 atomic_set(&sp->srcu_barrier_cpu_cnt, 1);
1006
1007 /*
1008 * Each pass through this loop enqueues a callback, but only
1009 * on CPUs already having callbacks enqueued. Note that if
1010 * a CPU already has callbacks enqueue, it must have already
1011 * registered the need for a future grace period, so all we
1012 * need do is enqueue a callback that will use the same
1013 * grace period as the last callback already in the queue.
1014 */
1015 for_each_possible_cpu(cpu) {
1016 sdp = per_cpu_ptr(sp->sda, cpu);
1017 raw_spin_lock_irq_rcu_node(sdp);
1018 atomic_inc(&sp->srcu_barrier_cpu_cnt);
1019 sdp->srcu_barrier_head.func = srcu_barrier_cb;
1020 debug_rcu_head_queue(&sdp->srcu_barrier_head);
1021 if (!rcu_segcblist_entrain(&sdp->srcu_cblist,
1022 &sdp->srcu_barrier_head, 0)) {
1023 debug_rcu_head_unqueue(&sdp->srcu_barrier_head);
1024 atomic_dec(&sp->srcu_barrier_cpu_cnt);
1025 }
1026 raw_spin_unlock_irq_rcu_node(sdp);
1027 }
1028
1029 /* Remove the initial count, at which point reaching zero can happen. */
1030 if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt))
1031 complete(&sp->srcu_barrier_completion);
1032 wait_for_completion(&sp->srcu_barrier_completion);
1033
1034 rcu_seq_end(&sp->srcu_barrier_seq);
1035 mutex_unlock(&sp->srcu_barrier_mutex);
1036 }
1037 EXPORT_SYMBOL_GPL(srcu_barrier);
1038
1039 /**
1040 * srcu_batches_completed - return batches completed.
1041 * @sp: srcu_struct on which to report batch completion.
1042 *
1043 * Report the number of batches, correlated with, but not necessarily
1044 * precisely the same as, the number of grace periods that have elapsed.
1045 */
1046 unsigned long srcu_batches_completed(struct srcu_struct *sp)
1047 {
1048 return sp->srcu_idx;
1049 }
1050 EXPORT_SYMBOL_GPL(srcu_batches_completed);
1051
1052 /*
1053 * Core SRCU state machine. Push state bits of ->srcu_gp_seq
1054 * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has
1055 * completed in that state.
1056 */
1057 static void srcu_advance_state(struct srcu_struct *sp)
1058 {
1059 int idx;
1060
1061 mutex_lock(&sp->srcu_gp_mutex);
1062
1063 /*
1064 * Because readers might be delayed for an extended period after
1065 * fetching ->srcu_idx for their index, at any point in time there
1066 * might well be readers using both idx=0 and idx=1. We therefore
1067 * need to wait for readers to clear from both index values before
1068 * invoking a callback.
1069 *
1070 * The load-acquire ensures that we see the accesses performed
1071 * by the prior grace period.
1072 */
1073 idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */
1074 if (idx == SRCU_STATE_IDLE) {
1075 raw_spin_lock_irq_rcu_node(sp);
1076 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
1077 WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq));
1078 raw_spin_unlock_irq_rcu_node(sp);
1079 mutex_unlock(&sp->srcu_gp_mutex);
1080 return;
1081 }
1082 idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq));
1083 if (idx == SRCU_STATE_IDLE)
1084 srcu_gp_start(sp);
1085 raw_spin_unlock_irq_rcu_node(sp);
1086 if (idx != SRCU_STATE_IDLE) {
1087 mutex_unlock(&sp->srcu_gp_mutex);
1088 return; /* Someone else started the grace period. */
1089 }
1090 }
1091
1092 if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) {
1093 idx = 1 ^ (sp->srcu_idx & 1);
1094 if (!try_check_zero(sp, idx, 1)) {
1095 mutex_unlock(&sp->srcu_gp_mutex);
1096 return; /* readers present, retry later. */
1097 }
1098 srcu_flip(sp);
1099 rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2);
1100 }
1101
1102 if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) {
1103
1104 /*
1105 * SRCU read-side critical sections are normally short,
1106 * so check at least twice in quick succession after a flip.
1107 */
1108 idx = 1 ^ (sp->srcu_idx & 1);
1109 if (!try_check_zero(sp, idx, 2)) {
1110 mutex_unlock(&sp->srcu_gp_mutex);
1111 return; /* readers present, retry later. */
1112 }
1113 srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */
1114 }
1115 }
1116
1117 /*
1118 * Invoke a limited number of SRCU callbacks that have passed through
1119 * their grace period. If there are more to do, SRCU will reschedule
1120 * the workqueue. Note that needed memory barriers have been executed
1121 * in this task's context by srcu_readers_active_idx_check().
1122 */
1123 static void srcu_invoke_callbacks(struct work_struct *work)
1124 {
1125 bool more;
1126 struct rcu_cblist ready_cbs;
1127 struct rcu_head *rhp;
1128 struct srcu_data *sdp;
1129 struct srcu_struct *sp;
1130
1131 sdp = container_of(work, struct srcu_data, work.work);
1132 sp = sdp->sp;
1133 rcu_cblist_init(&ready_cbs);
1134 raw_spin_lock_irq_rcu_node(sdp);
1135 rcu_segcblist_advance(&sdp->srcu_cblist,
1136 rcu_seq_current(&sp->srcu_gp_seq));
1137 if (sdp->srcu_cblist_invoking ||
1138 !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) {
1139 raw_spin_unlock_irq_rcu_node(sdp);
1140 return; /* Someone else on the job or nothing to do. */
1141 }
1142
1143 /* We are on the job! Extract and invoke ready callbacks. */
1144 sdp->srcu_cblist_invoking = true;
1145 rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs);
1146 raw_spin_unlock_irq_rcu_node(sdp);
1147 rhp = rcu_cblist_dequeue(&ready_cbs);
1148 for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) {
1149 debug_rcu_head_unqueue(rhp);
1150 local_bh_disable();
1151 rhp->func(rhp);
1152 local_bh_enable();
1153 }
1154
1155 /*
1156 * Update counts, accelerate new callbacks, and if needed,
1157 * schedule another round of callback invocation.
1158 */
1159 raw_spin_lock_irq_rcu_node(sdp);
1160 rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs);
1161 (void)rcu_segcblist_accelerate(&sdp->srcu_cblist,
1162 rcu_seq_snap(&sp->srcu_gp_seq));
1163 sdp->srcu_cblist_invoking = false;
1164 more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist);
1165 raw_spin_unlock_irq_rcu_node(sdp);
1166 if (more)
1167 srcu_schedule_cbs_sdp(sdp, 0);
1168 }
1169
1170 /*
1171 * Finished one round of SRCU grace period. Start another if there are
1172 * more SRCU callbacks queued, otherwise put SRCU into not-running state.
1173 */
1174 static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay)
1175 {
1176 bool pushgp = true;
1177
1178 raw_spin_lock_irq_rcu_node(sp);
1179 if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) {
1180 if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) {
1181 /* All requests fulfilled, time to go idle. */
1182 pushgp = false;
1183 }
1184 } else if (!rcu_seq_state(sp->srcu_gp_seq)) {
1185 /* Outstanding request and no GP. Start one. */
1186 srcu_gp_start(sp);
1187 }
1188 raw_spin_unlock_irq_rcu_node(sp);
1189
1190 if (pushgp)
1191 queue_delayed_work(system_power_efficient_wq, &sp->work, delay);
1192 }
1193
1194 /*
1195 * This is the work-queue function that handles SRCU grace periods.
1196 */
1197 void process_srcu(struct work_struct *work)
1198 {
1199 struct srcu_struct *sp;
1200
1201 sp = container_of(work, struct srcu_struct, work.work);
1202
1203 srcu_advance_state(sp);
1204 srcu_reschedule(sp, srcu_get_delay(sp));
1205 }
1206 EXPORT_SYMBOL_GPL(process_srcu);
1207
1208 void srcutorture_get_gp_data(enum rcutorture_type test_type,
1209 struct srcu_struct *sp, int *flags,
1210 unsigned long *gpnum, unsigned long *completed)
1211 {
1212 if (test_type != SRCU_FLAVOR)
1213 return;
1214 *flags = 0;
1215 *completed = rcu_seq_ctr(sp->srcu_gp_seq);
1216 *gpnum = rcu_seq_ctr(sp->srcu_gp_seq_needed);
1217 }
1218 EXPORT_SYMBOL_GPL(srcutorture_get_gp_data);
1219
1220 static int __init srcu_bootup_announce(void)
1221 {
1222 pr_info("Hierarchical SRCU implementation.\n");
1223 if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
1224 pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
1225 return 0;
1226 }
1227 early_initcall(srcu_bootup_announce);