]> git.proxmox.com Git - mirror_ubuntu-focal-kernel.git/blame - include/linux/ptr_ring.h
treewide: Replace GPLv2 boilerplate/reference with SPDX - rule 152
[mirror_ubuntu-focal-kernel.git] / include / linux / ptr_ring.h
CommitLineData
2874c5fd 1/* SPDX-License-Identifier: GPL-2.0-or-later */
2e0ab8ca
MT
2/*
3 * Definitions for the 'struct ptr_ring' datastructure.
4 *
5 * Author:
6 * Michael S. Tsirkin <mst@redhat.com>
7 *
8 * Copyright (C) 2016 Red Hat, Inc.
9 *
2e0ab8ca
MT
10 * This is a limited-size FIFO maintaining pointers in FIFO order, with
11 * one CPU producing entries and another consuming entries from a FIFO.
12 *
13 * This implementation tries to minimize cache-contention when there is a
14 * single producer and a single consumer CPU.
15 */
16
17#ifndef _LINUX_PTR_RING_H
18#define _LINUX_PTR_RING_H 1
19
20#ifdef __KERNEL__
21#include <linux/spinlock.h>
22#include <linux/cache.h>
23#include <linux/types.h>
24#include <linux/compiler.h>
2e0ab8ca
MT
25#include <linux/slab.h>
26#include <asm/errno.h>
27#endif
28
29struct ptr_ring {
30 int producer ____cacheline_aligned_in_smp;
31 spinlock_t producer_lock;
fb9de970
MT
32 int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */
33 int consumer_tail; /* next entry to invalidate */
2e0ab8ca
MT
34 spinlock_t consumer_lock;
35 /* Shared consumer/producer data */
36 /* Read-only by both the producer and the consumer */
37 int size ____cacheline_aligned_in_smp; /* max entries in queue */
fb9de970 38 int batch; /* number of entries to consume in a batch */
2e0ab8ca
MT
39 void **queue;
40};
41
42/* Note: callers invoking this in a loop must use a compiler barrier,
84328342
MT
43 * for example cpu_relax().
44 *
45 * NB: this is unlike __ptr_ring_empty in that callers must hold producer_lock:
46 * see e.g. ptr_ring_full.
2e0ab8ca
MT
47 */
48static inline bool __ptr_ring_full(struct ptr_ring *r)
49{
50 return r->queue[r->producer];
51}
52
53static inline bool ptr_ring_full(struct ptr_ring *r)
54{
5d49de53
MT
55 bool ret;
56
57 spin_lock(&r->producer_lock);
58 ret = __ptr_ring_full(r);
59 spin_unlock(&r->producer_lock);
60
61 return ret;
62}
63
64static inline bool ptr_ring_full_irq(struct ptr_ring *r)
65{
66 bool ret;
67
68 spin_lock_irq(&r->producer_lock);
69 ret = __ptr_ring_full(r);
70 spin_unlock_irq(&r->producer_lock);
71
72 return ret;
73}
74
75static inline bool ptr_ring_full_any(struct ptr_ring *r)
76{
77 unsigned long flags;
78 bool ret;
79
80 spin_lock_irqsave(&r->producer_lock, flags);
81 ret = __ptr_ring_full(r);
82 spin_unlock_irqrestore(&r->producer_lock, flags);
83
84 return ret;
85}
86
87static inline bool ptr_ring_full_bh(struct ptr_ring *r)
88{
89 bool ret;
90
91 spin_lock_bh(&r->producer_lock);
92 ret = __ptr_ring_full(r);
93 spin_unlock_bh(&r->producer_lock);
94
95 return ret;
2e0ab8ca
MT
96}
97
98/* Note: callers invoking this in a loop must use a compiler barrier,
5d49de53 99 * for example cpu_relax(). Callers must hold producer_lock.
a8ceb5db
MT
100 * Callers are responsible for making sure pointer that is being queued
101 * points to a valid data.
2e0ab8ca
MT
102 */
103static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr)
104{
982fb490 105 if (unlikely(!r->size) || r->queue[r->producer])
2e0ab8ca
MT
106 return -ENOSPC;
107
a8ceb5db
MT
108 /* Make sure the pointer we are storing points to a valid data. */
109 /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */
110 smp_wmb();
111
a07d29c6 112 WRITE_ONCE(r->queue[r->producer++], ptr);
2e0ab8ca
MT
113 if (unlikely(r->producer >= r->size))
114 r->producer = 0;
115 return 0;
116}
117
e7169530
MT
118/*
119 * Note: resize (below) nests producer lock within consumer lock, so if you
120 * consume in interrupt or BH context, you must disable interrupts/BH when
121 * calling this.
122 */
2e0ab8ca
MT
123static inline int ptr_ring_produce(struct ptr_ring *r, void *ptr)
124{
125 int ret;
126
127 spin_lock(&r->producer_lock);
128 ret = __ptr_ring_produce(r, ptr);
129 spin_unlock(&r->producer_lock);
130
131 return ret;
132}
133
134static inline int ptr_ring_produce_irq(struct ptr_ring *r, void *ptr)
135{
136 int ret;
137
138 spin_lock_irq(&r->producer_lock);
139 ret = __ptr_ring_produce(r, ptr);
140 spin_unlock_irq(&r->producer_lock);
141
142 return ret;
143}
144
145static inline int ptr_ring_produce_any(struct ptr_ring *r, void *ptr)
146{
147 unsigned long flags;
148 int ret;
149
150 spin_lock_irqsave(&r->producer_lock, flags);
151 ret = __ptr_ring_produce(r, ptr);
152 spin_unlock_irqrestore(&r->producer_lock, flags);
153
154 return ret;
155}
156
157static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr)
158{
159 int ret;
160
161 spin_lock_bh(&r->producer_lock);
162 ret = __ptr_ring_produce(r, ptr);
163 spin_unlock_bh(&r->producer_lock);
164
165 return ret;
166}
167
2e0ab8ca
MT
168static inline void *__ptr_ring_peek(struct ptr_ring *r)
169{
982fb490 170 if (likely(r->size))
a07d29c6 171 return READ_ONCE(r->queue[r->consumer_head]);
982fb490 172 return NULL;
2e0ab8ca
MT
173}
174
8619d384
MT
175/*
176 * Test ring empty status without taking any locks.
177 *
178 * NB: This is only safe to call if ring is never resized.
179 *
180 * However, if some other CPU consumes ring entries at the same time, the value
181 * returned is not guaranteed to be correct.
182 *
183 * In this case - to avoid incorrectly detecting the ring
184 * as empty - the CPU consuming the ring entries is responsible
185 * for either consuming all ring entries until the ring is empty,
186 * or synchronizing with some other CPU and causing it to
187 * re-test __ptr_ring_empty and/or consume the ring enteries
188 * after the synchronization point.
189 *
190 * Note: callers invoking this in a loop must use a compiler barrier,
191 * for example cpu_relax().
192 */
5d49de53 193static inline bool __ptr_ring_empty(struct ptr_ring *r)
2e0ab8ca 194{
a259df36
MT
195 if (likely(r->size))
196 return !r->queue[READ_ONCE(r->consumer_head)];
197 return true;
2e0ab8ca
MT
198}
199
5d49de53
MT
200static inline bool ptr_ring_empty(struct ptr_ring *r)
201{
202 bool ret;
203
204 spin_lock(&r->consumer_lock);
205 ret = __ptr_ring_empty(r);
206 spin_unlock(&r->consumer_lock);
207
208 return ret;
209}
210
211static inline bool ptr_ring_empty_irq(struct ptr_ring *r)
212{
213 bool ret;
214
215 spin_lock_irq(&r->consumer_lock);
216 ret = __ptr_ring_empty(r);
217 spin_unlock_irq(&r->consumer_lock);
218
219 return ret;
220}
221
222static inline bool ptr_ring_empty_any(struct ptr_ring *r)
223{
224 unsigned long flags;
225 bool ret;
226
227 spin_lock_irqsave(&r->consumer_lock, flags);
228 ret = __ptr_ring_empty(r);
229 spin_unlock_irqrestore(&r->consumer_lock, flags);
230
231 return ret;
232}
233
234static inline bool ptr_ring_empty_bh(struct ptr_ring *r)
235{
236 bool ret;
237
238 spin_lock_bh(&r->consumer_lock);
239 ret = __ptr_ring_empty(r);
240 spin_unlock_bh(&r->consumer_lock);
241
242 return ret;
243}
244
2e0ab8ca
MT
245/* Must only be called after __ptr_ring_peek returned !NULL */
246static inline void __ptr_ring_discard_one(struct ptr_ring *r)
247{
fb9de970
MT
248 /* Fundamentally, what we want to do is update consumer
249 * index and zero out the entry so producer can reuse it.
250 * Doing it naively at each consume would be as simple as:
406de755
MT
251 * consumer = r->consumer;
252 * r->queue[consumer++] = NULL;
253 * if (unlikely(consumer >= r->size))
254 * consumer = 0;
255 * r->consumer = consumer;
fb9de970
MT
256 * but that is suboptimal when the ring is full as producer is writing
257 * out new entries in the same cache line. Defer these updates until a
258 * batch of entries has been consumed.
259 */
406de755
MT
260 /* Note: we must keep consumer_head valid at all times for __ptr_ring_empty
261 * to work correctly.
262 */
263 int consumer_head = r->consumer_head;
264 int head = consumer_head++;
fb9de970
MT
265
266 /* Once we have processed enough entries invalidate them in
267 * the ring all at once so producer can reuse their space in the ring.
268 * We also do this when we reach end of the ring - not mandatory
269 * but helps keep the implementation simple.
270 */
406de755
MT
271 if (unlikely(consumer_head - r->consumer_tail >= r->batch ||
272 consumer_head >= r->size)) {
fb9de970
MT
273 /* Zero out entries in the reverse order: this way we touch the
274 * cache line that producer might currently be reading the last;
275 * producer won't make progress and touch other cache lines
276 * besides the first one until we write out all entries.
277 */
278 while (likely(head >= r->consumer_tail))
279 r->queue[head--] = NULL;
406de755 280 r->consumer_tail = consumer_head;
fb9de970 281 }
406de755
MT
282 if (unlikely(consumer_head >= r->size)) {
283 consumer_head = 0;
fb9de970
MT
284 r->consumer_tail = 0;
285 }
a259df36
MT
286 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
287 WRITE_ONCE(r->consumer_head, consumer_head);
2e0ab8ca
MT
288}
289
290static inline void *__ptr_ring_consume(struct ptr_ring *r)
291{
292 void *ptr;
293
e3f9f417
AP
294 /* The READ_ONCE in __ptr_ring_peek guarantees that anyone
295 * accessing data through the pointer is up to date. Pairs
296 * with smp_wmb in __ptr_ring_produce.
297 */
2e0ab8ca
MT
298 ptr = __ptr_ring_peek(r);
299 if (ptr)
300 __ptr_ring_discard_one(r);
301
302 return ptr;
303}
304
728fc8d5
JW
305static inline int __ptr_ring_consume_batched(struct ptr_ring *r,
306 void **array, int n)
307{
308 void *ptr;
309 int i;
310
311 for (i = 0; i < n; i++) {
312 ptr = __ptr_ring_consume(r);
313 if (!ptr)
314 break;
315 array[i] = ptr;
316 }
317
318 return i;
319}
320
e7169530
MT
321/*
322 * Note: resize (below) nests producer lock within consumer lock, so if you
323 * call this in interrupt or BH context, you must disable interrupts/BH when
324 * producing.
325 */
2e0ab8ca
MT
326static inline void *ptr_ring_consume(struct ptr_ring *r)
327{
328 void *ptr;
329
330 spin_lock(&r->consumer_lock);
331 ptr = __ptr_ring_consume(r);
332 spin_unlock(&r->consumer_lock);
333
334 return ptr;
335}
336
337static inline void *ptr_ring_consume_irq(struct ptr_ring *r)
338{
339 void *ptr;
340
341 spin_lock_irq(&r->consumer_lock);
342 ptr = __ptr_ring_consume(r);
343 spin_unlock_irq(&r->consumer_lock);
344
345 return ptr;
346}
347
348static inline void *ptr_ring_consume_any(struct ptr_ring *r)
349{
350 unsigned long flags;
351 void *ptr;
352
353 spin_lock_irqsave(&r->consumer_lock, flags);
354 ptr = __ptr_ring_consume(r);
355 spin_unlock_irqrestore(&r->consumer_lock, flags);
356
357 return ptr;
358}
359
360static inline void *ptr_ring_consume_bh(struct ptr_ring *r)
361{
362 void *ptr;
363
364 spin_lock_bh(&r->consumer_lock);
365 ptr = __ptr_ring_consume(r);
366 spin_unlock_bh(&r->consumer_lock);
367
368 return ptr;
369}
370
728fc8d5
JW
371static inline int ptr_ring_consume_batched(struct ptr_ring *r,
372 void **array, int n)
373{
374 int ret;
375
376 spin_lock(&r->consumer_lock);
377 ret = __ptr_ring_consume_batched(r, array, n);
378 spin_unlock(&r->consumer_lock);
379
380 return ret;
381}
382
383static inline int ptr_ring_consume_batched_irq(struct ptr_ring *r,
384 void **array, int n)
385{
386 int ret;
387
388 spin_lock_irq(&r->consumer_lock);
389 ret = __ptr_ring_consume_batched(r, array, n);
390 spin_unlock_irq(&r->consumer_lock);
391
392 return ret;
393}
394
395static inline int ptr_ring_consume_batched_any(struct ptr_ring *r,
396 void **array, int n)
397{
398 unsigned long flags;
399 int ret;
400
401 spin_lock_irqsave(&r->consumer_lock, flags);
402 ret = __ptr_ring_consume_batched(r, array, n);
403 spin_unlock_irqrestore(&r->consumer_lock, flags);
404
405 return ret;
406}
407
408static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
409 void **array, int n)
410{
411 int ret;
412
413 spin_lock_bh(&r->consumer_lock);
414 ret = __ptr_ring_consume_batched(r, array, n);
415 spin_unlock_bh(&r->consumer_lock);
416
417 return ret;
418}
419
2e0ab8ca
MT
420/* Cast to structure type and call a function without discarding from FIFO.
421 * Function must return a value.
422 * Callers must take consumer_lock.
423 */
424#define __PTR_RING_PEEK_CALL(r, f) ((f)(__ptr_ring_peek(r)))
425
426#define PTR_RING_PEEK_CALL(r, f) ({ \
427 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
428 \
429 spin_lock(&(r)->consumer_lock); \
430 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
431 spin_unlock(&(r)->consumer_lock); \
432 __PTR_RING_PEEK_CALL_v; \
433})
434
435#define PTR_RING_PEEK_CALL_IRQ(r, f) ({ \
436 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
437 \
438 spin_lock_irq(&(r)->consumer_lock); \
439 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
440 spin_unlock_irq(&(r)->consumer_lock); \
441 __PTR_RING_PEEK_CALL_v; \
442})
443
444#define PTR_RING_PEEK_CALL_BH(r, f) ({ \
445 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
446 \
447 spin_lock_bh(&(r)->consumer_lock); \
448 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
449 spin_unlock_bh(&(r)->consumer_lock); \
450 __PTR_RING_PEEK_CALL_v; \
451})
452
453#define PTR_RING_PEEK_CALL_ANY(r, f) ({ \
454 typeof((f)(NULL)) __PTR_RING_PEEK_CALL_v; \
455 unsigned long __PTR_RING_PEEK_CALL_f;\
456 \
457 spin_lock_irqsave(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
458 __PTR_RING_PEEK_CALL_v = __PTR_RING_PEEK_CALL(r, f); \
459 spin_unlock_irqrestore(&(r)->consumer_lock, __PTR_RING_PEEK_CALL_f); \
460 __PTR_RING_PEEK_CALL_v; \
461})
462
0bf7800f
JW
463/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
464 * documentation for vmalloc for which of them are legal.
465 */
81fbfe8a 466static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
5d49de53 467{
54e02162 468 if (size > KMALLOC_MAX_SIZE / sizeof(void *))
6e6e41c3 469 return NULL;
0bf7800f 470 return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
5d49de53
MT
471}
472
fb9de970
MT
473static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
474{
475 r->size = size;
476 r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue));
477 /* We need to set batch at least to 1 to make logic
478 * in __ptr_ring_discard_one work correctly.
479 * Batching too much (because ring is small) would cause a lot of
480 * burstiness. Needs tuning, for now disable batching.
481 */
482 if (r->batch > r->size / 2 || !r->batch)
483 r->batch = 1;
484}
485
2e0ab8ca
MT
486static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp)
487{
5d49de53 488 r->queue = __ptr_ring_init_queue_alloc(size, gfp);
2e0ab8ca
MT
489 if (!r->queue)
490 return -ENOMEM;
491
fb9de970
MT
492 __ptr_ring_set_size(r, size);
493 r->producer = r->consumer_head = r->consumer_tail = 0;
2e0ab8ca
MT
494 spin_lock_init(&r->producer_lock);
495 spin_lock_init(&r->consumer_lock);
496
497 return 0;
498}
499
197a5212
MT
500/*
501 * Return entries into ring. Destroy entries that don't fit.
502 *
503 * Note: this is expected to be a rare slow path operation.
504 *
505 * Note: producer lock is nested within consumer lock, so if you
506 * resize you must make sure all uses nest correctly.
507 * In particular if you consume ring in interrupt or BH context, you must
508 * disable interrupts/BH when doing so.
509 */
510static inline void ptr_ring_unconsume(struct ptr_ring *r, void **batch, int n,
511 void (*destroy)(void *))
512{
513 unsigned long flags;
514 int head;
515
516 spin_lock_irqsave(&r->consumer_lock, flags);
517 spin_lock(&r->producer_lock);
518
519 if (!r->size)
520 goto done;
521
522 /*
523 * Clean out buffered entries (for simplicity). This way following code
524 * can test entries for NULL and if not assume they are valid.
525 */
526 head = r->consumer_head - 1;
527 while (likely(head >= r->consumer_tail))
528 r->queue[head--] = NULL;
529 r->consumer_tail = r->consumer_head;
530
531 /*
532 * Go over entries in batch, start moving head back and copy entries.
533 * Stop when we run into previously unconsumed entries.
534 */
535 while (n) {
536 head = r->consumer_head - 1;
537 if (head < 0)
538 head = r->size - 1;
539 if (r->queue[head]) {
540 /* This batch entry will have to be destroyed. */
541 goto done;
542 }
543 r->queue[head] = batch[--n];
a259df36
MT
544 r->consumer_tail = head;
545 /* matching READ_ONCE in __ptr_ring_empty for lockless tests */
546 WRITE_ONCE(r->consumer_head, head);
197a5212
MT
547 }
548
549done:
550 /* Destroy all entries left in the batch. */
551 while (n)
552 destroy(batch[--n]);
553 spin_unlock(&r->producer_lock);
554 spin_unlock_irqrestore(&r->consumer_lock, flags);
555}
556
59e6ae53
MT
557static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue,
558 int size, gfp_t gfp,
559 void (*destroy)(void *))
5d49de53 560{
5d49de53 561 int producer = 0;
5d49de53
MT
562 void **old;
563 void *ptr;
564
e7169530 565 while ((ptr = __ptr_ring_consume(r)))
5d49de53
MT
566 if (producer < size)
567 queue[producer++] = ptr;
568 else if (destroy)
569 destroy(ptr);
570
aff6db45
CW
571 if (producer >= size)
572 producer = 0;
fb9de970 573 __ptr_ring_set_size(r, size);
5d49de53 574 r->producer = producer;
fb9de970
MT
575 r->consumer_head = 0;
576 r->consumer_tail = 0;
5d49de53
MT
577 old = r->queue;
578 r->queue = queue;
579
59e6ae53
MT
580 return old;
581}
582
e7169530
MT
583/*
584 * Note: producer lock is nested within consumer lock, so if you
585 * resize you must make sure all uses nest correctly.
586 * In particular if you consume ring in interrupt or BH context, you must
587 * disable interrupts/BH when doing so.
588 */
59e6ae53
MT
589static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
590 void (*destroy)(void *))
591{
592 unsigned long flags;
593 void **queue = __ptr_ring_init_queue_alloc(size, gfp);
594 void **old;
595
596 if (!queue)
597 return -ENOMEM;
598
e7169530
MT
599 spin_lock_irqsave(&(r)->consumer_lock, flags);
600 spin_lock(&(r)->producer_lock);
59e6ae53
MT
601
602 old = __ptr_ring_swap_queue(r, queue, size, gfp, destroy);
603
e7169530
MT
604 spin_unlock(&(r)->producer_lock);
605 spin_unlock_irqrestore(&(r)->consumer_lock, flags);
5d49de53 606
0bf7800f 607 kvfree(old);
5d49de53
MT
608
609 return 0;
610}
611
e7169530
MT
612/*
613 * Note: producer lock is nested within consumer lock, so if you
614 * resize you must make sure all uses nest correctly.
615 * In particular if you consume ring in interrupt or BH context, you must
616 * disable interrupts/BH when doing so.
617 */
81fbfe8a
ED
618static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
619 unsigned int nrings,
59e6ae53
MT
620 int size,
621 gfp_t gfp, void (*destroy)(void *))
622{
623 unsigned long flags;
624 void ***queues;
625 int i;
626
81fbfe8a 627 queues = kmalloc_array(nrings, sizeof(*queues), gfp);
59e6ae53
MT
628 if (!queues)
629 goto noqueues;
630
631 for (i = 0; i < nrings; ++i) {
632 queues[i] = __ptr_ring_init_queue_alloc(size, gfp);
633 if (!queues[i])
634 goto nomem;
635 }
636
637 for (i = 0; i < nrings; ++i) {
e7169530
MT
638 spin_lock_irqsave(&(rings[i])->consumer_lock, flags);
639 spin_lock(&(rings[i])->producer_lock);
59e6ae53
MT
640 queues[i] = __ptr_ring_swap_queue(rings[i], queues[i],
641 size, gfp, destroy);
e7169530
MT
642 spin_unlock(&(rings[i])->producer_lock);
643 spin_unlock_irqrestore(&(rings[i])->consumer_lock, flags);
59e6ae53
MT
644 }
645
646 for (i = 0; i < nrings; ++i)
0bf7800f 647 kvfree(queues[i]);
59e6ae53
MT
648
649 kfree(queues);
650
651 return 0;
652
653nomem:
654 while (--i >= 0)
0bf7800f 655 kvfree(queues[i]);
59e6ae53
MT
656
657 kfree(queues);
658
659noqueues:
660 return -ENOMEM;
661}
662
5d49de53 663static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
2e0ab8ca 664{
5d49de53
MT
665 void *ptr;
666
667 if (destroy)
668 while ((ptr = ptr_ring_consume(r)))
669 destroy(ptr);
0bf7800f 670 kvfree(r->queue);
2e0ab8ca
MT
671}
672
673#endif /* _LINUX_PTR_RING_H */