]>
Commit | Line | Data |
---|---|---|
dac09149 BT |
1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* XDP user-space ring structure | |
423f3832 | 3 | * Copyright(c) 2018 Intel Corporation. |
423f3832 MK |
4 | */ |
5 | ||
6 | #ifndef _LINUX_XSK_QUEUE_H | |
7 | #define _LINUX_XSK_QUEUE_H | |
8 | ||
9 | #include <linux/types.h> | |
10 | #include <linux/if_xdp.h> | |
e61e62b9 | 11 | #include <net/xdp_sock.h> |
423f3832 | 12 | |
b3a9e0be BT |
13 | struct xdp_ring { |
14 | u32 producer ____cacheline_aligned_in_smp; | |
15 | u32 consumer ____cacheline_aligned_in_smp; | |
77cd0d7b | 16 | u32 flags; |
b3a9e0be BT |
17 | }; |
18 | ||
19 | /* Used for the RX and TX queues for packets */ | |
20 | struct xdp_rxtx_ring { | |
21 | struct xdp_ring ptrs; | |
22 | struct xdp_desc desc[0] ____cacheline_aligned_in_smp; | |
23 | }; | |
24 | ||
25 | /* Used for the fill and completion queues for buffers */ | |
26 | struct xdp_umem_ring { | |
27 | struct xdp_ring ptrs; | |
bbff2f32 | 28 | u64 desc[0] ____cacheline_aligned_in_smp; |
b3a9e0be BT |
29 | }; |
30 | ||
423f3832 | 31 | struct xsk_queue { |
93ee30f3 MK |
32 | u64 chunk_mask; |
33 | u64 size; | |
423f3832 MK |
34 | u32 ring_mask; |
35 | u32 nentries; | |
d7012f05 | 36 | u32 cached_prod; |
c5ed924b | 37 | u32 cached_cons; |
423f3832 MK |
38 | struct xdp_ring *ring; |
39 | u64 invalid_descs; | |
40 | }; | |
41 | ||
f63666de MK |
42 | /* The structure of the shared state of the rings are the same as the |
43 | * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion | |
44 | * ring, the kernel is the producer and user space is the consumer. For | |
45 | * the Tx and fill rings, the kernel is the consumer and user space is | |
46 | * the producer. | |
47 | * | |
48 | * producer consumer | |
49 | * | |
50 | * if (LOAD ->consumer) { LOAD ->producer | |
51 | * (A) smp_rmb() (C) | |
52 | * STORE $data LOAD $data | |
53 | * smp_wmb() (B) smp_mb() (D) | |
54 | * STORE ->producer STORE ->consumer | |
55 | * } | |
56 | * | |
57 | * (A) pairs with (D), and (B) pairs with (C). | |
58 | * | |
59 | * Starting with (B), it protects the data from being written after | |
60 | * the producer pointer. If this barrier was missing, the consumer | |
61 | * could observe the producer pointer being set and thus load the data | |
62 | * before the producer has written the new data. The consumer would in | |
63 | * this case load the old data. | |
64 | * | |
65 | * (C) protects the consumer from speculatively loading the data before | |
66 | * the producer pointer actually has been read. If we do not have this | |
67 | * barrier, some architectures could load old data as speculative loads | |
68 | * are not discarded as the CPU does not know there is a dependency | |
69 | * between ->producer and data. | |
70 | * | |
71 | * (A) is a control dependency that separates the load of ->consumer | |
72 | * from the stores of $data. In case ->consumer indicates there is no | |
73 | * room in the buffer to store $data we do not. So no barrier is needed. | |
74 | * | |
75 | * (D) protects the load of the data to be observed to happen after the | |
76 | * store of the consumer pointer. If we did not have this memory | |
77 | * barrier, the producer could observe the consumer pointer being set | |
78 | * and overwrite the data with a new value before the consumer got the | |
79 | * chance to read the old value. The consumer would thus miss reading | |
80 | * the old entry and very likely read the new entry twice, once right | |
81 | * now and again after circling through the ring. | |
82 | */ | |
83 | ||
15d8c916 MK |
84 | /* The operations on the rings are the following: |
85 | * | |
86 | * producer consumer | |
87 | * | |
88 | * RESERVE entries PEEK in the ring for entries | |
89 | * WRITE data into the ring READ data from the ring | |
90 | * SUBMIT entries RELEASE entries | |
91 | * | |
92 | * The producer reserves one or more entries in the ring. It can then | |
93 | * fill in these entries and finally submit them so that they can be | |
94 | * seen and read by the consumer. | |
95 | * | |
96 | * The consumer peeks into the ring to see if the producer has written | |
97 | * any new entries. If so, the producer can then read these entries | |
98 | * and when it is done reading them release them back to the producer | |
99 | * so that the producer can use these slots to fill in new entries. | |
100 | * | |
101 | * The function names below reflect these operations. | |
102 | */ | |
d57d7642 | 103 | |
15d8c916 | 104 | /* Functions that read and validate content from consumer rings. */ |
c497176c | 105 | |
03896ef1 MK |
106 | static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem, |
107 | u64 addr, | |
108 | u64 length) | |
c05cd364 KL |
109 | { |
110 | bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; | |
111 | bool next_pg_contig = | |
112 | (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr & | |
113 | XSK_NEXT_PG_CONTIG_MASK; | |
114 | ||
115 | return cross_pg && !next_pg_contig; | |
116 | } | |
117 | ||
03896ef1 MK |
118 | static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q, |
119 | u64 addr, | |
c05cd364 KL |
120 | u64 length, |
121 | struct xdp_umem *umem) | |
122 | { | |
123 | u64 base_addr = xsk_umem_extract_addr(addr); | |
124 | ||
125 | addr = xsk_umem_add_offset_to_addr(addr); | |
126 | if (base_addr >= q->size || addr >= q->size || | |
03896ef1 | 127 | xskq_cons_crosses_non_contig_pg(umem, addr, length)) { |
c05cd364 KL |
128 | q->invalid_descs++; |
129 | return false; | |
130 | } | |
131 | ||
132 | return true; | |
133 | } | |
134 | ||
15d8c916 MK |
135 | static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr) |
136 | { | |
137 | if (addr >= q->size) { | |
138 | q->invalid_descs++; | |
139 | return false; | |
140 | } | |
141 | ||
142 | return true; | |
143 | } | |
144 | ||
03896ef1 MK |
145 | static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr, |
146 | struct xdp_umem *umem) | |
c497176c | 147 | { |
c5ed924b MK |
148 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; |
149 | ||
150 | while (q->cached_cons != q->cached_prod) { | |
151 | u32 idx = q->cached_cons & q->ring_mask; | |
c497176c | 152 | |
c34787fc | 153 | *addr = ring->desc[idx] & q->chunk_mask; |
c05cd364 KL |
154 | |
155 | if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { | |
03896ef1 | 156 | if (xskq_cons_is_valid_unaligned(q, *addr, |
c05cd364 KL |
157 | umem->chunk_size_nohr, |
158 | umem)) | |
03896ef1 | 159 | return true; |
c05cd364 KL |
160 | goto out; |
161 | } | |
162 | ||
03896ef1 MK |
163 | if (xskq_cons_is_valid_addr(q, *addr)) |
164 | return true; | |
c497176c | 165 | |
c05cd364 | 166 | out: |
c5ed924b | 167 | q->cached_cons++; |
c497176c BT |
168 | } |
169 | ||
03896ef1 | 170 | return false; |
c497176c BT |
171 | } |
172 | ||
03896ef1 MK |
173 | static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, |
174 | struct xdp_desc *d, | |
175 | struct xdp_umem *umem) | |
35fcde7f | 176 | { |
c05cd364 | 177 | if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { |
03896ef1 | 178 | if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem)) |
c05cd364 KL |
179 | return false; |
180 | ||
181 | if (d->len > umem->chunk_size_nohr || d->options) { | |
182 | q->invalid_descs++; | |
183 | return false; | |
184 | } | |
185 | ||
186 | return true; | |
187 | } | |
188 | ||
03896ef1 | 189 | if (!xskq_cons_is_valid_addr(q, d->addr)) |
35fcde7f | 190 | return false; |
35fcde7f | 191 | |
c57b557b BT |
192 | if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || |
193 | d->options) { | |
35fcde7f MK |
194 | q->invalid_descs++; |
195 | return false; | |
196 | } | |
197 | ||
198 | return true; | |
199 | } | |
200 | ||
03896ef1 MK |
201 | static inline bool xskq_cons_read_desc(struct xsk_queue *q, |
202 | struct xdp_desc *desc, | |
203 | struct xdp_umem *umem) | |
35fcde7f | 204 | { |
c5ed924b | 205 | while (q->cached_cons != q->cached_prod) { |
35fcde7f | 206 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; |
c5ed924b | 207 | u32 idx = q->cached_cons & q->ring_mask; |
35fcde7f | 208 | |
c34787fc | 209 | *desc = ring->desc[idx]; |
03896ef1 MK |
210 | if (xskq_cons_is_valid_desc(q, desc, umem)) |
211 | return true; | |
35fcde7f | 212 | |
c5ed924b | 213 | q->cached_cons++; |
35fcde7f MK |
214 | } |
215 | ||
03896ef1 | 216 | return false; |
35fcde7f MK |
217 | } |
218 | ||
15d8c916 MK |
219 | /* Functions for consumers */ |
220 | ||
221 | static inline void __xskq_cons_release(struct xsk_queue *q) | |
222 | { | |
223 | smp_mb(); /* D, matches A */ | |
224 | WRITE_ONCE(q->ring->consumer, q->cached_cons); | |
225 | } | |
226 | ||
227 | static inline void __xskq_cons_peek(struct xsk_queue *q) | |
228 | { | |
229 | /* Refresh the local pointer */ | |
230 | q->cached_prod = READ_ONCE(q->ring->producer); | |
231 | smp_rmb(); /* C, matches B */ | |
232 | } | |
233 | ||
234 | static inline void xskq_cons_get_entries(struct xsk_queue *q) | |
235 | { | |
236 | __xskq_cons_release(q); | |
237 | __xskq_cons_peek(q); | |
238 | } | |
239 | ||
240 | static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) | |
241 | { | |
242 | u32 entries = q->cached_prod - q->cached_cons; | |
243 | ||
244 | if (entries >= cnt) | |
245 | return true; | |
246 | ||
247 | __xskq_cons_peek(q); | |
248 | entries = q->cached_prod - q->cached_cons; | |
249 | ||
250 | return entries >= cnt; | |
251 | } | |
252 | ||
253 | static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr, | |
254 | struct xdp_umem *umem) | |
255 | { | |
256 | if (q->cached_prod == q->cached_cons) | |
257 | xskq_cons_get_entries(q); | |
258 | return xskq_cons_read_addr(q, addr, umem); | |
259 | } | |
260 | ||
03896ef1 MK |
261 | static inline bool xskq_cons_peek_desc(struct xsk_queue *q, |
262 | struct xdp_desc *desc, | |
263 | struct xdp_umem *umem) | |
35fcde7f | 264 | { |
c5ed924b MK |
265 | if (q->cached_prod == q->cached_cons) |
266 | xskq_cons_get_entries(q); | |
03896ef1 | 267 | return xskq_cons_read_desc(q, desc, umem); |
35fcde7f MK |
268 | } |
269 | ||
15d8c916 MK |
270 | static inline void xskq_cons_release(struct xsk_queue *q) |
271 | { | |
272 | /* To improve performance, only update local state here. | |
273 | * Reflect this to global state when we get new entries | |
274 | * from the ring in xskq_cons_get_entries(). | |
275 | */ | |
276 | q->cached_cons++; | |
277 | } | |
278 | ||
279 | static inline bool xskq_cons_is_full(struct xsk_queue *q) | |
280 | { | |
281 | /* No barriers needed since data is not accessed */ | |
282 | return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == | |
283 | q->nentries; | |
284 | } | |
285 | ||
286 | /* Functions for producers */ | |
287 | ||
288 | static inline bool xskq_prod_is_full(struct xsk_queue *q) | |
289 | { | |
290 | u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
291 | ||
292 | if (free_entries) | |
293 | return false; | |
294 | ||
295 | /* Refresh the local tail pointer */ | |
296 | q->cached_cons = READ_ONCE(q->ring->consumer); | |
297 | free_entries = q->nentries - (q->cached_prod - q->cached_cons); | |
298 | ||
299 | return !free_entries; | |
300 | } | |
301 | ||
302 | static inline int xskq_prod_reserve(struct xsk_queue *q) | |
303 | { | |
304 | if (xskq_prod_is_full(q)) | |
305 | return -ENOSPC; | |
306 | ||
307 | /* A, matches D */ | |
308 | q->cached_prod++; | |
309 | return 0; | |
310 | } | |
311 | ||
312 | static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) | |
313 | { | |
314 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
315 | ||
316 | if (xskq_prod_is_full(q)) | |
317 | return -ENOSPC; | |
318 | ||
319 | /* A, matches D */ | |
320 | ring->desc[q->cached_prod++ & q->ring_mask] = addr; | |
321 | return 0; | |
322 | } | |
323 | ||
59e35e55 MK |
324 | static inline int xskq_prod_reserve_desc(struct xsk_queue *q, |
325 | u64 addr, u32 len) | |
c497176c BT |
326 | { |
327 | struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; | |
59e35e55 | 328 | u32 idx; |
c497176c | 329 | |
df0ae6f7 | 330 | if (xskq_prod_is_full(q)) |
c497176c BT |
331 | return -ENOSPC; |
332 | ||
f63666de | 333 | /* A, matches D */ |
d7012f05 | 334 | idx = q->cached_prod++ & q->ring_mask; |
bbff2f32 | 335 | ring->desc[idx].addr = addr; |
c497176c | 336 | ring->desc[idx].len = len; |
c497176c BT |
337 | |
338 | return 0; | |
339 | } | |
340 | ||
15d8c916 | 341 | static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) |
35fcde7f | 342 | { |
15d8c916 MK |
343 | smp_wmb(); /* B, matches C */ |
344 | ||
345 | WRITE_ONCE(q->ring->producer, idx); | |
346 | } | |
347 | ||
348 | static inline void xskq_prod_submit(struct xsk_queue *q) | |
349 | { | |
350 | __xskq_prod_submit(q, q->cached_prod); | |
351 | } | |
352 | ||
353 | static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) | |
354 | { | |
355 | struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; | |
356 | u32 idx = q->ring->producer; | |
357 | ||
358 | ring->desc[idx++ & q->ring_mask] = addr; | |
359 | ||
360 | __xskq_prod_submit(q, idx); | |
361 | } | |
362 | ||
363 | static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) | |
364 | { | |
365 | __xskq_prod_submit(q, q->ring->producer + nb_entries); | |
35fcde7f MK |
366 | } |
367 | ||
59e35e55 | 368 | static inline bool xskq_prod_is_empty(struct xsk_queue *q) |
c497176c | 369 | { |
11cc2d21 MK |
370 | /* No barriers needed since data is not accessed */ |
371 | return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); | |
c497176c BT |
372 | } |
373 | ||
15d8c916 MK |
374 | /* For both producers and consumers */ |
375 | ||
376 | static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) | |
377 | { | |
378 | return q ? q->invalid_descs : 0; | |
379 | } | |
380 | ||
93ee30f3 | 381 | void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask); |
b9b6b68e | 382 | struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); |
c497176c | 383 | void xskq_destroy(struct xsk_queue *q_ops); |
423f3832 | 384 | |
f5bd9138 JK |
385 | /* Executed by the core when the entire UMEM gets freed */ |
386 | void xsk_reuseq_destroy(struct xdp_umem *umem); | |
387 | ||
423f3832 | 388 | #endif /* _LINUX_XSK_QUEUE_H */ |