net/xdp/xsk_queue.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /* XDP user-space ring structure
   3  * Copyright(c) 2018 Intel Corporation.
   4  */
   5
   6 #ifndef _LINUX_XSK_QUEUE_H
   7 #define _LINUX_XSK_QUEUE_H
   8
   9 #include <linux/types.h>
  10 #include <linux/if_xdp.h>
  11 #include <net/xdp_sock.h>
  12
  13 struct xdp_ring {
  14         u32 producer ____cacheline_aligned_in_smp;
  15         u32 consumer ____cacheline_aligned_in_smp;
  16         u32 flags;
  17 };
  18
  19 /* Used for the RX and TX queues for packets */
  20 struct xdp_rxtx_ring {
  21         struct xdp_ring ptrs;
  22         struct xdp_desc desc[] ____cacheline_aligned_in_smp;
  23 };
  24
  25 /* Used for the fill and completion queues for buffers */
  26 struct xdp_umem_ring {
  27         struct xdp_ring ptrs;
  28         u64 desc[] ____cacheline_aligned_in_smp;
  29 };
  30
  31 struct xsk_queue {
  32         u64 chunk_mask;
  33         u64 size;
  34         u32 ring_mask;
  35         u32 nentries;
  36         u32 cached_prod;
  37         u32 cached_cons;
  38         struct xdp_ring *ring;
  39         u64 invalid_descs;
  40 };
  41
  42 /* The structure of the shared state of the rings are the same as the
  43  * ring buffer in kernel/events/ring_buffer.c. For the Rx and completion
  44  * ring, the kernel is the producer and user space is the consumer. For
  45  * the Tx and fill rings, the kernel is the consumer and user space is
  46  * the producer.
  47  *
  48  * producer                         consumer
  49  *
  50  * if (LOAD ->consumer) {           LOAD ->producer
  51  *                    (A)           smp_rmb()       (C)
  52  *    STORE $data                   LOAD $data
  53  *    smp_wmb()       (B)           smp_mb()        (D)
  54  *    STORE ->producer              STORE ->consumer
  55  * }
  56  *
  57  * (A) pairs with (D), and (B) pairs with (C).
  58  *
  59  * Starting with (B), it protects the data from being written after
  60  * the producer pointer. If this barrier was missing, the consumer
  61  * could observe the producer pointer being set and thus load the data
  62  * before the producer has written the new data. The consumer would in
  63  * this case load the old data.
  64  *
  65  * (C) protects the consumer from speculatively loading the data before
  66  * the producer pointer actually has been read. If we do not have this
  67  * barrier, some architectures could load old data as speculative loads
  68  * are not discarded as the CPU does not know there is a dependency
  69  * between ->producer and data.
  70  *
  71  * (A) is a control dependency that separates the load of ->consumer
  72  * from the stores of $data. In case ->consumer indicates there is no
  73  * room in the buffer to store $data we do not. So no barrier is needed.
  74  *
  75  * (D) protects the load of the data to be observed to happen after the
  76  * store of the consumer pointer. If we did not have this memory
  77  * barrier, the producer could observe the consumer pointer being set
  78  * and overwrite the data with a new value before the consumer got the
  79  * chance to read the old value. The consumer would thus miss reading
  80  * the old entry and very likely read the new entry twice, once right
  81  * now and again after circling through the ring.
  82  */
  83
  84 /* The operations on the rings are the following:
  85  *
  86  * producer                           consumer
  87  *
  88  * RESERVE entries                    PEEK in the ring for entries
  89  * WRITE data into the ring           READ data from the ring
  90  * SUBMIT entries                     RELEASE entries
  91  *
  92  * The producer reserves one or more entries in the ring. It can then
  93  * fill in these entries and finally submit them so that they can be
  94  * seen and read by the consumer.
  95  *
  96  * The consumer peeks into the ring to see if the producer has written
  97  * any new entries. If so, the producer can then read these entries
  98  * and when it is done reading them release them back to the producer
  99  * so that the producer can use these slots to fill in new entries.
 100  *
 101  * The function names below reflect these operations.
 102  */
 103
 104 /* Functions that read and validate content from consumer rings. */
 105
 106 static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem,
 107                                                    u64 addr,
 108                                                    u64 length)
 109 {
 110         bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
 111         bool next_pg_contig =
 112                 (unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
 113                         XSK_NEXT_PG_CONTIG_MASK;
 114
 115         return cross_pg && !next_pg_contig;
 116 }
 117
 118 static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
 119                                                 u64 addr,
 120                                                 u64 length,
 121                                                 struct xdp_umem *umem)
 122 {
 123         u64 base_addr = xsk_umem_extract_addr(addr);
 124
 125         addr = xsk_umem_add_offset_to_addr(addr);
 126         if (base_addr >= q->size || addr >= q->size ||
 127             xskq_cons_crosses_non_contig_pg(umem, addr, length)) {
 128                 q->invalid_descs++;
 129                 return false;
 130         }
 131
 132         return true;
 133 }
 134
 135 static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr)
 136 {
 137         if (addr >= q->size) {
 138                 q->invalid_descs++;
 139                 return false;
 140         }
 141
 142         return true;
 143 }
 144
 145 static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr,
 146                                        struct xdp_umem *umem)
 147 {
 148         struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
 149
 150         while (q->cached_cons != q->cached_prod) {
 151                 u32 idx = q->cached_cons & q->ring_mask;
 152
 153                 *addr = ring->desc[idx] & q->chunk_mask;
 154
 155                 if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
 156                         if (xskq_cons_is_valid_unaligned(q, *addr,
 157                                                          umem->chunk_size_nohr,
 158                                                          umem))
 159                                 return true;
 160                         goto out;
 161                 }
 162
 163                 if (xskq_cons_is_valid_addr(q, *addr))
 164                         return true;
 165
 166 out:
 167                 q->cached_cons++;
 168         }
 169
 170         return false;
 171 }
 172
 173 static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
 174                                            struct xdp_desc *d,
 175                                            struct xdp_umem *umem)
 176 {
 177         if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
 178                 if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem))
 179                         return false;
 180
 181                 if (d->len > umem->chunk_size_nohr || d->options) {
 182                         q->invalid_descs++;
 183                         return false;
 184                 }
 185
 186                 return true;
 187         }
 188
 189         if (!xskq_cons_is_valid_addr(q, d->addr))
 190                 return false;
 191
 192         if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
 193             d->options) {
 194                 q->invalid_descs++;
 195                 return false;
 196         }
 197
 198         return true;
 199 }
 200
 201 static inline bool xskq_cons_read_desc(struct xsk_queue *q,
 202                                        struct xdp_desc *desc,
 203                                        struct xdp_umem *umem)
 204 {
 205         while (q->cached_cons != q->cached_prod) {
 206                 struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
 207                 u32 idx = q->cached_cons & q->ring_mask;
 208
 209                 *desc = ring->desc[idx];
 210                 if (xskq_cons_is_valid_desc(q, desc, umem))
 211                         return true;
 212
 213                 q->cached_cons++;
 214         }
 215
 216         return false;
 217 }
 218
 219 /* Functions for consumers */
 220
 221 static inline void __xskq_cons_release(struct xsk_queue *q)
 222 {
 223         smp_mb(); /* D, matches A */
 224         WRITE_ONCE(q->ring->consumer, q->cached_cons);
 225 }
 226
 227 static inline void __xskq_cons_peek(struct xsk_queue *q)
 228 {
 229         /* Refresh the local pointer */
 230         q->cached_prod = READ_ONCE(q->ring->producer);
 231         smp_rmb(); /* C, matches B */
 232 }
 233
 234 static inline void xskq_cons_get_entries(struct xsk_queue *q)
 235 {
 236         __xskq_cons_release(q);
 237         __xskq_cons_peek(q);
 238 }
 239
 240 static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
 241 {
 242         u32 entries = q->cached_prod - q->cached_cons;
 243
 244         if (entries >= cnt)
 245                 return true;
 246
 247         __xskq_cons_peek(q);
 248         entries = q->cached_prod - q->cached_cons;
 249
 250         return entries >= cnt;
 251 }
 252
 253 static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr,
 254                                        struct xdp_umem *umem)
 255 {
 256         if (q->cached_prod == q->cached_cons)
 257                 xskq_cons_get_entries(q);
 258         return xskq_cons_read_addr(q, addr, umem);
 259 }
 260
 261 static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
 262                                        struct xdp_desc *desc,
 263                                        struct xdp_umem *umem)
 264 {
 265         if (q->cached_prod == q->cached_cons)
 266                 xskq_cons_get_entries(q);
 267         return xskq_cons_read_desc(q, desc, umem);
 268 }
 269
 270 static inline void xskq_cons_release(struct xsk_queue *q)
 271 {
 272         /* To improve performance, only update local state here.
 273          * Reflect this to global state when we get new entries
 274          * from the ring in xskq_cons_get_entries() and whenever
 275          * Rx or Tx processing are completed in the NAPI loop.
 276          */
 277         q->cached_cons++;
 278 }
 279
 280 static inline bool xskq_cons_is_full(struct xsk_queue *q)
 281 {
 282         /* No barriers needed since data is not accessed */
 283         return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) ==
 284                 q->nentries;
 285 }
 286
 287 /* Functions for producers */
 288
 289 static inline bool xskq_prod_is_full(struct xsk_queue *q)
 290 {
 291         u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons);
 292
 293         if (free_entries)
 294                 return false;
 295
 296         /* Refresh the local tail pointer */
 297         q->cached_cons = READ_ONCE(q->ring->consumer);
 298         free_entries = q->nentries - (q->cached_prod - q->cached_cons);
 299
 300         return !free_entries;
 301 }
 302
 303 static inline int xskq_prod_reserve(struct xsk_queue *q)
 304 {
 305         if (xskq_prod_is_full(q))
 306                 return -ENOSPC;
 307
 308         /* A, matches D */
 309         q->cached_prod++;
 310         return 0;
 311 }
 312
 313 static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr)
 314 {
 315         struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
 316
 317         if (xskq_prod_is_full(q))
 318                 return -ENOSPC;
 319
 320         /* A, matches D */
 321         ring->desc[q->cached_prod++ & q->ring_mask] = addr;
 322         return 0;
 323 }
 324
 325 static inline int xskq_prod_reserve_desc(struct xsk_queue *q,
 326                                          u64 addr, u32 len)
 327 {
 328         struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring;
 329         u32 idx;
 330
 331         if (xskq_prod_is_full(q))
 332                 return -ENOSPC;
 333
 334         /* A, matches D */
 335         idx = q->cached_prod++ & q->ring_mask;
 336         ring->desc[idx].addr = addr;
 337         ring->desc[idx].len = len;
 338
 339         return 0;
 340 }
 341
 342 static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx)
 343 {
 344         smp_wmb(); /* B, matches C */
 345
 346         WRITE_ONCE(q->ring->producer, idx);
 347 }
 348
 349 static inline void xskq_prod_submit(struct xsk_queue *q)
 350 {
 351         __xskq_prod_submit(q, q->cached_prod);
 352 }
 353
 354 static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr)
 355 {
 356         struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
 357         u32 idx = q->ring->producer;
 358
 359         ring->desc[idx++ & q->ring_mask] = addr;
 360
 361         __xskq_prod_submit(q, idx);
 362 }
 363
 364 static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries)
 365 {
 366         __xskq_prod_submit(q, q->ring->producer + nb_entries);
 367 }
 368
 369 static inline bool xskq_prod_is_empty(struct xsk_queue *q)
 370 {
 371         /* No barriers needed since data is not accessed */
 372         return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer);
 373 }
 374
 375 /* For both producers and consumers */
 376
 377 static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
 378 {
 379         return q ? q->invalid_descs : 0;
 380 }
 381
 382 void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
 383 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
 384 void xskq_destroy(struct xsk_queue *q_ops);
 385
 386 /* Executed by the core when the entire UMEM gets freed */
 387 void xsk_reuseq_destroy(struct xdp_umem *umem);
 388
 389 #endif /* _LINUX_XSK_QUEUE_H */