1 // SPDX-License-Identifier: GPL-2.0-only
4 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
7 #include <linux/filter.h>
8 #include <linux/types.h>
10 #include <linux/netdevice.h>
11 #include <linux/slab.h>
12 #include <linux/idr.h>
13 #include <linux/rhashtable.h>
14 #include <linux/bug.h>
15 #include <net/page_pool.h>
18 #include <net/xdp_priv.h> /* struct xdp_mem_allocator */
19 #include <trace/events/xdp.h>
20 #include <net/xdp_sock_drv.h>
22 #define REG_STATE_NEW 0x0
23 #define REG_STATE_REGISTERED 0x1
24 #define REG_STATE_UNREGISTERED 0x2
25 #define REG_STATE_UNUSED 0x3
27 static DEFINE_IDA(mem_id_pool
);
28 static DEFINE_MUTEX(mem_id_lock
);
29 #define MEM_ID_MAX 0xFFFE
31 static int mem_id_next
= MEM_ID_MIN
;
33 static bool mem_id_init
; /* false */
34 static struct rhashtable
*mem_id_ht
;
36 static u32
xdp_mem_id_hashfn(const void *data
, u32 len
, u32 seed
)
41 BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator
, mem
.id
)
44 /* Use cyclic increasing ID as direct hash key */
48 static int xdp_mem_id_cmp(struct rhashtable_compare_arg
*arg
,
51 const struct xdp_mem_allocator
*xa
= ptr
;
52 u32 mem_id
= *(u32
*)arg
->key
;
54 return xa
->mem
.id
!= mem_id
;
57 static const struct rhashtable_params mem_id_rht_params
= {
59 .head_offset
= offsetof(struct xdp_mem_allocator
, node
),
60 .key_offset
= offsetof(struct xdp_mem_allocator
, mem
.id
),
61 .key_len
= sizeof_field(struct xdp_mem_allocator
, mem
.id
),
62 .max_size
= MEM_ID_MAX
,
64 .automatic_shrinking
= true,
65 .hashfn
= xdp_mem_id_hashfn
,
66 .obj_cmpfn
= xdp_mem_id_cmp
,
69 static void __xdp_mem_allocator_rcu_free(struct rcu_head
*rcu
)
71 struct xdp_mem_allocator
*xa
;
73 xa
= container_of(rcu
, struct xdp_mem_allocator
, rcu
);
75 /* Allow this ID to be reused */
76 ida_simple_remove(&mem_id_pool
, xa
->mem
.id
);
81 static void mem_xa_remove(struct xdp_mem_allocator
*xa
)
83 trace_mem_disconnect(xa
);
85 if (!rhashtable_remove_fast(mem_id_ht
, &xa
->node
, mem_id_rht_params
))
86 call_rcu(&xa
->rcu
, __xdp_mem_allocator_rcu_free
);
89 static void mem_allocator_disconnect(void *allocator
)
91 struct xdp_mem_allocator
*xa
;
92 struct rhashtable_iter iter
;
94 mutex_lock(&mem_id_lock
);
96 rhashtable_walk_enter(mem_id_ht
, &iter
);
98 rhashtable_walk_start(&iter
);
100 while ((xa
= rhashtable_walk_next(&iter
)) && !IS_ERR(xa
)) {
101 if (xa
->allocator
== allocator
)
105 rhashtable_walk_stop(&iter
);
107 } while (xa
== ERR_PTR(-EAGAIN
));
108 rhashtable_walk_exit(&iter
);
110 mutex_unlock(&mem_id_lock
);
113 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info
*xdp_rxq
)
115 struct xdp_mem_allocator
*xa
;
116 int type
= xdp_rxq
->mem
.type
;
117 int id
= xdp_rxq
->mem
.id
;
119 /* Reset mem info to defaults */
121 xdp_rxq
->mem
.type
= 0;
123 if (xdp_rxq
->reg_state
!= REG_STATE_REGISTERED
) {
124 WARN(1, "Missing register, driver bug");
131 if (type
== MEM_TYPE_PAGE_POOL
) {
133 xa
= rhashtable_lookup(mem_id_ht
, &id
, mem_id_rht_params
);
134 page_pool_destroy(xa
->page_pool
);
138 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model
);
140 void xdp_rxq_info_unreg(struct xdp_rxq_info
*xdp_rxq
)
142 /* Simplify driver cleanup code paths, allow unreg "unused" */
143 if (xdp_rxq
->reg_state
== REG_STATE_UNUSED
)
146 WARN(!(xdp_rxq
->reg_state
== REG_STATE_REGISTERED
), "Driver BUG");
148 xdp_rxq_info_unreg_mem_model(xdp_rxq
);
150 xdp_rxq
->reg_state
= REG_STATE_UNREGISTERED
;
153 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg
);
155 static void xdp_rxq_info_init(struct xdp_rxq_info
*xdp_rxq
)
157 memset(xdp_rxq
, 0, sizeof(*xdp_rxq
));
160 /* Returns 0 on success, negative on failure */
161 int xdp_rxq_info_reg(struct xdp_rxq_info
*xdp_rxq
,
162 struct net_device
*dev
, u32 queue_index
, unsigned int napi_id
)
164 if (xdp_rxq
->reg_state
== REG_STATE_UNUSED
) {
165 WARN(1, "Driver promised not to register this");
169 if (xdp_rxq
->reg_state
== REG_STATE_REGISTERED
) {
170 WARN(1, "Missing unregister, handled but fix driver");
171 xdp_rxq_info_unreg(xdp_rxq
);
175 WARN(1, "Missing net_device from driver");
179 /* State either UNREGISTERED or NEW */
180 xdp_rxq_info_init(xdp_rxq
);
182 xdp_rxq
->queue_index
= queue_index
;
183 xdp_rxq
->napi_id
= napi_id
;
185 xdp_rxq
->reg_state
= REG_STATE_REGISTERED
;
188 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg
);
190 void xdp_rxq_info_unused(struct xdp_rxq_info
*xdp_rxq
)
192 xdp_rxq
->reg_state
= REG_STATE_UNUSED
;
194 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused
);
196 bool xdp_rxq_info_is_reg(struct xdp_rxq_info
*xdp_rxq
)
198 return (xdp_rxq
->reg_state
== REG_STATE_REGISTERED
);
200 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg
);
202 static int __mem_id_init_hash_table(void)
204 struct rhashtable
*rht
;
207 if (unlikely(mem_id_init
))
210 rht
= kzalloc(sizeof(*rht
), GFP_KERNEL
);
214 ret
= rhashtable_init(rht
, &mem_id_rht_params
);
220 smp_mb(); /* mutex lock should provide enough pairing */
226 /* Allocate a cyclic ID that maps to allocator pointer.
227 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
229 * Caller must lock mem_id_lock.
231 static int __mem_id_cyclic_get(gfp_t gfp
)
237 id
= ida_simple_get(&mem_id_pool
, mem_id_next
, MEM_ID_MAX
, gfp
);
240 /* Cyclic allocator, reset next id */
242 mem_id_next
= MEM_ID_MIN
;
246 return id
; /* errno */
248 mem_id_next
= id
+ 1;
253 static bool __is_supported_mem_type(enum xdp_mem_type type
)
255 if (type
== MEM_TYPE_PAGE_POOL
)
256 return is_page_pool_compiled_in();
258 if (type
>= MEM_TYPE_MAX
)
264 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info
*xdp_rxq
,
265 enum xdp_mem_type type
, void *allocator
)
267 struct xdp_mem_allocator
*xdp_alloc
;
268 gfp_t gfp
= GFP_KERNEL
;
272 if (xdp_rxq
->reg_state
!= REG_STATE_REGISTERED
) {
273 WARN(1, "Missing register, driver bug");
277 if (!__is_supported_mem_type(type
))
280 xdp_rxq
->mem
.type
= type
;
283 if (type
== MEM_TYPE_PAGE_POOL
)
284 return -EINVAL
; /* Setup time check page_pool req */
288 /* Delay init of rhashtable to save memory if feature isn't used */
290 mutex_lock(&mem_id_lock
);
291 ret
= __mem_id_init_hash_table();
292 mutex_unlock(&mem_id_lock
);
299 xdp_alloc
= kzalloc(sizeof(*xdp_alloc
), gfp
);
303 mutex_lock(&mem_id_lock
);
304 id
= __mem_id_cyclic_get(gfp
);
309 xdp_rxq
->mem
.id
= id
;
310 xdp_alloc
->mem
= xdp_rxq
->mem
;
311 xdp_alloc
->allocator
= allocator
;
313 /* Insert allocator into ID lookup table */
314 ptr
= rhashtable_insert_slow(mem_id_ht
, &id
, &xdp_alloc
->node
);
316 ida_simple_remove(&mem_id_pool
, xdp_rxq
->mem
.id
);
318 errno
= PTR_ERR(ptr
);
322 if (type
== MEM_TYPE_PAGE_POOL
)
323 page_pool_use_xdp_mem(allocator
, mem_allocator_disconnect
);
325 mutex_unlock(&mem_id_lock
);
327 trace_mem_connect(xdp_alloc
, xdp_rxq
);
330 mutex_unlock(&mem_id_lock
);
334 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model
);
336 /* XDP RX runs under NAPI protection, and in different delivery error
337 * scenarios (e.g. queue full), it is possible to return the xdp_frame
338 * while still leveraging this protection. The @napi_direct boolean
339 * is used for those calls sites. Thus, allowing for faster recycling
340 * of xdp_frames/pages in those cases.
342 static void __xdp_return(void *data
, struct xdp_mem_info
*mem
, bool napi_direct
,
343 struct xdp_buff
*xdp
)
345 struct xdp_mem_allocator
*xa
;
349 case MEM_TYPE_PAGE_POOL
:
351 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
352 xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
353 page
= virt_to_head_page(data
);
354 if (napi_direct
&& xdp_return_frame_no_direct())
356 page_pool_put_full_page(xa
->page_pool
, page
, napi_direct
);
359 case MEM_TYPE_PAGE_SHARED
:
360 page_frag_free(data
);
362 case MEM_TYPE_PAGE_ORDER0
:
363 page
= virt_to_page(data
); /* Assumes order0 page*/
366 case MEM_TYPE_XSK_BUFF_POOL
:
367 /* NB! Only valid from an xdp_buff! */
371 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
372 WARN(1, "Incorrect XDP memory type (%d) usage", mem
->type
);
377 void xdp_return_frame(struct xdp_frame
*xdpf
)
379 __xdp_return(xdpf
->data
, &xdpf
->mem
, false, NULL
);
381 EXPORT_SYMBOL_GPL(xdp_return_frame
);
383 void xdp_return_frame_rx_napi(struct xdp_frame
*xdpf
)
385 __xdp_return(xdpf
->data
, &xdpf
->mem
, true, NULL
);
387 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi
);
389 /* XDP bulk APIs introduce a defer/flush mechanism to return
390 * pages belonging to the same xdp_mem_allocator object
391 * (identified via the mem.id field) in bulk to optimize
392 * I-cache and D-cache.
393 * The bulk queue size is set to 16 to be aligned to how
394 * XDP_REDIRECT bulking works. The bulk is flushed when
395 * it is full or when mem.id changes.
396 * xdp_frame_bulk is usually stored/allocated on the function
397 * call-stack to avoid locking penalties.
399 void xdp_flush_frame_bulk(struct xdp_frame_bulk
*bq
)
401 struct xdp_mem_allocator
*xa
= bq
->xa
;
403 if (unlikely(!xa
|| !bq
->count
))
406 page_pool_put_page_bulk(xa
->page_pool
, bq
->q
, bq
->count
);
407 /* bq->xa is not cleared to save lookup, if mem.id same in next bulk */
410 EXPORT_SYMBOL_GPL(xdp_flush_frame_bulk
);
412 /* Must be called with rcu_read_lock held */
413 void xdp_return_frame_bulk(struct xdp_frame
*xdpf
,
414 struct xdp_frame_bulk
*bq
)
416 struct xdp_mem_info
*mem
= &xdpf
->mem
;
417 struct xdp_mem_allocator
*xa
;
419 if (mem
->type
!= MEM_TYPE_PAGE_POOL
) {
420 __xdp_return(xdpf
->data
, &xdpf
->mem
, false, NULL
);
426 xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
431 if (bq
->count
== XDP_BULK_QUEUE_SIZE
)
432 xdp_flush_frame_bulk(bq
);
434 if (unlikely(mem
->id
!= xa
->mem
.id
)) {
435 xdp_flush_frame_bulk(bq
);
436 bq
->xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
439 bq
->q
[bq
->count
++] = xdpf
->data
;
441 EXPORT_SYMBOL_GPL(xdp_return_frame_bulk
);
443 void xdp_return_buff(struct xdp_buff
*xdp
)
445 __xdp_return(xdp
->data
, &xdp
->rxq
->mem
, true, xdp
);
448 /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
449 void __xdp_release_frame(void *data
, struct xdp_mem_info
*mem
)
451 struct xdp_mem_allocator
*xa
;
455 xa
= rhashtable_lookup(mem_id_ht
, &mem
->id
, mem_id_rht_params
);
456 page
= virt_to_head_page(data
);
458 page_pool_release_page(xa
->page_pool
, page
);
461 EXPORT_SYMBOL_GPL(__xdp_release_frame
);
463 void xdp_attachment_setup(struct xdp_attachment_info
*info
,
464 struct netdev_bpf
*bpf
)
467 bpf_prog_put(info
->prog
);
468 info
->prog
= bpf
->prog
;
469 info
->flags
= bpf
->flags
;
471 EXPORT_SYMBOL_GPL(xdp_attachment_setup
);
473 struct xdp_frame
*xdp_convert_zc_to_xdp_frame(struct xdp_buff
*xdp
)
475 unsigned int metasize
, totsize
;
476 void *addr
, *data_to_copy
;
477 struct xdp_frame
*xdpf
;
480 /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
481 metasize
= xdp_data_meta_unsupported(xdp
) ? 0 :
482 xdp
->data
- xdp
->data_meta
;
483 totsize
= xdp
->data_end
- xdp
->data
+ metasize
;
485 if (sizeof(*xdpf
) + totsize
> PAGE_SIZE
)
488 page
= dev_alloc_page();
492 addr
= page_to_virt(page
);
494 memset(xdpf
, 0, sizeof(*xdpf
));
496 addr
+= sizeof(*xdpf
);
497 data_to_copy
= metasize
? xdp
->data_meta
: xdp
->data
;
498 memcpy(addr
, data_to_copy
, totsize
);
500 xdpf
->data
= addr
+ metasize
;
501 xdpf
->len
= totsize
- metasize
;
503 xdpf
->metasize
= metasize
;
504 xdpf
->frame_sz
= PAGE_SIZE
;
505 xdpf
->mem
.type
= MEM_TYPE_PAGE_ORDER0
;
510 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame
);
512 /* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */
513 void xdp_warn(const char *msg
, const char *func
, const int line
)
515 WARN(1, "XDP_WARN: %s(line:%d): %s\n", func
, line
, msg
);
517 EXPORT_SYMBOL_GPL(xdp_warn
);
519 int xdp_alloc_skb_bulk(void **skbs
, int n_skb
, gfp_t gfp
)
521 n_skb
= kmem_cache_alloc_bulk(skbuff_head_cache
, gfp
,
523 if (unlikely(!n_skb
))
528 EXPORT_SYMBOL_GPL(xdp_alloc_skb_bulk
);
530 struct sk_buff
*__xdp_build_skb_from_frame(struct xdp_frame
*xdpf
,
532 struct net_device
*dev
)
534 unsigned int headroom
, frame_size
;
537 /* Part of headroom was reserved to xdpf */
538 headroom
= sizeof(*xdpf
) + xdpf
->headroom
;
540 /* Memory size backing xdp_frame data already have reserved
541 * room for build_skb to place skb_shared_info in tailroom.
543 frame_size
= xdpf
->frame_sz
;
545 hard_start
= xdpf
->data
- headroom
;
546 skb
= build_skb_around(skb
, hard_start
, frame_size
);
550 skb_reserve(skb
, headroom
);
551 __skb_put(skb
, xdpf
->len
);
553 skb_metadata_set(skb
, xdpf
->metasize
);
555 /* Essential SKB info: protocol and skb->dev */
556 skb
->protocol
= eth_type_trans(skb
, dev
);
558 /* Optional SKB info, currently missing:
559 * - HW checksum info (skb->ip_summed)
560 * - HW RX hash (skb_set_hash)
561 * - RX ring dev queue index (skb_record_rx_queue)
564 /* Until page_pool get SKB return path, release DMA here */
565 xdp_release_frame(xdpf
);
567 /* Allow SKB to reuse area used by xdp_frame */
568 xdp_scrub_frame(xdpf
);
572 EXPORT_SYMBOL_GPL(__xdp_build_skb_from_frame
);
574 struct sk_buff
*xdp_build_skb_from_frame(struct xdp_frame
*xdpf
,
575 struct net_device
*dev
)
579 skb
= kmem_cache_alloc(skbuff_head_cache
, GFP_ATOMIC
);
583 memset(skb
, 0, offsetof(struct sk_buff
, tail
));
585 return __xdp_build_skb_from_frame(xdpf
, skb
, dev
);
587 EXPORT_SYMBOL_GPL(xdp_build_skb_from_frame
);
589 struct xdp_frame
*xdpf_clone(struct xdp_frame
*xdpf
)
591 unsigned int headroom
, totalsize
;
592 struct xdp_frame
*nxdpf
;
596 headroom
= xdpf
->headroom
+ sizeof(*xdpf
);
597 totalsize
= headroom
+ xdpf
->len
;
599 if (unlikely(totalsize
> PAGE_SIZE
))
601 page
= dev_alloc_page();
604 addr
= page_to_virt(page
);
606 memcpy(addr
, xdpf
, totalsize
);
609 nxdpf
->data
= addr
+ headroom
;
610 nxdpf
->frame_sz
= PAGE_SIZE
;
611 nxdpf
->mem
.type
= MEM_TYPE_PAGE_ORDER0
;