]> git.proxmox.com Git - mirror_ubuntu-hirsute-kernel.git/blob - net/core/xdp.c
xdp: force mem allocator removal and periodic warning
[mirror_ubuntu-hirsute-kernel.git] / net / core / xdp.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* net/core/xdp.c
3 *
4 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
5 */
6 #include <linux/bpf.h>
7 #include <linux/filter.h>
8 #include <linux/types.h>
9 #include <linux/mm.h>
10 #include <linux/netdevice.h>
11 #include <linux/slab.h>
12 #include <linux/idr.h>
13 #include <linux/rhashtable.h>
14 #include <net/page_pool.h>
15
16 #include <net/xdp.h>
17
18 #define REG_STATE_NEW 0x0
19 #define REG_STATE_REGISTERED 0x1
20 #define REG_STATE_UNREGISTERED 0x2
21 #define REG_STATE_UNUSED 0x3
22
23 static DEFINE_IDA(mem_id_pool);
24 static DEFINE_MUTEX(mem_id_lock);
25 #define MEM_ID_MAX 0xFFFE
26 #define MEM_ID_MIN 1
27 static int mem_id_next = MEM_ID_MIN;
28
29 static bool mem_id_init; /* false */
30 static struct rhashtable *mem_id_ht;
31
32 struct xdp_mem_allocator {
33 struct xdp_mem_info mem;
34 union {
35 void *allocator;
36 struct page_pool *page_pool;
37 struct zero_copy_allocator *zc_alloc;
38 };
39 struct rhash_head node;
40 struct rcu_head rcu;
41 struct delayed_work defer_wq;
42 unsigned long defer_start;
43 unsigned long defer_warn;
44 int disconnect_cnt;
45 };
46
47 static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed)
48 {
49 const u32 *k = data;
50 const u32 key = *k;
51
52 BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id)
53 != sizeof(u32));
54
55 /* Use cyclic increasing ID as direct hash key */
56 return key;
57 }
58
59 static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg,
60 const void *ptr)
61 {
62 const struct xdp_mem_allocator *xa = ptr;
63 u32 mem_id = *(u32 *)arg->key;
64
65 return xa->mem.id != mem_id;
66 }
67
68 static const struct rhashtable_params mem_id_rht_params = {
69 .nelem_hint = 64,
70 .head_offset = offsetof(struct xdp_mem_allocator, node),
71 .key_offset = offsetof(struct xdp_mem_allocator, mem.id),
72 .key_len = FIELD_SIZEOF(struct xdp_mem_allocator, mem.id),
73 .max_size = MEM_ID_MAX,
74 .min_size = 8,
75 .automatic_shrinking = true,
76 .hashfn = xdp_mem_id_hashfn,
77 .obj_cmpfn = xdp_mem_id_cmp,
78 };
79
80 static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
81 {
82 struct xdp_mem_allocator *xa;
83
84 xa = container_of(rcu, struct xdp_mem_allocator, rcu);
85
86 /* Allocator have indicated safe to remove before this is called */
87 if (xa->mem.type == MEM_TYPE_PAGE_POOL)
88 page_pool_free(xa->page_pool);
89
90 /* Allow this ID to be reused */
91 ida_simple_remove(&mem_id_pool, xa->mem.id);
92
93 /* Poison memory */
94 xa->mem.id = 0xFFFF;
95 xa->mem.type = 0xF0F0;
96 xa->allocator = (void *)0xDEAD9001;
97
98 kfree(xa);
99 }
100
101 bool __mem_id_disconnect(int id, bool force)
102 {
103 struct xdp_mem_allocator *xa;
104 bool safe_to_remove = true;
105
106 mutex_lock(&mem_id_lock);
107
108 xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
109 if (!xa) {
110 mutex_unlock(&mem_id_lock);
111 WARN(1, "Request remove non-existing id(%d), driver bug?", id);
112 return true;
113 }
114 xa->disconnect_cnt++;
115
116 /* Detects in-flight packet-pages for page_pool */
117 if (xa->mem.type == MEM_TYPE_PAGE_POOL)
118 safe_to_remove = page_pool_request_shutdown(xa->page_pool);
119
120 /* TODO: Tracepoint will be added here in next-patch */
121
122 if ((safe_to_remove || force) &&
123 !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
124 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
125
126 mutex_unlock(&mem_id_lock);
127 return (safe_to_remove|force);
128 }
129
130 #define DEFER_TIME (msecs_to_jiffies(1000))
131 #define DEFER_WARN_INTERVAL (30 * HZ)
132 #define DEFER_MAX_RETRIES 120
133
134 static void mem_id_disconnect_defer_retry(struct work_struct *wq)
135 {
136 struct delayed_work *dwq = to_delayed_work(wq);
137 struct xdp_mem_allocator *xa = container_of(dwq, typeof(*xa), defer_wq);
138 bool force = false;
139
140 if (xa->disconnect_cnt > DEFER_MAX_RETRIES)
141 force = true;
142
143 if (__mem_id_disconnect(xa->mem.id, force))
144 return;
145
146 /* Periodic warning */
147 if (time_after_eq(jiffies, xa->defer_warn)) {
148 int sec = (s32)((u32)jiffies - (u32)xa->defer_start) / HZ;
149
150 pr_warn("%s() stalled mem.id=%u shutdown %d attempts %d sec\n",
151 __func__, xa->mem.id, xa->disconnect_cnt, sec);
152 xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
153 }
154
155 /* Still not ready to be disconnected, retry later */
156 schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
157 }
158
159 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
160 {
161 struct xdp_mem_allocator *xa;
162 int id = xdp_rxq->mem.id;
163
164 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
165 WARN(1, "Missing register, driver bug");
166 return;
167 }
168
169 if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL &&
170 xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) {
171 return;
172 }
173
174 if (id == 0)
175 return;
176
177 if (__mem_id_disconnect(id, false))
178 return;
179
180 /* Could not disconnect, defer new disconnect attempt to later */
181 mutex_lock(&mem_id_lock);
182
183 xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
184 if (!xa) {
185 mutex_unlock(&mem_id_lock);
186 return;
187 }
188 xa->defer_start = jiffies;
189 xa->defer_warn = jiffies + DEFER_WARN_INTERVAL;
190
191 INIT_DELAYED_WORK(&xa->defer_wq, mem_id_disconnect_defer_retry);
192 mutex_unlock(&mem_id_lock);
193 schedule_delayed_work(&xa->defer_wq, DEFER_TIME);
194 }
195 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model);
196
197 /* This unregister operation will also cleanup and destroy the
198 * allocator. The page_pool_free() operation is first called when it's
199 * safe to remove, possibly deferred to a workqueue.
200 */
201 void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq)
202 {
203 /* Simplify driver cleanup code paths, allow unreg "unused" */
204 if (xdp_rxq->reg_state == REG_STATE_UNUSED)
205 return;
206
207 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG");
208
209 xdp_rxq_info_unreg_mem_model(xdp_rxq);
210
211 xdp_rxq->reg_state = REG_STATE_UNREGISTERED;
212 xdp_rxq->dev = NULL;
213
214 /* Reset mem info to defaults */
215 xdp_rxq->mem.id = 0;
216 xdp_rxq->mem.type = 0;
217 }
218 EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg);
219
220 static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq)
221 {
222 memset(xdp_rxq, 0, sizeof(*xdp_rxq));
223 }
224
225 /* Returns 0 on success, negative on failure */
226 int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq,
227 struct net_device *dev, u32 queue_index)
228 {
229 if (xdp_rxq->reg_state == REG_STATE_UNUSED) {
230 WARN(1, "Driver promised not to register this");
231 return -EINVAL;
232 }
233
234 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) {
235 WARN(1, "Missing unregister, handled but fix driver");
236 xdp_rxq_info_unreg(xdp_rxq);
237 }
238
239 if (!dev) {
240 WARN(1, "Missing net_device from driver");
241 return -ENODEV;
242 }
243
244 /* State either UNREGISTERED or NEW */
245 xdp_rxq_info_init(xdp_rxq);
246 xdp_rxq->dev = dev;
247 xdp_rxq->queue_index = queue_index;
248
249 xdp_rxq->reg_state = REG_STATE_REGISTERED;
250 return 0;
251 }
252 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg);
253
254 void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq)
255 {
256 xdp_rxq->reg_state = REG_STATE_UNUSED;
257 }
258 EXPORT_SYMBOL_GPL(xdp_rxq_info_unused);
259
260 bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq)
261 {
262 return (xdp_rxq->reg_state == REG_STATE_REGISTERED);
263 }
264 EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg);
265
266 static int __mem_id_init_hash_table(void)
267 {
268 struct rhashtable *rht;
269 int ret;
270
271 if (unlikely(mem_id_init))
272 return 0;
273
274 rht = kzalloc(sizeof(*rht), GFP_KERNEL);
275 if (!rht)
276 return -ENOMEM;
277
278 ret = rhashtable_init(rht, &mem_id_rht_params);
279 if (ret < 0) {
280 kfree(rht);
281 return ret;
282 }
283 mem_id_ht = rht;
284 smp_mb(); /* mutex lock should provide enough pairing */
285 mem_id_init = true;
286
287 return 0;
288 }
289
290 /* Allocate a cyclic ID that maps to allocator pointer.
291 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html
292 *
293 * Caller must lock mem_id_lock.
294 */
295 static int __mem_id_cyclic_get(gfp_t gfp)
296 {
297 int retries = 1;
298 int id;
299
300 again:
301 id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp);
302 if (id < 0) {
303 if (id == -ENOSPC) {
304 /* Cyclic allocator, reset next id */
305 if (retries--) {
306 mem_id_next = MEM_ID_MIN;
307 goto again;
308 }
309 }
310 return id; /* errno */
311 }
312 mem_id_next = id + 1;
313
314 return id;
315 }
316
317 static bool __is_supported_mem_type(enum xdp_mem_type type)
318 {
319 if (type == MEM_TYPE_PAGE_POOL)
320 return is_page_pool_compiled_in();
321
322 if (type >= MEM_TYPE_MAX)
323 return false;
324
325 return true;
326 }
327
328 int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
329 enum xdp_mem_type type, void *allocator)
330 {
331 struct xdp_mem_allocator *xdp_alloc;
332 gfp_t gfp = GFP_KERNEL;
333 int id, errno, ret;
334 void *ptr;
335
336 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) {
337 WARN(1, "Missing register, driver bug");
338 return -EFAULT;
339 }
340
341 if (!__is_supported_mem_type(type))
342 return -EOPNOTSUPP;
343
344 xdp_rxq->mem.type = type;
345
346 if (!allocator) {
347 if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY)
348 return -EINVAL; /* Setup time check page_pool req */
349 return 0;
350 }
351
352 /* Delay init of rhashtable to save memory if feature isn't used */
353 if (!mem_id_init) {
354 mutex_lock(&mem_id_lock);
355 ret = __mem_id_init_hash_table();
356 mutex_unlock(&mem_id_lock);
357 if (ret < 0) {
358 WARN_ON(1);
359 return ret;
360 }
361 }
362
363 xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp);
364 if (!xdp_alloc)
365 return -ENOMEM;
366
367 mutex_lock(&mem_id_lock);
368 id = __mem_id_cyclic_get(gfp);
369 if (id < 0) {
370 errno = id;
371 goto err;
372 }
373 xdp_rxq->mem.id = id;
374 xdp_alloc->mem = xdp_rxq->mem;
375 xdp_alloc->allocator = allocator;
376
377 /* Insert allocator into ID lookup table */
378 ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node);
379 if (IS_ERR(ptr)) {
380 ida_simple_remove(&mem_id_pool, xdp_rxq->mem.id);
381 xdp_rxq->mem.id = 0;
382 errno = PTR_ERR(ptr);
383 goto err;
384 }
385
386 mutex_unlock(&mem_id_lock);
387
388 return 0;
389 err:
390 mutex_unlock(&mem_id_lock);
391 kfree(xdp_alloc);
392 return errno;
393 }
394 EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
395
396 /* XDP RX runs under NAPI protection, and in different delivery error
397 * scenarios (e.g. queue full), it is possible to return the xdp_frame
398 * while still leveraging this protection. The @napi_direct boolian
399 * is used for those calls sites. Thus, allowing for faster recycling
400 * of xdp_frames/pages in those cases.
401 */
402 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
403 unsigned long handle)
404 {
405 struct xdp_mem_allocator *xa;
406 struct page *page;
407
408 switch (mem->type) {
409 case MEM_TYPE_PAGE_POOL:
410 rcu_read_lock();
411 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
412 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
413 page = virt_to_head_page(data);
414 if (likely(xa)) {
415 napi_direct &= !xdp_return_frame_no_direct();
416 page_pool_put_page(xa->page_pool, page, napi_direct);
417 } else {
418 /* Hopefully stack show who to blame for late return */
419 WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id);
420 put_page(page);
421 }
422 rcu_read_unlock();
423 break;
424 case MEM_TYPE_PAGE_SHARED:
425 page_frag_free(data);
426 break;
427 case MEM_TYPE_PAGE_ORDER0:
428 page = virt_to_page(data); /* Assumes order0 page*/
429 put_page(page);
430 break;
431 case MEM_TYPE_ZERO_COPY:
432 /* NB! Only valid from an xdp_buff! */
433 rcu_read_lock();
434 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
435 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
436 xa->zc_alloc->free(xa->zc_alloc, handle);
437 rcu_read_unlock();
438 default:
439 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */
440 break;
441 }
442 }
443
444 void xdp_return_frame(struct xdp_frame *xdpf)
445 {
446 __xdp_return(xdpf->data, &xdpf->mem, false, 0);
447 }
448 EXPORT_SYMBOL_GPL(xdp_return_frame);
449
450 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
451 {
452 __xdp_return(xdpf->data, &xdpf->mem, true, 0);
453 }
454 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
455
456 void xdp_return_buff(struct xdp_buff *xdp)
457 {
458 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
459 }
460 EXPORT_SYMBOL_GPL(xdp_return_buff);
461
462 /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
463 void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
464 {
465 struct xdp_mem_allocator *xa;
466 struct page *page;
467
468 rcu_read_lock();
469 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
470 page = virt_to_head_page(data);
471 if (xa)
472 page_pool_release_page(xa->page_pool, page);
473 rcu_read_unlock();
474 }
475 EXPORT_SYMBOL_GPL(__xdp_release_frame);
476
477 int xdp_attachment_query(struct xdp_attachment_info *info,
478 struct netdev_bpf *bpf)
479 {
480 bpf->prog_id = info->prog ? info->prog->aux->id : 0;
481 bpf->prog_flags = info->prog ? info->flags : 0;
482 return 0;
483 }
484 EXPORT_SYMBOL_GPL(xdp_attachment_query);
485
486 bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
487 struct netdev_bpf *bpf)
488 {
489 if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) {
490 NL_SET_ERR_MSG(bpf->extack,
491 "program loaded with different flags");
492 return false;
493 }
494 return true;
495 }
496 EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok);
497
498 void xdp_attachment_setup(struct xdp_attachment_info *info,
499 struct netdev_bpf *bpf)
500 {
501 if (info->prog)
502 bpf_prog_put(info->prog);
503 info->prog = bpf->prog;
504 info->flags = bpf->flags;
505 }
506 EXPORT_SYMBOL_GPL(xdp_attachment_setup);
507
508 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
509 {
510 unsigned int metasize, totsize;
511 void *addr, *data_to_copy;
512 struct xdp_frame *xdpf;
513 struct page *page;
514
515 /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */
516 metasize = xdp_data_meta_unsupported(xdp) ? 0 :
517 xdp->data - xdp->data_meta;
518 totsize = xdp->data_end - xdp->data + metasize;
519
520 if (sizeof(*xdpf) + totsize > PAGE_SIZE)
521 return NULL;
522
523 page = dev_alloc_page();
524 if (!page)
525 return NULL;
526
527 addr = page_to_virt(page);
528 xdpf = addr;
529 memset(xdpf, 0, sizeof(*xdpf));
530
531 addr += sizeof(*xdpf);
532 data_to_copy = metasize ? xdp->data_meta : xdp->data;
533 memcpy(addr, data_to_copy, totsize);
534
535 xdpf->data = addr + metasize;
536 xdpf->len = totsize - metasize;
537 xdpf->headroom = 0;
538 xdpf->metasize = metasize;
539 xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
540
541 xdp_return_buff(xdp);
542 return xdpf;
543 }
544 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);