]> git.proxmox.com Git - mirror_ubuntu-jammy-kernel.git/blame - net/xdp/xdp_umem.c
Merge tag 'char-misc-4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregk...
[mirror_ubuntu-jammy-kernel.git] / net / xdp / xdp_umem.c
CommitLineData
c0c77d8f
BT
1// SPDX-License-Identifier: GPL-2.0
2/* XDP user-space packet buffer
3 * Copyright(c) 2018 Intel Corporation.
c0c77d8f
BT
4 */
5
6#include <linux/init.h>
7#include <linux/sched/mm.h>
8#include <linux/sched/signal.h>
9#include <linux/sched/task.h>
10#include <linux/uaccess.h>
11#include <linux/slab.h>
12#include <linux/bpf.h>
13#include <linux/mm.h>
84c6b868
JK
14#include <linux/netdevice.h>
15#include <linux/rtnetlink.h>
c0c77d8f
BT
16
17#include "xdp_umem.h"
e61e62b9 18#include "xsk_queue.h"
c0c77d8f 19
bbff2f32 20#define XDP_UMEM_MIN_CHUNK_SIZE 2048
c0c77d8f 21
ac98d8aa
MK
22void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
23{
24 unsigned long flags;
25
26 spin_lock_irqsave(&umem->xsk_list_lock, flags);
27 list_add_rcu(&xs->list, &umem->xsk_list);
28 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
29}
30
31void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
32{
33 unsigned long flags;
34
541d7fdd
BT
35 spin_lock_irqsave(&umem->xsk_list_lock, flags);
36 list_del_rcu(&xs->list);
37 spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
ac98d8aa
MK
38}
39
c9b47cc1
MK
40/* The umem is stored both in the _rx struct and the _tx struct as we do
41 * not know if the device has more tx queues than rx, or the opposite.
42 * This might also change during run time.
43 */
44static void xdp_reg_umem_at_qid(struct net_device *dev, struct xdp_umem *umem,
45 u16 queue_id)
84c6b868 46{
c9b47cc1
MK
47 if (queue_id < dev->real_num_rx_queues)
48 dev->_rx[queue_id].umem = umem;
49 if (queue_id < dev->real_num_tx_queues)
50 dev->_tx[queue_id].umem = umem;
51}
84c6b868 52
1661d346
JK
53struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
54 u16 queue_id)
c9b47cc1
MK
55{
56 if (queue_id < dev->real_num_rx_queues)
57 return dev->_rx[queue_id].umem;
58 if (queue_id < dev->real_num_tx_queues)
59 return dev->_tx[queue_id].umem;
84c6b868 60
c9b47cc1
MK
61 return NULL;
62}
84c6b868 63
c9b47cc1
MK
64static void xdp_clear_umem_at_qid(struct net_device *dev, u16 queue_id)
65{
a41b4f3c 66 if (queue_id < dev->real_num_rx_queues)
c9b47cc1 67 dev->_rx[queue_id].umem = NULL;
a41b4f3c 68 if (queue_id < dev->real_num_tx_queues)
c9b47cc1 69 dev->_tx[queue_id].umem = NULL;
84c6b868
JK
70}
71
173d3adb 72int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
c9b47cc1 73 u16 queue_id, u16 flags)
173d3adb
BT
74{
75 bool force_zc, force_copy;
76 struct netdev_bpf bpf;
c9b47cc1 77 int err = 0;
173d3adb
BT
78
79 force_zc = flags & XDP_ZEROCOPY;
80 force_copy = flags & XDP_COPY;
81
82 if (force_zc && force_copy)
83 return -EINVAL;
84
c9b47cc1
MK
85 rtnl_lock();
86 if (xdp_get_umem_from_qid(dev, queue_id)) {
87 err = -EBUSY;
88 goto out_rtnl_unlock;
89 }
173d3adb 90
c9b47cc1
MK
91 xdp_reg_umem_at_qid(dev, umem, queue_id);
92 umem->dev = dev;
93 umem->queue_id = queue_id;
94 if (force_copy)
95 /* For copy-mode, we are done. */
96 goto out_rtnl_unlock;
173d3adb 97
c9b47cc1
MK
98 if (!dev->netdev_ops->ndo_bpf ||
99 !dev->netdev_ops->ndo_xsk_async_xmit) {
100 err = -EOPNOTSUPP;
101 goto err_unreg_umem;
84c6b868 102 }
173d3adb 103
f734607e
JK
104 bpf.command = XDP_SETUP_XSK_UMEM;
105 bpf.xsk.umem = umem;
106 bpf.xsk.queue_id = queue_id;
173d3adb 107
f734607e 108 err = dev->netdev_ops->ndo_bpf(dev, &bpf);
f734607e 109 if (err)
c9b47cc1 110 goto err_unreg_umem;
84c6b868 111 rtnl_unlock();
173d3adb 112
f734607e 113 dev_hold(dev);
f734607e
JK
114 umem->zc = true;
115 return 0;
84c6b868 116
c9b47cc1
MK
117err_unreg_umem:
118 xdp_clear_umem_at_qid(dev, queue_id);
119 if (!force_zc)
120 err = 0; /* fallback to copy mode */
121out_rtnl_unlock:
84c6b868 122 rtnl_unlock();
c9b47cc1 123 return err;
173d3adb
BT
124}
125
ac98d8aa 126static void xdp_umem_clear_dev(struct xdp_umem *umem)
173d3adb
BT
127{
128 struct netdev_bpf bpf;
129 int err;
130
c9b47cc1 131 if (umem->zc) {
173d3adb
BT
132 bpf.command = XDP_SETUP_XSK_UMEM;
133 bpf.xsk.umem = NULL;
134 bpf.xsk.queue_id = umem->queue_id;
135
136 rtnl_lock();
137 err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
138 rtnl_unlock();
139
140 if (err)
141 WARN(1, "failed to disable umem!\n");
c9b47cc1
MK
142 }
143
144 if (umem->dev) {
145 rtnl_lock();
146 xdp_clear_umem_at_qid(umem->dev, umem->queue_id);
147 rtnl_unlock();
148 }
173d3adb 149
c9b47cc1 150 if (umem->zc) {
173d3adb 151 dev_put(umem->dev);
c9b47cc1 152 umem->zc = false;
173d3adb
BT
153 }
154}
155
c0c77d8f
BT
156static void xdp_umem_unpin_pages(struct xdp_umem *umem)
157{
158 unsigned int i;
159
a49049ea
BT
160 for (i = 0; i < umem->npgs; i++) {
161 struct page *page = umem->pgs[i];
c0c77d8f 162
a49049ea
BT
163 set_page_dirty_lock(page);
164 put_page(page);
c0c77d8f 165 }
a49049ea
BT
166
167 kfree(umem->pgs);
168 umem->pgs = NULL;
c0c77d8f
BT
169}
170
171static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
172{
c09290c5
DB
173 if (umem->user) {
174 atomic_long_sub(umem->npgs, &umem->user->locked_vm);
175 free_uid(umem->user);
176 }
c0c77d8f
BT
177}
178
179static void xdp_umem_release(struct xdp_umem *umem)
180{
181 struct task_struct *task;
182 struct mm_struct *mm;
183
173d3adb
BT
184 xdp_umem_clear_dev(umem);
185
423f3832
MK
186 if (umem->fq) {
187 xskq_destroy(umem->fq);
188 umem->fq = NULL;
189 }
190
fe230832
MK
191 if (umem->cq) {
192 xskq_destroy(umem->cq);
193 umem->cq = NULL;
194 }
195
f5bd9138
JK
196 xsk_reuseq_destroy(umem);
197
a49049ea 198 xdp_umem_unpin_pages(umem);
c0c77d8f 199
a49049ea
BT
200 task = get_pid_task(umem->pid, PIDTYPE_PID);
201 put_pid(umem->pid);
202 if (!task)
203 goto out;
204 mm = get_task_mm(task);
205 put_task_struct(task);
206 if (!mm)
207 goto out;
c0c77d8f 208
a49049ea 209 mmput(mm);
8aef7340
BT
210 kfree(umem->pages);
211 umem->pages = NULL;
212
c0c77d8f
BT
213 xdp_umem_unaccount_pages(umem);
214out:
215 kfree(umem);
216}
217
218static void xdp_umem_release_deferred(struct work_struct *work)
219{
220 struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
221
222 xdp_umem_release(umem);
223}
224
225void xdp_get_umem(struct xdp_umem *umem)
226{
d3b42f14 227 refcount_inc(&umem->users);
c0c77d8f
BT
228}
229
230void xdp_put_umem(struct xdp_umem *umem)
231{
232 if (!umem)
233 return;
234
d3b42f14 235 if (refcount_dec_and_test(&umem->users)) {
c0c77d8f
BT
236 INIT_WORK(&umem->work, xdp_umem_release_deferred);
237 schedule_work(&umem->work);
238 }
239}
240
241static int xdp_umem_pin_pages(struct xdp_umem *umem)
242{
243 unsigned int gup_flags = FOLL_WRITE;
244 long npgs;
245 int err;
246
a343993c
BT
247 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
248 GFP_KERNEL | __GFP_NOWARN);
c0c77d8f
BT
249 if (!umem->pgs)
250 return -ENOMEM;
251
252 down_write(&current->mm->mmap_sem);
253 npgs = get_user_pages(umem->address, umem->npgs,
254 gup_flags, &umem->pgs[0], NULL);
255 up_write(&current->mm->mmap_sem);
256
257 if (npgs != umem->npgs) {
258 if (npgs >= 0) {
259 umem->npgs = npgs;
260 err = -ENOMEM;
261 goto out_pin;
262 }
263 err = npgs;
264 goto out_pgs;
265 }
266 return 0;
267
268out_pin:
269 xdp_umem_unpin_pages(umem);
270out_pgs:
271 kfree(umem->pgs);
272 umem->pgs = NULL;
273 return err;
274}
275
276static int xdp_umem_account_pages(struct xdp_umem *umem)
277{
278 unsigned long lock_limit, new_npgs, old_npgs;
279
280 if (capable(CAP_IPC_LOCK))
281 return 0;
282
283 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
284 umem->user = get_uid(current_user());
285
286 do {
287 old_npgs = atomic_long_read(&umem->user->locked_vm);
288 new_npgs = old_npgs + umem->npgs;
289 if (new_npgs > lock_limit) {
290 free_uid(umem->user);
291 umem->user = NULL;
292 return -ENOBUFS;
293 }
294 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
295 new_npgs) != old_npgs);
296 return 0;
297}
298
a49049ea 299static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
c0c77d8f 300{
bbff2f32
BT
301 u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
302 unsigned int chunks, chunks_per_page;
c0c77d8f 303 u64 addr = mr->addr, size = mr->len;
8aef7340 304 int size_chk, err, i;
c0c77d8f 305
bbff2f32 306 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
c0c77d8f
BT
307 /* Strictly speaking we could support this, if:
308 * - huge pages, or*
309 * - using an IOMMU, or
310 * - making sure the memory area is consecutive
311 * but for now, we simply say "computer says no".
312 */
313 return -EINVAL;
314 }
315
bbff2f32 316 if (!is_power_of_2(chunk_size))
c0c77d8f
BT
317 return -EINVAL;
318
319 if (!PAGE_ALIGNED(addr)) {
320 /* Memory area has to be page size aligned. For
321 * simplicity, this might change.
322 */
323 return -EINVAL;
324 }
325
326 if ((addr + size) < addr)
327 return -EINVAL;
328
bbff2f32
BT
329 chunks = (unsigned int)div_u64(size, chunk_size);
330 if (chunks == 0)
c0c77d8f
BT
331 return -EINVAL;
332
bbff2f32
BT
333 chunks_per_page = PAGE_SIZE / chunk_size;
334 if (chunks < chunks_per_page || chunks % chunks_per_page)
c0c77d8f
BT
335 return -EINVAL;
336
bbff2f32 337 headroom = ALIGN(headroom, 64);
c0c77d8f 338
bbff2f32 339 size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
c0c77d8f
BT
340 if (size_chk < 0)
341 return -EINVAL;
342
343 umem->pid = get_task_pid(current, PIDTYPE_PID);
c0c77d8f 344 umem->address = (unsigned long)addr;
93ee30f3
MK
345 umem->chunk_mask = ~((u64)chunk_size - 1);
346 umem->size = size;
bbff2f32
BT
347 umem->headroom = headroom;
348 umem->chunk_size_nohr = chunk_size - headroom;
c0c77d8f
BT
349 umem->npgs = size / PAGE_SIZE;
350 umem->pgs = NULL;
351 umem->user = NULL;
ac98d8aa
MK
352 INIT_LIST_HEAD(&umem->xsk_list);
353 spin_lock_init(&umem->xsk_list_lock);
c0c77d8f 354
d3b42f14 355 refcount_set(&umem->users, 1);
c0c77d8f
BT
356
357 err = xdp_umem_account_pages(umem);
358 if (err)
359 goto out;
360
361 err = xdp_umem_pin_pages(umem);
362 if (err)
363 goto out_account;
8aef7340
BT
364
365 umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
366 if (!umem->pages) {
367 err = -ENOMEM;
368 goto out_account;
369 }
370
371 for (i = 0; i < umem->npgs; i++)
372 umem->pages[i].addr = page_address(umem->pgs[i]);
373
c0c77d8f
BT
374 return 0;
375
376out_account:
377 xdp_umem_unaccount_pages(umem);
378out:
379 put_pid(umem->pid);
380 return err;
381}
965a9909 382
a49049ea
BT
383struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
384{
385 struct xdp_umem *umem;
386 int err;
387
388 umem = kzalloc(sizeof(*umem), GFP_KERNEL);
389 if (!umem)
390 return ERR_PTR(-ENOMEM);
391
392 err = xdp_umem_reg(umem, mr);
393 if (err) {
394 kfree(umem);
395 return ERR_PTR(err);
396 }
397
398 return umem;
399}
400
965a9909
MK
401bool xdp_umem_validate_queues(struct xdp_umem *umem)
402{
da60cf00 403 return umem->fq && umem->cq;
965a9909 404}