]>
Commit | Line | Data |
---|---|---|
ae24345d YS |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2020 Facebook */ | |
3 | ||
4 | #include <linux/fs.h> | |
ac51d99b | 5 | #include <linux/anon_inodes.h> |
ae24345d YS |
6 | #include <linux/filter.h> |
7 | #include <linux/bpf.h> | |
8 | ||
9 | struct bpf_iter_target_info { | |
10 | struct list_head list; | |
15172a46 | 11 | const struct bpf_iter_reg *reg_info; |
15d83c4d | 12 | u32 btf_id; /* cached value */ |
ae24345d YS |
13 | }; |
14 | ||
de4e05ca YS |
15 | struct bpf_iter_link { |
16 | struct bpf_link link; | |
17 | struct bpf_iter_target_info *tinfo; | |
18 | }; | |
19 | ||
ac51d99b YS |
20 | struct bpf_iter_priv_data { |
21 | struct bpf_iter_target_info *tinfo; | |
22 | struct bpf_prog *prog; | |
23 | u64 session_id; | |
24 | u64 seq_num; | |
25 | bool done_stop; | |
26 | u8 target_private[] __aligned(8); | |
27 | }; | |
28 | ||
ae24345d YS |
29 | static struct list_head targets = LIST_HEAD_INIT(targets); |
30 | static DEFINE_MUTEX(targets_mutex); | |
31 | ||
2057c92b YS |
32 | /* protect bpf_iter_link changes */ |
33 | static DEFINE_MUTEX(link_mutex); | |
34 | ||
ac51d99b YS |
35 | /* incremented on every opened seq_file */ |
36 | static atomic64_t session_id; | |
37 | ||
367ec3e4 YS |
38 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link); |
39 | ||
e5158d98 YS |
40 | static void bpf_iter_inc_seq_num(struct seq_file *seq) |
41 | { | |
42 | struct bpf_iter_priv_data *iter_priv; | |
43 | ||
44 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
45 | target_private); | |
46 | iter_priv->seq_num++; | |
47 | } | |
48 | ||
49 | static void bpf_iter_dec_seq_num(struct seq_file *seq) | |
50 | { | |
51 | struct bpf_iter_priv_data *iter_priv; | |
52 | ||
53 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
54 | target_private); | |
55 | iter_priv->seq_num--; | |
56 | } | |
57 | ||
58 | static void bpf_iter_done_stop(struct seq_file *seq) | |
59 | { | |
60 | struct bpf_iter_priv_data *iter_priv; | |
61 | ||
62 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
63 | target_private); | |
64 | iter_priv->done_stop = true; | |
65 | } | |
66 | ||
fd4f12bc YS |
67 | /* bpf_seq_read, a customized and simpler version for bpf iterator. |
68 | * no_llseek is assumed for this file. | |
69 | * The following are differences from seq_read(): | |
70 | * . fixed buffer size (PAGE_SIZE) | |
71 | * . assuming no_llseek | |
72 | * . stop() may call bpf program, handling potential overflow there | |
73 | */ | |
74 | static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, | |
75 | loff_t *ppos) | |
76 | { | |
77 | struct seq_file *seq = file->private_data; | |
78 | size_t n, offs, copied = 0; | |
79 | int err = 0; | |
80 | void *p; | |
81 | ||
82 | mutex_lock(&seq->lock); | |
83 | ||
84 | if (!seq->buf) { | |
85 | seq->size = PAGE_SIZE; | |
86 | seq->buf = kmalloc(seq->size, GFP_KERNEL); | |
87 | if (!seq->buf) { | |
88 | err = -ENOMEM; | |
89 | goto done; | |
90 | } | |
91 | } | |
92 | ||
93 | if (seq->count) { | |
94 | n = min(seq->count, size); | |
95 | err = copy_to_user(buf, seq->buf + seq->from, n); | |
96 | if (err) { | |
97 | err = -EFAULT; | |
98 | goto done; | |
99 | } | |
100 | seq->count -= n; | |
101 | seq->from += n; | |
102 | copied = n; | |
103 | goto done; | |
104 | } | |
105 | ||
106 | seq->from = 0; | |
107 | p = seq->op->start(seq, &seq->index); | |
108 | if (!p) | |
109 | goto stop; | |
110 | if (IS_ERR(p)) { | |
111 | err = PTR_ERR(p); | |
112 | seq->op->stop(seq, p); | |
113 | seq->count = 0; | |
114 | goto done; | |
115 | } | |
116 | ||
117 | err = seq->op->show(seq, p); | |
118 | if (err > 0) { | |
e5158d98 YS |
119 | /* object is skipped, decrease seq_num, so next |
120 | * valid object can reuse the same seq_num. | |
121 | */ | |
122 | bpf_iter_dec_seq_num(seq); | |
fd4f12bc YS |
123 | seq->count = 0; |
124 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
125 | if (!err) | |
126 | err = -E2BIG; | |
127 | seq->op->stop(seq, p); | |
128 | seq->count = 0; | |
129 | goto done; | |
130 | } | |
131 | ||
132 | while (1) { | |
133 | loff_t pos = seq->index; | |
134 | ||
135 | offs = seq->count; | |
136 | p = seq->op->next(seq, p, &seq->index); | |
137 | if (pos == seq->index) { | |
138 | pr_info_ratelimited("buggy seq_file .next function %ps " | |
139 | "did not updated position index\n", | |
140 | seq->op->next); | |
141 | seq->index++; | |
142 | } | |
143 | ||
144 | if (IS_ERR_OR_NULL(p)) | |
145 | break; | |
146 | ||
e5158d98 YS |
147 | /* got a valid next object, increase seq_num */ |
148 | bpf_iter_inc_seq_num(seq); | |
149 | ||
fd4f12bc YS |
150 | if (seq->count >= size) |
151 | break; | |
152 | ||
153 | err = seq->op->show(seq, p); | |
154 | if (err > 0) { | |
e5158d98 | 155 | bpf_iter_dec_seq_num(seq); |
fd4f12bc YS |
156 | seq->count = offs; |
157 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
158 | seq->count = offs; | |
159 | if (offs == 0) { | |
160 | if (!err) | |
161 | err = -E2BIG; | |
162 | seq->op->stop(seq, p); | |
163 | goto done; | |
164 | } | |
165 | break; | |
166 | } | |
167 | } | |
168 | stop: | |
169 | offs = seq->count; | |
170 | /* bpf program called if !p */ | |
171 | seq->op->stop(seq, p); | |
e5158d98 YS |
172 | if (!p) { |
173 | if (!seq_has_overflowed(seq)) { | |
174 | bpf_iter_done_stop(seq); | |
175 | } else { | |
176 | seq->count = offs; | |
177 | if (offs == 0) { | |
178 | err = -E2BIG; | |
179 | goto done; | |
180 | } | |
fd4f12bc YS |
181 | } |
182 | } | |
183 | ||
184 | n = min(seq->count, size); | |
185 | err = copy_to_user(buf, seq->buf, n); | |
186 | if (err) { | |
187 | err = -EFAULT; | |
188 | goto done; | |
189 | } | |
190 | copied = n; | |
191 | seq->count -= n; | |
192 | seq->from = n; | |
193 | done: | |
194 | if (!copied) | |
195 | copied = err; | |
196 | else | |
197 | *ppos += copied; | |
198 | mutex_unlock(&seq->lock); | |
199 | return copied; | |
200 | } | |
201 | ||
367ec3e4 YS |
202 | static int iter_open(struct inode *inode, struct file *file) |
203 | { | |
204 | struct bpf_iter_link *link = inode->i_private; | |
205 | ||
206 | return prepare_seq_file(file, link); | |
207 | } | |
208 | ||
ac51d99b YS |
209 | static int iter_release(struct inode *inode, struct file *file) |
210 | { | |
211 | struct bpf_iter_priv_data *iter_priv; | |
212 | struct seq_file *seq; | |
213 | ||
214 | seq = file->private_data; | |
215 | if (!seq) | |
216 | return 0; | |
217 | ||
218 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
219 | target_private); | |
220 | ||
15172a46 YS |
221 | if (iter_priv->tinfo->reg_info->fini_seq_private) |
222 | iter_priv->tinfo->reg_info->fini_seq_private(seq->private); | |
ac51d99b YS |
223 | |
224 | bpf_prog_put(iter_priv->prog); | |
225 | seq->private = iter_priv; | |
226 | ||
227 | return seq_release_private(inode, file); | |
228 | } | |
229 | ||
367ec3e4 YS |
230 | const struct file_operations bpf_iter_fops = { |
231 | .open = iter_open, | |
ac51d99b YS |
232 | .llseek = no_llseek, |
233 | .read = bpf_seq_read, | |
234 | .release = iter_release, | |
235 | }; | |
236 | ||
15172a46 YS |
237 | /* The argument reg_info will be cached in bpf_iter_target_info. |
238 | * The common practice is to declare target reg_info as | |
239 | * a const static variable and passed as an argument to | |
240 | * bpf_iter_reg_target(). | |
241 | */ | |
242 | int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) | |
ae24345d YS |
243 | { |
244 | struct bpf_iter_target_info *tinfo; | |
245 | ||
246 | tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); | |
247 | if (!tinfo) | |
248 | return -ENOMEM; | |
249 | ||
15172a46 | 250 | tinfo->reg_info = reg_info; |
ae24345d YS |
251 | INIT_LIST_HEAD(&tinfo->list); |
252 | ||
253 | mutex_lock(&targets_mutex); | |
254 | list_add(&tinfo->list, &targets); | |
255 | mutex_unlock(&targets_mutex); | |
256 | ||
257 | return 0; | |
258 | } | |
259 | ||
ab2ee4fc | 260 | void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) |
ae24345d YS |
261 | { |
262 | struct bpf_iter_target_info *tinfo; | |
263 | bool found = false; | |
264 | ||
265 | mutex_lock(&targets_mutex); | |
266 | list_for_each_entry(tinfo, &targets, list) { | |
ab2ee4fc | 267 | if (reg_info == tinfo->reg_info) { |
ae24345d YS |
268 | list_del(&tinfo->list); |
269 | kfree(tinfo); | |
270 | found = true; | |
271 | break; | |
272 | } | |
273 | } | |
274 | mutex_unlock(&targets_mutex); | |
275 | ||
276 | WARN_ON(found == false); | |
277 | } | |
15d83c4d YS |
278 | |
279 | static void cache_btf_id(struct bpf_iter_target_info *tinfo, | |
280 | struct bpf_prog *prog) | |
281 | { | |
282 | tinfo->btf_id = prog->aux->attach_btf_id; | |
283 | } | |
284 | ||
285 | bool bpf_iter_prog_supported(struct bpf_prog *prog) | |
286 | { | |
287 | const char *attach_fname = prog->aux->attach_func_name; | |
288 | u32 prog_btf_id = prog->aux->attach_btf_id; | |
289 | const char *prefix = BPF_ITER_FUNC_PREFIX; | |
290 | struct bpf_iter_target_info *tinfo; | |
291 | int prefix_len = strlen(prefix); | |
292 | bool supported = false; | |
293 | ||
294 | if (strncmp(attach_fname, prefix, prefix_len)) | |
295 | return false; | |
296 | ||
297 | mutex_lock(&targets_mutex); | |
298 | list_for_each_entry(tinfo, &targets, list) { | |
299 | if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { | |
300 | supported = true; | |
301 | break; | |
302 | } | |
15172a46 | 303 | if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { |
15d83c4d YS |
304 | cache_btf_id(tinfo, prog); |
305 | supported = true; | |
306 | break; | |
307 | } | |
308 | } | |
309 | mutex_unlock(&targets_mutex); | |
310 | ||
311 | return supported; | |
312 | } | |
de4e05ca YS |
313 | |
314 | static void bpf_iter_link_release(struct bpf_link *link) | |
315 | { | |
316 | } | |
317 | ||
318 | static void bpf_iter_link_dealloc(struct bpf_link *link) | |
319 | { | |
320 | struct bpf_iter_link *iter_link = | |
321 | container_of(link, struct bpf_iter_link, link); | |
322 | ||
323 | kfree(iter_link); | |
324 | } | |
325 | ||
2057c92b YS |
326 | static int bpf_iter_link_replace(struct bpf_link *link, |
327 | struct bpf_prog *new_prog, | |
328 | struct bpf_prog *old_prog) | |
329 | { | |
330 | int ret = 0; | |
331 | ||
332 | mutex_lock(&link_mutex); | |
333 | if (old_prog && link->prog != old_prog) { | |
334 | ret = -EPERM; | |
335 | goto out_unlock; | |
336 | } | |
337 | ||
338 | if (link->prog->type != new_prog->type || | |
339 | link->prog->expected_attach_type != new_prog->expected_attach_type || | |
340 | link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { | |
341 | ret = -EINVAL; | |
342 | goto out_unlock; | |
343 | } | |
344 | ||
345 | old_prog = xchg(&link->prog, new_prog); | |
346 | bpf_prog_put(old_prog); | |
347 | ||
348 | out_unlock: | |
349 | mutex_unlock(&link_mutex); | |
350 | return ret; | |
351 | } | |
352 | ||
de4e05ca YS |
353 | static const struct bpf_link_ops bpf_iter_link_lops = { |
354 | .release = bpf_iter_link_release, | |
355 | .dealloc = bpf_iter_link_dealloc, | |
2057c92b | 356 | .update_prog = bpf_iter_link_replace, |
de4e05ca YS |
357 | }; |
358 | ||
367ec3e4 YS |
359 | bool bpf_link_is_iter(struct bpf_link *link) |
360 | { | |
361 | return link->ops == &bpf_iter_link_lops; | |
362 | } | |
363 | ||
de4e05ca YS |
364 | int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) |
365 | { | |
366 | struct bpf_link_primer link_primer; | |
367 | struct bpf_iter_target_info *tinfo; | |
368 | struct bpf_iter_link *link; | |
369 | bool existed = false; | |
370 | u32 prog_btf_id; | |
371 | int err; | |
372 | ||
373 | if (attr->link_create.target_fd || attr->link_create.flags) | |
374 | return -EINVAL; | |
375 | ||
376 | prog_btf_id = prog->aux->attach_btf_id; | |
377 | mutex_lock(&targets_mutex); | |
378 | list_for_each_entry(tinfo, &targets, list) { | |
379 | if (tinfo->btf_id == prog_btf_id) { | |
380 | existed = true; | |
381 | break; | |
382 | } | |
383 | } | |
384 | mutex_unlock(&targets_mutex); | |
385 | if (!existed) | |
386 | return -ENOENT; | |
387 | ||
388 | link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); | |
389 | if (!link) | |
390 | return -ENOMEM; | |
391 | ||
392 | bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); | |
393 | link->tinfo = tinfo; | |
394 | ||
395 | err = bpf_link_prime(&link->link, &link_primer); | |
396 | if (err) { | |
397 | kfree(link); | |
398 | return err; | |
399 | } | |
400 | ||
401 | return bpf_link_settle(&link_primer); | |
402 | } | |
ac51d99b YS |
403 | |
404 | static void init_seq_meta(struct bpf_iter_priv_data *priv_data, | |
405 | struct bpf_iter_target_info *tinfo, | |
406 | struct bpf_prog *prog) | |
407 | { | |
408 | priv_data->tinfo = tinfo; | |
409 | priv_data->prog = prog; | |
410 | priv_data->session_id = atomic64_inc_return(&session_id); | |
411 | priv_data->seq_num = 0; | |
412 | priv_data->done_stop = false; | |
413 | } | |
414 | ||
415 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link) | |
416 | { | |
417 | struct bpf_iter_priv_data *priv_data; | |
418 | struct bpf_iter_target_info *tinfo; | |
419 | struct bpf_prog *prog; | |
420 | u32 total_priv_dsize; | |
421 | struct seq_file *seq; | |
422 | int err = 0; | |
423 | ||
424 | mutex_lock(&link_mutex); | |
425 | prog = link->link.prog; | |
426 | bpf_prog_inc(prog); | |
427 | mutex_unlock(&link_mutex); | |
428 | ||
429 | tinfo = link->tinfo; | |
430 | total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + | |
15172a46 YS |
431 | tinfo->reg_info->seq_priv_size; |
432 | priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops, | |
433 | total_priv_dsize); | |
ac51d99b YS |
434 | if (!priv_data) { |
435 | err = -ENOMEM; | |
436 | goto release_prog; | |
437 | } | |
438 | ||
15172a46 YS |
439 | if (tinfo->reg_info->init_seq_private) { |
440 | err = tinfo->reg_info->init_seq_private(priv_data->target_private); | |
ac51d99b YS |
441 | if (err) |
442 | goto release_seq_file; | |
443 | } | |
444 | ||
445 | init_seq_meta(priv_data, tinfo, prog); | |
446 | seq = file->private_data; | |
447 | seq->private = priv_data->target_private; | |
448 | ||
449 | return 0; | |
450 | ||
451 | release_seq_file: | |
452 | seq_release_private(file->f_inode, file); | |
453 | file->private_data = NULL; | |
454 | release_prog: | |
455 | bpf_prog_put(prog); | |
456 | return err; | |
457 | } | |
458 | ||
459 | int bpf_iter_new_fd(struct bpf_link *link) | |
460 | { | |
461 | struct file *file; | |
462 | unsigned int flags; | |
463 | int err, fd; | |
464 | ||
465 | if (link->ops != &bpf_iter_link_lops) | |
466 | return -EINVAL; | |
467 | ||
468 | flags = O_RDONLY | O_CLOEXEC; | |
469 | fd = get_unused_fd_flags(flags); | |
470 | if (fd < 0) | |
471 | return fd; | |
472 | ||
473 | file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); | |
474 | if (IS_ERR(file)) { | |
475 | err = PTR_ERR(file); | |
476 | goto free_fd; | |
477 | } | |
478 | ||
479 | err = prepare_seq_file(file, | |
480 | container_of(link, struct bpf_iter_link, link)); | |
481 | if (err) | |
482 | goto free_file; | |
483 | ||
484 | fd_install(fd, file); | |
485 | return fd; | |
486 | ||
487 | free_file: | |
488 | fput(file); | |
489 | free_fd: | |
490 | put_unused_fd(fd); | |
491 | return err; | |
492 | } | |
e5158d98 YS |
493 | |
494 | struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) | |
495 | { | |
496 | struct bpf_iter_priv_data *iter_priv; | |
497 | struct seq_file *seq; | |
498 | void *seq_priv; | |
499 | ||
500 | seq = meta->seq; | |
501 | if (seq->file->f_op != &bpf_iter_fops) | |
502 | return NULL; | |
503 | ||
504 | seq_priv = seq->private; | |
505 | iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, | |
506 | target_private); | |
507 | ||
508 | if (in_stop && iter_priv->done_stop) | |
509 | return NULL; | |
510 | ||
511 | meta->session_id = iter_priv->session_id; | |
512 | meta->seq_num = iter_priv->seq_num; | |
513 | ||
514 | return iter_priv->prog; | |
515 | } | |
516 | ||
517 | int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) | |
518 | { | |
519 | int ret; | |
520 | ||
521 | rcu_read_lock(); | |
522 | migrate_disable(); | |
523 | ret = BPF_PROG_RUN(prog, ctx); | |
524 | migrate_enable(); | |
525 | rcu_read_unlock(); | |
526 | ||
2e3ed68b YS |
527 | /* bpf program can only return 0 or 1: |
528 | * 0 : okay | |
529 | * 1 : retry the same object | |
530 | * The bpf_iter_run_prog() return value | |
531 | * will be seq_ops->show() return value. | |
532 | */ | |
e5158d98 YS |
533 | return ret == 0 ? 0 : -EAGAIN; |
534 | } |