]>
Commit | Line | Data |
---|---|---|
ae24345d YS |
1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Copyright (c) 2020 Facebook */ | |
3 | ||
4 | #include <linux/fs.h> | |
ac51d99b | 5 | #include <linux/anon_inodes.h> |
ae24345d YS |
6 | #include <linux/filter.h> |
7 | #include <linux/bpf.h> | |
8 | ||
9 | struct bpf_iter_target_info { | |
10 | struct list_head list; | |
15172a46 | 11 | const struct bpf_iter_reg *reg_info; |
15d83c4d | 12 | u32 btf_id; /* cached value */ |
ae24345d YS |
13 | }; |
14 | ||
de4e05ca YS |
15 | struct bpf_iter_link { |
16 | struct bpf_link link; | |
a5cbe05a | 17 | struct bpf_iter_aux_info aux; |
de4e05ca YS |
18 | struct bpf_iter_target_info *tinfo; |
19 | }; | |
20 | ||
ac51d99b YS |
21 | struct bpf_iter_priv_data { |
22 | struct bpf_iter_target_info *tinfo; | |
a5cbe05a | 23 | const struct bpf_iter_seq_info *seq_info; |
ac51d99b YS |
24 | struct bpf_prog *prog; |
25 | u64 session_id; | |
26 | u64 seq_num; | |
27 | bool done_stop; | |
28 | u8 target_private[] __aligned(8); | |
29 | }; | |
30 | ||
ae24345d YS |
31 | static struct list_head targets = LIST_HEAD_INIT(targets); |
32 | static DEFINE_MUTEX(targets_mutex); | |
33 | ||
2057c92b YS |
34 | /* protect bpf_iter_link changes */ |
35 | static DEFINE_MUTEX(link_mutex); | |
36 | ||
ac51d99b YS |
37 | /* incremented on every opened seq_file */ |
38 | static atomic64_t session_id; | |
39 | ||
a5cbe05a YS |
40 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, |
41 | const struct bpf_iter_seq_info *seq_info); | |
367ec3e4 | 42 | |
e5158d98 YS |
43 | static void bpf_iter_inc_seq_num(struct seq_file *seq) |
44 | { | |
45 | struct bpf_iter_priv_data *iter_priv; | |
46 | ||
47 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
48 | target_private); | |
49 | iter_priv->seq_num++; | |
50 | } | |
51 | ||
52 | static void bpf_iter_dec_seq_num(struct seq_file *seq) | |
53 | { | |
54 | struct bpf_iter_priv_data *iter_priv; | |
55 | ||
56 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
57 | target_private); | |
58 | iter_priv->seq_num--; | |
59 | } | |
60 | ||
61 | static void bpf_iter_done_stop(struct seq_file *seq) | |
62 | { | |
63 | struct bpf_iter_priv_data *iter_priv; | |
64 | ||
65 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
66 | target_private); | |
67 | iter_priv->done_stop = true; | |
68 | } | |
69 | ||
cf83b2d2 YS |
70 | static bool bpf_iter_support_resched(struct seq_file *seq) |
71 | { | |
72 | struct bpf_iter_priv_data *iter_priv; | |
73 | ||
74 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
75 | target_private); | |
76 | return iter_priv->tinfo->reg_info->feature & BPF_ITER_RESCHED; | |
77 | } | |
78 | ||
e679654a YS |
79 | /* maximum visited objects before bailing out */ |
80 | #define MAX_ITER_OBJECTS 1000000 | |
81 | ||
fd4f12bc YS |
82 | /* bpf_seq_read, a customized and simpler version for bpf iterator. |
83 | * no_llseek is assumed for this file. | |
84 | * The following are differences from seq_read(): | |
85 | * . fixed buffer size (PAGE_SIZE) | |
86 | * . assuming no_llseek | |
87 | * . stop() may call bpf program, handling potential overflow there | |
88 | */ | |
89 | static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size, | |
90 | loff_t *ppos) | |
91 | { | |
92 | struct seq_file *seq = file->private_data; | |
93 | size_t n, offs, copied = 0; | |
e679654a | 94 | int err = 0, num_objs = 0; |
cf83b2d2 | 95 | bool can_resched; |
fd4f12bc YS |
96 | void *p; |
97 | ||
98 | mutex_lock(&seq->lock); | |
99 | ||
100 | if (!seq->buf) { | |
af653209 AM |
101 | seq->size = PAGE_SIZE << 3; |
102 | seq->buf = kvmalloc(seq->size, GFP_KERNEL); | |
fd4f12bc YS |
103 | if (!seq->buf) { |
104 | err = -ENOMEM; | |
105 | goto done; | |
106 | } | |
107 | } | |
108 | ||
109 | if (seq->count) { | |
110 | n = min(seq->count, size); | |
111 | err = copy_to_user(buf, seq->buf + seq->from, n); | |
112 | if (err) { | |
113 | err = -EFAULT; | |
114 | goto done; | |
115 | } | |
116 | seq->count -= n; | |
117 | seq->from += n; | |
118 | copied = n; | |
119 | goto done; | |
120 | } | |
121 | ||
122 | seq->from = 0; | |
123 | p = seq->op->start(seq, &seq->index); | |
124 | if (!p) | |
125 | goto stop; | |
126 | if (IS_ERR(p)) { | |
127 | err = PTR_ERR(p); | |
128 | seq->op->stop(seq, p); | |
129 | seq->count = 0; | |
130 | goto done; | |
131 | } | |
132 | ||
133 | err = seq->op->show(seq, p); | |
134 | if (err > 0) { | |
e5158d98 YS |
135 | /* object is skipped, decrease seq_num, so next |
136 | * valid object can reuse the same seq_num. | |
137 | */ | |
138 | bpf_iter_dec_seq_num(seq); | |
fd4f12bc YS |
139 | seq->count = 0; |
140 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
141 | if (!err) | |
142 | err = -E2BIG; | |
143 | seq->op->stop(seq, p); | |
144 | seq->count = 0; | |
145 | goto done; | |
146 | } | |
147 | ||
cf83b2d2 | 148 | can_resched = bpf_iter_support_resched(seq); |
fd4f12bc YS |
149 | while (1) { |
150 | loff_t pos = seq->index; | |
151 | ||
e679654a | 152 | num_objs++; |
fd4f12bc YS |
153 | offs = seq->count; |
154 | p = seq->op->next(seq, p, &seq->index); | |
155 | if (pos == seq->index) { | |
156 | pr_info_ratelimited("buggy seq_file .next function %ps " | |
157 | "did not updated position index\n", | |
158 | seq->op->next); | |
159 | seq->index++; | |
160 | } | |
161 | ||
162 | if (IS_ERR_OR_NULL(p)) | |
163 | break; | |
164 | ||
e5158d98 YS |
165 | /* got a valid next object, increase seq_num */ |
166 | bpf_iter_inc_seq_num(seq); | |
167 | ||
fd4f12bc YS |
168 | if (seq->count >= size) |
169 | break; | |
170 | ||
e679654a YS |
171 | if (num_objs >= MAX_ITER_OBJECTS) { |
172 | if (offs == 0) { | |
173 | err = -EAGAIN; | |
174 | seq->op->stop(seq, p); | |
175 | goto done; | |
176 | } | |
177 | break; | |
178 | } | |
179 | ||
fd4f12bc YS |
180 | err = seq->op->show(seq, p); |
181 | if (err > 0) { | |
e5158d98 | 182 | bpf_iter_dec_seq_num(seq); |
fd4f12bc YS |
183 | seq->count = offs; |
184 | } else if (err < 0 || seq_has_overflowed(seq)) { | |
185 | seq->count = offs; | |
186 | if (offs == 0) { | |
187 | if (!err) | |
188 | err = -E2BIG; | |
189 | seq->op->stop(seq, p); | |
190 | goto done; | |
191 | } | |
192 | break; | |
193 | } | |
cf83b2d2 YS |
194 | |
195 | if (can_resched) | |
196 | cond_resched(); | |
fd4f12bc YS |
197 | } |
198 | stop: | |
199 | offs = seq->count; | |
200 | /* bpf program called if !p */ | |
201 | seq->op->stop(seq, p); | |
e5158d98 YS |
202 | if (!p) { |
203 | if (!seq_has_overflowed(seq)) { | |
204 | bpf_iter_done_stop(seq); | |
205 | } else { | |
206 | seq->count = offs; | |
207 | if (offs == 0) { | |
208 | err = -E2BIG; | |
209 | goto done; | |
210 | } | |
fd4f12bc YS |
211 | } |
212 | } | |
213 | ||
214 | n = min(seq->count, size); | |
215 | err = copy_to_user(buf, seq->buf, n); | |
216 | if (err) { | |
217 | err = -EFAULT; | |
218 | goto done; | |
219 | } | |
220 | copied = n; | |
221 | seq->count -= n; | |
222 | seq->from = n; | |
223 | done: | |
224 | if (!copied) | |
225 | copied = err; | |
226 | else | |
227 | *ppos += copied; | |
228 | mutex_unlock(&seq->lock); | |
229 | return copied; | |
230 | } | |
231 | ||
a5cbe05a YS |
232 | static const struct bpf_iter_seq_info * |
233 | __get_seq_info(struct bpf_iter_link *link) | |
234 | { | |
235 | const struct bpf_iter_seq_info *seq_info; | |
236 | ||
237 | if (link->aux.map) { | |
238 | seq_info = link->aux.map->ops->iter_seq_info; | |
239 | if (seq_info) | |
240 | return seq_info; | |
241 | } | |
242 | ||
243 | return link->tinfo->reg_info->seq_info; | |
244 | } | |
245 | ||
367ec3e4 YS |
246 | static int iter_open(struct inode *inode, struct file *file) |
247 | { | |
248 | struct bpf_iter_link *link = inode->i_private; | |
249 | ||
a5cbe05a | 250 | return prepare_seq_file(file, link, __get_seq_info(link)); |
367ec3e4 YS |
251 | } |
252 | ||
ac51d99b YS |
253 | static int iter_release(struct inode *inode, struct file *file) |
254 | { | |
255 | struct bpf_iter_priv_data *iter_priv; | |
256 | struct seq_file *seq; | |
257 | ||
258 | seq = file->private_data; | |
259 | if (!seq) | |
260 | return 0; | |
261 | ||
262 | iter_priv = container_of(seq->private, struct bpf_iter_priv_data, | |
263 | target_private); | |
264 | ||
a5cbe05a YS |
265 | if (iter_priv->seq_info->fini_seq_private) |
266 | iter_priv->seq_info->fini_seq_private(seq->private); | |
ac51d99b YS |
267 | |
268 | bpf_prog_put(iter_priv->prog); | |
269 | seq->private = iter_priv; | |
270 | ||
271 | return seq_release_private(inode, file); | |
272 | } | |
273 | ||
367ec3e4 YS |
274 | const struct file_operations bpf_iter_fops = { |
275 | .open = iter_open, | |
ac51d99b YS |
276 | .llseek = no_llseek, |
277 | .read = bpf_seq_read, | |
278 | .release = iter_release, | |
279 | }; | |
280 | ||
15172a46 YS |
281 | /* The argument reg_info will be cached in bpf_iter_target_info. |
282 | * The common practice is to declare target reg_info as | |
283 | * a const static variable and passed as an argument to | |
284 | * bpf_iter_reg_target(). | |
285 | */ | |
286 | int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info) | |
ae24345d YS |
287 | { |
288 | struct bpf_iter_target_info *tinfo; | |
289 | ||
17d8beda | 290 | tinfo = kzalloc(sizeof(*tinfo), GFP_KERNEL); |
ae24345d YS |
291 | if (!tinfo) |
292 | return -ENOMEM; | |
293 | ||
15172a46 | 294 | tinfo->reg_info = reg_info; |
ae24345d YS |
295 | INIT_LIST_HEAD(&tinfo->list); |
296 | ||
297 | mutex_lock(&targets_mutex); | |
298 | list_add(&tinfo->list, &targets); | |
299 | mutex_unlock(&targets_mutex); | |
300 | ||
301 | return 0; | |
302 | } | |
303 | ||
ab2ee4fc | 304 | void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info) |
ae24345d YS |
305 | { |
306 | struct bpf_iter_target_info *tinfo; | |
307 | bool found = false; | |
308 | ||
309 | mutex_lock(&targets_mutex); | |
310 | list_for_each_entry(tinfo, &targets, list) { | |
ab2ee4fc | 311 | if (reg_info == tinfo->reg_info) { |
ae24345d YS |
312 | list_del(&tinfo->list); |
313 | kfree(tinfo); | |
314 | found = true; | |
315 | break; | |
316 | } | |
317 | } | |
318 | mutex_unlock(&targets_mutex); | |
319 | ||
320 | WARN_ON(found == false); | |
321 | } | |
15d83c4d YS |
322 | |
323 | static void cache_btf_id(struct bpf_iter_target_info *tinfo, | |
324 | struct bpf_prog *prog) | |
325 | { | |
326 | tinfo->btf_id = prog->aux->attach_btf_id; | |
327 | } | |
328 | ||
329 | bool bpf_iter_prog_supported(struct bpf_prog *prog) | |
330 | { | |
331 | const char *attach_fname = prog->aux->attach_func_name; | |
332 | u32 prog_btf_id = prog->aux->attach_btf_id; | |
333 | const char *prefix = BPF_ITER_FUNC_PREFIX; | |
334 | struct bpf_iter_target_info *tinfo; | |
335 | int prefix_len = strlen(prefix); | |
336 | bool supported = false; | |
337 | ||
338 | if (strncmp(attach_fname, prefix, prefix_len)) | |
339 | return false; | |
340 | ||
341 | mutex_lock(&targets_mutex); | |
342 | list_for_each_entry(tinfo, &targets, list) { | |
343 | if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) { | |
344 | supported = true; | |
345 | break; | |
346 | } | |
15172a46 | 347 | if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) { |
15d83c4d YS |
348 | cache_btf_id(tinfo, prog); |
349 | supported = true; | |
350 | break; | |
351 | } | |
352 | } | |
353 | mutex_unlock(&targets_mutex); | |
354 | ||
3c32cc1b YS |
355 | if (supported) { |
356 | prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size; | |
357 | prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info; | |
358 | } | |
359 | ||
15d83c4d YS |
360 | return supported; |
361 | } | |
de4e05ca | 362 | |
3cee6fb8 MKL |
363 | const struct bpf_func_proto * |
364 | bpf_iter_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |
365 | { | |
366 | const struct bpf_iter_target_info *tinfo; | |
367 | const struct bpf_func_proto *fn = NULL; | |
368 | ||
369 | mutex_lock(&targets_mutex); | |
370 | list_for_each_entry(tinfo, &targets, list) { | |
371 | if (tinfo->btf_id == prog->aux->attach_btf_id) { | |
372 | const struct bpf_iter_reg *reg_info; | |
373 | ||
374 | reg_info = tinfo->reg_info; | |
375 | if (reg_info->get_func_proto) | |
376 | fn = reg_info->get_func_proto(func_id, prog); | |
377 | break; | |
378 | } | |
379 | } | |
380 | mutex_unlock(&targets_mutex); | |
381 | ||
382 | return fn; | |
383 | } | |
384 | ||
de4e05ca YS |
385 | static void bpf_iter_link_release(struct bpf_link *link) |
386 | { | |
a5cbe05a YS |
387 | struct bpf_iter_link *iter_link = |
388 | container_of(link, struct bpf_iter_link, link); | |
389 | ||
5e7b3020 YS |
390 | if (iter_link->tinfo->reg_info->detach_target) |
391 | iter_link->tinfo->reg_info->detach_target(&iter_link->aux); | |
de4e05ca YS |
392 | } |
393 | ||
394 | static void bpf_iter_link_dealloc(struct bpf_link *link) | |
395 | { | |
396 | struct bpf_iter_link *iter_link = | |
397 | container_of(link, struct bpf_iter_link, link); | |
398 | ||
399 | kfree(iter_link); | |
400 | } | |
401 | ||
2057c92b YS |
402 | static int bpf_iter_link_replace(struct bpf_link *link, |
403 | struct bpf_prog *new_prog, | |
404 | struct bpf_prog *old_prog) | |
405 | { | |
406 | int ret = 0; | |
407 | ||
408 | mutex_lock(&link_mutex); | |
409 | if (old_prog && link->prog != old_prog) { | |
410 | ret = -EPERM; | |
411 | goto out_unlock; | |
412 | } | |
413 | ||
414 | if (link->prog->type != new_prog->type || | |
415 | link->prog->expected_attach_type != new_prog->expected_attach_type || | |
416 | link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) { | |
417 | ret = -EINVAL; | |
418 | goto out_unlock; | |
419 | } | |
420 | ||
421 | old_prog = xchg(&link->prog, new_prog); | |
422 | bpf_prog_put(old_prog); | |
423 | ||
424 | out_unlock: | |
425 | mutex_unlock(&link_mutex); | |
426 | return ret; | |
427 | } | |
428 | ||
6b0a249a YS |
429 | static void bpf_iter_link_show_fdinfo(const struct bpf_link *link, |
430 | struct seq_file *seq) | |
431 | { | |
432 | struct bpf_iter_link *iter_link = | |
433 | container_of(link, struct bpf_iter_link, link); | |
434 | bpf_iter_show_fdinfo_t show_fdinfo; | |
435 | ||
436 | seq_printf(seq, | |
437 | "target_name:\t%s\n", | |
438 | iter_link->tinfo->reg_info->target); | |
439 | ||
440 | show_fdinfo = iter_link->tinfo->reg_info->show_fdinfo; | |
441 | if (show_fdinfo) | |
442 | show_fdinfo(&iter_link->aux, seq); | |
443 | } | |
444 | ||
445 | static int bpf_iter_link_fill_link_info(const struct bpf_link *link, | |
446 | struct bpf_link_info *info) | |
447 | { | |
448 | struct bpf_iter_link *iter_link = | |
449 | container_of(link, struct bpf_iter_link, link); | |
450 | char __user *ubuf = u64_to_user_ptr(info->iter.target_name); | |
451 | bpf_iter_fill_link_info_t fill_link_info; | |
452 | u32 ulen = info->iter.target_name_len; | |
453 | const char *target_name; | |
454 | u32 target_len; | |
455 | ||
456 | if (!ulen ^ !ubuf) | |
457 | return -EINVAL; | |
458 | ||
459 | target_name = iter_link->tinfo->reg_info->target; | |
460 | target_len = strlen(target_name); | |
461 | info->iter.target_name_len = target_len + 1; | |
462 | ||
463 | if (ubuf) { | |
464 | if (ulen >= target_len + 1) { | |
465 | if (copy_to_user(ubuf, target_name, target_len + 1)) | |
466 | return -EFAULT; | |
467 | } else { | |
468 | char zero = '\0'; | |
469 | ||
470 | if (copy_to_user(ubuf, target_name, ulen - 1)) | |
471 | return -EFAULT; | |
472 | if (put_user(zero, ubuf + ulen - 1)) | |
473 | return -EFAULT; | |
474 | return -ENOSPC; | |
475 | } | |
476 | } | |
477 | ||
478 | fill_link_info = iter_link->tinfo->reg_info->fill_link_info; | |
479 | if (fill_link_info) | |
480 | return fill_link_info(&iter_link->aux, info); | |
481 | ||
482 | return 0; | |
483 | } | |
484 | ||
de4e05ca YS |
485 | static const struct bpf_link_ops bpf_iter_link_lops = { |
486 | .release = bpf_iter_link_release, | |
487 | .dealloc = bpf_iter_link_dealloc, | |
2057c92b | 488 | .update_prog = bpf_iter_link_replace, |
6b0a249a YS |
489 | .show_fdinfo = bpf_iter_link_show_fdinfo, |
490 | .fill_link_info = bpf_iter_link_fill_link_info, | |
de4e05ca YS |
491 | }; |
492 | ||
367ec3e4 YS |
493 | bool bpf_link_is_iter(struct bpf_link *link) |
494 | { | |
495 | return link->ops == &bpf_iter_link_lops; | |
496 | } | |
497 | ||
af2ac3e1 AS |
498 | int bpf_iter_link_attach(const union bpf_attr *attr, bpfptr_t uattr, |
499 | struct bpf_prog *prog) | |
de4e05ca YS |
500 | { |
501 | struct bpf_link_primer link_primer; | |
502 | struct bpf_iter_target_info *tinfo; | |
5e7b3020 | 503 | union bpf_iter_link_info linfo; |
de4e05ca | 504 | struct bpf_iter_link *link; |
5e7b3020 | 505 | u32 prog_btf_id, linfo_len; |
de4e05ca | 506 | bool existed = false; |
af2ac3e1 | 507 | bpfptr_t ulinfo; |
de4e05ca YS |
508 | int err; |
509 | ||
5e7b3020 YS |
510 | if (attr->link_create.target_fd || attr->link_create.flags) |
511 | return -EINVAL; | |
512 | ||
513 | memset(&linfo, 0, sizeof(union bpf_iter_link_info)); | |
514 | ||
af2ac3e1 | 515 | ulinfo = make_bpfptr(attr->link_create.iter_info, uattr.is_kernel); |
5e7b3020 | 516 | linfo_len = attr->link_create.iter_info_len; |
af2ac3e1 | 517 | if (bpfptr_is_null(ulinfo) ^ !linfo_len) |
5e7b3020 YS |
518 | return -EINVAL; |
519 | ||
af2ac3e1 | 520 | if (!bpfptr_is_null(ulinfo)) { |
5e7b3020 YS |
521 | err = bpf_check_uarg_tail_zero(ulinfo, sizeof(linfo), |
522 | linfo_len); | |
523 | if (err) | |
524 | return err; | |
525 | linfo_len = min_t(u32, linfo_len, sizeof(linfo)); | |
af2ac3e1 | 526 | if (copy_from_bpfptr(&linfo, ulinfo, linfo_len)) |
5e7b3020 YS |
527 | return -EFAULT; |
528 | } | |
529 | ||
de4e05ca YS |
530 | prog_btf_id = prog->aux->attach_btf_id; |
531 | mutex_lock(&targets_mutex); | |
532 | list_for_each_entry(tinfo, &targets, list) { | |
533 | if (tinfo->btf_id == prog_btf_id) { | |
534 | existed = true; | |
535 | break; | |
536 | } | |
537 | } | |
538 | mutex_unlock(&targets_mutex); | |
539 | if (!existed) | |
540 | return -ENOENT; | |
541 | ||
542 | link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN); | |
543 | if (!link) | |
544 | return -ENOMEM; | |
545 | ||
546 | bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog); | |
547 | link->tinfo = tinfo; | |
548 | ||
549 | err = bpf_link_prime(&link->link, &link_primer); | |
550 | if (err) { | |
551 | kfree(link); | |
552 | return err; | |
553 | } | |
554 | ||
5e7b3020 YS |
555 | if (tinfo->reg_info->attach_target) { |
556 | err = tinfo->reg_info->attach_target(prog, &linfo, &link->aux); | |
a5cbe05a | 557 | if (err) { |
5e7b3020 YS |
558 | bpf_link_cleanup(&link_primer); |
559 | return err; | |
a5cbe05a | 560 | } |
a5cbe05a YS |
561 | } |
562 | ||
de4e05ca YS |
563 | return bpf_link_settle(&link_primer); |
564 | } | |
ac51d99b YS |
565 | |
566 | static void init_seq_meta(struct bpf_iter_priv_data *priv_data, | |
567 | struct bpf_iter_target_info *tinfo, | |
a5cbe05a | 568 | const struct bpf_iter_seq_info *seq_info, |
ac51d99b YS |
569 | struct bpf_prog *prog) |
570 | { | |
571 | priv_data->tinfo = tinfo; | |
a5cbe05a | 572 | priv_data->seq_info = seq_info; |
ac51d99b YS |
573 | priv_data->prog = prog; |
574 | priv_data->session_id = atomic64_inc_return(&session_id); | |
575 | priv_data->seq_num = 0; | |
576 | priv_data->done_stop = false; | |
577 | } | |
578 | ||
a5cbe05a YS |
579 | static int prepare_seq_file(struct file *file, struct bpf_iter_link *link, |
580 | const struct bpf_iter_seq_info *seq_info) | |
ac51d99b YS |
581 | { |
582 | struct bpf_iter_priv_data *priv_data; | |
583 | struct bpf_iter_target_info *tinfo; | |
584 | struct bpf_prog *prog; | |
585 | u32 total_priv_dsize; | |
586 | struct seq_file *seq; | |
587 | int err = 0; | |
588 | ||
589 | mutex_lock(&link_mutex); | |
590 | prog = link->link.prog; | |
591 | bpf_prog_inc(prog); | |
592 | mutex_unlock(&link_mutex); | |
593 | ||
594 | tinfo = link->tinfo; | |
595 | total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) + | |
a5cbe05a YS |
596 | seq_info->seq_priv_size; |
597 | priv_data = __seq_open_private(file, seq_info->seq_ops, | |
15172a46 | 598 | total_priv_dsize); |
ac51d99b YS |
599 | if (!priv_data) { |
600 | err = -ENOMEM; | |
601 | goto release_prog; | |
602 | } | |
603 | ||
a5cbe05a YS |
604 | if (seq_info->init_seq_private) { |
605 | err = seq_info->init_seq_private(priv_data->target_private, &link->aux); | |
ac51d99b YS |
606 | if (err) |
607 | goto release_seq_file; | |
608 | } | |
609 | ||
a5cbe05a | 610 | init_seq_meta(priv_data, tinfo, seq_info, prog); |
ac51d99b YS |
611 | seq = file->private_data; |
612 | seq->private = priv_data->target_private; | |
613 | ||
614 | return 0; | |
615 | ||
616 | release_seq_file: | |
617 | seq_release_private(file->f_inode, file); | |
618 | file->private_data = NULL; | |
619 | release_prog: | |
620 | bpf_prog_put(prog); | |
621 | return err; | |
622 | } | |
623 | ||
624 | int bpf_iter_new_fd(struct bpf_link *link) | |
625 | { | |
a5cbe05a | 626 | struct bpf_iter_link *iter_link; |
ac51d99b YS |
627 | struct file *file; |
628 | unsigned int flags; | |
629 | int err, fd; | |
630 | ||
631 | if (link->ops != &bpf_iter_link_lops) | |
632 | return -EINVAL; | |
633 | ||
634 | flags = O_RDONLY | O_CLOEXEC; | |
635 | fd = get_unused_fd_flags(flags); | |
636 | if (fd < 0) | |
637 | return fd; | |
638 | ||
639 | file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags); | |
640 | if (IS_ERR(file)) { | |
641 | err = PTR_ERR(file); | |
642 | goto free_fd; | |
643 | } | |
644 | ||
a5cbe05a YS |
645 | iter_link = container_of(link, struct bpf_iter_link, link); |
646 | err = prepare_seq_file(file, iter_link, __get_seq_info(iter_link)); | |
ac51d99b YS |
647 | if (err) |
648 | goto free_file; | |
649 | ||
650 | fd_install(fd, file); | |
651 | return fd; | |
652 | ||
653 | free_file: | |
654 | fput(file); | |
655 | free_fd: | |
656 | put_unused_fd(fd); | |
657 | return err; | |
658 | } | |
e5158d98 YS |
659 | |
660 | struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop) | |
661 | { | |
662 | struct bpf_iter_priv_data *iter_priv; | |
663 | struct seq_file *seq; | |
664 | void *seq_priv; | |
665 | ||
666 | seq = meta->seq; | |
667 | if (seq->file->f_op != &bpf_iter_fops) | |
668 | return NULL; | |
669 | ||
670 | seq_priv = seq->private; | |
671 | iter_priv = container_of(seq_priv, struct bpf_iter_priv_data, | |
672 | target_private); | |
673 | ||
674 | if (in_stop && iter_priv->done_stop) | |
675 | return NULL; | |
676 | ||
677 | meta->session_id = iter_priv->session_id; | |
678 | meta->seq_num = iter_priv->seq_num; | |
679 | ||
680 | return iter_priv->prog; | |
681 | } | |
682 | ||
683 | int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx) | |
684 | { | |
685 | int ret; | |
686 | ||
687 | rcu_read_lock(); | |
688 | migrate_disable(); | |
fb7dd8bc | 689 | ret = bpf_prog_run(prog, ctx); |
e5158d98 YS |
690 | migrate_enable(); |
691 | rcu_read_unlock(); | |
692 | ||
2e3ed68b YS |
693 | /* bpf program can only return 0 or 1: |
694 | * 0 : okay | |
695 | * 1 : retry the same object | |
696 | * The bpf_iter_run_prog() return value | |
697 | * will be seq_ops->show() return value. | |
698 | */ | |
e5158d98 YS |
699 | return ret == 0 ? 0 : -EAGAIN; |
700 | } | |
69c087ba YS |
701 | |
702 | BPF_CALL_4(bpf_for_each_map_elem, struct bpf_map *, map, void *, callback_fn, | |
703 | void *, callback_ctx, u64, flags) | |
704 | { | |
705 | return map->ops->map_for_each_callback(map, callback_fn, callback_ctx, flags); | |
706 | } | |
707 | ||
708 | const struct bpf_func_proto bpf_for_each_map_elem_proto = { | |
709 | .func = bpf_for_each_map_elem, | |
710 | .gpl_only = false, | |
711 | .ret_type = RET_INTEGER, | |
712 | .arg1_type = ARG_CONST_MAP_PTR, | |
713 | .arg2_type = ARG_PTR_TO_FUNC, | |
714 | .arg3_type = ARG_PTR_TO_STACK_OR_NULL, | |
715 | .arg4_type = ARG_ANYTHING, | |
716 | }; |