]>
Commit | Line | Data |
---|---|---|
30070984 DM |
1 | /* |
2 | * Functions to manage eBPF programs attached to cgroups | |
3 | * | |
4 | * Copyright (c) 2016 Daniel Mack | |
5 | * | |
6 | * This file is subject to the terms and conditions of version 2 of the GNU | |
7 | * General Public License. See the file COPYING in the main directory of the | |
8 | * Linux distribution for more details. | |
9 | */ | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/atomic.h> | |
13 | #include <linux/cgroup.h> | |
7b146ceb | 14 | #include <linux/filter.h> |
30070984 | 15 | #include <linux/slab.h> |
7b146ceb | 16 | #include <linux/sysctl.h> |
808649fb | 17 | #include <linux/string.h> |
30070984 DM |
18 | #include <linux/bpf.h> |
19 | #include <linux/bpf-cgroup.h> | |
20 | #include <net/sock.h> | |
21 | ||
22 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); | |
23 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); | |
24 | ||
25 | /** | |
26 | * cgroup_bpf_put() - put references of all bpf programs | |
27 | * @cgrp: the cgroup to modify | |
28 | */ | |
29 | void cgroup_bpf_put(struct cgroup *cgrp) | |
30 | { | |
8bad74f9 | 31 | enum bpf_cgroup_storage_type stype; |
30070984 DM |
32 | unsigned int type; |
33 | ||
324bda9e AS |
34 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { |
35 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
36 | struct bpf_prog_list *pl, *tmp; | |
37 | ||
38 | list_for_each_entry_safe(pl, tmp, progs, node) { | |
39 | list_del(&pl->node); | |
40 | bpf_prog_put(pl->prog); | |
8bad74f9 RG |
41 | for_each_cgroup_storage_type(stype) { |
42 | bpf_cgroup_storage_unlink(pl->storage[stype]); | |
43 | bpf_cgroup_storage_free(pl->storage[stype]); | |
44 | } | |
324bda9e | 45 | kfree(pl); |
30070984 DM |
46 | static_branch_dec(&cgroup_bpf_enabled_key); |
47 | } | |
324bda9e AS |
48 | bpf_prog_array_free(cgrp->bpf.effective[type]); |
49 | } | |
50 | } | |
51 | ||
52 | /* count number of elements in the list. | |
53 | * it's slow but the list cannot be long | |
54 | */ | |
55 | static u32 prog_list_length(struct list_head *head) | |
56 | { | |
57 | struct bpf_prog_list *pl; | |
58 | u32 cnt = 0; | |
59 | ||
60 | list_for_each_entry(pl, head, node) { | |
61 | if (!pl->prog) | |
62 | continue; | |
63 | cnt++; | |
30070984 | 64 | } |
324bda9e AS |
65 | return cnt; |
66 | } | |
67 | ||
68 | /* if parent has non-overridable prog attached, | |
69 | * disallow attaching new programs to the descendent cgroup. | |
70 | * if parent has overridable or multi-prog, allow attaching | |
71 | */ | |
72 | static bool hierarchy_allows_attach(struct cgroup *cgrp, | |
73 | enum bpf_attach_type type, | |
74 | u32 new_flags) | |
75 | { | |
76 | struct cgroup *p; | |
77 | ||
78 | p = cgroup_parent(cgrp); | |
79 | if (!p) | |
80 | return true; | |
81 | do { | |
82 | u32 flags = p->bpf.flags[type]; | |
83 | u32 cnt; | |
84 | ||
85 | if (flags & BPF_F_ALLOW_MULTI) | |
86 | return true; | |
87 | cnt = prog_list_length(&p->bpf.progs[type]); | |
88 | WARN_ON_ONCE(cnt > 1); | |
89 | if (cnt == 1) | |
90 | return !!(flags & BPF_F_ALLOW_OVERRIDE); | |
91 | p = cgroup_parent(p); | |
92 | } while (p); | |
93 | return true; | |
94 | } | |
95 | ||
96 | /* compute a chain of effective programs for a given cgroup: | |
97 | * start from the list of programs in this cgroup and add | |
98 | * all parent programs. | |
99 | * Note that parent's F_ALLOW_OVERRIDE-type program is yielding | |
100 | * to programs in this cgroup | |
101 | */ | |
102 | static int compute_effective_progs(struct cgroup *cgrp, | |
103 | enum bpf_attach_type type, | |
104 | struct bpf_prog_array __rcu **array) | |
105 | { | |
8bad74f9 | 106 | enum bpf_cgroup_storage_type stype; |
3960f4fd | 107 | struct bpf_prog_array *progs; |
324bda9e AS |
108 | struct bpf_prog_list *pl; |
109 | struct cgroup *p = cgrp; | |
110 | int cnt = 0; | |
111 | ||
112 | /* count number of effective programs by walking parents */ | |
113 | do { | |
114 | if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) | |
115 | cnt += prog_list_length(&p->bpf.progs[type]); | |
116 | p = cgroup_parent(p); | |
117 | } while (p); | |
118 | ||
119 | progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); | |
120 | if (!progs) | |
121 | return -ENOMEM; | |
122 | ||
123 | /* populate the array with effective progs */ | |
124 | cnt = 0; | |
125 | p = cgrp; | |
126 | do { | |
394e40a2 RG |
127 | if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) |
128 | continue; | |
129 | ||
130 | list_for_each_entry(pl, &p->bpf.progs[type], node) { | |
131 | if (!pl->prog) | |
132 | continue; | |
133 | ||
134 | progs->items[cnt].prog = pl->prog; | |
8bad74f9 RG |
135 | for_each_cgroup_storage_type(stype) |
136 | progs->items[cnt].cgroup_storage[stype] = | |
137 | pl->storage[stype]; | |
394e40a2 RG |
138 | cnt++; |
139 | } | |
140 | } while ((p = cgroup_parent(p))); | |
324bda9e | 141 | |
3960f4fd | 142 | rcu_assign_pointer(*array, progs); |
324bda9e AS |
143 | return 0; |
144 | } | |
145 | ||
146 | static void activate_effective_progs(struct cgroup *cgrp, | |
147 | enum bpf_attach_type type, | |
148 | struct bpf_prog_array __rcu *array) | |
149 | { | |
150 | struct bpf_prog_array __rcu *old_array; | |
151 | ||
152 | old_array = xchg(&cgrp->bpf.effective[type], array); | |
153 | /* free prog array after grace period, since __cgroup_bpf_run_*() | |
154 | * might be still walking the array | |
155 | */ | |
156 | bpf_prog_array_free(old_array); | |
30070984 DM |
157 | } |
158 | ||
159 | /** | |
160 | * cgroup_bpf_inherit() - inherit effective programs from parent | |
161 | * @cgrp: the cgroup to modify | |
30070984 | 162 | */ |
324bda9e | 163 | int cgroup_bpf_inherit(struct cgroup *cgrp) |
30070984 | 164 | { |
324bda9e AS |
165 | /* has to use marco instead of const int, since compiler thinks |
166 | * that array below is variable length | |
167 | */ | |
168 | #define NR ARRAY_SIZE(cgrp->bpf.effective) | |
169 | struct bpf_prog_array __rcu *arrays[NR] = {}; | |
170 | int i; | |
30070984 | 171 | |
324bda9e AS |
172 | for (i = 0; i < NR; i++) |
173 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); | |
30070984 | 174 | |
324bda9e AS |
175 | for (i = 0; i < NR; i++) |
176 | if (compute_effective_progs(cgrp, i, &arrays[i])) | |
177 | goto cleanup; | |
178 | ||
179 | for (i = 0; i < NR; i++) | |
180 | activate_effective_progs(cgrp, i, arrays[i]); | |
181 | ||
182 | return 0; | |
183 | cleanup: | |
184 | for (i = 0; i < NR; i++) | |
185 | bpf_prog_array_free(arrays[i]); | |
186 | return -ENOMEM; | |
30070984 DM |
187 | } |
188 | ||
85fc4b16 RG |
189 | static int update_effective_progs(struct cgroup *cgrp, |
190 | enum bpf_attach_type type) | |
191 | { | |
192 | struct cgroup_subsys_state *css; | |
193 | int err; | |
194 | ||
195 | /* allocate and recompute effective prog arrays */ | |
196 | css_for_each_descendant_pre(css, &cgrp->self) { | |
197 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
198 | ||
199 | err = compute_effective_progs(desc, type, &desc->bpf.inactive); | |
200 | if (err) | |
201 | goto cleanup; | |
202 | } | |
203 | ||
204 | /* all allocations were successful. Activate all prog arrays */ | |
205 | css_for_each_descendant_pre(css, &cgrp->self) { | |
206 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
207 | ||
208 | activate_effective_progs(desc, type, desc->bpf.inactive); | |
209 | desc->bpf.inactive = NULL; | |
210 | } | |
211 | ||
212 | return 0; | |
213 | ||
214 | cleanup: | |
215 | /* oom while computing effective. Free all computed effective arrays | |
216 | * since they were not activated | |
217 | */ | |
218 | css_for_each_descendant_pre(css, &cgrp->self) { | |
219 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
220 | ||
221 | bpf_prog_array_free(desc->bpf.inactive); | |
222 | desc->bpf.inactive = NULL; | |
223 | } | |
224 | ||
225 | return err; | |
226 | } | |
227 | ||
324bda9e AS |
228 | #define BPF_CGROUP_MAX_PROGS 64 |
229 | ||
30070984 | 230 | /** |
324bda9e | 231 | * __cgroup_bpf_attach() - Attach the program to a cgroup, and |
30070984 DM |
232 | * propagate the change to descendants |
233 | * @cgrp: The cgroup which descendants to traverse | |
324bda9e AS |
234 | * @prog: A program to attach |
235 | * @type: Type of attach operation | |
1832f4ef | 236 | * @flags: Option flags |
30070984 DM |
237 | * |
238 | * Must be called with cgroup_mutex held. | |
239 | */ | |
324bda9e AS |
240 | int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, |
241 | enum bpf_attach_type type, u32 flags) | |
30070984 | 242 | { |
324bda9e AS |
243 | struct list_head *progs = &cgrp->bpf.progs[type]; |
244 | struct bpf_prog *old_prog = NULL; | |
8bad74f9 RG |
245 | struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], |
246 | *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; | |
247 | enum bpf_cgroup_storage_type stype; | |
324bda9e AS |
248 | struct bpf_prog_list *pl; |
249 | bool pl_was_allocated; | |
324bda9e AS |
250 | int err; |
251 | ||
252 | if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) | |
253 | /* invalid combination */ | |
254 | return -EINVAL; | |
255 | ||
256 | if (!hierarchy_allows_attach(cgrp, type, flags)) | |
7f677633 AS |
257 | return -EPERM; |
258 | ||
324bda9e AS |
259 | if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) |
260 | /* Disallow attaching non-overridable on top | |
261 | * of existing overridable in this cgroup. | |
262 | * Disallow attaching multi-prog if overridable or none | |
7f677633 AS |
263 | */ |
264 | return -EPERM; | |
265 | ||
324bda9e AS |
266 | if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) |
267 | return -E2BIG; | |
268 | ||
8bad74f9 RG |
269 | for_each_cgroup_storage_type(stype) { |
270 | storage[stype] = bpf_cgroup_storage_alloc(prog, stype); | |
271 | if (IS_ERR(storage[stype])) { | |
272 | storage[stype] = NULL; | |
273 | for_each_cgroup_storage_type(stype) | |
274 | bpf_cgroup_storage_free(storage[stype]); | |
275 | return -ENOMEM; | |
276 | } | |
277 | } | |
d7bf2c10 | 278 | |
324bda9e | 279 | if (flags & BPF_F_ALLOW_MULTI) { |
d7bf2c10 RG |
280 | list_for_each_entry(pl, progs, node) { |
281 | if (pl->prog == prog) { | |
324bda9e | 282 | /* disallow attaching the same prog twice */ |
8bad74f9 RG |
283 | for_each_cgroup_storage_type(stype) |
284 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 285 | return -EINVAL; |
d7bf2c10 RG |
286 | } |
287 | } | |
324bda9e AS |
288 | |
289 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); | |
d7bf2c10 | 290 | if (!pl) { |
8bad74f9 RG |
291 | for_each_cgroup_storage_type(stype) |
292 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 293 | return -ENOMEM; |
d7bf2c10 RG |
294 | } |
295 | ||
324bda9e AS |
296 | pl_was_allocated = true; |
297 | pl->prog = prog; | |
8bad74f9 RG |
298 | for_each_cgroup_storage_type(stype) |
299 | pl->storage[stype] = storage[stype]; | |
324bda9e AS |
300 | list_add_tail(&pl->node, progs); |
301 | } else { | |
302 | if (list_empty(progs)) { | |
303 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); | |
d7bf2c10 | 304 | if (!pl) { |
8bad74f9 RG |
305 | for_each_cgroup_storage_type(stype) |
306 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 307 | return -ENOMEM; |
d7bf2c10 | 308 | } |
324bda9e AS |
309 | pl_was_allocated = true; |
310 | list_add_tail(&pl->node, progs); | |
311 | } else { | |
312 | pl = list_first_entry(progs, typeof(*pl), node); | |
313 | old_prog = pl->prog; | |
8bad74f9 RG |
314 | for_each_cgroup_storage_type(stype) { |
315 | old_storage[stype] = pl->storage[stype]; | |
316 | bpf_cgroup_storage_unlink(old_storage[stype]); | |
317 | } | |
324bda9e AS |
318 | pl_was_allocated = false; |
319 | } | |
320 | pl->prog = prog; | |
8bad74f9 RG |
321 | for_each_cgroup_storage_type(stype) |
322 | pl->storage[stype] = storage[stype]; | |
7f677633 | 323 | } |
30070984 | 324 | |
324bda9e | 325 | cgrp->bpf.flags[type] = flags; |
7f677633 | 326 | |
85fc4b16 RG |
327 | err = update_effective_progs(cgrp, type); |
328 | if (err) | |
329 | goto cleanup; | |
324bda9e AS |
330 | |
331 | static_branch_inc(&cgroup_bpf_enabled_key); | |
8bad74f9 RG |
332 | for_each_cgroup_storage_type(stype) { |
333 | if (!old_storage[stype]) | |
334 | continue; | |
335 | bpf_cgroup_storage_free(old_storage[stype]); | |
336 | } | |
30070984 DM |
337 | if (old_prog) { |
338 | bpf_prog_put(old_prog); | |
339 | static_branch_dec(&cgroup_bpf_enabled_key); | |
340 | } | |
8bad74f9 RG |
341 | for_each_cgroup_storage_type(stype) |
342 | bpf_cgroup_storage_link(storage[stype], cgrp, type); | |
7f677633 | 343 | return 0; |
324bda9e AS |
344 | |
345 | cleanup: | |
324bda9e AS |
346 | /* and cleanup the prog list */ |
347 | pl->prog = old_prog; | |
8bad74f9 RG |
348 | for_each_cgroup_storage_type(stype) { |
349 | bpf_cgroup_storage_free(pl->storage[stype]); | |
350 | pl->storage[stype] = old_storage[stype]; | |
351 | bpf_cgroup_storage_link(old_storage[stype], cgrp, type); | |
352 | } | |
324bda9e AS |
353 | if (pl_was_allocated) { |
354 | list_del(&pl->node); | |
355 | kfree(pl); | |
356 | } | |
357 | return err; | |
358 | } | |
359 | ||
360 | /** | |
361 | * __cgroup_bpf_detach() - Detach the program from a cgroup, and | |
362 | * propagate the change to descendants | |
363 | * @cgrp: The cgroup which descendants to traverse | |
364 | * @prog: A program to detach or NULL | |
365 | * @type: Type of detach operation | |
366 | * | |
367 | * Must be called with cgroup_mutex held. | |
368 | */ | |
369 | int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, | |
1832f4ef | 370 | enum bpf_attach_type type) |
324bda9e AS |
371 | { |
372 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
8bad74f9 | 373 | enum bpf_cgroup_storage_type stype; |
324bda9e AS |
374 | u32 flags = cgrp->bpf.flags[type]; |
375 | struct bpf_prog *old_prog = NULL; | |
324bda9e AS |
376 | struct bpf_prog_list *pl; |
377 | int err; | |
378 | ||
379 | if (flags & BPF_F_ALLOW_MULTI) { | |
380 | if (!prog) | |
381 | /* to detach MULTI prog the user has to specify valid FD | |
382 | * of the program to be detached | |
383 | */ | |
384 | return -EINVAL; | |
385 | } else { | |
386 | if (list_empty(progs)) | |
387 | /* report error when trying to detach and nothing is attached */ | |
388 | return -ENOENT; | |
389 | } | |
390 | ||
391 | if (flags & BPF_F_ALLOW_MULTI) { | |
392 | /* find the prog and detach it */ | |
393 | list_for_each_entry(pl, progs, node) { | |
394 | if (pl->prog != prog) | |
395 | continue; | |
396 | old_prog = prog; | |
397 | /* mark it deleted, so it's ignored while | |
398 | * recomputing effective | |
399 | */ | |
400 | pl->prog = NULL; | |
401 | break; | |
402 | } | |
403 | if (!old_prog) | |
404 | return -ENOENT; | |
405 | } else { | |
406 | /* to maintain backward compatibility NONE and OVERRIDE cgroups | |
407 | * allow detaching with invalid FD (prog==NULL) | |
408 | */ | |
409 | pl = list_first_entry(progs, typeof(*pl), node); | |
410 | old_prog = pl->prog; | |
411 | pl->prog = NULL; | |
412 | } | |
413 | ||
85fc4b16 RG |
414 | err = update_effective_progs(cgrp, type); |
415 | if (err) | |
416 | goto cleanup; | |
324bda9e AS |
417 | |
418 | /* now can actually delete it from this cgroup list */ | |
419 | list_del(&pl->node); | |
8bad74f9 RG |
420 | for_each_cgroup_storage_type(stype) { |
421 | bpf_cgroup_storage_unlink(pl->storage[stype]); | |
422 | bpf_cgroup_storage_free(pl->storage[stype]); | |
423 | } | |
324bda9e AS |
424 | kfree(pl); |
425 | if (list_empty(progs)) | |
426 | /* last program was detached, reset flags to zero */ | |
427 | cgrp->bpf.flags[type] = 0; | |
428 | ||
429 | bpf_prog_put(old_prog); | |
430 | static_branch_dec(&cgroup_bpf_enabled_key); | |
431 | return 0; | |
432 | ||
433 | cleanup: | |
324bda9e AS |
434 | /* and restore back old_prog */ |
435 | pl->prog = old_prog; | |
436 | return err; | |
30070984 DM |
437 | } |
438 | ||
468e2f64 AS |
439 | /* Must be called with cgroup_mutex held to avoid races. */ |
440 | int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, | |
441 | union bpf_attr __user *uattr) | |
442 | { | |
443 | __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); | |
444 | enum bpf_attach_type type = attr->query.attach_type; | |
445 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
446 | u32 flags = cgrp->bpf.flags[type]; | |
447 | int cnt, ret = 0, i; | |
448 | ||
449 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) | |
450 | cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); | |
451 | else | |
452 | cnt = prog_list_length(progs); | |
453 | ||
454 | if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) | |
455 | return -EFAULT; | |
456 | if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) | |
457 | return -EFAULT; | |
458 | if (attr->query.prog_cnt == 0 || !prog_ids || !cnt) | |
459 | /* return early if user requested only program count + flags */ | |
460 | return 0; | |
461 | if (attr->query.prog_cnt < cnt) { | |
462 | cnt = attr->query.prog_cnt; | |
463 | ret = -ENOSPC; | |
464 | } | |
465 | ||
466 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { | |
467 | return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], | |
468 | prog_ids, cnt); | |
469 | } else { | |
470 | struct bpf_prog_list *pl; | |
471 | u32 id; | |
472 | ||
473 | i = 0; | |
474 | list_for_each_entry(pl, progs, node) { | |
475 | id = pl->prog->aux->id; | |
476 | if (copy_to_user(prog_ids + i, &id, sizeof(id))) | |
477 | return -EFAULT; | |
478 | if (++i == cnt) | |
479 | break; | |
480 | } | |
481 | } | |
482 | return ret; | |
483 | } | |
484 | ||
fdb5c453 SY |
485 | int cgroup_bpf_prog_attach(const union bpf_attr *attr, |
486 | enum bpf_prog_type ptype, struct bpf_prog *prog) | |
487 | { | |
488 | struct cgroup *cgrp; | |
489 | int ret; | |
490 | ||
491 | cgrp = cgroup_get_from_fd(attr->target_fd); | |
492 | if (IS_ERR(cgrp)) | |
493 | return PTR_ERR(cgrp); | |
494 | ||
495 | ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, | |
496 | attr->attach_flags); | |
497 | cgroup_put(cgrp); | |
498 | return ret; | |
499 | } | |
500 | ||
501 | int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) | |
502 | { | |
503 | struct bpf_prog *prog; | |
504 | struct cgroup *cgrp; | |
505 | int ret; | |
506 | ||
507 | cgrp = cgroup_get_from_fd(attr->target_fd); | |
508 | if (IS_ERR(cgrp)) | |
509 | return PTR_ERR(cgrp); | |
510 | ||
511 | prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); | |
512 | if (IS_ERR(prog)) | |
513 | prog = NULL; | |
514 | ||
515 | ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); | |
516 | if (prog) | |
517 | bpf_prog_put(prog); | |
518 | ||
519 | cgroup_put(cgrp); | |
520 | return ret; | |
521 | } | |
522 | ||
523 | int cgroup_bpf_prog_query(const union bpf_attr *attr, | |
524 | union bpf_attr __user *uattr) | |
525 | { | |
526 | struct cgroup *cgrp; | |
527 | int ret; | |
528 | ||
529 | cgrp = cgroup_get_from_fd(attr->query.target_fd); | |
530 | if (IS_ERR(cgrp)) | |
531 | return PTR_ERR(cgrp); | |
532 | ||
533 | ret = cgroup_bpf_query(cgrp, attr, uattr); | |
534 | ||
535 | cgroup_put(cgrp); | |
536 | return ret; | |
537 | } | |
538 | ||
30070984 | 539 | /** |
b2cd1257 | 540 | * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering |
8f917bba | 541 | * @sk: The socket sending or receiving traffic |
30070984 DM |
542 | * @skb: The skb that is being sent or received |
543 | * @type: The type of program to be exectuted | |
544 | * | |
545 | * If no socket is passed, or the socket is not of type INET or INET6, | |
546 | * this function does nothing and returns 0. | |
547 | * | |
548 | * The program type passed in via @type must be suitable for network | |
549 | * filtering. No further check is performed to assert that. | |
550 | * | |
551 | * This function will return %-EPERM if any if an attached program was found | |
552 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
553 | */ | |
b2cd1257 DA |
554 | int __cgroup_bpf_run_filter_skb(struct sock *sk, |
555 | struct sk_buff *skb, | |
556 | enum bpf_attach_type type) | |
30070984 | 557 | { |
324bda9e AS |
558 | unsigned int offset = skb->data - skb_network_header(skb); |
559 | struct sock *save_sk; | |
b39b5f41 | 560 | void *saved_data_end; |
30070984 | 561 | struct cgroup *cgrp; |
324bda9e | 562 | int ret; |
30070984 DM |
563 | |
564 | if (!sk || !sk_fullsock(sk)) | |
565 | return 0; | |
566 | ||
324bda9e | 567 | if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) |
30070984 DM |
568 | return 0; |
569 | ||
570 | cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e AS |
571 | save_sk = skb->sk; |
572 | skb->sk = sk; | |
573 | __skb_push(skb, offset); | |
b39b5f41 SL |
574 | |
575 | /* compute pointers for the bpf prog */ | |
576 | bpf_compute_and_save_data_end(skb, &saved_data_end); | |
577 | ||
324bda9e | 578 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, |
6cab5e90 | 579 | __bpf_prog_run_save_cb); |
b39b5f41 | 580 | bpf_restore_data_end(skb, saved_data_end); |
324bda9e AS |
581 | __skb_pull(skb, offset); |
582 | skb->sk = save_sk; | |
583 | return ret == 1 ? 0 : -EPERM; | |
30070984 | 584 | } |
b2cd1257 | 585 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); |
61023658 DA |
586 | |
587 | /** | |
588 | * __cgroup_bpf_run_filter_sk() - Run a program on a sock | |
589 | * @sk: sock structure to manipulate | |
590 | * @type: The type of program to be exectuted | |
591 | * | |
592 | * socket is passed is expected to be of type INET or INET6. | |
593 | * | |
594 | * The program type passed in via @type must be suitable for sock | |
595 | * filtering. No further check is performed to assert that. | |
596 | * | |
597 | * This function will return %-EPERM if any if an attached program was found | |
598 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
599 | */ | |
600 | int __cgroup_bpf_run_filter_sk(struct sock *sk, | |
601 | enum bpf_attach_type type) | |
602 | { | |
603 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e | 604 | int ret; |
61023658 | 605 | |
324bda9e AS |
606 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); |
607 | return ret == 1 ? 0 : -EPERM; | |
61023658 DA |
608 | } |
609 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); | |
40304b2a | 610 | |
4fbac77d AI |
611 | /** |
612 | * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and | |
613 | * provided by user sockaddr | |
614 | * @sk: sock struct that will use sockaddr | |
615 | * @uaddr: sockaddr struct provided by user | |
616 | * @type: The type of program to be exectuted | |
1cedee13 | 617 | * @t_ctx: Pointer to attach type specific context |
4fbac77d AI |
618 | * |
619 | * socket is expected to be of type INET or INET6. | |
620 | * | |
621 | * This function will return %-EPERM if an attached program is found and | |
622 | * returned value != 1 during execution. In all other cases, 0 is returned. | |
623 | */ | |
624 | int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, | |
625 | struct sockaddr *uaddr, | |
1cedee13 AI |
626 | enum bpf_attach_type type, |
627 | void *t_ctx) | |
4fbac77d AI |
628 | { |
629 | struct bpf_sock_addr_kern ctx = { | |
630 | .sk = sk, | |
631 | .uaddr = uaddr, | |
1cedee13 | 632 | .t_ctx = t_ctx, |
4fbac77d | 633 | }; |
1cedee13 | 634 | struct sockaddr_storage unspec; |
4fbac77d AI |
635 | struct cgroup *cgrp; |
636 | int ret; | |
637 | ||
638 | /* Check socket family since not all sockets represent network | |
639 | * endpoint (e.g. AF_UNIX). | |
640 | */ | |
641 | if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) | |
642 | return 0; | |
643 | ||
1cedee13 AI |
644 | if (!ctx.uaddr) { |
645 | memset(&unspec, 0, sizeof(unspec)); | |
646 | ctx.uaddr = (struct sockaddr *)&unspec; | |
647 | } | |
648 | ||
4fbac77d AI |
649 | cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); |
650 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); | |
651 | ||
652 | return ret == 1 ? 0 : -EPERM; | |
653 | } | |
654 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); | |
655 | ||
40304b2a LB |
656 | /** |
657 | * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock | |
658 | * @sk: socket to get cgroup from | |
659 | * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains | |
660 | * sk with connection information (IP addresses, etc.) May not contain | |
661 | * cgroup info if it is a req sock. | |
662 | * @type: The type of program to be exectuted | |
663 | * | |
664 | * socket passed is expected to be of type INET or INET6. | |
665 | * | |
666 | * The program type passed in via @type must be suitable for sock_ops | |
667 | * filtering. No further check is performed to assert that. | |
668 | * | |
669 | * This function will return %-EPERM if any if an attached program was found | |
670 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
671 | */ | |
672 | int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, | |
673 | struct bpf_sock_ops_kern *sock_ops, | |
674 | enum bpf_attach_type type) | |
675 | { | |
676 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e | 677 | int ret; |
40304b2a | 678 | |
324bda9e AS |
679 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, |
680 | BPF_PROG_RUN); | |
681 | return ret == 1 ? 0 : -EPERM; | |
40304b2a LB |
682 | } |
683 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); | |
ebc614f6 RG |
684 | |
685 | int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, | |
686 | short access, enum bpf_attach_type type) | |
687 | { | |
688 | struct cgroup *cgrp; | |
689 | struct bpf_cgroup_dev_ctx ctx = { | |
690 | .access_type = (access << 16) | dev_type, | |
691 | .major = major, | |
692 | .minor = minor, | |
693 | }; | |
694 | int allow = 1; | |
695 | ||
696 | rcu_read_lock(); | |
697 | cgrp = task_dfl_cgroup(current); | |
698 | allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, | |
699 | BPF_PROG_RUN); | |
700 | rcu_read_unlock(); | |
701 | ||
702 | return !allow; | |
703 | } | |
704 | EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); | |
705 | ||
706 | static const struct bpf_func_proto * | |
b1cd609d | 707 | cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
ebc614f6 RG |
708 | { |
709 | switch (func_id) { | |
710 | case BPF_FUNC_map_lookup_elem: | |
711 | return &bpf_map_lookup_elem_proto; | |
712 | case BPF_FUNC_map_update_elem: | |
713 | return &bpf_map_update_elem_proto; | |
714 | case BPF_FUNC_map_delete_elem: | |
715 | return &bpf_map_delete_elem_proto; | |
716 | case BPF_FUNC_get_current_uid_gid: | |
717 | return &bpf_get_current_uid_gid_proto; | |
cd339431 RG |
718 | case BPF_FUNC_get_local_storage: |
719 | return &bpf_get_local_storage_proto; | |
5bf7a60b YS |
720 | case BPF_FUNC_get_current_cgroup_id: |
721 | return &bpf_get_current_cgroup_id_proto; | |
ebc614f6 RG |
722 | case BPF_FUNC_trace_printk: |
723 | if (capable(CAP_SYS_ADMIN)) | |
724 | return bpf_get_trace_printk_proto(); | |
c8dc7980 | 725 | /* fall through */ |
ebc614f6 RG |
726 | default: |
727 | return NULL; | |
728 | } | |
729 | } | |
730 | ||
b1cd609d AI |
731 | static const struct bpf_func_proto * |
732 | cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |
733 | { | |
734 | return cgroup_base_func_proto(func_id, prog); | |
735 | } | |
736 | ||
ebc614f6 RG |
737 | static bool cgroup_dev_is_valid_access(int off, int size, |
738 | enum bpf_access_type type, | |
5e43f899 | 739 | const struct bpf_prog *prog, |
ebc614f6 RG |
740 | struct bpf_insn_access_aux *info) |
741 | { | |
06ef0ccb YS |
742 | const int size_default = sizeof(__u32); |
743 | ||
ebc614f6 RG |
744 | if (type == BPF_WRITE) |
745 | return false; | |
746 | ||
747 | if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) | |
748 | return false; | |
749 | /* The verifier guarantees that size > 0. */ | |
750 | if (off % size != 0) | |
751 | return false; | |
06ef0ccb YS |
752 | |
753 | switch (off) { | |
754 | case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): | |
755 | bpf_ctx_record_field_size(info, size_default); | |
756 | if (!bpf_ctx_narrow_access_ok(off, size, size_default)) | |
757 | return false; | |
758 | break; | |
759 | default: | |
760 | if (size != size_default) | |
761 | return false; | |
762 | } | |
ebc614f6 RG |
763 | |
764 | return true; | |
765 | } | |
766 | ||
767 | const struct bpf_prog_ops cg_dev_prog_ops = { | |
768 | }; | |
769 | ||
770 | const struct bpf_verifier_ops cg_dev_verifier_ops = { | |
771 | .get_func_proto = cgroup_dev_func_proto, | |
772 | .is_valid_access = cgroup_dev_is_valid_access, | |
773 | }; | |
7b146ceb AI |
774 | |
775 | /** | |
776 | * __cgroup_bpf_run_filter_sysctl - Run a program on sysctl | |
777 | * | |
778 | * @head: sysctl table header | |
779 | * @table: sysctl table | |
780 | * @write: sysctl is being read (= 0) or written (= 1) | |
4e63acdf AI |
781 | * @buf: pointer to buffer passed by user space |
782 | * @pcount: value-result argument: value is size of buffer pointed to by @buf, | |
783 | * result is size of @new_buf if program set new value, initial value | |
784 | * otherwise | |
e1550bfe AI |
785 | * @ppos: value-result argument: value is position at which read from or write |
786 | * to sysctl is happening, result is new position if program overrode it, | |
787 | * initial value otherwise | |
4e63acdf AI |
788 | * @new_buf: pointer to pointer to new buffer that will be allocated if program |
789 | * overrides new value provided by user space on sysctl write | |
790 | * NOTE: it's caller responsibility to free *new_buf if it was set | |
7b146ceb AI |
791 | * @type: type of program to be executed |
792 | * | |
793 | * Program is run when sysctl is being accessed, either read or written, and | |
794 | * can allow or deny such access. | |
795 | * | |
796 | * This function will return %-EPERM if an attached program is found and | |
797 | * returned value != 1 during execution. In all other cases 0 is returned. | |
798 | */ | |
799 | int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, | |
800 | struct ctl_table *table, int write, | |
4e63acdf | 801 | void __user *buf, size_t *pcount, |
e1550bfe AI |
802 | loff_t *ppos, void **new_buf, |
803 | enum bpf_attach_type type) | |
7b146ceb AI |
804 | { |
805 | struct bpf_sysctl_kern ctx = { | |
806 | .head = head, | |
807 | .table = table, | |
808 | .write = write, | |
e1550bfe | 809 | .ppos = ppos, |
1d11b301 AI |
810 | .cur_val = NULL, |
811 | .cur_len = PAGE_SIZE, | |
4e63acdf AI |
812 | .new_val = NULL, |
813 | .new_len = 0, | |
814 | .new_updated = 0, | |
7b146ceb AI |
815 | }; |
816 | struct cgroup *cgrp; | |
817 | int ret; | |
818 | ||
1d11b301 AI |
819 | ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); |
820 | if (ctx.cur_val) { | |
821 | mm_segment_t old_fs; | |
822 | loff_t pos = 0; | |
823 | ||
824 | old_fs = get_fs(); | |
825 | set_fs(KERNEL_DS); | |
826 | if (table->proc_handler(table, 0, (void __user *)ctx.cur_val, | |
827 | &ctx.cur_len, &pos)) { | |
828 | /* Let BPF program decide how to proceed. */ | |
829 | ctx.cur_len = 0; | |
830 | } | |
831 | set_fs(old_fs); | |
832 | } else { | |
833 | /* Let BPF program decide how to proceed. */ | |
834 | ctx.cur_len = 0; | |
835 | } | |
836 | ||
4e63acdf AI |
837 | if (write && buf && *pcount) { |
838 | /* BPF program should be able to override new value with a | |
839 | * buffer bigger than provided by user. | |
840 | */ | |
841 | ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); | |
842 | ctx.new_len = min(PAGE_SIZE, *pcount); | |
843 | if (!ctx.new_val || | |
844 | copy_from_user(ctx.new_val, buf, ctx.new_len)) | |
845 | /* Let BPF program decide how to proceed. */ | |
846 | ctx.new_len = 0; | |
847 | } | |
848 | ||
7b146ceb AI |
849 | rcu_read_lock(); |
850 | cgrp = task_dfl_cgroup(current); | |
851 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); | |
852 | rcu_read_unlock(); | |
853 | ||
1d11b301 AI |
854 | kfree(ctx.cur_val); |
855 | ||
4e63acdf AI |
856 | if (ret == 1 && ctx.new_updated) { |
857 | *new_buf = ctx.new_val; | |
858 | *pcount = ctx.new_len; | |
859 | } else { | |
860 | kfree(ctx.new_val); | |
861 | } | |
862 | ||
7b146ceb AI |
863 | return ret == 1 ? 0 : -EPERM; |
864 | } | |
865 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); | |
866 | ||
808649fb AI |
867 | static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, |
868 | size_t *lenp) | |
869 | { | |
870 | ssize_t tmp_ret = 0, ret; | |
871 | ||
872 | if (dir->header.parent) { | |
873 | tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); | |
874 | if (tmp_ret < 0) | |
875 | return tmp_ret; | |
876 | } | |
877 | ||
878 | ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); | |
879 | if (ret < 0) | |
880 | return ret; | |
881 | *bufp += ret; | |
882 | *lenp -= ret; | |
883 | ret += tmp_ret; | |
884 | ||
885 | /* Avoid leading slash. */ | |
886 | if (!ret) | |
887 | return ret; | |
888 | ||
889 | tmp_ret = strscpy(*bufp, "/", *lenp); | |
890 | if (tmp_ret < 0) | |
891 | return tmp_ret; | |
892 | *bufp += tmp_ret; | |
893 | *lenp -= tmp_ret; | |
894 | ||
895 | return ret + tmp_ret; | |
896 | } | |
897 | ||
898 | BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, | |
899 | size_t, buf_len, u64, flags) | |
900 | { | |
901 | ssize_t tmp_ret = 0, ret; | |
902 | ||
903 | if (!buf) | |
904 | return -EINVAL; | |
905 | ||
906 | if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { | |
907 | if (!ctx->head) | |
908 | return -EINVAL; | |
909 | tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); | |
910 | if (tmp_ret < 0) | |
911 | return tmp_ret; | |
912 | } | |
913 | ||
914 | ret = strscpy(buf, ctx->table->procname, buf_len); | |
915 | ||
916 | return ret < 0 ? ret : tmp_ret + ret; | |
917 | } | |
918 | ||
919 | static const struct bpf_func_proto bpf_sysctl_get_name_proto = { | |
920 | .func = bpf_sysctl_get_name, | |
921 | .gpl_only = false, | |
922 | .ret_type = RET_INTEGER, | |
923 | .arg1_type = ARG_PTR_TO_CTX, | |
924 | .arg2_type = ARG_PTR_TO_MEM, | |
925 | .arg3_type = ARG_CONST_SIZE, | |
926 | .arg4_type = ARG_ANYTHING, | |
927 | }; | |
928 | ||
1d11b301 AI |
929 | static int copy_sysctl_value(char *dst, size_t dst_len, char *src, |
930 | size_t src_len) | |
931 | { | |
932 | if (!dst) | |
933 | return -EINVAL; | |
934 | ||
935 | if (!dst_len) | |
936 | return -E2BIG; | |
937 | ||
938 | if (!src || !src_len) { | |
939 | memset(dst, 0, dst_len); | |
940 | return -EINVAL; | |
941 | } | |
942 | ||
943 | memcpy(dst, src, min(dst_len, src_len)); | |
944 | ||
945 | if (dst_len > src_len) { | |
946 | memset(dst + src_len, '\0', dst_len - src_len); | |
947 | return src_len; | |
948 | } | |
949 | ||
950 | dst[dst_len - 1] = '\0'; | |
951 | ||
952 | return -E2BIG; | |
953 | } | |
954 | ||
955 | BPF_CALL_3(bpf_sysctl_get_current_value, struct bpf_sysctl_kern *, ctx, | |
956 | char *, buf, size_t, buf_len) | |
957 | { | |
958 | return copy_sysctl_value(buf, buf_len, ctx->cur_val, ctx->cur_len); | |
959 | } | |
960 | ||
961 | static const struct bpf_func_proto bpf_sysctl_get_current_value_proto = { | |
962 | .func = bpf_sysctl_get_current_value, | |
963 | .gpl_only = false, | |
964 | .ret_type = RET_INTEGER, | |
965 | .arg1_type = ARG_PTR_TO_CTX, | |
966 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, | |
967 | .arg3_type = ARG_CONST_SIZE, | |
968 | }; | |
969 | ||
4e63acdf AI |
970 | BPF_CALL_3(bpf_sysctl_get_new_value, struct bpf_sysctl_kern *, ctx, char *, buf, |
971 | size_t, buf_len) | |
972 | { | |
973 | if (!ctx->write) { | |
974 | if (buf && buf_len) | |
975 | memset(buf, '\0', buf_len); | |
976 | return -EINVAL; | |
977 | } | |
978 | return copy_sysctl_value(buf, buf_len, ctx->new_val, ctx->new_len); | |
979 | } | |
980 | ||
981 | static const struct bpf_func_proto bpf_sysctl_get_new_value_proto = { | |
982 | .func = bpf_sysctl_get_new_value, | |
983 | .gpl_only = false, | |
984 | .ret_type = RET_INTEGER, | |
985 | .arg1_type = ARG_PTR_TO_CTX, | |
986 | .arg2_type = ARG_PTR_TO_UNINIT_MEM, | |
987 | .arg3_type = ARG_CONST_SIZE, | |
988 | }; | |
989 | ||
990 | BPF_CALL_3(bpf_sysctl_set_new_value, struct bpf_sysctl_kern *, ctx, | |
991 | const char *, buf, size_t, buf_len) | |
992 | { | |
993 | if (!ctx->write || !ctx->new_val || !ctx->new_len || !buf || !buf_len) | |
994 | return -EINVAL; | |
995 | ||
996 | if (buf_len > PAGE_SIZE - 1) | |
997 | return -E2BIG; | |
998 | ||
999 | memcpy(ctx->new_val, buf, buf_len); | |
1000 | ctx->new_len = buf_len; | |
1001 | ctx->new_updated = 1; | |
1002 | ||
1003 | return 0; | |
1004 | } | |
1005 | ||
1006 | static const struct bpf_func_proto bpf_sysctl_set_new_value_proto = { | |
1007 | .func = bpf_sysctl_set_new_value, | |
1008 | .gpl_only = false, | |
1009 | .ret_type = RET_INTEGER, | |
1010 | .arg1_type = ARG_PTR_TO_CTX, | |
1011 | .arg2_type = ARG_PTR_TO_MEM, | |
1012 | .arg3_type = ARG_CONST_SIZE, | |
1013 | }; | |
1014 | ||
7b146ceb AI |
1015 | static const struct bpf_func_proto * |
1016 | sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) | |
1017 | { | |
808649fb | 1018 | switch (func_id) { |
d7a4cb9b AI |
1019 | case BPF_FUNC_strtol: |
1020 | return &bpf_strtol_proto; | |
1021 | case BPF_FUNC_strtoul: | |
1022 | return &bpf_strtoul_proto; | |
808649fb AI |
1023 | case BPF_FUNC_sysctl_get_name: |
1024 | return &bpf_sysctl_get_name_proto; | |
1d11b301 AI |
1025 | case BPF_FUNC_sysctl_get_current_value: |
1026 | return &bpf_sysctl_get_current_value_proto; | |
4e63acdf AI |
1027 | case BPF_FUNC_sysctl_get_new_value: |
1028 | return &bpf_sysctl_get_new_value_proto; | |
1029 | case BPF_FUNC_sysctl_set_new_value: | |
1030 | return &bpf_sysctl_set_new_value_proto; | |
808649fb AI |
1031 | default: |
1032 | return cgroup_base_func_proto(func_id, prog); | |
1033 | } | |
7b146ceb AI |
1034 | } |
1035 | ||
1036 | static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, | |
1037 | const struct bpf_prog *prog, | |
1038 | struct bpf_insn_access_aux *info) | |
1039 | { | |
1040 | const int size_default = sizeof(__u32); | |
1041 | ||
e1550bfe | 1042 | if (off < 0 || off + size > sizeof(struct bpf_sysctl) || off % size) |
7b146ceb AI |
1043 | return false; |
1044 | ||
1045 | switch (off) { | |
1046 | case offsetof(struct bpf_sysctl, write): | |
e1550bfe AI |
1047 | if (type != BPF_READ) |
1048 | return false; | |
7b146ceb AI |
1049 | bpf_ctx_record_field_size(info, size_default); |
1050 | return bpf_ctx_narrow_access_ok(off, size, size_default); | |
e1550bfe AI |
1051 | case offsetof(struct bpf_sysctl, file_pos): |
1052 | if (type == BPF_READ) { | |
1053 | bpf_ctx_record_field_size(info, size_default); | |
1054 | return bpf_ctx_narrow_access_ok(off, size, size_default); | |
1055 | } else { | |
1056 | return size == size_default; | |
1057 | } | |
7b146ceb AI |
1058 | default: |
1059 | return false; | |
1060 | } | |
1061 | } | |
1062 | ||
1063 | static u32 sysctl_convert_ctx_access(enum bpf_access_type type, | |
1064 | const struct bpf_insn *si, | |
1065 | struct bpf_insn *insn_buf, | |
1066 | struct bpf_prog *prog, u32 *target_size) | |
1067 | { | |
1068 | struct bpf_insn *insn = insn_buf; | |
1069 | ||
1070 | switch (si->off) { | |
1071 | case offsetof(struct bpf_sysctl, write): | |
1072 | *insn++ = BPF_LDX_MEM( | |
1073 | BPF_SIZE(si->code), si->dst_reg, si->src_reg, | |
1074 | bpf_target_off(struct bpf_sysctl_kern, write, | |
1075 | FIELD_SIZEOF(struct bpf_sysctl_kern, | |
1076 | write), | |
1077 | target_size)); | |
1078 | break; | |
e1550bfe AI |
1079 | case offsetof(struct bpf_sysctl, file_pos): |
1080 | /* ppos is a pointer so it should be accessed via indirect | |
1081 | * loads and stores. Also for stores additional temporary | |
1082 | * register is used since neither src_reg nor dst_reg can be | |
1083 | * overridden. | |
1084 | */ | |
1085 | if (type == BPF_WRITE) { | |
1086 | int treg = BPF_REG_9; | |
1087 | ||
1088 | if (si->src_reg == treg || si->dst_reg == treg) | |
1089 | --treg; | |
1090 | if (si->src_reg == treg || si->dst_reg == treg) | |
1091 | --treg; | |
1092 | *insn++ = BPF_STX_MEM( | |
1093 | BPF_DW, si->dst_reg, treg, | |
1094 | offsetof(struct bpf_sysctl_kern, tmp_reg)); | |
1095 | *insn++ = BPF_LDX_MEM( | |
1096 | BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), | |
1097 | treg, si->dst_reg, | |
1098 | offsetof(struct bpf_sysctl_kern, ppos)); | |
1099 | *insn++ = BPF_STX_MEM( | |
1100 | BPF_SIZEOF(u32), treg, si->src_reg, 0); | |
1101 | *insn++ = BPF_LDX_MEM( | |
1102 | BPF_DW, treg, si->dst_reg, | |
1103 | offsetof(struct bpf_sysctl_kern, tmp_reg)); | |
1104 | } else { | |
1105 | *insn++ = BPF_LDX_MEM( | |
1106 | BPF_FIELD_SIZEOF(struct bpf_sysctl_kern, ppos), | |
1107 | si->dst_reg, si->src_reg, | |
1108 | offsetof(struct bpf_sysctl_kern, ppos)); | |
1109 | *insn++ = BPF_LDX_MEM( | |
1110 | BPF_SIZE(si->code), si->dst_reg, si->dst_reg, 0); | |
1111 | } | |
1112 | *target_size = sizeof(u32); | |
1113 | break; | |
7b146ceb AI |
1114 | } |
1115 | ||
1116 | return insn - insn_buf; | |
1117 | } | |
1118 | ||
1119 | const struct bpf_verifier_ops cg_sysctl_verifier_ops = { | |
1120 | .get_func_proto = sysctl_func_proto, | |
1121 | .is_valid_access = sysctl_is_valid_access, | |
1122 | .convert_ctx_access = sysctl_convert_ctx_access, | |
1123 | }; | |
1124 | ||
1125 | const struct bpf_prog_ops cg_sysctl_prog_ops = { | |
1126 | }; |