]>
Commit | Line | Data |
---|---|---|
30070984 DM |
1 | /* |
2 | * Functions to manage eBPF programs attached to cgroups | |
3 | * | |
4 | * Copyright (c) 2016 Daniel Mack | |
5 | * | |
6 | * This file is subject to the terms and conditions of version 2 of the GNU | |
7 | * General Public License. See the file COPYING in the main directory of the | |
8 | * Linux distribution for more details. | |
9 | */ | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/atomic.h> | |
13 | #include <linux/cgroup.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/bpf.h> | |
16 | #include <linux/bpf-cgroup.h> | |
17 | #include <net/sock.h> | |
18 | ||
19 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); | |
20 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); | |
21 | ||
22 | /** | |
23 | * cgroup_bpf_put() - put references of all bpf programs | |
24 | * @cgrp: the cgroup to modify | |
25 | */ | |
26 | void cgroup_bpf_put(struct cgroup *cgrp) | |
27 | { | |
8bad74f9 | 28 | enum bpf_cgroup_storage_type stype; |
30070984 DM |
29 | unsigned int type; |
30 | ||
324bda9e AS |
31 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { |
32 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
33 | struct bpf_prog_list *pl, *tmp; | |
34 | ||
35 | list_for_each_entry_safe(pl, tmp, progs, node) { | |
36 | list_del(&pl->node); | |
37 | bpf_prog_put(pl->prog); | |
8bad74f9 RG |
38 | for_each_cgroup_storage_type(stype) { |
39 | bpf_cgroup_storage_unlink(pl->storage[stype]); | |
40 | bpf_cgroup_storage_free(pl->storage[stype]); | |
41 | } | |
324bda9e | 42 | kfree(pl); |
30070984 DM |
43 | static_branch_dec(&cgroup_bpf_enabled_key); |
44 | } | |
324bda9e AS |
45 | bpf_prog_array_free(cgrp->bpf.effective[type]); |
46 | } | |
47 | } | |
48 | ||
49 | /* count number of elements in the list. | |
50 | * it's slow but the list cannot be long | |
51 | */ | |
52 | static u32 prog_list_length(struct list_head *head) | |
53 | { | |
54 | struct bpf_prog_list *pl; | |
55 | u32 cnt = 0; | |
56 | ||
57 | list_for_each_entry(pl, head, node) { | |
58 | if (!pl->prog) | |
59 | continue; | |
60 | cnt++; | |
30070984 | 61 | } |
324bda9e AS |
62 | return cnt; |
63 | } | |
64 | ||
65 | /* if parent has non-overridable prog attached, | |
66 | * disallow attaching new programs to the descendent cgroup. | |
67 | * if parent has overridable or multi-prog, allow attaching | |
68 | */ | |
69 | static bool hierarchy_allows_attach(struct cgroup *cgrp, | |
70 | enum bpf_attach_type type, | |
71 | u32 new_flags) | |
72 | { | |
73 | struct cgroup *p; | |
74 | ||
75 | p = cgroup_parent(cgrp); | |
76 | if (!p) | |
77 | return true; | |
78 | do { | |
79 | u32 flags = p->bpf.flags[type]; | |
80 | u32 cnt; | |
81 | ||
82 | if (flags & BPF_F_ALLOW_MULTI) | |
83 | return true; | |
84 | cnt = prog_list_length(&p->bpf.progs[type]); | |
85 | WARN_ON_ONCE(cnt > 1); | |
86 | if (cnt == 1) | |
87 | return !!(flags & BPF_F_ALLOW_OVERRIDE); | |
88 | p = cgroup_parent(p); | |
89 | } while (p); | |
90 | return true; | |
91 | } | |
92 | ||
93 | /* compute a chain of effective programs for a given cgroup: | |
94 | * start from the list of programs in this cgroup and add | |
95 | * all parent programs. | |
96 | * Note that parent's F_ALLOW_OVERRIDE-type program is yielding | |
97 | * to programs in this cgroup | |
98 | */ | |
99 | static int compute_effective_progs(struct cgroup *cgrp, | |
100 | enum bpf_attach_type type, | |
101 | struct bpf_prog_array __rcu **array) | |
102 | { | |
8bad74f9 | 103 | enum bpf_cgroup_storage_type stype; |
3960f4fd | 104 | struct bpf_prog_array *progs; |
324bda9e AS |
105 | struct bpf_prog_list *pl; |
106 | struct cgroup *p = cgrp; | |
107 | int cnt = 0; | |
108 | ||
109 | /* count number of effective programs by walking parents */ | |
110 | do { | |
111 | if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) | |
112 | cnt += prog_list_length(&p->bpf.progs[type]); | |
113 | p = cgroup_parent(p); | |
114 | } while (p); | |
115 | ||
116 | progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); | |
117 | if (!progs) | |
118 | return -ENOMEM; | |
119 | ||
120 | /* populate the array with effective progs */ | |
121 | cnt = 0; | |
122 | p = cgrp; | |
123 | do { | |
394e40a2 RG |
124 | if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) |
125 | continue; | |
126 | ||
127 | list_for_each_entry(pl, &p->bpf.progs[type], node) { | |
128 | if (!pl->prog) | |
129 | continue; | |
130 | ||
131 | progs->items[cnt].prog = pl->prog; | |
8bad74f9 RG |
132 | for_each_cgroup_storage_type(stype) |
133 | progs->items[cnt].cgroup_storage[stype] = | |
134 | pl->storage[stype]; | |
394e40a2 RG |
135 | cnt++; |
136 | } | |
137 | } while ((p = cgroup_parent(p))); | |
324bda9e | 138 | |
3960f4fd | 139 | rcu_assign_pointer(*array, progs); |
324bda9e AS |
140 | return 0; |
141 | } | |
142 | ||
143 | static void activate_effective_progs(struct cgroup *cgrp, | |
144 | enum bpf_attach_type type, | |
145 | struct bpf_prog_array __rcu *array) | |
146 | { | |
147 | struct bpf_prog_array __rcu *old_array; | |
148 | ||
149 | old_array = xchg(&cgrp->bpf.effective[type], array); | |
150 | /* free prog array after grace period, since __cgroup_bpf_run_*() | |
151 | * might be still walking the array | |
152 | */ | |
153 | bpf_prog_array_free(old_array); | |
30070984 DM |
154 | } |
155 | ||
156 | /** | |
157 | * cgroup_bpf_inherit() - inherit effective programs from parent | |
158 | * @cgrp: the cgroup to modify | |
30070984 | 159 | */ |
324bda9e | 160 | int cgroup_bpf_inherit(struct cgroup *cgrp) |
30070984 | 161 | { |
324bda9e AS |
162 | /* has to use marco instead of const int, since compiler thinks |
163 | * that array below is variable length | |
164 | */ | |
165 | #define NR ARRAY_SIZE(cgrp->bpf.effective) | |
166 | struct bpf_prog_array __rcu *arrays[NR] = {}; | |
167 | int i; | |
30070984 | 168 | |
324bda9e AS |
169 | for (i = 0; i < NR; i++) |
170 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); | |
30070984 | 171 | |
324bda9e AS |
172 | for (i = 0; i < NR; i++) |
173 | if (compute_effective_progs(cgrp, i, &arrays[i])) | |
174 | goto cleanup; | |
175 | ||
176 | for (i = 0; i < NR; i++) | |
177 | activate_effective_progs(cgrp, i, arrays[i]); | |
178 | ||
179 | return 0; | |
180 | cleanup: | |
181 | for (i = 0; i < NR; i++) | |
182 | bpf_prog_array_free(arrays[i]); | |
183 | return -ENOMEM; | |
30070984 DM |
184 | } |
185 | ||
85fc4b16 RG |
186 | static int update_effective_progs(struct cgroup *cgrp, |
187 | enum bpf_attach_type type) | |
188 | { | |
189 | struct cgroup_subsys_state *css; | |
190 | int err; | |
191 | ||
192 | /* allocate and recompute effective prog arrays */ | |
193 | css_for_each_descendant_pre(css, &cgrp->self) { | |
194 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
195 | ||
196 | err = compute_effective_progs(desc, type, &desc->bpf.inactive); | |
197 | if (err) | |
198 | goto cleanup; | |
199 | } | |
200 | ||
201 | /* all allocations were successful. Activate all prog arrays */ | |
202 | css_for_each_descendant_pre(css, &cgrp->self) { | |
203 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
204 | ||
205 | activate_effective_progs(desc, type, desc->bpf.inactive); | |
206 | desc->bpf.inactive = NULL; | |
207 | } | |
208 | ||
209 | return 0; | |
210 | ||
211 | cleanup: | |
212 | /* oom while computing effective. Free all computed effective arrays | |
213 | * since they were not activated | |
214 | */ | |
215 | css_for_each_descendant_pre(css, &cgrp->self) { | |
216 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
217 | ||
218 | bpf_prog_array_free(desc->bpf.inactive); | |
219 | desc->bpf.inactive = NULL; | |
220 | } | |
221 | ||
222 | return err; | |
223 | } | |
224 | ||
324bda9e AS |
225 | #define BPF_CGROUP_MAX_PROGS 64 |
226 | ||
30070984 | 227 | /** |
324bda9e | 228 | * __cgroup_bpf_attach() - Attach the program to a cgroup, and |
30070984 DM |
229 | * propagate the change to descendants |
230 | * @cgrp: The cgroup which descendants to traverse | |
324bda9e AS |
231 | * @prog: A program to attach |
232 | * @type: Type of attach operation | |
1832f4ef | 233 | * @flags: Option flags |
30070984 DM |
234 | * |
235 | * Must be called with cgroup_mutex held. | |
236 | */ | |
324bda9e AS |
237 | int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, |
238 | enum bpf_attach_type type, u32 flags) | |
30070984 | 239 | { |
324bda9e AS |
240 | struct list_head *progs = &cgrp->bpf.progs[type]; |
241 | struct bpf_prog *old_prog = NULL; | |
8bad74f9 RG |
242 | struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], |
243 | *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; | |
244 | enum bpf_cgroup_storage_type stype; | |
324bda9e AS |
245 | struct bpf_prog_list *pl; |
246 | bool pl_was_allocated; | |
324bda9e AS |
247 | int err; |
248 | ||
249 | if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) | |
250 | /* invalid combination */ | |
251 | return -EINVAL; | |
252 | ||
253 | if (!hierarchy_allows_attach(cgrp, type, flags)) | |
7f677633 AS |
254 | return -EPERM; |
255 | ||
324bda9e AS |
256 | if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) |
257 | /* Disallow attaching non-overridable on top | |
258 | * of existing overridable in this cgroup. | |
259 | * Disallow attaching multi-prog if overridable or none | |
7f677633 AS |
260 | */ |
261 | return -EPERM; | |
262 | ||
324bda9e AS |
263 | if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) |
264 | return -E2BIG; | |
265 | ||
8bad74f9 RG |
266 | for_each_cgroup_storage_type(stype) { |
267 | storage[stype] = bpf_cgroup_storage_alloc(prog, stype); | |
268 | if (IS_ERR(storage[stype])) { | |
269 | storage[stype] = NULL; | |
270 | for_each_cgroup_storage_type(stype) | |
271 | bpf_cgroup_storage_free(storage[stype]); | |
272 | return -ENOMEM; | |
273 | } | |
274 | } | |
d7bf2c10 | 275 | |
324bda9e | 276 | if (flags & BPF_F_ALLOW_MULTI) { |
d7bf2c10 RG |
277 | list_for_each_entry(pl, progs, node) { |
278 | if (pl->prog == prog) { | |
324bda9e | 279 | /* disallow attaching the same prog twice */ |
8bad74f9 RG |
280 | for_each_cgroup_storage_type(stype) |
281 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 282 | return -EINVAL; |
d7bf2c10 RG |
283 | } |
284 | } | |
324bda9e AS |
285 | |
286 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); | |
d7bf2c10 | 287 | if (!pl) { |
8bad74f9 RG |
288 | for_each_cgroup_storage_type(stype) |
289 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 290 | return -ENOMEM; |
d7bf2c10 RG |
291 | } |
292 | ||
324bda9e AS |
293 | pl_was_allocated = true; |
294 | pl->prog = prog; | |
8bad74f9 RG |
295 | for_each_cgroup_storage_type(stype) |
296 | pl->storage[stype] = storage[stype]; | |
324bda9e AS |
297 | list_add_tail(&pl->node, progs); |
298 | } else { | |
299 | if (list_empty(progs)) { | |
300 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); | |
d7bf2c10 | 301 | if (!pl) { |
8bad74f9 RG |
302 | for_each_cgroup_storage_type(stype) |
303 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 304 | return -ENOMEM; |
d7bf2c10 | 305 | } |
324bda9e AS |
306 | pl_was_allocated = true; |
307 | list_add_tail(&pl->node, progs); | |
308 | } else { | |
309 | pl = list_first_entry(progs, typeof(*pl), node); | |
310 | old_prog = pl->prog; | |
8bad74f9 RG |
311 | for_each_cgroup_storage_type(stype) { |
312 | old_storage[stype] = pl->storage[stype]; | |
313 | bpf_cgroup_storage_unlink(old_storage[stype]); | |
314 | } | |
324bda9e AS |
315 | pl_was_allocated = false; |
316 | } | |
317 | pl->prog = prog; | |
8bad74f9 RG |
318 | for_each_cgroup_storage_type(stype) |
319 | pl->storage[stype] = storage[stype]; | |
7f677633 | 320 | } |
30070984 | 321 | |
324bda9e | 322 | cgrp->bpf.flags[type] = flags; |
7f677633 | 323 | |
85fc4b16 RG |
324 | err = update_effective_progs(cgrp, type); |
325 | if (err) | |
326 | goto cleanup; | |
324bda9e AS |
327 | |
328 | static_branch_inc(&cgroup_bpf_enabled_key); | |
8bad74f9 RG |
329 | for_each_cgroup_storage_type(stype) { |
330 | if (!old_storage[stype]) | |
331 | continue; | |
332 | bpf_cgroup_storage_free(old_storage[stype]); | |
333 | } | |
30070984 DM |
334 | if (old_prog) { |
335 | bpf_prog_put(old_prog); | |
336 | static_branch_dec(&cgroup_bpf_enabled_key); | |
337 | } | |
8bad74f9 RG |
338 | for_each_cgroup_storage_type(stype) |
339 | bpf_cgroup_storage_link(storage[stype], cgrp, type); | |
7f677633 | 340 | return 0; |
324bda9e AS |
341 | |
342 | cleanup: | |
324bda9e AS |
343 | /* and cleanup the prog list */ |
344 | pl->prog = old_prog; | |
8bad74f9 RG |
345 | for_each_cgroup_storage_type(stype) { |
346 | bpf_cgroup_storage_free(pl->storage[stype]); | |
347 | pl->storage[stype] = old_storage[stype]; | |
348 | bpf_cgroup_storage_link(old_storage[stype], cgrp, type); | |
349 | } | |
324bda9e AS |
350 | if (pl_was_allocated) { |
351 | list_del(&pl->node); | |
352 | kfree(pl); | |
353 | } | |
354 | return err; | |
355 | } | |
356 | ||
357 | /** | |
358 | * __cgroup_bpf_detach() - Detach the program from a cgroup, and | |
359 | * propagate the change to descendants | |
360 | * @cgrp: The cgroup which descendants to traverse | |
361 | * @prog: A program to detach or NULL | |
362 | * @type: Type of detach operation | |
363 | * | |
364 | * Must be called with cgroup_mutex held. | |
365 | */ | |
366 | int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, | |
1832f4ef | 367 | enum bpf_attach_type type) |
324bda9e AS |
368 | { |
369 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
8bad74f9 | 370 | enum bpf_cgroup_storage_type stype; |
324bda9e AS |
371 | u32 flags = cgrp->bpf.flags[type]; |
372 | struct bpf_prog *old_prog = NULL; | |
324bda9e AS |
373 | struct bpf_prog_list *pl; |
374 | int err; | |
375 | ||
376 | if (flags & BPF_F_ALLOW_MULTI) { | |
377 | if (!prog) | |
378 | /* to detach MULTI prog the user has to specify valid FD | |
379 | * of the program to be detached | |
380 | */ | |
381 | return -EINVAL; | |
382 | } else { | |
383 | if (list_empty(progs)) | |
384 | /* report error when trying to detach and nothing is attached */ | |
385 | return -ENOENT; | |
386 | } | |
387 | ||
388 | if (flags & BPF_F_ALLOW_MULTI) { | |
389 | /* find the prog and detach it */ | |
390 | list_for_each_entry(pl, progs, node) { | |
391 | if (pl->prog != prog) | |
392 | continue; | |
393 | old_prog = prog; | |
394 | /* mark it deleted, so it's ignored while | |
395 | * recomputing effective | |
396 | */ | |
397 | pl->prog = NULL; | |
398 | break; | |
399 | } | |
400 | if (!old_prog) | |
401 | return -ENOENT; | |
402 | } else { | |
403 | /* to maintain backward compatibility NONE and OVERRIDE cgroups | |
404 | * allow detaching with invalid FD (prog==NULL) | |
405 | */ | |
406 | pl = list_first_entry(progs, typeof(*pl), node); | |
407 | old_prog = pl->prog; | |
408 | pl->prog = NULL; | |
409 | } | |
410 | ||
85fc4b16 RG |
411 | err = update_effective_progs(cgrp, type); |
412 | if (err) | |
413 | goto cleanup; | |
324bda9e AS |
414 | |
415 | /* now can actually delete it from this cgroup list */ | |
416 | list_del(&pl->node); | |
8bad74f9 RG |
417 | for_each_cgroup_storage_type(stype) { |
418 | bpf_cgroup_storage_unlink(pl->storage[stype]); | |
419 | bpf_cgroup_storage_free(pl->storage[stype]); | |
420 | } | |
324bda9e AS |
421 | kfree(pl); |
422 | if (list_empty(progs)) | |
423 | /* last program was detached, reset flags to zero */ | |
424 | cgrp->bpf.flags[type] = 0; | |
425 | ||
426 | bpf_prog_put(old_prog); | |
427 | static_branch_dec(&cgroup_bpf_enabled_key); | |
428 | return 0; | |
429 | ||
430 | cleanup: | |
324bda9e AS |
431 | /* and restore back old_prog */ |
432 | pl->prog = old_prog; | |
433 | return err; | |
30070984 DM |
434 | } |
435 | ||
468e2f64 AS |
436 | /* Must be called with cgroup_mutex held to avoid races. */ |
437 | int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, | |
438 | union bpf_attr __user *uattr) | |
439 | { | |
440 | __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); | |
441 | enum bpf_attach_type type = attr->query.attach_type; | |
442 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
443 | u32 flags = cgrp->bpf.flags[type]; | |
444 | int cnt, ret = 0, i; | |
445 | ||
446 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) | |
447 | cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); | |
448 | else | |
449 | cnt = prog_list_length(progs); | |
450 | ||
451 | if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) | |
452 | return -EFAULT; | |
453 | if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) | |
454 | return -EFAULT; | |
455 | if (attr->query.prog_cnt == 0 || !prog_ids || !cnt) | |
456 | /* return early if user requested only program count + flags */ | |
457 | return 0; | |
458 | if (attr->query.prog_cnt < cnt) { | |
459 | cnt = attr->query.prog_cnt; | |
460 | ret = -ENOSPC; | |
461 | } | |
462 | ||
463 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { | |
464 | return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], | |
465 | prog_ids, cnt); | |
466 | } else { | |
467 | struct bpf_prog_list *pl; | |
468 | u32 id; | |
469 | ||
470 | i = 0; | |
471 | list_for_each_entry(pl, progs, node) { | |
472 | id = pl->prog->aux->id; | |
473 | if (copy_to_user(prog_ids + i, &id, sizeof(id))) | |
474 | return -EFAULT; | |
475 | if (++i == cnt) | |
476 | break; | |
477 | } | |
478 | } | |
479 | return ret; | |
480 | } | |
481 | ||
fdb5c453 SY |
482 | int cgroup_bpf_prog_attach(const union bpf_attr *attr, |
483 | enum bpf_prog_type ptype, struct bpf_prog *prog) | |
484 | { | |
485 | struct cgroup *cgrp; | |
486 | int ret; | |
487 | ||
488 | cgrp = cgroup_get_from_fd(attr->target_fd); | |
489 | if (IS_ERR(cgrp)) | |
490 | return PTR_ERR(cgrp); | |
491 | ||
492 | ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, | |
493 | attr->attach_flags); | |
494 | cgroup_put(cgrp); | |
495 | return ret; | |
496 | } | |
497 | ||
498 | int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) | |
499 | { | |
500 | struct bpf_prog *prog; | |
501 | struct cgroup *cgrp; | |
502 | int ret; | |
503 | ||
504 | cgrp = cgroup_get_from_fd(attr->target_fd); | |
505 | if (IS_ERR(cgrp)) | |
506 | return PTR_ERR(cgrp); | |
507 | ||
508 | prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); | |
509 | if (IS_ERR(prog)) | |
510 | prog = NULL; | |
511 | ||
512 | ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); | |
513 | if (prog) | |
514 | bpf_prog_put(prog); | |
515 | ||
516 | cgroup_put(cgrp); | |
517 | return ret; | |
518 | } | |
519 | ||
520 | int cgroup_bpf_prog_query(const union bpf_attr *attr, | |
521 | union bpf_attr __user *uattr) | |
522 | { | |
523 | struct cgroup *cgrp; | |
524 | int ret; | |
525 | ||
526 | cgrp = cgroup_get_from_fd(attr->query.target_fd); | |
527 | if (IS_ERR(cgrp)) | |
528 | return PTR_ERR(cgrp); | |
529 | ||
530 | ret = cgroup_bpf_query(cgrp, attr, uattr); | |
531 | ||
532 | cgroup_put(cgrp); | |
533 | return ret; | |
534 | } | |
535 | ||
30070984 | 536 | /** |
b2cd1257 | 537 | * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering |
8f917bba | 538 | * @sk: The socket sending or receiving traffic |
30070984 DM |
539 | * @skb: The skb that is being sent or received |
540 | * @type: The type of program to be exectuted | |
541 | * | |
542 | * If no socket is passed, or the socket is not of type INET or INET6, | |
543 | * this function does nothing and returns 0. | |
544 | * | |
545 | * The program type passed in via @type must be suitable for network | |
546 | * filtering. No further check is performed to assert that. | |
547 | * | |
548 | * This function will return %-EPERM if any if an attached program was found | |
549 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
550 | */ | |
b2cd1257 DA |
551 | int __cgroup_bpf_run_filter_skb(struct sock *sk, |
552 | struct sk_buff *skb, | |
553 | enum bpf_attach_type type) | |
30070984 | 554 | { |
324bda9e AS |
555 | unsigned int offset = skb->data - skb_network_header(skb); |
556 | struct sock *save_sk; | |
b39b5f41 | 557 | void *saved_data_end; |
30070984 | 558 | struct cgroup *cgrp; |
324bda9e | 559 | int ret; |
30070984 DM |
560 | |
561 | if (!sk || !sk_fullsock(sk)) | |
562 | return 0; | |
563 | ||
324bda9e | 564 | if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) |
30070984 DM |
565 | return 0; |
566 | ||
567 | cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e AS |
568 | save_sk = skb->sk; |
569 | skb->sk = sk; | |
570 | __skb_push(skb, offset); | |
b39b5f41 SL |
571 | |
572 | /* compute pointers for the bpf prog */ | |
573 | bpf_compute_and_save_data_end(skb, &saved_data_end); | |
574 | ||
324bda9e | 575 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, |
6cab5e90 | 576 | __bpf_prog_run_save_cb); |
b39b5f41 | 577 | bpf_restore_data_end(skb, saved_data_end); |
324bda9e AS |
578 | __skb_pull(skb, offset); |
579 | skb->sk = save_sk; | |
580 | return ret == 1 ? 0 : -EPERM; | |
30070984 | 581 | } |
b2cd1257 | 582 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); |
61023658 DA |
583 | |
584 | /** | |
585 | * __cgroup_bpf_run_filter_sk() - Run a program on a sock | |
586 | * @sk: sock structure to manipulate | |
587 | * @type: The type of program to be exectuted | |
588 | * | |
589 | * socket is passed is expected to be of type INET or INET6. | |
590 | * | |
591 | * The program type passed in via @type must be suitable for sock | |
592 | * filtering. No further check is performed to assert that. | |
593 | * | |
594 | * This function will return %-EPERM if any if an attached program was found | |
595 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
596 | */ | |
597 | int __cgroup_bpf_run_filter_sk(struct sock *sk, | |
598 | enum bpf_attach_type type) | |
599 | { | |
600 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e | 601 | int ret; |
61023658 | 602 | |
324bda9e AS |
603 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); |
604 | return ret == 1 ? 0 : -EPERM; | |
61023658 DA |
605 | } |
606 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); | |
40304b2a | 607 | |
4fbac77d AI |
608 | /** |
609 | * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and | |
610 | * provided by user sockaddr | |
611 | * @sk: sock struct that will use sockaddr | |
612 | * @uaddr: sockaddr struct provided by user | |
613 | * @type: The type of program to be exectuted | |
1cedee13 | 614 | * @t_ctx: Pointer to attach type specific context |
4fbac77d AI |
615 | * |
616 | * socket is expected to be of type INET or INET6. | |
617 | * | |
618 | * This function will return %-EPERM if an attached program is found and | |
619 | * returned value != 1 during execution. In all other cases, 0 is returned. | |
620 | */ | |
621 | int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, | |
622 | struct sockaddr *uaddr, | |
1cedee13 AI |
623 | enum bpf_attach_type type, |
624 | void *t_ctx) | |
4fbac77d AI |
625 | { |
626 | struct bpf_sock_addr_kern ctx = { | |
627 | .sk = sk, | |
628 | .uaddr = uaddr, | |
1cedee13 | 629 | .t_ctx = t_ctx, |
4fbac77d | 630 | }; |
1cedee13 | 631 | struct sockaddr_storage unspec; |
4fbac77d AI |
632 | struct cgroup *cgrp; |
633 | int ret; | |
634 | ||
635 | /* Check socket family since not all sockets represent network | |
636 | * endpoint (e.g. AF_UNIX). | |
637 | */ | |
638 | if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) | |
639 | return 0; | |
640 | ||
1cedee13 AI |
641 | if (!ctx.uaddr) { |
642 | memset(&unspec, 0, sizeof(unspec)); | |
643 | ctx.uaddr = (struct sockaddr *)&unspec; | |
644 | } | |
645 | ||
4fbac77d AI |
646 | cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); |
647 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); | |
648 | ||
649 | return ret == 1 ? 0 : -EPERM; | |
650 | } | |
651 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); | |
652 | ||
40304b2a LB |
653 | /** |
654 | * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock | |
655 | * @sk: socket to get cgroup from | |
656 | * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains | |
657 | * sk with connection information (IP addresses, etc.) May not contain | |
658 | * cgroup info if it is a req sock. | |
659 | * @type: The type of program to be exectuted | |
660 | * | |
661 | * socket passed is expected to be of type INET or INET6. | |
662 | * | |
663 | * The program type passed in via @type must be suitable for sock_ops | |
664 | * filtering. No further check is performed to assert that. | |
665 | * | |
666 | * This function will return %-EPERM if any if an attached program was found | |
667 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
668 | */ | |
669 | int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, | |
670 | struct bpf_sock_ops_kern *sock_ops, | |
671 | enum bpf_attach_type type) | |
672 | { | |
673 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e | 674 | int ret; |
40304b2a | 675 | |
324bda9e AS |
676 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, |
677 | BPF_PROG_RUN); | |
678 | return ret == 1 ? 0 : -EPERM; | |
40304b2a LB |
679 | } |
680 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); | |
ebc614f6 RG |
681 | |
682 | int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, | |
683 | short access, enum bpf_attach_type type) | |
684 | { | |
685 | struct cgroup *cgrp; | |
686 | struct bpf_cgroup_dev_ctx ctx = { | |
687 | .access_type = (access << 16) | dev_type, | |
688 | .major = major, | |
689 | .minor = minor, | |
690 | }; | |
691 | int allow = 1; | |
692 | ||
693 | rcu_read_lock(); | |
694 | cgrp = task_dfl_cgroup(current); | |
695 | allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, | |
696 | BPF_PROG_RUN); | |
697 | rcu_read_unlock(); | |
698 | ||
699 | return !allow; | |
700 | } | |
701 | EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); | |
702 | ||
703 | static const struct bpf_func_proto * | |
5e43f899 | 704 | cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
ebc614f6 RG |
705 | { |
706 | switch (func_id) { | |
707 | case BPF_FUNC_map_lookup_elem: | |
708 | return &bpf_map_lookup_elem_proto; | |
709 | case BPF_FUNC_map_update_elem: | |
710 | return &bpf_map_update_elem_proto; | |
711 | case BPF_FUNC_map_delete_elem: | |
712 | return &bpf_map_delete_elem_proto; | |
713 | case BPF_FUNC_get_current_uid_gid: | |
714 | return &bpf_get_current_uid_gid_proto; | |
cd339431 RG |
715 | case BPF_FUNC_get_local_storage: |
716 | return &bpf_get_local_storage_proto; | |
5bf7a60b YS |
717 | case BPF_FUNC_get_current_cgroup_id: |
718 | return &bpf_get_current_cgroup_id_proto; | |
ebc614f6 RG |
719 | case BPF_FUNC_trace_printk: |
720 | if (capable(CAP_SYS_ADMIN)) | |
721 | return bpf_get_trace_printk_proto(); | |
c8dc7980 | 722 | /* fall through */ |
ebc614f6 RG |
723 | default: |
724 | return NULL; | |
725 | } | |
726 | } | |
727 | ||
728 | static bool cgroup_dev_is_valid_access(int off, int size, | |
729 | enum bpf_access_type type, | |
5e43f899 | 730 | const struct bpf_prog *prog, |
ebc614f6 RG |
731 | struct bpf_insn_access_aux *info) |
732 | { | |
06ef0ccb YS |
733 | const int size_default = sizeof(__u32); |
734 | ||
ebc614f6 RG |
735 | if (type == BPF_WRITE) |
736 | return false; | |
737 | ||
738 | if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) | |
739 | return false; | |
740 | /* The verifier guarantees that size > 0. */ | |
741 | if (off % size != 0) | |
742 | return false; | |
06ef0ccb YS |
743 | |
744 | switch (off) { | |
745 | case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): | |
746 | bpf_ctx_record_field_size(info, size_default); | |
747 | if (!bpf_ctx_narrow_access_ok(off, size, size_default)) | |
748 | return false; | |
749 | break; | |
750 | default: | |
751 | if (size != size_default) | |
752 | return false; | |
753 | } | |
ebc614f6 RG |
754 | |
755 | return true; | |
756 | } | |
757 | ||
758 | const struct bpf_prog_ops cg_dev_prog_ops = { | |
759 | }; | |
760 | ||
761 | const struct bpf_verifier_ops cg_dev_verifier_ops = { | |
762 | .get_func_proto = cgroup_dev_func_proto, | |
763 | .is_valid_access = cgroup_dev_is_valid_access, | |
764 | }; |