]>
Commit | Line | Data |
---|---|---|
30070984 DM |
1 | /* |
2 | * Functions to manage eBPF programs attached to cgroups | |
3 | * | |
4 | * Copyright (c) 2016 Daniel Mack | |
5 | * | |
6 | * This file is subject to the terms and conditions of version 2 of the GNU | |
7 | * General Public License. See the file COPYING in the main directory of the | |
8 | * Linux distribution for more details. | |
9 | */ | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/atomic.h> | |
13 | #include <linux/cgroup.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/bpf.h> | |
16 | #include <linux/bpf-cgroup.h> | |
17 | #include <net/sock.h> | |
18 | ||
19 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); | |
20 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); | |
21 | ||
22 | /** | |
23 | * cgroup_bpf_put() - put references of all bpf programs | |
24 | * @cgrp: the cgroup to modify | |
25 | */ | |
26 | void cgroup_bpf_put(struct cgroup *cgrp) | |
27 | { | |
8bad74f9 | 28 | enum bpf_cgroup_storage_type stype; |
30070984 DM |
29 | unsigned int type; |
30 | ||
324bda9e AS |
31 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.progs); type++) { |
32 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
33 | struct bpf_prog_list *pl, *tmp; | |
34 | ||
35 | list_for_each_entry_safe(pl, tmp, progs, node) { | |
36 | list_del(&pl->node); | |
37 | bpf_prog_put(pl->prog); | |
8bad74f9 RG |
38 | for_each_cgroup_storage_type(stype) { |
39 | bpf_cgroup_storage_unlink(pl->storage[stype]); | |
40 | bpf_cgroup_storage_free(pl->storage[stype]); | |
41 | } | |
324bda9e | 42 | kfree(pl); |
30070984 DM |
43 | static_branch_dec(&cgroup_bpf_enabled_key); |
44 | } | |
324bda9e AS |
45 | bpf_prog_array_free(cgrp->bpf.effective[type]); |
46 | } | |
47 | } | |
48 | ||
49 | /* count number of elements in the list. | |
50 | * it's slow but the list cannot be long | |
51 | */ | |
52 | static u32 prog_list_length(struct list_head *head) | |
53 | { | |
54 | struct bpf_prog_list *pl; | |
55 | u32 cnt = 0; | |
56 | ||
57 | list_for_each_entry(pl, head, node) { | |
58 | if (!pl->prog) | |
59 | continue; | |
60 | cnt++; | |
30070984 | 61 | } |
324bda9e AS |
62 | return cnt; |
63 | } | |
64 | ||
65 | /* if parent has non-overridable prog attached, | |
66 | * disallow attaching new programs to the descendent cgroup. | |
67 | * if parent has overridable or multi-prog, allow attaching | |
68 | */ | |
69 | static bool hierarchy_allows_attach(struct cgroup *cgrp, | |
70 | enum bpf_attach_type type, | |
71 | u32 new_flags) | |
72 | { | |
73 | struct cgroup *p; | |
74 | ||
75 | p = cgroup_parent(cgrp); | |
76 | if (!p) | |
77 | return true; | |
78 | do { | |
79 | u32 flags = p->bpf.flags[type]; | |
80 | u32 cnt; | |
81 | ||
82 | if (flags & BPF_F_ALLOW_MULTI) | |
83 | return true; | |
84 | cnt = prog_list_length(&p->bpf.progs[type]); | |
85 | WARN_ON_ONCE(cnt > 1); | |
86 | if (cnt == 1) | |
87 | return !!(flags & BPF_F_ALLOW_OVERRIDE); | |
88 | p = cgroup_parent(p); | |
89 | } while (p); | |
90 | return true; | |
91 | } | |
92 | ||
93 | /* compute a chain of effective programs for a given cgroup: | |
94 | * start from the list of programs in this cgroup and add | |
95 | * all parent programs. | |
96 | * Note that parent's F_ALLOW_OVERRIDE-type program is yielding | |
97 | * to programs in this cgroup | |
98 | */ | |
99 | static int compute_effective_progs(struct cgroup *cgrp, | |
100 | enum bpf_attach_type type, | |
101 | struct bpf_prog_array __rcu **array) | |
102 | { | |
8bad74f9 | 103 | enum bpf_cgroup_storage_type stype; |
3960f4fd | 104 | struct bpf_prog_array *progs; |
324bda9e AS |
105 | struct bpf_prog_list *pl; |
106 | struct cgroup *p = cgrp; | |
107 | int cnt = 0; | |
108 | ||
109 | /* count number of effective programs by walking parents */ | |
110 | do { | |
111 | if (cnt == 0 || (p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) | |
112 | cnt += prog_list_length(&p->bpf.progs[type]); | |
113 | p = cgroup_parent(p); | |
114 | } while (p); | |
115 | ||
116 | progs = bpf_prog_array_alloc(cnt, GFP_KERNEL); | |
117 | if (!progs) | |
118 | return -ENOMEM; | |
119 | ||
120 | /* populate the array with effective progs */ | |
121 | cnt = 0; | |
122 | p = cgrp; | |
123 | do { | |
394e40a2 RG |
124 | if (cnt > 0 && !(p->bpf.flags[type] & BPF_F_ALLOW_MULTI)) |
125 | continue; | |
126 | ||
127 | list_for_each_entry(pl, &p->bpf.progs[type], node) { | |
128 | if (!pl->prog) | |
129 | continue; | |
130 | ||
131 | progs->items[cnt].prog = pl->prog; | |
8bad74f9 RG |
132 | for_each_cgroup_storage_type(stype) |
133 | progs->items[cnt].cgroup_storage[stype] = | |
134 | pl->storage[stype]; | |
394e40a2 RG |
135 | cnt++; |
136 | } | |
137 | } while ((p = cgroup_parent(p))); | |
324bda9e | 138 | |
3960f4fd | 139 | rcu_assign_pointer(*array, progs); |
324bda9e AS |
140 | return 0; |
141 | } | |
142 | ||
143 | static void activate_effective_progs(struct cgroup *cgrp, | |
144 | enum bpf_attach_type type, | |
145 | struct bpf_prog_array __rcu *array) | |
146 | { | |
147 | struct bpf_prog_array __rcu *old_array; | |
148 | ||
149 | old_array = xchg(&cgrp->bpf.effective[type], array); | |
150 | /* free prog array after grace period, since __cgroup_bpf_run_*() | |
151 | * might be still walking the array | |
152 | */ | |
153 | bpf_prog_array_free(old_array); | |
30070984 DM |
154 | } |
155 | ||
156 | /** | |
157 | * cgroup_bpf_inherit() - inherit effective programs from parent | |
158 | * @cgrp: the cgroup to modify | |
30070984 | 159 | */ |
324bda9e | 160 | int cgroup_bpf_inherit(struct cgroup *cgrp) |
30070984 | 161 | { |
324bda9e AS |
162 | /* has to use marco instead of const int, since compiler thinks |
163 | * that array below is variable length | |
164 | */ | |
165 | #define NR ARRAY_SIZE(cgrp->bpf.effective) | |
166 | struct bpf_prog_array __rcu *arrays[NR] = {}; | |
167 | int i; | |
30070984 | 168 | |
324bda9e AS |
169 | for (i = 0; i < NR; i++) |
170 | INIT_LIST_HEAD(&cgrp->bpf.progs[i]); | |
30070984 | 171 | |
324bda9e AS |
172 | for (i = 0; i < NR; i++) |
173 | if (compute_effective_progs(cgrp, i, &arrays[i])) | |
174 | goto cleanup; | |
175 | ||
176 | for (i = 0; i < NR; i++) | |
177 | activate_effective_progs(cgrp, i, arrays[i]); | |
178 | ||
179 | return 0; | |
180 | cleanup: | |
181 | for (i = 0; i < NR; i++) | |
182 | bpf_prog_array_free(arrays[i]); | |
183 | return -ENOMEM; | |
30070984 DM |
184 | } |
185 | ||
85fc4b16 RG |
186 | static int update_effective_progs(struct cgroup *cgrp, |
187 | enum bpf_attach_type type) | |
188 | { | |
189 | struct cgroup_subsys_state *css; | |
190 | int err; | |
191 | ||
192 | /* allocate and recompute effective prog arrays */ | |
193 | css_for_each_descendant_pre(css, &cgrp->self) { | |
194 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
195 | ||
196 | err = compute_effective_progs(desc, type, &desc->bpf.inactive); | |
197 | if (err) | |
198 | goto cleanup; | |
199 | } | |
200 | ||
201 | /* all allocations were successful. Activate all prog arrays */ | |
202 | css_for_each_descendant_pre(css, &cgrp->self) { | |
203 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
204 | ||
205 | activate_effective_progs(desc, type, desc->bpf.inactive); | |
206 | desc->bpf.inactive = NULL; | |
207 | } | |
208 | ||
209 | return 0; | |
210 | ||
211 | cleanup: | |
212 | /* oom while computing effective. Free all computed effective arrays | |
213 | * since they were not activated | |
214 | */ | |
215 | css_for_each_descendant_pre(css, &cgrp->self) { | |
216 | struct cgroup *desc = container_of(css, struct cgroup, self); | |
217 | ||
218 | bpf_prog_array_free(desc->bpf.inactive); | |
219 | desc->bpf.inactive = NULL; | |
220 | } | |
221 | ||
222 | return err; | |
223 | } | |
224 | ||
324bda9e AS |
225 | #define BPF_CGROUP_MAX_PROGS 64 |
226 | ||
30070984 | 227 | /** |
324bda9e | 228 | * __cgroup_bpf_attach() - Attach the program to a cgroup, and |
30070984 DM |
229 | * propagate the change to descendants |
230 | * @cgrp: The cgroup which descendants to traverse | |
324bda9e AS |
231 | * @prog: A program to attach |
232 | * @type: Type of attach operation | |
30070984 DM |
233 | * |
234 | * Must be called with cgroup_mutex held. | |
235 | */ | |
324bda9e AS |
236 | int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, |
237 | enum bpf_attach_type type, u32 flags) | |
30070984 | 238 | { |
324bda9e AS |
239 | struct list_head *progs = &cgrp->bpf.progs[type]; |
240 | struct bpf_prog *old_prog = NULL; | |
8bad74f9 RG |
241 | struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], |
242 | *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; | |
243 | enum bpf_cgroup_storage_type stype; | |
324bda9e AS |
244 | struct bpf_prog_list *pl; |
245 | bool pl_was_allocated; | |
324bda9e AS |
246 | int err; |
247 | ||
248 | if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) | |
249 | /* invalid combination */ | |
250 | return -EINVAL; | |
251 | ||
252 | if (!hierarchy_allows_attach(cgrp, type, flags)) | |
7f677633 AS |
253 | return -EPERM; |
254 | ||
324bda9e AS |
255 | if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) |
256 | /* Disallow attaching non-overridable on top | |
257 | * of existing overridable in this cgroup. | |
258 | * Disallow attaching multi-prog if overridable or none | |
7f677633 AS |
259 | */ |
260 | return -EPERM; | |
261 | ||
324bda9e AS |
262 | if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) |
263 | return -E2BIG; | |
264 | ||
8bad74f9 RG |
265 | for_each_cgroup_storage_type(stype) { |
266 | storage[stype] = bpf_cgroup_storage_alloc(prog, stype); | |
267 | if (IS_ERR(storage[stype])) { | |
268 | storage[stype] = NULL; | |
269 | for_each_cgroup_storage_type(stype) | |
270 | bpf_cgroup_storage_free(storage[stype]); | |
271 | return -ENOMEM; | |
272 | } | |
273 | } | |
d7bf2c10 | 274 | |
324bda9e | 275 | if (flags & BPF_F_ALLOW_MULTI) { |
d7bf2c10 RG |
276 | list_for_each_entry(pl, progs, node) { |
277 | if (pl->prog == prog) { | |
324bda9e | 278 | /* disallow attaching the same prog twice */ |
8bad74f9 RG |
279 | for_each_cgroup_storage_type(stype) |
280 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 281 | return -EINVAL; |
d7bf2c10 RG |
282 | } |
283 | } | |
324bda9e AS |
284 | |
285 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); | |
d7bf2c10 | 286 | if (!pl) { |
8bad74f9 RG |
287 | for_each_cgroup_storage_type(stype) |
288 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 289 | return -ENOMEM; |
d7bf2c10 RG |
290 | } |
291 | ||
324bda9e AS |
292 | pl_was_allocated = true; |
293 | pl->prog = prog; | |
8bad74f9 RG |
294 | for_each_cgroup_storage_type(stype) |
295 | pl->storage[stype] = storage[stype]; | |
324bda9e AS |
296 | list_add_tail(&pl->node, progs); |
297 | } else { | |
298 | if (list_empty(progs)) { | |
299 | pl = kmalloc(sizeof(*pl), GFP_KERNEL); | |
d7bf2c10 | 300 | if (!pl) { |
8bad74f9 RG |
301 | for_each_cgroup_storage_type(stype) |
302 | bpf_cgroup_storage_free(storage[stype]); | |
324bda9e | 303 | return -ENOMEM; |
d7bf2c10 | 304 | } |
324bda9e AS |
305 | pl_was_allocated = true; |
306 | list_add_tail(&pl->node, progs); | |
307 | } else { | |
308 | pl = list_first_entry(progs, typeof(*pl), node); | |
309 | old_prog = pl->prog; | |
8bad74f9 RG |
310 | for_each_cgroup_storage_type(stype) { |
311 | old_storage[stype] = pl->storage[stype]; | |
312 | bpf_cgroup_storage_unlink(old_storage[stype]); | |
313 | } | |
324bda9e AS |
314 | pl_was_allocated = false; |
315 | } | |
316 | pl->prog = prog; | |
8bad74f9 RG |
317 | for_each_cgroup_storage_type(stype) |
318 | pl->storage[stype] = storage[stype]; | |
7f677633 | 319 | } |
30070984 | 320 | |
324bda9e | 321 | cgrp->bpf.flags[type] = flags; |
7f677633 | 322 | |
85fc4b16 RG |
323 | err = update_effective_progs(cgrp, type); |
324 | if (err) | |
325 | goto cleanup; | |
324bda9e AS |
326 | |
327 | static_branch_inc(&cgroup_bpf_enabled_key); | |
8bad74f9 RG |
328 | for_each_cgroup_storage_type(stype) { |
329 | if (!old_storage[stype]) | |
330 | continue; | |
331 | bpf_cgroup_storage_free(old_storage[stype]); | |
332 | } | |
30070984 DM |
333 | if (old_prog) { |
334 | bpf_prog_put(old_prog); | |
335 | static_branch_dec(&cgroup_bpf_enabled_key); | |
336 | } | |
8bad74f9 RG |
337 | for_each_cgroup_storage_type(stype) |
338 | bpf_cgroup_storage_link(storage[stype], cgrp, type); | |
7f677633 | 339 | return 0; |
324bda9e AS |
340 | |
341 | cleanup: | |
324bda9e AS |
342 | /* and cleanup the prog list */ |
343 | pl->prog = old_prog; | |
8bad74f9 RG |
344 | for_each_cgroup_storage_type(stype) { |
345 | bpf_cgroup_storage_free(pl->storage[stype]); | |
346 | pl->storage[stype] = old_storage[stype]; | |
347 | bpf_cgroup_storage_link(old_storage[stype], cgrp, type); | |
348 | } | |
324bda9e AS |
349 | if (pl_was_allocated) { |
350 | list_del(&pl->node); | |
351 | kfree(pl); | |
352 | } | |
353 | return err; | |
354 | } | |
355 | ||
356 | /** | |
357 | * __cgroup_bpf_detach() - Detach the program from a cgroup, and | |
358 | * propagate the change to descendants | |
359 | * @cgrp: The cgroup which descendants to traverse | |
360 | * @prog: A program to detach or NULL | |
361 | * @type: Type of detach operation | |
362 | * | |
363 | * Must be called with cgroup_mutex held. | |
364 | */ | |
365 | int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, | |
366 | enum bpf_attach_type type, u32 unused_flags) | |
367 | { | |
368 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
8bad74f9 | 369 | enum bpf_cgroup_storage_type stype; |
324bda9e AS |
370 | u32 flags = cgrp->bpf.flags[type]; |
371 | struct bpf_prog *old_prog = NULL; | |
324bda9e AS |
372 | struct bpf_prog_list *pl; |
373 | int err; | |
374 | ||
375 | if (flags & BPF_F_ALLOW_MULTI) { | |
376 | if (!prog) | |
377 | /* to detach MULTI prog the user has to specify valid FD | |
378 | * of the program to be detached | |
379 | */ | |
380 | return -EINVAL; | |
381 | } else { | |
382 | if (list_empty(progs)) | |
383 | /* report error when trying to detach and nothing is attached */ | |
384 | return -ENOENT; | |
385 | } | |
386 | ||
387 | if (flags & BPF_F_ALLOW_MULTI) { | |
388 | /* find the prog and detach it */ | |
389 | list_for_each_entry(pl, progs, node) { | |
390 | if (pl->prog != prog) | |
391 | continue; | |
392 | old_prog = prog; | |
393 | /* mark it deleted, so it's ignored while | |
394 | * recomputing effective | |
395 | */ | |
396 | pl->prog = NULL; | |
397 | break; | |
398 | } | |
399 | if (!old_prog) | |
400 | return -ENOENT; | |
401 | } else { | |
402 | /* to maintain backward compatibility NONE and OVERRIDE cgroups | |
403 | * allow detaching with invalid FD (prog==NULL) | |
404 | */ | |
405 | pl = list_first_entry(progs, typeof(*pl), node); | |
406 | old_prog = pl->prog; | |
407 | pl->prog = NULL; | |
408 | } | |
409 | ||
85fc4b16 RG |
410 | err = update_effective_progs(cgrp, type); |
411 | if (err) | |
412 | goto cleanup; | |
324bda9e AS |
413 | |
414 | /* now can actually delete it from this cgroup list */ | |
415 | list_del(&pl->node); | |
8bad74f9 RG |
416 | for_each_cgroup_storage_type(stype) { |
417 | bpf_cgroup_storage_unlink(pl->storage[stype]); | |
418 | bpf_cgroup_storage_free(pl->storage[stype]); | |
419 | } | |
324bda9e AS |
420 | kfree(pl); |
421 | if (list_empty(progs)) | |
422 | /* last program was detached, reset flags to zero */ | |
423 | cgrp->bpf.flags[type] = 0; | |
424 | ||
425 | bpf_prog_put(old_prog); | |
426 | static_branch_dec(&cgroup_bpf_enabled_key); | |
427 | return 0; | |
428 | ||
429 | cleanup: | |
324bda9e AS |
430 | /* and restore back old_prog */ |
431 | pl->prog = old_prog; | |
432 | return err; | |
30070984 DM |
433 | } |
434 | ||
468e2f64 AS |
435 | /* Must be called with cgroup_mutex held to avoid races. */ |
436 | int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, | |
437 | union bpf_attr __user *uattr) | |
438 | { | |
439 | __u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids); | |
440 | enum bpf_attach_type type = attr->query.attach_type; | |
441 | struct list_head *progs = &cgrp->bpf.progs[type]; | |
442 | u32 flags = cgrp->bpf.flags[type]; | |
443 | int cnt, ret = 0, i; | |
444 | ||
445 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) | |
446 | cnt = bpf_prog_array_length(cgrp->bpf.effective[type]); | |
447 | else | |
448 | cnt = prog_list_length(progs); | |
449 | ||
450 | if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags))) | |
451 | return -EFAULT; | |
452 | if (copy_to_user(&uattr->query.prog_cnt, &cnt, sizeof(cnt))) | |
453 | return -EFAULT; | |
454 | if (attr->query.prog_cnt == 0 || !prog_ids || !cnt) | |
455 | /* return early if user requested only program count + flags */ | |
456 | return 0; | |
457 | if (attr->query.prog_cnt < cnt) { | |
458 | cnt = attr->query.prog_cnt; | |
459 | ret = -ENOSPC; | |
460 | } | |
461 | ||
462 | if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) { | |
463 | return bpf_prog_array_copy_to_user(cgrp->bpf.effective[type], | |
464 | prog_ids, cnt); | |
465 | } else { | |
466 | struct bpf_prog_list *pl; | |
467 | u32 id; | |
468 | ||
469 | i = 0; | |
470 | list_for_each_entry(pl, progs, node) { | |
471 | id = pl->prog->aux->id; | |
472 | if (copy_to_user(prog_ids + i, &id, sizeof(id))) | |
473 | return -EFAULT; | |
474 | if (++i == cnt) | |
475 | break; | |
476 | } | |
477 | } | |
478 | return ret; | |
479 | } | |
480 | ||
fdb5c453 SY |
481 | int cgroup_bpf_prog_attach(const union bpf_attr *attr, |
482 | enum bpf_prog_type ptype, struct bpf_prog *prog) | |
483 | { | |
484 | struct cgroup *cgrp; | |
485 | int ret; | |
486 | ||
487 | cgrp = cgroup_get_from_fd(attr->target_fd); | |
488 | if (IS_ERR(cgrp)) | |
489 | return PTR_ERR(cgrp); | |
490 | ||
491 | ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, | |
492 | attr->attach_flags); | |
493 | cgroup_put(cgrp); | |
494 | return ret; | |
495 | } | |
496 | ||
497 | int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) | |
498 | { | |
499 | struct bpf_prog *prog; | |
500 | struct cgroup *cgrp; | |
501 | int ret; | |
502 | ||
503 | cgrp = cgroup_get_from_fd(attr->target_fd); | |
504 | if (IS_ERR(cgrp)) | |
505 | return PTR_ERR(cgrp); | |
506 | ||
507 | prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); | |
508 | if (IS_ERR(prog)) | |
509 | prog = NULL; | |
510 | ||
511 | ret = cgroup_bpf_detach(cgrp, prog, attr->attach_type, 0); | |
512 | if (prog) | |
513 | bpf_prog_put(prog); | |
514 | ||
515 | cgroup_put(cgrp); | |
516 | return ret; | |
517 | } | |
518 | ||
519 | int cgroup_bpf_prog_query(const union bpf_attr *attr, | |
520 | union bpf_attr __user *uattr) | |
521 | { | |
522 | struct cgroup *cgrp; | |
523 | int ret; | |
524 | ||
525 | cgrp = cgroup_get_from_fd(attr->query.target_fd); | |
526 | if (IS_ERR(cgrp)) | |
527 | return PTR_ERR(cgrp); | |
528 | ||
529 | ret = cgroup_bpf_query(cgrp, attr, uattr); | |
530 | ||
531 | cgroup_put(cgrp); | |
532 | return ret; | |
533 | } | |
534 | ||
30070984 | 535 | /** |
b2cd1257 | 536 | * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering |
8f917bba | 537 | * @sk: The socket sending or receiving traffic |
30070984 DM |
538 | * @skb: The skb that is being sent or received |
539 | * @type: The type of program to be exectuted | |
540 | * | |
541 | * If no socket is passed, or the socket is not of type INET or INET6, | |
542 | * this function does nothing and returns 0. | |
543 | * | |
544 | * The program type passed in via @type must be suitable for network | |
545 | * filtering. No further check is performed to assert that. | |
546 | * | |
547 | * This function will return %-EPERM if any if an attached program was found | |
548 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
549 | */ | |
b2cd1257 DA |
550 | int __cgroup_bpf_run_filter_skb(struct sock *sk, |
551 | struct sk_buff *skb, | |
552 | enum bpf_attach_type type) | |
30070984 | 553 | { |
324bda9e AS |
554 | unsigned int offset = skb->data - skb_network_header(skb); |
555 | struct sock *save_sk; | |
b39b5f41 | 556 | void *saved_data_end; |
30070984 | 557 | struct cgroup *cgrp; |
324bda9e | 558 | int ret; |
30070984 DM |
559 | |
560 | if (!sk || !sk_fullsock(sk)) | |
561 | return 0; | |
562 | ||
324bda9e | 563 | if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) |
30070984 DM |
564 | return 0; |
565 | ||
566 | cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e AS |
567 | save_sk = skb->sk; |
568 | skb->sk = sk; | |
569 | __skb_push(skb, offset); | |
b39b5f41 SL |
570 | |
571 | /* compute pointers for the bpf prog */ | |
572 | bpf_compute_and_save_data_end(skb, &saved_data_end); | |
573 | ||
324bda9e | 574 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], skb, |
6cab5e90 | 575 | __bpf_prog_run_save_cb); |
b39b5f41 | 576 | bpf_restore_data_end(skb, saved_data_end); |
324bda9e AS |
577 | __skb_pull(skb, offset); |
578 | skb->sk = save_sk; | |
579 | return ret == 1 ? 0 : -EPERM; | |
30070984 | 580 | } |
b2cd1257 | 581 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); |
61023658 DA |
582 | |
583 | /** | |
584 | * __cgroup_bpf_run_filter_sk() - Run a program on a sock | |
585 | * @sk: sock structure to manipulate | |
586 | * @type: The type of program to be exectuted | |
587 | * | |
588 | * socket is passed is expected to be of type INET or INET6. | |
589 | * | |
590 | * The program type passed in via @type must be suitable for sock | |
591 | * filtering. No further check is performed to assert that. | |
592 | * | |
593 | * This function will return %-EPERM if any if an attached program was found | |
594 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
595 | */ | |
596 | int __cgroup_bpf_run_filter_sk(struct sock *sk, | |
597 | enum bpf_attach_type type) | |
598 | { | |
599 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e | 600 | int ret; |
61023658 | 601 | |
324bda9e AS |
602 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sk, BPF_PROG_RUN); |
603 | return ret == 1 ? 0 : -EPERM; | |
61023658 DA |
604 | } |
605 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); | |
40304b2a | 606 | |
4fbac77d AI |
607 | /** |
608 | * __cgroup_bpf_run_filter_sock_addr() - Run a program on a sock and | |
609 | * provided by user sockaddr | |
610 | * @sk: sock struct that will use sockaddr | |
611 | * @uaddr: sockaddr struct provided by user | |
612 | * @type: The type of program to be exectuted | |
1cedee13 | 613 | * @t_ctx: Pointer to attach type specific context |
4fbac77d AI |
614 | * |
615 | * socket is expected to be of type INET or INET6. | |
616 | * | |
617 | * This function will return %-EPERM if an attached program is found and | |
618 | * returned value != 1 during execution. In all other cases, 0 is returned. | |
619 | */ | |
620 | int __cgroup_bpf_run_filter_sock_addr(struct sock *sk, | |
621 | struct sockaddr *uaddr, | |
1cedee13 AI |
622 | enum bpf_attach_type type, |
623 | void *t_ctx) | |
4fbac77d AI |
624 | { |
625 | struct bpf_sock_addr_kern ctx = { | |
626 | .sk = sk, | |
627 | .uaddr = uaddr, | |
1cedee13 | 628 | .t_ctx = t_ctx, |
4fbac77d | 629 | }; |
1cedee13 | 630 | struct sockaddr_storage unspec; |
4fbac77d AI |
631 | struct cgroup *cgrp; |
632 | int ret; | |
633 | ||
634 | /* Check socket family since not all sockets represent network | |
635 | * endpoint (e.g. AF_UNIX). | |
636 | */ | |
637 | if (sk->sk_family != AF_INET && sk->sk_family != AF_INET6) | |
638 | return 0; | |
639 | ||
1cedee13 AI |
640 | if (!ctx.uaddr) { |
641 | memset(&unspec, 0, sizeof(unspec)); | |
642 | ctx.uaddr = (struct sockaddr *)&unspec; | |
643 | } | |
644 | ||
4fbac77d AI |
645 | cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); |
646 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, BPF_PROG_RUN); | |
647 | ||
648 | return ret == 1 ? 0 : -EPERM; | |
649 | } | |
650 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_addr); | |
651 | ||
40304b2a LB |
652 | /** |
653 | * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock | |
654 | * @sk: socket to get cgroup from | |
655 | * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains | |
656 | * sk with connection information (IP addresses, etc.) May not contain | |
657 | * cgroup info if it is a req sock. | |
658 | * @type: The type of program to be exectuted | |
659 | * | |
660 | * socket passed is expected to be of type INET or INET6. | |
661 | * | |
662 | * The program type passed in via @type must be suitable for sock_ops | |
663 | * filtering. No further check is performed to assert that. | |
664 | * | |
665 | * This function will return %-EPERM if any if an attached program was found | |
666 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
667 | */ | |
668 | int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, | |
669 | struct bpf_sock_ops_kern *sock_ops, | |
670 | enum bpf_attach_type type) | |
671 | { | |
672 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
324bda9e | 673 | int ret; |
40304b2a | 674 | |
324bda9e AS |
675 | ret = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], sock_ops, |
676 | BPF_PROG_RUN); | |
677 | return ret == 1 ? 0 : -EPERM; | |
40304b2a LB |
678 | } |
679 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); | |
ebc614f6 RG |
680 | |
681 | int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, | |
682 | short access, enum bpf_attach_type type) | |
683 | { | |
684 | struct cgroup *cgrp; | |
685 | struct bpf_cgroup_dev_ctx ctx = { | |
686 | .access_type = (access << 16) | dev_type, | |
687 | .major = major, | |
688 | .minor = minor, | |
689 | }; | |
690 | int allow = 1; | |
691 | ||
692 | rcu_read_lock(); | |
693 | cgrp = task_dfl_cgroup(current); | |
694 | allow = BPF_PROG_RUN_ARRAY(cgrp->bpf.effective[type], &ctx, | |
695 | BPF_PROG_RUN); | |
696 | rcu_read_unlock(); | |
697 | ||
698 | return !allow; | |
699 | } | |
700 | EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission); | |
701 | ||
702 | static const struct bpf_func_proto * | |
5e43f899 | 703 | cgroup_dev_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) |
ebc614f6 RG |
704 | { |
705 | switch (func_id) { | |
706 | case BPF_FUNC_map_lookup_elem: | |
707 | return &bpf_map_lookup_elem_proto; | |
708 | case BPF_FUNC_map_update_elem: | |
709 | return &bpf_map_update_elem_proto; | |
710 | case BPF_FUNC_map_delete_elem: | |
711 | return &bpf_map_delete_elem_proto; | |
712 | case BPF_FUNC_get_current_uid_gid: | |
713 | return &bpf_get_current_uid_gid_proto; | |
cd339431 RG |
714 | case BPF_FUNC_get_local_storage: |
715 | return &bpf_get_local_storage_proto; | |
5bf7a60b YS |
716 | case BPF_FUNC_get_current_cgroup_id: |
717 | return &bpf_get_current_cgroup_id_proto; | |
ebc614f6 RG |
718 | case BPF_FUNC_trace_printk: |
719 | if (capable(CAP_SYS_ADMIN)) | |
720 | return bpf_get_trace_printk_proto(); | |
c8dc7980 | 721 | /* fall through */ |
ebc614f6 RG |
722 | default: |
723 | return NULL; | |
724 | } | |
725 | } | |
726 | ||
727 | static bool cgroup_dev_is_valid_access(int off, int size, | |
728 | enum bpf_access_type type, | |
5e43f899 | 729 | const struct bpf_prog *prog, |
ebc614f6 RG |
730 | struct bpf_insn_access_aux *info) |
731 | { | |
06ef0ccb YS |
732 | const int size_default = sizeof(__u32); |
733 | ||
ebc614f6 RG |
734 | if (type == BPF_WRITE) |
735 | return false; | |
736 | ||
737 | if (off < 0 || off + size > sizeof(struct bpf_cgroup_dev_ctx)) | |
738 | return false; | |
739 | /* The verifier guarantees that size > 0. */ | |
740 | if (off % size != 0) | |
741 | return false; | |
06ef0ccb YS |
742 | |
743 | switch (off) { | |
744 | case bpf_ctx_range(struct bpf_cgroup_dev_ctx, access_type): | |
745 | bpf_ctx_record_field_size(info, size_default); | |
746 | if (!bpf_ctx_narrow_access_ok(off, size, size_default)) | |
747 | return false; | |
748 | break; | |
749 | default: | |
750 | if (size != size_default) | |
751 | return false; | |
752 | } | |
ebc614f6 RG |
753 | |
754 | return true; | |
755 | } | |
756 | ||
757 | const struct bpf_prog_ops cg_dev_prog_ops = { | |
758 | }; | |
759 | ||
760 | const struct bpf_verifier_ops cg_dev_verifier_ops = { | |
761 | .get_func_proto = cgroup_dev_func_proto, | |
762 | .is_valid_access = cgroup_dev_is_valid_access, | |
763 | }; |