]>
Commit | Line | Data |
---|---|---|
30070984 DM |
1 | /* |
2 | * Functions to manage eBPF programs attached to cgroups | |
3 | * | |
4 | * Copyright (c) 2016 Daniel Mack | |
5 | * | |
6 | * This file is subject to the terms and conditions of version 2 of the GNU | |
7 | * General Public License. See the file COPYING in the main directory of the | |
8 | * Linux distribution for more details. | |
9 | */ | |
10 | ||
11 | #include <linux/kernel.h> | |
12 | #include <linux/atomic.h> | |
13 | #include <linux/cgroup.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/bpf.h> | |
16 | #include <linux/bpf-cgroup.h> | |
17 | #include <net/sock.h> | |
18 | ||
19 | DEFINE_STATIC_KEY_FALSE(cgroup_bpf_enabled_key); | |
20 | EXPORT_SYMBOL(cgroup_bpf_enabled_key); | |
21 | ||
22 | /** | |
23 | * cgroup_bpf_put() - put references of all bpf programs | |
24 | * @cgrp: the cgroup to modify | |
25 | */ | |
26 | void cgroup_bpf_put(struct cgroup *cgrp) | |
27 | { | |
28 | unsigned int type; | |
29 | ||
30 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.prog); type++) { | |
31 | struct bpf_prog *prog = cgrp->bpf.prog[type]; | |
32 | ||
33 | if (prog) { | |
34 | bpf_prog_put(prog); | |
35 | static_branch_dec(&cgroup_bpf_enabled_key); | |
36 | } | |
37 | } | |
38 | } | |
39 | ||
40 | /** | |
41 | * cgroup_bpf_inherit() - inherit effective programs from parent | |
42 | * @cgrp: the cgroup to modify | |
43 | * @parent: the parent to inherit from | |
44 | */ | |
45 | void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent) | |
46 | { | |
47 | unsigned int type; | |
48 | ||
49 | for (type = 0; type < ARRAY_SIZE(cgrp->bpf.effective); type++) { | |
50 | struct bpf_prog *e; | |
51 | ||
52 | e = rcu_dereference_protected(parent->bpf.effective[type], | |
53 | lockdep_is_held(&cgroup_mutex)); | |
54 | rcu_assign_pointer(cgrp->bpf.effective[type], e); | |
7f677633 | 55 | cgrp->bpf.disallow_override[type] = parent->bpf.disallow_override[type]; |
30070984 DM |
56 | } |
57 | } | |
58 | ||
59 | /** | |
60 | * __cgroup_bpf_update() - Update the pinned program of a cgroup, and | |
61 | * propagate the change to descendants | |
62 | * @cgrp: The cgroup which descendants to traverse | |
63 | * @parent: The parent of @cgrp, or %NULL if @cgrp is the root | |
64 | * @prog: A new program to pin | |
65 | * @type: Type of pinning operation (ingress/egress) | |
66 | * | |
67 | * Each cgroup has a set of two pointers for bpf programs; one for eBPF | |
68 | * programs it owns, and which is effective for execution. | |
69 | * | |
01ae87ea DM |
70 | * If @prog is not %NULL, this function attaches a new program to the cgroup |
71 | * and releases the one that is currently attached, if any. @prog is then made | |
30070984 DM |
72 | * the effective program of type @type in that cgroup. |
73 | * | |
74 | * If @prog is %NULL, the currently attached program of type @type is released, | |
75 | * and the effective program of the parent cgroup (if any) is inherited to | |
76 | * @cgrp. | |
77 | * | |
78 | * Then, the descendants of @cgrp are walked and the effective program for | |
79 | * each of them is set to the effective program of @cgrp unless the | |
80 | * descendant has its own program attached, in which case the subbranch is | |
81 | * skipped. This ensures that delegated subcgroups with own programs are left | |
82 | * untouched. | |
83 | * | |
84 | * Must be called with cgroup_mutex held. | |
85 | */ | |
7f677633 AS |
86 | int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, |
87 | struct bpf_prog *prog, enum bpf_attach_type type, | |
88 | bool new_overridable) | |
30070984 | 89 | { |
7f677633 | 90 | struct bpf_prog *old_prog, *effective = NULL; |
30070984 | 91 | struct cgroup_subsys_state *pos; |
7f677633 | 92 | bool overridable = true; |
30070984 | 93 | |
7f677633 AS |
94 | if (parent) { |
95 | overridable = !parent->bpf.disallow_override[type]; | |
96 | effective = rcu_dereference_protected(parent->bpf.effective[type], | |
97 | lockdep_is_held(&cgroup_mutex)); | |
98 | } | |
99 | ||
100 | if (prog && effective && !overridable) | |
101 | /* if parent has non-overridable prog attached, disallow | |
102 | * attaching new programs to descendent cgroup | |
103 | */ | |
104 | return -EPERM; | |
105 | ||
106 | if (prog && effective && overridable != new_overridable) | |
107 | /* if parent has overridable prog attached, only | |
108 | * allow overridable programs in descendent cgroup | |
109 | */ | |
110 | return -EPERM; | |
111 | ||
112 | old_prog = cgrp->bpf.prog[type]; | |
113 | ||
114 | if (prog) { | |
115 | overridable = new_overridable; | |
116 | effective = prog; | |
117 | if (old_prog && | |
118 | cgrp->bpf.disallow_override[type] == new_overridable) | |
119 | /* disallow attaching non-overridable on top | |
120 | * of existing overridable in this cgroup | |
121 | * and vice versa | |
122 | */ | |
123 | return -EPERM; | |
124 | } | |
30070984 | 125 | |
7f677633 AS |
126 | if (!prog && !old_prog) |
127 | /* report error when trying to detach and nothing is attached */ | |
128 | return -ENOENT; | |
129 | ||
130 | cgrp->bpf.prog[type] = prog; | |
30070984 DM |
131 | |
132 | css_for_each_descendant_pre(pos, &cgrp->self) { | |
133 | struct cgroup *desc = container_of(pos, struct cgroup, self); | |
134 | ||
135 | /* skip the subtree if the descendant has its own program */ | |
7f677633 | 136 | if (desc->bpf.prog[type] && desc != cgrp) { |
30070984 | 137 | pos = css_rightmost_descendant(pos); |
7f677633 | 138 | } else { |
30070984 DM |
139 | rcu_assign_pointer(desc->bpf.effective[type], |
140 | effective); | |
7f677633 AS |
141 | desc->bpf.disallow_override[type] = !overridable; |
142 | } | |
30070984 DM |
143 | } |
144 | ||
145 | if (prog) | |
146 | static_branch_inc(&cgroup_bpf_enabled_key); | |
147 | ||
148 | if (old_prog) { | |
149 | bpf_prog_put(old_prog); | |
150 | static_branch_dec(&cgroup_bpf_enabled_key); | |
151 | } | |
7f677633 | 152 | return 0; |
30070984 DM |
153 | } |
154 | ||
155 | /** | |
b2cd1257 | 156 | * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering |
8f917bba | 157 | * @sk: The socket sending or receiving traffic |
30070984 DM |
158 | * @skb: The skb that is being sent or received |
159 | * @type: The type of program to be exectuted | |
160 | * | |
161 | * If no socket is passed, or the socket is not of type INET or INET6, | |
162 | * this function does nothing and returns 0. | |
163 | * | |
164 | * The program type passed in via @type must be suitable for network | |
165 | * filtering. No further check is performed to assert that. | |
166 | * | |
167 | * This function will return %-EPERM if any if an attached program was found | |
168 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
169 | */ | |
b2cd1257 DA |
170 | int __cgroup_bpf_run_filter_skb(struct sock *sk, |
171 | struct sk_buff *skb, | |
172 | enum bpf_attach_type type) | |
30070984 DM |
173 | { |
174 | struct bpf_prog *prog; | |
175 | struct cgroup *cgrp; | |
176 | int ret = 0; | |
177 | ||
178 | if (!sk || !sk_fullsock(sk)) | |
179 | return 0; | |
180 | ||
181 | if (sk->sk_family != AF_INET && | |
182 | sk->sk_family != AF_INET6) | |
183 | return 0; | |
184 | ||
185 | cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
186 | ||
187 | rcu_read_lock(); | |
188 | ||
189 | prog = rcu_dereference(cgrp->bpf.effective[type]); | |
190 | if (prog) { | |
191 | unsigned int offset = skb->data - skb_network_header(skb); | |
8f917bba | 192 | struct sock *save_sk = skb->sk; |
30070984 | 193 | |
8f917bba | 194 | skb->sk = sk; |
30070984 DM |
195 | __skb_push(skb, offset); |
196 | ret = bpf_prog_run_save_cb(prog, skb) == 1 ? 0 : -EPERM; | |
197 | __skb_pull(skb, offset); | |
8f917bba | 198 | skb->sk = save_sk; |
30070984 DM |
199 | } |
200 | ||
201 | rcu_read_unlock(); | |
202 | ||
203 | return ret; | |
204 | } | |
b2cd1257 | 205 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); |
61023658 DA |
206 | |
207 | /** | |
208 | * __cgroup_bpf_run_filter_sk() - Run a program on a sock | |
209 | * @sk: sock structure to manipulate | |
210 | * @type: The type of program to be exectuted | |
211 | * | |
212 | * socket is passed is expected to be of type INET or INET6. | |
213 | * | |
214 | * The program type passed in via @type must be suitable for sock | |
215 | * filtering. No further check is performed to assert that. | |
216 | * | |
217 | * This function will return %-EPERM if any if an attached program was found | |
218 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
219 | */ | |
220 | int __cgroup_bpf_run_filter_sk(struct sock *sk, | |
221 | enum bpf_attach_type type) | |
222 | { | |
223 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
224 | struct bpf_prog *prog; | |
225 | int ret = 0; | |
226 | ||
227 | ||
228 | rcu_read_lock(); | |
229 | ||
230 | prog = rcu_dereference(cgrp->bpf.effective[type]); | |
231 | if (prog) | |
232 | ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM; | |
233 | ||
234 | rcu_read_unlock(); | |
235 | ||
236 | return ret; | |
237 | } | |
238 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); | |
40304b2a LB |
239 | |
240 | /** | |
241 | * __cgroup_bpf_run_filter_sock_ops() - Run a program on a sock | |
242 | * @sk: socket to get cgroup from | |
243 | * @sock_ops: bpf_sock_ops_kern struct to pass to program. Contains | |
244 | * sk with connection information (IP addresses, etc.) May not contain | |
245 | * cgroup info if it is a req sock. | |
246 | * @type: The type of program to be exectuted | |
247 | * | |
248 | * socket passed is expected to be of type INET or INET6. | |
249 | * | |
250 | * The program type passed in via @type must be suitable for sock_ops | |
251 | * filtering. No further check is performed to assert that. | |
252 | * | |
253 | * This function will return %-EPERM if any if an attached program was found | |
254 | * and if it returned != 1 during execution. In all other cases, 0 is returned. | |
255 | */ | |
256 | int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, | |
257 | struct bpf_sock_ops_kern *sock_ops, | |
258 | enum bpf_attach_type type) | |
259 | { | |
260 | struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); | |
261 | struct bpf_prog *prog; | |
262 | int ret = 0; | |
263 | ||
264 | ||
265 | rcu_read_lock(); | |
266 | ||
267 | prog = rcu_dereference(cgrp->bpf.effective[type]); | |
268 | if (prog) | |
269 | ret = BPF_PROG_RUN(prog, sock_ops) == 1 ? 0 : -EPERM; | |
270 | ||
271 | rcu_read_unlock(); | |
272 | ||
273 | return ret; | |
274 | } | |
275 | EXPORT_SYMBOL(__cgroup_bpf_run_filter_sock_ops); |