]>
Commit | Line | Data |
---|---|---|
99c55f7d AS |
1 | /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com |
2 | * | |
3 | * This program is free software; you can redistribute it and/or | |
4 | * modify it under the terms of version 2 of the GNU General Public | |
5 | * License as published by the Free Software Foundation. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, but | |
8 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * General Public License for more details. | |
11 | */ | |
12 | #include <linux/bpf.h> | |
13 | #include <linux/syscalls.h> | |
14 | #include <linux/slab.h> | |
15 | #include <linux/anon_inodes.h> | |
db20fd2b | 16 | #include <linux/file.h> |
99c55f7d AS |
17 | |
18 | static LIST_HEAD(bpf_map_types); | |
19 | ||
20 | static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) | |
21 | { | |
22 | struct bpf_map_type_list *tl; | |
23 | struct bpf_map *map; | |
24 | ||
25 | list_for_each_entry(tl, &bpf_map_types, list_node) { | |
26 | if (tl->type == attr->map_type) { | |
27 | map = tl->ops->map_alloc(attr); | |
28 | if (IS_ERR(map)) | |
29 | return map; | |
30 | map->ops = tl->ops; | |
31 | map->map_type = attr->map_type; | |
32 | return map; | |
33 | } | |
34 | } | |
35 | return ERR_PTR(-EINVAL); | |
36 | } | |
37 | ||
38 | /* boot time registration of different map implementations */ | |
39 | void bpf_register_map_type(struct bpf_map_type_list *tl) | |
40 | { | |
41 | list_add(&tl->list_node, &bpf_map_types); | |
42 | } | |
43 | ||
44 | /* called from workqueue */ | |
45 | static void bpf_map_free_deferred(struct work_struct *work) | |
46 | { | |
47 | struct bpf_map *map = container_of(work, struct bpf_map, work); | |
48 | ||
49 | /* implementation dependent freeing */ | |
50 | map->ops->map_free(map); | |
51 | } | |
52 | ||
53 | /* decrement map refcnt and schedule it for freeing via workqueue | |
54 | * (unrelying map implementation ops->map_free() might sleep) | |
55 | */ | |
56 | void bpf_map_put(struct bpf_map *map) | |
57 | { | |
58 | if (atomic_dec_and_test(&map->refcnt)) { | |
59 | INIT_WORK(&map->work, bpf_map_free_deferred); | |
60 | schedule_work(&map->work); | |
61 | } | |
62 | } | |
63 | ||
64 | static int bpf_map_release(struct inode *inode, struct file *filp) | |
65 | { | |
66 | struct bpf_map *map = filp->private_data; | |
67 | ||
68 | bpf_map_put(map); | |
69 | return 0; | |
70 | } | |
71 | ||
72 | static const struct file_operations bpf_map_fops = { | |
73 | .release = bpf_map_release, | |
74 | }; | |
75 | ||
76 | /* helper macro to check that unused fields 'union bpf_attr' are zero */ | |
77 | #define CHECK_ATTR(CMD) \ | |
78 | memchr_inv((void *) &attr->CMD##_LAST_FIELD + \ | |
79 | sizeof(attr->CMD##_LAST_FIELD), 0, \ | |
80 | sizeof(*attr) - \ | |
81 | offsetof(union bpf_attr, CMD##_LAST_FIELD) - \ | |
82 | sizeof(attr->CMD##_LAST_FIELD)) != NULL | |
83 | ||
84 | #define BPF_MAP_CREATE_LAST_FIELD max_entries | |
85 | /* called via syscall */ | |
86 | static int map_create(union bpf_attr *attr) | |
87 | { | |
88 | struct bpf_map *map; | |
89 | int err; | |
90 | ||
91 | err = CHECK_ATTR(BPF_MAP_CREATE); | |
92 | if (err) | |
93 | return -EINVAL; | |
94 | ||
95 | /* find map type and init map: hashtable vs rbtree vs bloom vs ... */ | |
96 | map = find_and_alloc_map(attr); | |
97 | if (IS_ERR(map)) | |
98 | return PTR_ERR(map); | |
99 | ||
100 | atomic_set(&map->refcnt, 1); | |
101 | ||
102 | err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC); | |
103 | ||
104 | if (err < 0) | |
105 | /* failed to allocate fd */ | |
106 | goto free_map; | |
107 | ||
108 | return err; | |
109 | ||
110 | free_map: | |
111 | map->ops->map_free(map); | |
112 | return err; | |
113 | } | |
114 | ||
db20fd2b AS |
115 | /* if error is returned, fd is released. |
116 | * On success caller should complete fd access with matching fdput() | |
117 | */ | |
118 | struct bpf_map *bpf_map_get(struct fd f) | |
119 | { | |
120 | struct bpf_map *map; | |
121 | ||
122 | if (!f.file) | |
123 | return ERR_PTR(-EBADF); | |
124 | ||
125 | if (f.file->f_op != &bpf_map_fops) { | |
126 | fdput(f); | |
127 | return ERR_PTR(-EINVAL); | |
128 | } | |
129 | ||
130 | map = f.file->private_data; | |
131 | ||
132 | return map; | |
133 | } | |
134 | ||
135 | /* helper to convert user pointers passed inside __aligned_u64 fields */ | |
136 | static void __user *u64_to_ptr(__u64 val) | |
137 | { | |
138 | return (void __user *) (unsigned long) val; | |
139 | } | |
140 | ||
141 | /* last field in 'union bpf_attr' used by this command */ | |
142 | #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value | |
143 | ||
144 | static int map_lookup_elem(union bpf_attr *attr) | |
145 | { | |
146 | void __user *ukey = u64_to_ptr(attr->key); | |
147 | void __user *uvalue = u64_to_ptr(attr->value); | |
148 | int ufd = attr->map_fd; | |
149 | struct fd f = fdget(ufd); | |
150 | struct bpf_map *map; | |
151 | void *key, *value; | |
152 | int err; | |
153 | ||
154 | if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM)) | |
155 | return -EINVAL; | |
156 | ||
157 | map = bpf_map_get(f); | |
158 | if (IS_ERR(map)) | |
159 | return PTR_ERR(map); | |
160 | ||
161 | err = -ENOMEM; | |
162 | key = kmalloc(map->key_size, GFP_USER); | |
163 | if (!key) | |
164 | goto err_put; | |
165 | ||
166 | err = -EFAULT; | |
167 | if (copy_from_user(key, ukey, map->key_size) != 0) | |
168 | goto free_key; | |
169 | ||
170 | err = -ESRCH; | |
171 | rcu_read_lock(); | |
172 | value = map->ops->map_lookup_elem(map, key); | |
173 | if (!value) | |
174 | goto err_unlock; | |
175 | ||
176 | err = -EFAULT; | |
177 | if (copy_to_user(uvalue, value, map->value_size) != 0) | |
178 | goto err_unlock; | |
179 | ||
180 | err = 0; | |
181 | ||
182 | err_unlock: | |
183 | rcu_read_unlock(); | |
184 | free_key: | |
185 | kfree(key); | |
186 | err_put: | |
187 | fdput(f); | |
188 | return err; | |
189 | } | |
190 | ||
191 | #define BPF_MAP_UPDATE_ELEM_LAST_FIELD value | |
192 | ||
193 | static int map_update_elem(union bpf_attr *attr) | |
194 | { | |
195 | void __user *ukey = u64_to_ptr(attr->key); | |
196 | void __user *uvalue = u64_to_ptr(attr->value); | |
197 | int ufd = attr->map_fd; | |
198 | struct fd f = fdget(ufd); | |
199 | struct bpf_map *map; | |
200 | void *key, *value; | |
201 | int err; | |
202 | ||
203 | if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM)) | |
204 | return -EINVAL; | |
205 | ||
206 | map = bpf_map_get(f); | |
207 | if (IS_ERR(map)) | |
208 | return PTR_ERR(map); | |
209 | ||
210 | err = -ENOMEM; | |
211 | key = kmalloc(map->key_size, GFP_USER); | |
212 | if (!key) | |
213 | goto err_put; | |
214 | ||
215 | err = -EFAULT; | |
216 | if (copy_from_user(key, ukey, map->key_size) != 0) | |
217 | goto free_key; | |
218 | ||
219 | err = -ENOMEM; | |
220 | value = kmalloc(map->value_size, GFP_USER); | |
221 | if (!value) | |
222 | goto free_key; | |
223 | ||
224 | err = -EFAULT; | |
225 | if (copy_from_user(value, uvalue, map->value_size) != 0) | |
226 | goto free_value; | |
227 | ||
228 | /* eBPF program that use maps are running under rcu_read_lock(), | |
229 | * therefore all map accessors rely on this fact, so do the same here | |
230 | */ | |
231 | rcu_read_lock(); | |
232 | err = map->ops->map_update_elem(map, key, value); | |
233 | rcu_read_unlock(); | |
234 | ||
235 | free_value: | |
236 | kfree(value); | |
237 | free_key: | |
238 | kfree(key); | |
239 | err_put: | |
240 | fdput(f); | |
241 | return err; | |
242 | } | |
243 | ||
244 | #define BPF_MAP_DELETE_ELEM_LAST_FIELD key | |
245 | ||
246 | static int map_delete_elem(union bpf_attr *attr) | |
247 | { | |
248 | void __user *ukey = u64_to_ptr(attr->key); | |
249 | int ufd = attr->map_fd; | |
250 | struct fd f = fdget(ufd); | |
251 | struct bpf_map *map; | |
252 | void *key; | |
253 | int err; | |
254 | ||
255 | if (CHECK_ATTR(BPF_MAP_DELETE_ELEM)) | |
256 | return -EINVAL; | |
257 | ||
258 | map = bpf_map_get(f); | |
259 | if (IS_ERR(map)) | |
260 | return PTR_ERR(map); | |
261 | ||
262 | err = -ENOMEM; | |
263 | key = kmalloc(map->key_size, GFP_USER); | |
264 | if (!key) | |
265 | goto err_put; | |
266 | ||
267 | err = -EFAULT; | |
268 | if (copy_from_user(key, ukey, map->key_size) != 0) | |
269 | goto free_key; | |
270 | ||
271 | rcu_read_lock(); | |
272 | err = map->ops->map_delete_elem(map, key); | |
273 | rcu_read_unlock(); | |
274 | ||
275 | free_key: | |
276 | kfree(key); | |
277 | err_put: | |
278 | fdput(f); | |
279 | return err; | |
280 | } | |
281 | ||
282 | /* last field in 'union bpf_attr' used by this command */ | |
283 | #define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key | |
284 | ||
285 | static int map_get_next_key(union bpf_attr *attr) | |
286 | { | |
287 | void __user *ukey = u64_to_ptr(attr->key); | |
288 | void __user *unext_key = u64_to_ptr(attr->next_key); | |
289 | int ufd = attr->map_fd; | |
290 | struct fd f = fdget(ufd); | |
291 | struct bpf_map *map; | |
292 | void *key, *next_key; | |
293 | int err; | |
294 | ||
295 | if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY)) | |
296 | return -EINVAL; | |
297 | ||
298 | map = bpf_map_get(f); | |
299 | if (IS_ERR(map)) | |
300 | return PTR_ERR(map); | |
301 | ||
302 | err = -ENOMEM; | |
303 | key = kmalloc(map->key_size, GFP_USER); | |
304 | if (!key) | |
305 | goto err_put; | |
306 | ||
307 | err = -EFAULT; | |
308 | if (copy_from_user(key, ukey, map->key_size) != 0) | |
309 | goto free_key; | |
310 | ||
311 | err = -ENOMEM; | |
312 | next_key = kmalloc(map->key_size, GFP_USER); | |
313 | if (!next_key) | |
314 | goto free_key; | |
315 | ||
316 | rcu_read_lock(); | |
317 | err = map->ops->map_get_next_key(map, key, next_key); | |
318 | rcu_read_unlock(); | |
319 | if (err) | |
320 | goto free_next_key; | |
321 | ||
322 | err = -EFAULT; | |
323 | if (copy_to_user(unext_key, next_key, map->key_size) != 0) | |
324 | goto free_next_key; | |
325 | ||
326 | err = 0; | |
327 | ||
328 | free_next_key: | |
329 | kfree(next_key); | |
330 | free_key: | |
331 | kfree(key); | |
332 | err_put: | |
333 | fdput(f); | |
334 | return err; | |
335 | } | |
336 | ||
99c55f7d AS |
337 | SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) |
338 | { | |
339 | union bpf_attr attr = {}; | |
340 | int err; | |
341 | ||
342 | /* the syscall is limited to root temporarily. This restriction will be | |
343 | * lifted when security audit is clean. Note that eBPF+tracing must have | |
344 | * this restriction, since it may pass kernel data to user space | |
345 | */ | |
346 | if (!capable(CAP_SYS_ADMIN)) | |
347 | return -EPERM; | |
348 | ||
349 | if (!access_ok(VERIFY_READ, uattr, 1)) | |
350 | return -EFAULT; | |
351 | ||
352 | if (size > PAGE_SIZE) /* silly large */ | |
353 | return -E2BIG; | |
354 | ||
355 | /* If we're handed a bigger struct than we know of, | |
356 | * ensure all the unknown bits are 0 - i.e. new | |
357 | * user-space does not rely on any kernel feature | |
358 | * extensions we dont know about yet. | |
359 | */ | |
360 | if (size > sizeof(attr)) { | |
361 | unsigned char __user *addr; | |
362 | unsigned char __user *end; | |
363 | unsigned char val; | |
364 | ||
365 | addr = (void __user *)uattr + sizeof(attr); | |
366 | end = (void __user *)uattr + size; | |
367 | ||
368 | for (; addr < end; addr++) { | |
369 | err = get_user(val, addr); | |
370 | if (err) | |
371 | return err; | |
372 | if (val) | |
373 | return -E2BIG; | |
374 | } | |
375 | size = sizeof(attr); | |
376 | } | |
377 | ||
378 | /* copy attributes from user space, may be less than sizeof(bpf_attr) */ | |
379 | if (copy_from_user(&attr, uattr, size) != 0) | |
380 | return -EFAULT; | |
381 | ||
382 | switch (cmd) { | |
383 | case BPF_MAP_CREATE: | |
384 | err = map_create(&attr); | |
385 | break; | |
db20fd2b AS |
386 | case BPF_MAP_LOOKUP_ELEM: |
387 | err = map_lookup_elem(&attr); | |
388 | break; | |
389 | case BPF_MAP_UPDATE_ELEM: | |
390 | err = map_update_elem(&attr); | |
391 | break; | |
392 | case BPF_MAP_DELETE_ELEM: | |
393 | err = map_delete_elem(&attr); | |
394 | break; | |
395 | case BPF_MAP_GET_NEXT_KEY: | |
396 | err = map_get_next_key(&attr); | |
397 | break; | |
99c55f7d AS |
398 | default: |
399 | err = -EINVAL; | |
400 | break; | |
401 | } | |
402 | ||
403 | return err; | |
404 | } |