]>
Commit | Line | Data |
---|---|---|
b2197755 DB |
1 | /* |
2 | * Minimal file system backend for holding eBPF maps and programs, | |
3 | * used by bpf(2) object pinning. | |
4 | * | |
5 | * Authors: | |
6 | * | |
7 | * Daniel Borkmann <daniel@iogearbox.net> | |
8 | * | |
9 | * This program is free software; you can redistribute it and/or | |
10 | * modify it under the terms of the GNU General Public License | |
11 | * version 2 as published by the Free Software Foundation. | |
12 | */ | |
13 | ||
a536a6e1 | 14 | #include <linux/init.h> |
b2197755 DB |
15 | #include <linux/magic.h> |
16 | #include <linux/major.h> | |
17 | #include <linux/mount.h> | |
18 | #include <linux/namei.h> | |
19 | #include <linux/fs.h> | |
20 | #include <linux/kdev_t.h> | |
a3af5f80 | 21 | #include <linux/parser.h> |
b2197755 DB |
22 | #include <linux/filter.h> |
23 | #include <linux/bpf.h> | |
24 | ||
25 | enum bpf_type { | |
26 | BPF_TYPE_UNSPEC = 0, | |
27 | BPF_TYPE_PROG, | |
28 | BPF_TYPE_MAP, | |
29 | }; | |
30 | ||
31 | static void *bpf_any_get(void *raw, enum bpf_type type) | |
32 | { | |
33 | switch (type) { | |
34 | case BPF_TYPE_PROG: | |
92117d84 | 35 | raw = bpf_prog_inc(raw); |
b2197755 DB |
36 | break; |
37 | case BPF_TYPE_MAP: | |
92117d84 | 38 | raw = bpf_map_inc(raw, true); |
b2197755 DB |
39 | break; |
40 | default: | |
41 | WARN_ON_ONCE(1); | |
42 | break; | |
43 | } | |
44 | ||
45 | return raw; | |
46 | } | |
47 | ||
48 | static void bpf_any_put(void *raw, enum bpf_type type) | |
49 | { | |
50 | switch (type) { | |
51 | case BPF_TYPE_PROG: | |
52 | bpf_prog_put(raw); | |
53 | break; | |
54 | case BPF_TYPE_MAP: | |
c9da161c | 55 | bpf_map_put_with_uref(raw); |
b2197755 DB |
56 | break; |
57 | default: | |
58 | WARN_ON_ONCE(1); | |
59 | break; | |
60 | } | |
61 | } | |
62 | ||
63 | static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) | |
64 | { | |
65 | void *raw; | |
66 | ||
67 | *type = BPF_TYPE_MAP; | |
c9da161c | 68 | raw = bpf_map_get_with_uref(ufd); |
b2197755 DB |
69 | if (IS_ERR(raw)) { |
70 | *type = BPF_TYPE_PROG; | |
71 | raw = bpf_prog_get(ufd); | |
72 | } | |
73 | ||
74 | return raw; | |
75 | } | |
76 | ||
77 | static const struct inode_operations bpf_dir_iops; | |
78 | ||
79 | static const struct inode_operations bpf_prog_iops = { }; | |
80 | static const struct inode_operations bpf_map_iops = { }; | |
81 | ||
82 | static struct inode *bpf_get_inode(struct super_block *sb, | |
83 | const struct inode *dir, | |
84 | umode_t mode) | |
85 | { | |
86 | struct inode *inode; | |
87 | ||
88 | switch (mode & S_IFMT) { | |
89 | case S_IFDIR: | |
90 | case S_IFREG: | |
0f98621b | 91 | case S_IFLNK: |
b2197755 DB |
92 | break; |
93 | default: | |
94 | return ERR_PTR(-EINVAL); | |
95 | } | |
96 | ||
97 | inode = new_inode(sb); | |
98 | if (!inode) | |
99 | return ERR_PTR(-ENOSPC); | |
100 | ||
101 | inode->i_ino = get_next_ino(); | |
078cd827 | 102 | inode->i_atime = current_time(inode); |
b2197755 DB |
103 | inode->i_mtime = inode->i_atime; |
104 | inode->i_ctime = inode->i_atime; | |
105 | ||
106 | inode_init_owner(inode, dir, mode); | |
107 | ||
108 | return inode; | |
109 | } | |
110 | ||
111 | static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) | |
112 | { | |
113 | *type = BPF_TYPE_UNSPEC; | |
114 | if (inode->i_op == &bpf_prog_iops) | |
115 | *type = BPF_TYPE_PROG; | |
116 | else if (inode->i_op == &bpf_map_iops) | |
117 | *type = BPF_TYPE_MAP; | |
118 | else | |
119 | return -EACCES; | |
120 | ||
121 | return 0; | |
122 | } | |
123 | ||
0f98621b DB |
124 | static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, |
125 | struct inode *dir) | |
126 | { | |
127 | d_instantiate(dentry, inode); | |
128 | dget(dentry); | |
129 | ||
130 | dir->i_mtime = current_time(dir); | |
131 | dir->i_ctime = dir->i_mtime; | |
132 | } | |
133 | ||
b2197755 DB |
134 | static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) |
135 | { | |
136 | struct inode *inode; | |
137 | ||
b2197755 DB |
138 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); |
139 | if (IS_ERR(inode)) | |
140 | return PTR_ERR(inode); | |
141 | ||
142 | inode->i_op = &bpf_dir_iops; | |
143 | inode->i_fop = &simple_dir_operations; | |
144 | ||
145 | inc_nlink(inode); | |
146 | inc_nlink(dir); | |
147 | ||
0f98621b | 148 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
149 | return 0; |
150 | } | |
151 | ||
152 | static int bpf_mkobj_ops(struct inode *dir, struct dentry *dentry, | |
153 | umode_t mode, const struct inode_operations *iops) | |
154 | { | |
155 | struct inode *inode; | |
156 | ||
b2197755 DB |
157 | inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFREG); |
158 | if (IS_ERR(inode)) | |
159 | return PTR_ERR(inode); | |
160 | ||
161 | inode->i_op = iops; | |
162 | inode->i_private = dentry->d_fsdata; | |
163 | ||
0f98621b | 164 | bpf_dentry_finalize(dentry, inode, dir); |
b2197755 DB |
165 | return 0; |
166 | } | |
167 | ||
168 | static int bpf_mkobj(struct inode *dir, struct dentry *dentry, umode_t mode, | |
169 | dev_t devt) | |
170 | { | |
171 | enum bpf_type type = MINOR(devt); | |
172 | ||
173 | if (MAJOR(devt) != UNNAMED_MAJOR || !S_ISREG(mode) || | |
174 | dentry->d_fsdata == NULL) | |
175 | return -EPERM; | |
176 | ||
177 | switch (type) { | |
178 | case BPF_TYPE_PROG: | |
179 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_prog_iops); | |
180 | case BPF_TYPE_MAP: | |
181 | return bpf_mkobj_ops(dir, dentry, mode, &bpf_map_iops); | |
182 | default: | |
183 | return -EPERM; | |
184 | } | |
185 | } | |
186 | ||
0c93b7d8 AV |
187 | static struct dentry * |
188 | bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags) | |
bb35a6ef | 189 | { |
0c93b7d8 AV |
190 | if (strchr(dentry->d_name.name, '.')) |
191 | return ERR_PTR(-EPERM); | |
0f98621b | 192 | |
0c93b7d8 | 193 | return simple_lookup(dir, dentry, flags); |
bb35a6ef DB |
194 | } |
195 | ||
0f98621b DB |
196 | static int bpf_symlink(struct inode *dir, struct dentry *dentry, |
197 | const char *target) | |
198 | { | |
199 | char *link = kstrdup(target, GFP_USER | __GFP_NOWARN); | |
200 | struct inode *inode; | |
201 | ||
202 | if (!link) | |
203 | return -ENOMEM; | |
204 | ||
205 | inode = bpf_get_inode(dir->i_sb, dir, S_IRWXUGO | S_IFLNK); | |
206 | if (IS_ERR(inode)) { | |
207 | kfree(link); | |
208 | return PTR_ERR(inode); | |
209 | } | |
210 | ||
211 | inode->i_op = &simple_symlink_inode_operations; | |
212 | inode->i_link = link; | |
213 | ||
214 | bpf_dentry_finalize(dentry, inode, dir); | |
215 | return 0; | |
216 | } | |
217 | ||
b2197755 | 218 | static const struct inode_operations bpf_dir_iops = { |
0c93b7d8 | 219 | .lookup = bpf_lookup, |
b2197755 DB |
220 | .mknod = bpf_mkobj, |
221 | .mkdir = bpf_mkdir, | |
0f98621b | 222 | .symlink = bpf_symlink, |
b2197755 | 223 | .rmdir = simple_rmdir, |
0c93b7d8 AV |
224 | .rename = simple_rename, |
225 | .link = simple_link, | |
b2197755 DB |
226 | .unlink = simple_unlink, |
227 | }; | |
228 | ||
229 | static int bpf_obj_do_pin(const struct filename *pathname, void *raw, | |
230 | enum bpf_type type) | |
231 | { | |
232 | struct dentry *dentry; | |
233 | struct inode *dir; | |
234 | struct path path; | |
235 | umode_t mode; | |
236 | dev_t devt; | |
237 | int ret; | |
238 | ||
239 | dentry = kern_path_create(AT_FDCWD, pathname->name, &path, 0); | |
240 | if (IS_ERR(dentry)) | |
241 | return PTR_ERR(dentry); | |
242 | ||
243 | mode = S_IFREG | ((S_IRUSR | S_IWUSR) & ~current_umask()); | |
244 | devt = MKDEV(UNNAMED_MAJOR, type); | |
245 | ||
246 | ret = security_path_mknod(&path, dentry, mode, devt); | |
247 | if (ret) | |
248 | goto out; | |
249 | ||
250 | dir = d_inode(path.dentry); | |
251 | if (dir->i_op != &bpf_dir_iops) { | |
252 | ret = -EPERM; | |
253 | goto out; | |
254 | } | |
255 | ||
256 | dentry->d_fsdata = raw; | |
257 | ret = vfs_mknod(dir, dentry, mode, devt); | |
258 | dentry->d_fsdata = NULL; | |
259 | out: | |
260 | done_path_create(&path, dentry); | |
261 | return ret; | |
262 | } | |
263 | ||
264 | int bpf_obj_pin_user(u32 ufd, const char __user *pathname) | |
265 | { | |
266 | struct filename *pname; | |
267 | enum bpf_type type; | |
268 | void *raw; | |
269 | int ret; | |
270 | ||
271 | pname = getname(pathname); | |
272 | if (IS_ERR(pname)) | |
273 | return PTR_ERR(pname); | |
274 | ||
275 | raw = bpf_fd_probe_obj(ufd, &type); | |
276 | if (IS_ERR(raw)) { | |
277 | ret = PTR_ERR(raw); | |
278 | goto out; | |
279 | } | |
280 | ||
281 | ret = bpf_obj_do_pin(pname, raw, type); | |
282 | if (ret != 0) | |
283 | bpf_any_put(raw, type); | |
284 | out: | |
285 | putname(pname); | |
286 | return ret; | |
287 | } | |
288 | ||
289 | static void *bpf_obj_do_get(const struct filename *pathname, | |
290 | enum bpf_type *type) | |
291 | { | |
292 | struct inode *inode; | |
293 | struct path path; | |
294 | void *raw; | |
295 | int ret; | |
296 | ||
297 | ret = kern_path(pathname->name, LOOKUP_FOLLOW, &path); | |
298 | if (ret) | |
299 | return ERR_PTR(ret); | |
300 | ||
301 | inode = d_backing_inode(path.dentry); | |
302 | ret = inode_permission(inode, MAY_WRITE); | |
303 | if (ret) | |
304 | goto out; | |
305 | ||
306 | ret = bpf_inode_type(inode, type); | |
307 | if (ret) | |
308 | goto out; | |
309 | ||
310 | raw = bpf_any_get(inode->i_private, *type); | |
92117d84 AS |
311 | if (!IS_ERR(raw)) |
312 | touch_atime(&path); | |
b2197755 DB |
313 | |
314 | path_put(&path); | |
315 | return raw; | |
316 | out: | |
317 | path_put(&path); | |
318 | return ERR_PTR(ret); | |
319 | } | |
320 | ||
321 | int bpf_obj_get_user(const char __user *pathname) | |
322 | { | |
323 | enum bpf_type type = BPF_TYPE_UNSPEC; | |
324 | struct filename *pname; | |
325 | int ret = -ENOENT; | |
326 | void *raw; | |
327 | ||
328 | pname = getname(pathname); | |
329 | if (IS_ERR(pname)) | |
330 | return PTR_ERR(pname); | |
331 | ||
332 | raw = bpf_obj_do_get(pname, &type); | |
333 | if (IS_ERR(raw)) { | |
334 | ret = PTR_ERR(raw); | |
335 | goto out; | |
336 | } | |
337 | ||
338 | if (type == BPF_TYPE_PROG) | |
339 | ret = bpf_prog_new_fd(raw); | |
340 | else if (type == BPF_TYPE_MAP) | |
341 | ret = bpf_map_new_fd(raw); | |
342 | else | |
343 | goto out; | |
344 | ||
345 | if (ret < 0) | |
346 | bpf_any_put(raw, type); | |
347 | out: | |
348 | putname(pname); | |
349 | return ret; | |
350 | } | |
351 | ||
352 | static void bpf_evict_inode(struct inode *inode) | |
353 | { | |
354 | enum bpf_type type; | |
355 | ||
356 | truncate_inode_pages_final(&inode->i_data); | |
357 | clear_inode(inode); | |
358 | ||
0f98621b DB |
359 | if (S_ISLNK(inode->i_mode)) |
360 | kfree(inode->i_link); | |
b2197755 DB |
361 | if (!bpf_inode_type(inode, &type)) |
362 | bpf_any_put(inode->i_private, type); | |
363 | } | |
364 | ||
365 | static const struct super_operations bpf_super_ops = { | |
366 | .statfs = simple_statfs, | |
367 | .drop_inode = generic_delete_inode, | |
a3af5f80 | 368 | .show_options = generic_show_options, |
b2197755 DB |
369 | .evict_inode = bpf_evict_inode, |
370 | }; | |
371 | ||
a3af5f80 DB |
372 | enum { |
373 | OPT_MODE, | |
374 | OPT_ERR, | |
375 | }; | |
376 | ||
377 | static const match_table_t bpf_mount_tokens = { | |
378 | { OPT_MODE, "mode=%o" }, | |
379 | { OPT_ERR, NULL }, | |
380 | }; | |
381 | ||
382 | struct bpf_mount_opts { | |
383 | umode_t mode; | |
384 | }; | |
385 | ||
386 | static int bpf_parse_options(char *data, struct bpf_mount_opts *opts) | |
387 | { | |
388 | substring_t args[MAX_OPT_ARGS]; | |
389 | int option, token; | |
390 | char *ptr; | |
391 | ||
392 | opts->mode = S_IRWXUGO; | |
393 | ||
394 | while ((ptr = strsep(&data, ",")) != NULL) { | |
395 | if (!*ptr) | |
396 | continue; | |
397 | ||
398 | token = match_token(ptr, bpf_mount_tokens, args); | |
399 | switch (token) { | |
400 | case OPT_MODE: | |
401 | if (match_octal(&args[0], &option)) | |
402 | return -EINVAL; | |
403 | opts->mode = option & S_IALLUGO; | |
404 | break; | |
405 | /* We might like to report bad mount options here, but | |
406 | * traditionally we've ignored all mount options, so we'd | |
407 | * better continue to ignore non-existing options for bpf. | |
408 | */ | |
409 | } | |
410 | } | |
411 | ||
412 | return 0; | |
413 | } | |
414 | ||
b2197755 DB |
415 | static int bpf_fill_super(struct super_block *sb, void *data, int silent) |
416 | { | |
417 | static struct tree_descr bpf_rfiles[] = { { "" } }; | |
a3af5f80 | 418 | struct bpf_mount_opts opts; |
b2197755 DB |
419 | struct inode *inode; |
420 | int ret; | |
421 | ||
a3af5f80 DB |
422 | save_mount_options(sb, data); |
423 | ||
424 | ret = bpf_parse_options(data, &opts); | |
425 | if (ret) | |
426 | return ret; | |
427 | ||
b2197755 DB |
428 | ret = simple_fill_super(sb, BPF_FS_MAGIC, bpf_rfiles); |
429 | if (ret) | |
430 | return ret; | |
431 | ||
432 | sb->s_op = &bpf_super_ops; | |
433 | ||
434 | inode = sb->s_root->d_inode; | |
435 | inode->i_op = &bpf_dir_iops; | |
436 | inode->i_mode &= ~S_IALLUGO; | |
a3af5f80 | 437 | inode->i_mode |= S_ISVTX | opts.mode; |
b2197755 DB |
438 | |
439 | return 0; | |
440 | } | |
441 | ||
442 | static struct dentry *bpf_mount(struct file_system_type *type, int flags, | |
443 | const char *dev_name, void *data) | |
444 | { | |
e27f4a94 | 445 | return mount_nodev(type, flags, data, bpf_fill_super); |
b2197755 DB |
446 | } |
447 | ||
448 | static struct file_system_type bpf_fs_type = { | |
449 | .owner = THIS_MODULE, | |
450 | .name = "bpf", | |
451 | .mount = bpf_mount, | |
452 | .kill_sb = kill_litter_super, | |
b2197755 DB |
453 | }; |
454 | ||
b2197755 DB |
455 | static int __init bpf_init(void) |
456 | { | |
457 | int ret; | |
458 | ||
459 | ret = sysfs_create_mount_point(fs_kobj, "bpf"); | |
460 | if (ret) | |
461 | return ret; | |
462 | ||
463 | ret = register_filesystem(&bpf_fs_type); | |
464 | if (ret) | |
465 | sysfs_remove_mount_point(fs_kobj, "bpf"); | |
466 | ||
467 | return ret; | |
468 | } | |
469 | fs_initcall(bpf_init); |