]>
Commit | Line | Data |
---|---|---|
7c6b6602 | 1 | /* |
7387863d DDAG |
2 | * FUSE: Filesystem in Userspace |
3 | * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> | |
4 | * | |
5 | * This program can be distributed under the terms of the GNU GPLv2. | |
6 | * See the file COPYING. | |
7 | */ | |
7c6b6602 | 8 | |
7387863d | 9 | /* |
7c6b6602 DDAG |
10 | * |
11 | * This file system mirrors the existing file system hierarchy of the | |
12 | * system, starting at the root file system. This is implemented by | |
13 | * just "passing through" all requests to the corresponding user-space | |
14 | * libc functions. In contrast to passthrough.c and passthrough_fh.c, | |
15 | * this implementation uses the low-level API. Its performance should | |
16 | * be the least bad among the three, but many operations are not | |
17 | * implemented. In particular, it is not possible to remove files (or | |
18 | * directories) because the code necessary to defer actual removal | |
19 | * until the file is not opened anymore would make the example much | |
20 | * more complicated. | |
21 | * | |
22 | * When writeback caching is enabled (-o writeback mount option), it | |
23 | * is only possible to write to files for which the mounting user has | |
24 | * read permissions. This is because the writeback cache requires the | |
25 | * kernel to be able to issue read requests for all files (which the | |
26 | * passthrough filesystem cannot satisfy if it can't read the file in | |
27 | * the underlying filesystem). | |
28 | * | |
29 | * Compile with: | |
30 | * | |
7387863d DDAG |
31 | * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o |
32 | * passthrough_ll | |
7c6b6602 DDAG |
33 | * |
34 | * ## Source code ## | |
35 | * \include passthrough_ll.c | |
36 | */ | |
37 | ||
09863ebc | 38 | #include "qemu/osdep.h" |
50fb955a | 39 | #include "qemu/timer.h" |
f6f3573c | 40 | #include "fuse_virtio.h" |
d240314a | 41 | #include "fuse_log.h" |
09863ebc | 42 | #include "fuse_lowlevel.h" |
7c6b6602 | 43 | #include <assert.h> |
2405f3c0 | 44 | #include <cap-ng.h> |
7387863d | 45 | #include <dirent.h> |
7c6b6602 | 46 | #include <errno.h> |
36f38469 | 47 | #include <glib.h> |
7c6b6602 | 48 | #include <inttypes.h> |
7387863d | 49 | #include <limits.h> |
7c6b6602 | 50 | #include <pthread.h> |
7387863d DDAG |
51 | #include <stdbool.h> |
52 | #include <stddef.h> | |
53 | #include <stdio.h> | |
54 | #include <stdlib.h> | |
55 | #include <string.h> | |
7c6b6602 | 56 | #include <sys/file.h> |
5baa3b8e | 57 | #include <sys/mount.h> |
8e1d4ef2 | 58 | #include <sys/prctl.h> |
01a6dc95 | 59 | #include <sys/resource.h> |
929cfb7a | 60 | #include <sys/syscall.h> |
8e1d4ef2 SH |
61 | #include <sys/types.h> |
62 | #include <sys/wait.h> | |
7c6b6602 | 63 | #include <sys/xattr.h> |
f185621d | 64 | #include <syslog.h> |
7387863d | 65 | #include <unistd.h> |
7c6b6602 | 66 | |
6084633d | 67 | #include "qemu/cutils.h" |
7c6b6602 | 68 | #include "passthrough_helpers.h" |
3f99cf57 | 69 | #include "passthrough_seccomp.h" |
7c6b6602 | 70 | |
0e81414c VG |
71 | /* Keep track of inode posix locks for each owner. */ |
72 | struct lo_inode_plock { | |
73 | uint64_t lock_owner; | |
74 | int fd; /* fd for OFD locks */ | |
75 | }; | |
76 | ||
25c13572 SH |
77 | struct lo_map_elem { |
78 | union { | |
92fb57b8 | 79 | struct lo_inode *inode; |
b39bce12 | 80 | struct lo_dirp *dirp; |
73b4d19d | 81 | int fd; |
25c13572 SH |
82 | ssize_t freelist; |
83 | }; | |
84 | bool in_use; | |
85 | }; | |
86 | ||
87 | /* Maps FUSE fh or ino values to internal objects */ | |
88 | struct lo_map { | |
89 | struct lo_map_elem *elems; | |
90 | size_t nelems; | |
91 | ssize_t freelist; | |
92 | }; | |
93 | ||
bfc50a6e MS |
94 | struct lo_key { |
95 | ino_t ino; | |
96 | dev_t dev; | |
97 | }; | |
98 | ||
7c6b6602 | 99 | struct lo_inode { |
7387863d | 100 | int fd; |
c241aa94 SH |
101 | |
102 | /* | |
103 | * Atomic reference count for this object. The nlookup field holds a | |
104 | * reference and release it when nlookup reaches 0. | |
105 | */ | |
106 | gint refcount; | |
107 | ||
bfc50a6e | 108 | struct lo_key key; |
1222f015 SH |
109 | |
110 | /* | |
111 | * This counter keeps the inode alive during the FUSE session. | |
112 | * Incremented when the FUSE inode number is sent in a reply | |
113 | * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is | |
114 | * released by requests like FUSE_FORGET, FUSE_RMDIR, FUSE_RENAME, etc. | |
115 | * | |
116 | * Note that this value is untrusted because the client can manipulate | |
117 | * it arbitrarily using FUSE_FORGET requests. | |
118 | * | |
119 | * Protected by lo->mutex. | |
120 | */ | |
121 | uint64_t nlookup; | |
122 | ||
92fb57b8 | 123 | fuse_ino_t fuse_ino; |
0e81414c VG |
124 | pthread_mutex_t plock_mutex; |
125 | GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ | |
c241aa94 | 126 | |
bdfd6678 | 127 | mode_t filetype; |
eba8b096 HR |
128 | |
129 | /* | |
130 | * So we can detect crossmount roots | |
131 | * (As such, this only needs to be valid for directories. Note | |
132 | * that files can have multiple parents due to hard links, and so | |
133 | * their parent_dev may fluctuate.) | |
134 | */ | |
135 | dev_t parent_dev; | |
7c6b6602 DDAG |
136 | }; |
137 | ||
929cfb7a VG |
138 | struct lo_cred { |
139 | uid_t euid; | |
140 | gid_t egid; | |
141 | }; | |
142 | ||
7c6b6602 | 143 | enum { |
230e777b MS |
144 | CACHE_NONE, |
145 | CACHE_AUTO, | |
7387863d | 146 | CACHE_ALWAYS, |
7c6b6602 DDAG |
147 | }; |
148 | ||
06844584 SH |
149 | enum { |
150 | SANDBOX_NAMESPACE, | |
151 | SANDBOX_CHROOT, | |
152 | }; | |
153 | ||
6084633d DDAG |
154 | typedef struct xattr_map_entry { |
155 | char *key; | |
156 | char *prepend; | |
157 | unsigned int flags; | |
158 | } XattrMapEntry; | |
159 | ||
7c6b6602 | 160 | struct lo_data { |
7387863d | 161 | pthread_mutex_t mutex; |
06844584 | 162 | int sandbox; |
7387863d DDAG |
163 | int debug; |
164 | int writeback; | |
165 | int flock; | |
0e81414c | 166 | int posix_lock; |
7387863d | 167 | int xattr; |
6084633d | 168 | char *xattrmap; |
eb68a33b | 169 | char *source; |
3005c099 | 170 | char *modcaps; |
7387863d DDAG |
171 | double timeout; |
172 | int cache; | |
173 | int timeout_set; | |
59aef494 MS |
174 | int readdirplus_set; |
175 | int readdirplus_clear; | |
e12a0eda | 176 | int allow_direct_io; |
bfc50a6e MS |
177 | struct lo_inode root; |
178 | GHashTable *inodes; /* protected by lo->mutex */ | |
92fb57b8 | 179 | struct lo_map ino_map; /* protected by lo->mutex */ |
b39bce12 | 180 | struct lo_map dirp_map; /* protected by lo->mutex */ |
73b4d19d | 181 | struct lo_map fd_map; /* protected by lo->mutex */ |
6084633d DDAG |
182 | XattrMapEntry *xattr_map_list; |
183 | size_t xattr_map_nentries; | |
9f59d175 SH |
184 | |
185 | /* An O_PATH file descriptor to /proc/self/fd/ */ | |
186 | int proc_self_fd; | |
7c6b6602 DDAG |
187 | }; |
188 | ||
189 | static const struct fuse_opt lo_opts[] = { | |
06844584 SH |
190 | { "sandbox=namespace", |
191 | offsetof(struct lo_data, sandbox), | |
192 | SANDBOX_NAMESPACE }, | |
193 | { "sandbox=chroot", | |
194 | offsetof(struct lo_data, sandbox), | |
195 | SANDBOX_CHROOT }, | |
7387863d DDAG |
196 | { "writeback", offsetof(struct lo_data, writeback), 1 }, |
197 | { "no_writeback", offsetof(struct lo_data, writeback), 0 }, | |
198 | { "source=%s", offsetof(struct lo_data, source), 0 }, | |
199 | { "flock", offsetof(struct lo_data, flock), 1 }, | |
200 | { "no_flock", offsetof(struct lo_data, flock), 0 }, | |
0e81414c VG |
201 | { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, |
202 | { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, | |
7387863d DDAG |
203 | { "xattr", offsetof(struct lo_data, xattr), 1 }, |
204 | { "no_xattr", offsetof(struct lo_data, xattr), 0 }, | |
6084633d | 205 | { "xattrmap=%s", offsetof(struct lo_data, xattrmap), 0 }, |
3005c099 | 206 | { "modcaps=%s", offsetof(struct lo_data, modcaps), 0 }, |
7387863d DDAG |
207 | { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, |
208 | { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, | |
230e777b MS |
209 | { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, |
210 | { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, | |
7387863d | 211 | { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, |
59aef494 MS |
212 | { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, |
213 | { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, | |
e12a0eda JZ |
214 | { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, |
215 | { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, | |
7387863d | 216 | FUSE_OPT_END |
7c6b6602 | 217 | }; |
f185621d | 218 | static bool use_syslog = false; |
d240314a | 219 | static int current_log_level; |
95d27157 MS |
220 | static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, |
221 | uint64_t n); | |
5fe319a7 | 222 | |
2405f3c0 DDAG |
223 | static struct { |
224 | pthread_mutex_t mutex; | |
225 | void *saved; | |
226 | } cap; | |
227 | /* That we loaded cap-ng in the current thread from the saved */ | |
228 | static __thread bool cap_loaded = 0; | |
229 | ||
5fe319a7 MS |
230 | static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st); |
231 | ||
25dae28c SH |
232 | static int is_dot_or_dotdot(const char *name) |
233 | { | |
234 | return name[0] == '.' && | |
235 | (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); | |
236 | } | |
237 | ||
238 | /* Is `path` a single path component that is not "." or ".."? */ | |
239 | static int is_safe_path_component(const char *path) | |
240 | { | |
241 | if (strchr(path, '/')) { | |
242 | return 0; | |
243 | } | |
244 | ||
245 | return !is_dot_or_dotdot(path); | |
246 | } | |
5fe319a7 | 247 | |
7c6b6602 DDAG |
248 | static struct lo_data *lo_data(fuse_req_t req) |
249 | { | |
7387863d | 250 | return (struct lo_data *)fuse_req_userdata(req); |
7c6b6602 DDAG |
251 | } |
252 | ||
2405f3c0 DDAG |
253 | /* |
254 | * Load capng's state from our saved state if the current thread | |
255 | * hadn't previously been loaded. | |
256 | * returns 0 on success | |
257 | */ | |
258 | static int load_capng(void) | |
259 | { | |
260 | if (!cap_loaded) { | |
261 | pthread_mutex_lock(&cap.mutex); | |
262 | capng_restore_state(&cap.saved); | |
263 | /* | |
264 | * restore_state free's the saved copy | |
265 | * so make another. | |
266 | */ | |
267 | cap.saved = capng_save_state(); | |
268 | if (!cap.saved) { | |
68639111 | 269 | pthread_mutex_unlock(&cap.mutex); |
2405f3c0 DDAG |
270 | fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); |
271 | return -EINVAL; | |
272 | } | |
273 | pthread_mutex_unlock(&cap.mutex); | |
274 | ||
275 | /* | |
276 | * We want to use the loaded state for our pid, | |
277 | * not the original | |
278 | */ | |
279 | capng_setpid(syscall(SYS_gettid)); | |
280 | cap_loaded = true; | |
281 | } | |
282 | return 0; | |
283 | } | |
284 | ||
ee884652 VG |
285 | /* |
286 | * Helpers for dropping and regaining effective capabilities. Returns 0 | |
287 | * on success, error otherwise | |
288 | */ | |
289 | static int drop_effective_cap(const char *cap_name, bool *cap_dropped) | |
290 | { | |
291 | int cap, ret; | |
292 | ||
293 | cap = capng_name_to_capability(cap_name); | |
294 | if (cap < 0) { | |
295 | ret = errno; | |
296 | fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", | |
297 | cap_name, strerror(errno)); | |
298 | goto out; | |
299 | } | |
300 | ||
301 | if (load_capng()) { | |
302 | ret = errno; | |
303 | fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); | |
304 | goto out; | |
305 | } | |
306 | ||
307 | /* We dont have this capability in effective set already. */ | |
308 | if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { | |
309 | ret = 0; | |
310 | goto out; | |
311 | } | |
312 | ||
313 | if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { | |
314 | ret = errno; | |
315 | fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); | |
316 | goto out; | |
317 | } | |
318 | ||
319 | if (capng_apply(CAPNG_SELECT_CAPS)) { | |
320 | ret = errno; | |
321 | fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); | |
322 | goto out; | |
323 | } | |
324 | ||
325 | ret = 0; | |
326 | if (cap_dropped) { | |
327 | *cap_dropped = true; | |
328 | } | |
329 | ||
330 | out: | |
331 | return ret; | |
332 | } | |
333 | ||
334 | static int gain_effective_cap(const char *cap_name) | |
335 | { | |
336 | int cap; | |
337 | int ret = 0; | |
338 | ||
339 | cap = capng_name_to_capability(cap_name); | |
340 | if (cap < 0) { | |
341 | ret = errno; | |
342 | fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", | |
343 | cap_name, strerror(errno)); | |
344 | goto out; | |
345 | } | |
346 | ||
347 | if (load_capng()) { | |
348 | ret = errno; | |
349 | fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); | |
350 | goto out; | |
351 | } | |
352 | ||
353 | if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { | |
354 | ret = errno; | |
355 | fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); | |
356 | goto out; | |
357 | } | |
358 | ||
359 | if (capng_apply(CAPNG_SELECT_CAPS)) { | |
360 | ret = errno; | |
361 | fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); | |
362 | goto out; | |
363 | } | |
364 | ret = 0; | |
365 | ||
366 | out: | |
367 | return ret; | |
368 | } | |
369 | ||
92fb57b8 | 370 | static void lo_map_init(struct lo_map *map) |
25c13572 SH |
371 | { |
372 | map->elems = NULL; | |
373 | map->nelems = 0; | |
374 | map->freelist = -1; | |
375 | } | |
376 | ||
92fb57b8 | 377 | static void lo_map_destroy(struct lo_map *map) |
25c13572 SH |
378 | { |
379 | free(map->elems); | |
380 | } | |
381 | ||
382 | static int lo_map_grow(struct lo_map *map, size_t new_nelems) | |
383 | { | |
384 | struct lo_map_elem *new_elems; | |
385 | size_t i; | |
386 | ||
387 | if (new_nelems <= map->nelems) { | |
388 | return 1; | |
389 | } | |
390 | ||
391 | new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems); | |
392 | if (!new_elems) { | |
393 | return 0; | |
394 | } | |
395 | ||
396 | for (i = map->nelems; i < new_nelems; i++) { | |
397 | new_elems[i].freelist = i + 1; | |
398 | new_elems[i].in_use = false; | |
399 | } | |
400 | new_elems[new_nelems - 1].freelist = -1; | |
401 | ||
402 | map->elems = new_elems; | |
403 | map->freelist = map->nelems; | |
404 | map->nelems = new_nelems; | |
405 | return 1; | |
406 | } | |
407 | ||
92fb57b8 | 408 | static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) |
25c13572 SH |
409 | { |
410 | struct lo_map_elem *elem; | |
411 | ||
412 | if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { | |
413 | return NULL; | |
414 | } | |
415 | ||
416 | elem = &map->elems[map->freelist]; | |
417 | map->freelist = elem->freelist; | |
418 | ||
419 | elem->in_use = true; | |
420 | ||
421 | return elem; | |
422 | } | |
423 | ||
92fb57b8 | 424 | static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) |
25c13572 SH |
425 | { |
426 | ssize_t *prev; | |
427 | ||
428 | if (!lo_map_grow(map, key + 1)) { | |
429 | return NULL; | |
430 | } | |
431 | ||
432 | for (prev = &map->freelist; *prev != -1; | |
433 | prev = &map->elems[*prev].freelist) { | |
434 | if (*prev == key) { | |
435 | struct lo_map_elem *elem = &map->elems[key]; | |
436 | ||
437 | *prev = elem->freelist; | |
438 | elem->in_use = true; | |
439 | return elem; | |
440 | } | |
441 | } | |
442 | return NULL; | |
443 | } | |
444 | ||
92fb57b8 | 445 | static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) |
25c13572 SH |
446 | { |
447 | if (key >= map->nelems) { | |
448 | return NULL; | |
449 | } | |
450 | if (!map->elems[key].in_use) { | |
451 | return NULL; | |
452 | } | |
453 | return &map->elems[key]; | |
454 | } | |
455 | ||
92fb57b8 | 456 | static void lo_map_remove(struct lo_map *map, size_t key) |
25c13572 SH |
457 | { |
458 | struct lo_map_elem *elem; | |
459 | ||
460 | if (key >= map->nelems) { | |
461 | return; | |
462 | } | |
463 | ||
464 | elem = &map->elems[key]; | |
465 | if (!elem->in_use) { | |
466 | return; | |
467 | } | |
468 | ||
469 | elem->in_use = false; | |
470 | ||
471 | elem->freelist = map->freelist; | |
472 | map->freelist = key; | |
473 | } | |
474 | ||
73b4d19d SH |
475 | /* Assumes lo->mutex is held */ |
476 | static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd) | |
477 | { | |
478 | struct lo_map_elem *elem; | |
479 | ||
480 | elem = lo_map_alloc_elem(&lo_data(req)->fd_map); | |
481 | if (!elem) { | |
482 | return -1; | |
483 | } | |
484 | ||
485 | elem->fd = fd; | |
486 | return elem - lo_data(req)->fd_map.elems; | |
487 | } | |
488 | ||
b39bce12 SH |
489 | /* Assumes lo->mutex is held */ |
490 | static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) | |
491 | { | |
492 | struct lo_map_elem *elem; | |
493 | ||
494 | elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); | |
495 | if (!elem) { | |
496 | return -1; | |
497 | } | |
498 | ||
499 | elem->dirp = dirp; | |
500 | return elem - lo_data(req)->dirp_map.elems; | |
501 | } | |
502 | ||
92fb57b8 SH |
503 | /* Assumes lo->mutex is held */ |
504 | static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) | |
505 | { | |
506 | struct lo_map_elem *elem; | |
507 | ||
508 | elem = lo_map_alloc_elem(&lo_data(req)->ino_map); | |
509 | if (!elem) { | |
510 | return -1; | |
511 | } | |
512 | ||
513 | elem->inode = inode; | |
514 | return elem - lo_data(req)->ino_map.elems; | |
515 | } | |
516 | ||
c241aa94 SH |
517 | static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) |
518 | { | |
519 | struct lo_inode *inode = *inodep; | |
520 | ||
521 | if (!inode) { | |
522 | return; | |
523 | } | |
524 | ||
525 | *inodep = NULL; | |
526 | ||
527 | if (g_atomic_int_dec_and_test(&inode->refcount)) { | |
528 | close(inode->fd); | |
529 | free(inode); | |
530 | } | |
531 | } | |
532 | ||
533 | /* Caller must release refcount using lo_inode_put() */ | |
7c6b6602 DDAG |
534 | static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) |
535 | { | |
92fb57b8 SH |
536 | struct lo_data *lo = lo_data(req); |
537 | struct lo_map_elem *elem; | |
538 | ||
539 | pthread_mutex_lock(&lo->mutex); | |
540 | elem = lo_map_get(&lo->ino_map, ino); | |
c241aa94 SH |
541 | if (elem) { |
542 | g_atomic_int_inc(&elem->inode->refcount); | |
543 | } | |
92fb57b8 SH |
544 | pthread_mutex_unlock(&lo->mutex); |
545 | ||
546 | if (!elem) { | |
547 | return NULL; | |
7387863d | 548 | } |
92fb57b8 SH |
549 | |
550 | return elem->inode; | |
7c6b6602 DDAG |
551 | } |
552 | ||
c241aa94 SH |
553 | /* |
554 | * TODO Remove this helper and force callers to hold an inode refcount until | |
555 | * they are done with the fd. This will be done in a later patch to make | |
556 | * review easier. | |
557 | */ | |
7c6b6602 DDAG |
558 | static int lo_fd(fuse_req_t req, fuse_ino_t ino) |
559 | { | |
92fb57b8 | 560 | struct lo_inode *inode = lo_inode(req, ino); |
c241aa94 SH |
561 | int fd; |
562 | ||
563 | if (!inode) { | |
564 | return -1; | |
565 | } | |
566 | ||
567 | fd = inode->fd; | |
568 | lo_inode_put(lo_data(req), &inode); | |
569 | return fd; | |
7c6b6602 DDAG |
570 | } |
571 | ||
7387863d | 572 | static void lo_init(void *userdata, struct fuse_conn_info *conn) |
7c6b6602 | 573 | { |
7387863d DDAG |
574 | struct lo_data *lo = (struct lo_data *)userdata; |
575 | ||
576 | if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { | |
577 | conn->want |= FUSE_CAP_EXPORT_SUPPORT; | |
578 | } | |
579 | ||
580 | if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { | |
d240314a | 581 | fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); |
7387863d DDAG |
582 | conn->want |= FUSE_CAP_WRITEBACK_CACHE; |
583 | } | |
e468d4af PT |
584 | if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { |
585 | if (lo->flock) { | |
586 | fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); | |
587 | conn->want |= FUSE_CAP_FLOCK_LOCKS; | |
588 | } else { | |
589 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); | |
590 | conn->want &= ~FUSE_CAP_FLOCK_LOCKS; | |
591 | } | |
7387863d | 592 | } |
0e81414c VG |
593 | |
594 | if (conn->capable & FUSE_CAP_POSIX_LOCKS) { | |
595 | if (lo->posix_lock) { | |
596 | fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); | |
597 | conn->want |= FUSE_CAP_POSIX_LOCKS; | |
598 | } else { | |
599 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); | |
600 | conn->want &= ~FUSE_CAP_POSIX_LOCKS; | |
601 | } | |
602 | } | |
603 | ||
230e777b | 604 | if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || |
59aef494 | 605 | lo->readdirplus_clear) { |
ddcbabcb MS |
606 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); |
607 | conn->want &= ~FUSE_CAP_READDIRPLUS; | |
608 | } | |
7c6b6602 DDAG |
609 | } |
610 | ||
611 | static void lo_getattr(fuse_req_t req, fuse_ino_t ino, | |
7387863d | 612 | struct fuse_file_info *fi) |
7c6b6602 | 613 | { |
7387863d DDAG |
614 | int res; |
615 | struct stat buf; | |
616 | struct lo_data *lo = lo_data(req); | |
7c6b6602 | 617 | |
7387863d | 618 | (void)fi; |
7c6b6602 | 619 | |
7387863d DDAG |
620 | res = |
621 | fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); | |
622 | if (res == -1) { | |
623 | return (void)fuse_reply_err(req, errno); | |
624 | } | |
7c6b6602 | 625 | |
7387863d | 626 | fuse_reply_attr(req, &buf, lo->timeout); |
7c6b6602 DDAG |
627 | } |
628 | ||
73b4d19d SH |
629 | static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) |
630 | { | |
631 | struct lo_data *lo = lo_data(req); | |
632 | struct lo_map_elem *elem; | |
633 | ||
634 | pthread_mutex_lock(&lo->mutex); | |
635 | elem = lo_map_get(&lo->fd_map, fi->fh); | |
636 | pthread_mutex_unlock(&lo->mutex); | |
637 | ||
638 | if (!elem) { | |
639 | return -1; | |
640 | } | |
641 | ||
642 | return elem->fd; | |
643 | } | |
644 | ||
7c6b6602 | 645 | static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, |
7387863d | 646 | int valid, struct fuse_file_info *fi) |
7c6b6602 | 647 | { |
7387863d DDAG |
648 | int saverr; |
649 | char procname[64]; | |
5fe319a7 | 650 | struct lo_data *lo = lo_data(req); |
92fb57b8 SH |
651 | struct lo_inode *inode; |
652 | int ifd; | |
7387863d | 653 | int res; |
2acf4f8f | 654 | int fd = -1; |
7387863d | 655 | |
92fb57b8 SH |
656 | inode = lo_inode(req, ino); |
657 | if (!inode) { | |
658 | fuse_reply_err(req, EBADF); | |
659 | return; | |
660 | } | |
661 | ||
662 | ifd = inode->fd; | |
663 | ||
73b4d19d SH |
664 | /* If fi->fh is invalid we'll report EBADF later */ |
665 | if (fi) { | |
666 | fd = lo_fi_fd(req, fi); | |
667 | } | |
668 | ||
7387863d DDAG |
669 | if (valid & FUSE_SET_ATTR_MODE) { |
670 | if (fi) { | |
73b4d19d | 671 | res = fchmod(fd, attr->st_mode); |
7387863d | 672 | } else { |
9f59d175 SH |
673 | sprintf(procname, "%i", ifd); |
674 | res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); | |
7387863d DDAG |
675 | } |
676 | if (res == -1) { | |
677 | goto out_err; | |
678 | } | |
679 | } | |
680 | if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { | |
681 | uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; | |
682 | gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; | |
683 | ||
684 | res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); | |
685 | if (res == -1) { | |
686 | goto out_err; | |
687 | } | |
688 | } | |
689 | if (valid & FUSE_SET_ATTR_SIZE) { | |
9f59d175 SH |
690 | int truncfd; |
691 | ||
7387863d | 692 | if (fi) { |
9f59d175 | 693 | truncfd = fd; |
7387863d | 694 | } else { |
9f59d175 SH |
695 | sprintf(procname, "%i", ifd); |
696 | truncfd = openat(lo->proc_self_fd, procname, O_RDWR); | |
697 | if (truncfd < 0) { | |
698 | goto out_err; | |
699 | } | |
700 | } | |
701 | ||
702 | res = ftruncate(truncfd, attr->st_size); | |
703 | if (!fi) { | |
704 | saverr = errno; | |
705 | close(truncfd); | |
706 | errno = saverr; | |
7387863d DDAG |
707 | } |
708 | if (res == -1) { | |
709 | goto out_err; | |
710 | } | |
711 | } | |
712 | if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { | |
713 | struct timespec tv[2]; | |
714 | ||
715 | tv[0].tv_sec = 0; | |
716 | tv[1].tv_sec = 0; | |
717 | tv[0].tv_nsec = UTIME_OMIT; | |
718 | tv[1].tv_nsec = UTIME_OMIT; | |
719 | ||
720 | if (valid & FUSE_SET_ATTR_ATIME_NOW) { | |
721 | tv[0].tv_nsec = UTIME_NOW; | |
722 | } else if (valid & FUSE_SET_ATTR_ATIME) { | |
723 | tv[0] = attr->st_atim; | |
724 | } | |
725 | ||
726 | if (valid & FUSE_SET_ATTR_MTIME_NOW) { | |
727 | tv[1].tv_nsec = UTIME_NOW; | |
728 | } else if (valid & FUSE_SET_ATTR_MTIME) { | |
729 | tv[1] = attr->st_mtim; | |
730 | } | |
731 | ||
732 | if (fi) { | |
73b4d19d | 733 | res = futimens(fd, tv); |
7387863d | 734 | } else { |
93bb3d8d MS |
735 | sprintf(procname, "%i", inode->fd); |
736 | res = utimensat(lo->proc_self_fd, procname, tv, 0); | |
7387863d DDAG |
737 | } |
738 | if (res == -1) { | |
739 | goto out_err; | |
740 | } | |
741 | } | |
c241aa94 | 742 | lo_inode_put(lo, &inode); |
7387863d DDAG |
743 | |
744 | return lo_getattr(req, ino, fi); | |
7c6b6602 DDAG |
745 | |
746 | out_err: | |
7387863d | 747 | saverr = errno; |
c241aa94 | 748 | lo_inode_put(lo, &inode); |
7387863d | 749 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
750 | } |
751 | ||
752 | static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st) | |
753 | { | |
7387863d | 754 | struct lo_inode *p; |
bfc50a6e MS |
755 | struct lo_key key = { |
756 | .ino = st->st_ino, | |
757 | .dev = st->st_dev, | |
758 | }; | |
7387863d DDAG |
759 | |
760 | pthread_mutex_lock(&lo->mutex); | |
bfc50a6e MS |
761 | p = g_hash_table_lookup(lo->inodes, &key); |
762 | if (p) { | |
1222f015 SH |
763 | assert(p->nlookup > 0); |
764 | p->nlookup++; | |
c241aa94 | 765 | g_atomic_int_inc(&p->refcount); |
7387863d DDAG |
766 | } |
767 | pthread_mutex_unlock(&lo->mutex); | |
bfc50a6e MS |
768 | |
769 | return p; | |
7c6b6602 DDAG |
770 | } |
771 | ||
0e81414c VG |
772 | /* value_destroy_func for posix_locks GHashTable */ |
773 | static void posix_locks_value_destroy(gpointer data) | |
774 | { | |
775 | struct lo_inode_plock *plock = data; | |
776 | ||
777 | /* | |
778 | * We had used open() for locks and had only one fd. So | |
779 | * closing this fd should release all OFD locks. | |
780 | */ | |
781 | close(plock->fd); | |
782 | free(plock); | |
783 | } | |
784 | ||
c241aa94 SH |
785 | /* |
786 | * Increments nlookup and caller must release refcount using | |
787 | * lo_inode_put(&parent). | |
788 | */ | |
7c6b6602 | 789 | static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, |
7387863d | 790 | struct fuse_entry_param *e) |
7c6b6602 | 791 | { |
7387863d DDAG |
792 | int newfd; |
793 | int res; | |
794 | int saverr; | |
795 | struct lo_data *lo = lo_data(req); | |
c241aa94 SH |
796 | struct lo_inode *inode = NULL; |
797 | struct lo_inode *dir = lo_inode(req, parent); | |
7387863d | 798 | |
9de4fab5 MS |
799 | /* |
800 | * name_to_handle_at() and open_by_handle_at() can reach here with fuse | |
801 | * mount point in guest, but we don't have its inode info in the | |
802 | * ino_map. | |
803 | */ | |
804 | if (!dir) { | |
805 | return ENOENT; | |
806 | } | |
807 | ||
7387863d DDAG |
808 | memset(e, 0, sizeof(*e)); |
809 | e->attr_timeout = lo->timeout; | |
810 | e->entry_timeout = lo->timeout; | |
811 | ||
854684bc SH |
812 | /* Do not allow escaping root directory */ |
813 | if (dir == &lo->root && strcmp(name, "..") == 0) { | |
814 | name = "."; | |
815 | } | |
816 | ||
9de4fab5 | 817 | newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); |
7387863d DDAG |
818 | if (newfd == -1) { |
819 | goto out_err; | |
820 | } | |
821 | ||
822 | res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); | |
823 | if (res == -1) { | |
824 | goto out_err; | |
825 | } | |
826 | ||
9de4fab5 | 827 | inode = lo_find(lo, &e->attr); |
7387863d DDAG |
828 | if (inode) { |
829 | close(newfd); | |
7387863d | 830 | } else { |
7387863d DDAG |
831 | inode = calloc(1, sizeof(struct lo_inode)); |
832 | if (!inode) { | |
833 | goto out_err; | |
834 | } | |
835 | ||
bdfd6678 MT |
836 | /* cache only filetype */ |
837 | inode->filetype = (e->attr.st_mode & S_IFMT); | |
c241aa94 SH |
838 | |
839 | /* | |
840 | * One for the caller and one for nlookup (released in | |
841 | * unref_inode_lolocked()) | |
842 | */ | |
843 | g_atomic_int_set(&inode->refcount, 2); | |
844 | ||
1222f015 | 845 | inode->nlookup = 1; |
7387863d | 846 | inode->fd = newfd; |
bfc50a6e MS |
847 | inode->key.ino = e->attr.st_ino; |
848 | inode->key.dev = e->attr.st_dev; | |
0e81414c VG |
849 | pthread_mutex_init(&inode->plock_mutex, NULL); |
850 | inode->posix_locks = g_hash_table_new_full( | |
851 | g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); | |
7387863d DDAG |
852 | |
853 | pthread_mutex_lock(&lo->mutex); | |
92fb57b8 | 854 | inode->fuse_ino = lo_add_inode_mapping(req, inode); |
bfc50a6e | 855 | g_hash_table_insert(lo->inodes, &inode->key, inode); |
7387863d DDAG |
856 | pthread_mutex_unlock(&lo->mutex); |
857 | } | |
eba8b096 | 858 | inode->parent_dev = dir->key.dev; |
92fb57b8 | 859 | e->ino = inode->fuse_ino; |
c241aa94 SH |
860 | lo_inode_put(lo, &inode); |
861 | lo_inode_put(lo, &dir); | |
7387863d | 862 | |
d240314a EG |
863 | fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, |
864 | name, (unsigned long long)e->ino); | |
7387863d DDAG |
865 | |
866 | return 0; | |
7c6b6602 DDAG |
867 | |
868 | out_err: | |
7387863d DDAG |
869 | saverr = errno; |
870 | if (newfd != -1) { | |
871 | close(newfd); | |
872 | } | |
c241aa94 SH |
873 | lo_inode_put(lo, &inode); |
874 | lo_inode_put(lo, &dir); | |
7387863d | 875 | return saverr; |
7c6b6602 DDAG |
876 | } |
877 | ||
878 | static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) | |
879 | { | |
7387863d DDAG |
880 | struct fuse_entry_param e; |
881 | int err; | |
882 | ||
d240314a EG |
883 | fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, |
884 | name); | |
7387863d | 885 | |
25dae28c SH |
886 | /* |
887 | * Don't use is_safe_path_component(), allow "." and ".." for NFS export | |
888 | * support. | |
889 | */ | |
890 | if (strchr(name, '/')) { | |
891 | fuse_reply_err(req, EINVAL); | |
892 | return; | |
893 | } | |
894 | ||
7387863d DDAG |
895 | err = lo_do_lookup(req, parent, name, &e); |
896 | if (err) { | |
897 | fuse_reply_err(req, err); | |
898 | } else { | |
899 | fuse_reply_entry(req, &e); | |
900 | } | |
7c6b6602 DDAG |
901 | } |
902 | ||
929cfb7a VG |
903 | /* |
904 | * On some archs, setres*id is limited to 2^16 but they | |
905 | * provide setres*id32 variants that allow 2^32. | |
906 | * Others just let setres*id do 2^32 anyway. | |
907 | */ | |
908 | #ifdef SYS_setresgid32 | |
909 | #define OURSYS_setresgid SYS_setresgid32 | |
910 | #else | |
911 | #define OURSYS_setresgid SYS_setresgid | |
912 | #endif | |
913 | ||
914 | #ifdef SYS_setresuid32 | |
915 | #define OURSYS_setresuid SYS_setresuid32 | |
916 | #else | |
917 | #define OURSYS_setresuid SYS_setresuid | |
918 | #endif | |
919 | ||
920 | /* | |
921 | * Change to uid/gid of caller so that file is created with | |
922 | * ownership of caller. | |
923 | * TODO: What about selinux context? | |
924 | */ | |
925 | static int lo_change_cred(fuse_req_t req, struct lo_cred *old) | |
926 | { | |
927 | int res; | |
928 | ||
929 | old->euid = geteuid(); | |
930 | old->egid = getegid(); | |
931 | ||
932 | res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); | |
933 | if (res == -1) { | |
934 | return errno; | |
935 | } | |
936 | ||
937 | res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); | |
938 | if (res == -1) { | |
939 | int errno_save = errno; | |
940 | ||
941 | syscall(OURSYS_setresgid, -1, old->egid, -1); | |
942 | return errno_save; | |
943 | } | |
944 | ||
945 | return 0; | |
946 | } | |
947 | ||
948 | /* Regain Privileges */ | |
949 | static void lo_restore_cred(struct lo_cred *old) | |
950 | { | |
951 | int res; | |
952 | ||
953 | res = syscall(OURSYS_setresuid, -1, old->euid, -1); | |
954 | if (res == -1) { | |
955 | fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); | |
956 | exit(1); | |
957 | } | |
958 | ||
959 | res = syscall(OURSYS_setresgid, -1, old->egid, -1); | |
960 | if (res == -1) { | |
961 | fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); | |
962 | exit(1); | |
963 | } | |
964 | } | |
965 | ||
7c6b6602 | 966 | static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, |
7387863d DDAG |
967 | const char *name, mode_t mode, dev_t rdev, |
968 | const char *link) | |
7c6b6602 | 969 | { |
7387863d DDAG |
970 | int res; |
971 | int saverr; | |
c241aa94 | 972 | struct lo_data *lo = lo_data(req); |
92fb57b8 | 973 | struct lo_inode *dir; |
7387863d | 974 | struct fuse_entry_param e; |
929cfb7a | 975 | struct lo_cred old = {}; |
7c6b6602 | 976 | |
25dae28c SH |
977 | if (!is_safe_path_component(name)) { |
978 | fuse_reply_err(req, EINVAL); | |
979 | return; | |
980 | } | |
981 | ||
92fb57b8 SH |
982 | dir = lo_inode(req, parent); |
983 | if (!dir) { | |
984 | fuse_reply_err(req, EBADF); | |
985 | return; | |
986 | } | |
987 | ||
929cfb7a VG |
988 | saverr = lo_change_cred(req, &old); |
989 | if (saverr) { | |
990 | goto out; | |
991 | } | |
992 | ||
7387863d | 993 | res = mknod_wrapper(dir->fd, name, link, mode, rdev); |
7c6b6602 | 994 | |
7387863d | 995 | saverr = errno; |
929cfb7a VG |
996 | |
997 | lo_restore_cred(&old); | |
998 | ||
7387863d DDAG |
999 | if (res == -1) { |
1000 | goto out; | |
1001 | } | |
7c6b6602 | 1002 | |
7387863d DDAG |
1003 | saverr = lo_do_lookup(req, parent, name, &e); |
1004 | if (saverr) { | |
1005 | goto out; | |
1006 | } | |
7c6b6602 | 1007 | |
d240314a EG |
1008 | fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, |
1009 | name, (unsigned long long)e.ino); | |
7c6b6602 | 1010 | |
7387863d | 1011 | fuse_reply_entry(req, &e); |
c241aa94 | 1012 | lo_inode_put(lo, &dir); |
7387863d | 1013 | return; |
7c6b6602 DDAG |
1014 | |
1015 | out: | |
c241aa94 | 1016 | lo_inode_put(lo, &dir); |
7387863d | 1017 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
1018 | } |
1019 | ||
7387863d DDAG |
1020 | static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, |
1021 | mode_t mode, dev_t rdev) | |
7c6b6602 | 1022 | { |
7387863d | 1023 | lo_mknod_symlink(req, parent, name, mode, rdev, NULL); |
7c6b6602 DDAG |
1024 | } |
1025 | ||
1026 | static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, | |
7387863d | 1027 | mode_t mode) |
7c6b6602 | 1028 | { |
7387863d | 1029 | lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); |
7c6b6602 DDAG |
1030 | } |
1031 | ||
7387863d DDAG |
1032 | static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, |
1033 | const char *name) | |
7c6b6602 | 1034 | { |
7387863d | 1035 | lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); |
7c6b6602 DDAG |
1036 | } |
1037 | ||
7c6b6602 | 1038 | static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, |
7387863d | 1039 | const char *name) |
7c6b6602 | 1040 | { |
7387863d DDAG |
1041 | int res; |
1042 | struct lo_data *lo = lo_data(req); | |
c241aa94 | 1043 | struct lo_inode *parent_inode; |
92fb57b8 | 1044 | struct lo_inode *inode; |
7387863d | 1045 | struct fuse_entry_param e; |
93bb3d8d | 1046 | char procname[64]; |
7387863d DDAG |
1047 | int saverr; |
1048 | ||
25dae28c SH |
1049 | if (!is_safe_path_component(name)) { |
1050 | fuse_reply_err(req, EINVAL); | |
1051 | return; | |
1052 | } | |
1053 | ||
c241aa94 | 1054 | parent_inode = lo_inode(req, parent); |
92fb57b8 | 1055 | inode = lo_inode(req, ino); |
c241aa94 SH |
1056 | if (!parent_inode || !inode) { |
1057 | errno = EBADF; | |
1058 | goto out_err; | |
92fb57b8 SH |
1059 | } |
1060 | ||
7387863d DDAG |
1061 | memset(&e, 0, sizeof(struct fuse_entry_param)); |
1062 | e.attr_timeout = lo->timeout; | |
1063 | e.entry_timeout = lo->timeout; | |
1064 | ||
93bb3d8d MS |
1065 | sprintf(procname, "%i", inode->fd); |
1066 | res = linkat(lo->proc_self_fd, procname, parent_inode->fd, name, | |
1067 | AT_SYMLINK_FOLLOW); | |
7387863d DDAG |
1068 | if (res == -1) { |
1069 | goto out_err; | |
1070 | } | |
1071 | ||
1072 | res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); | |
1073 | if (res == -1) { | |
1074 | goto out_err; | |
1075 | } | |
1076 | ||
1077 | pthread_mutex_lock(&lo->mutex); | |
1222f015 | 1078 | inode->nlookup++; |
7387863d | 1079 | pthread_mutex_unlock(&lo->mutex); |
92fb57b8 | 1080 | e.ino = inode->fuse_ino; |
7387863d | 1081 | |
d240314a EG |
1082 | fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, |
1083 | name, (unsigned long long)e.ino); | |
7387863d | 1084 | |
eba8b096 HR |
1085 | /* |
1086 | * No need to update inode->parent_dev, because | |
1087 | * (1) We cannot, the inode now has more than one parent, | |
1088 | * (2) Directories cannot have more than one parent, so link() | |
1089 | * does not work for them; but parent_dev only needs to be | |
1090 | * valid for directories. | |
1091 | */ | |
1092 | ||
7387863d | 1093 | fuse_reply_entry(req, &e); |
c241aa94 SH |
1094 | lo_inode_put(lo, &parent_inode); |
1095 | lo_inode_put(lo, &inode); | |
7387863d | 1096 | return; |
7c6b6602 DDAG |
1097 | |
1098 | out_err: | |
7387863d | 1099 | saverr = errno; |
c241aa94 SH |
1100 | lo_inode_put(lo, &parent_inode); |
1101 | lo_inode_put(lo, &inode); | |
7387863d | 1102 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
1103 | } |
1104 | ||
c241aa94 | 1105 | /* Increments nlookup and caller must release refcount using lo_inode_put() */ |
9257e514 MS |
1106 | static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, |
1107 | const char *name) | |
1108 | { | |
1109 | int res; | |
1110 | struct stat attr; | |
1111 | ||
1112 | res = fstatat(lo_fd(req, parent), name, &attr, | |
1113 | AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); | |
1114 | if (res == -1) { | |
1115 | return NULL; | |
1116 | } | |
1117 | ||
1118 | return lo_find(lo_data(req), &attr); | |
1119 | } | |
1120 | ||
7c6b6602 DDAG |
1121 | static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) |
1122 | { | |
7387863d | 1123 | int res; |
9257e514 MS |
1124 | struct lo_inode *inode; |
1125 | struct lo_data *lo = lo_data(req); | |
1126 | ||
25dae28c SH |
1127 | if (!is_safe_path_component(name)) { |
1128 | fuse_reply_err(req, EINVAL); | |
1129 | return; | |
1130 | } | |
7c6b6602 | 1131 | |
9257e514 MS |
1132 | inode = lookup_name(req, parent, name); |
1133 | if (!inode) { | |
1134 | fuse_reply_err(req, EIO); | |
1135 | return; | |
1136 | } | |
1137 | ||
7387863d | 1138 | res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); |
7c6b6602 | 1139 | |
7387863d | 1140 | fuse_reply_err(req, res == -1 ? errno : 0); |
9257e514 | 1141 | unref_inode_lolocked(lo, inode, 1); |
c241aa94 | 1142 | lo_inode_put(lo, &inode); |
7c6b6602 DDAG |
1143 | } |
1144 | ||
1145 | static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, | |
7387863d DDAG |
1146 | fuse_ino_t newparent, const char *newname, |
1147 | unsigned int flags) | |
7c6b6602 | 1148 | { |
7387863d | 1149 | int res; |
c241aa94 SH |
1150 | struct lo_inode *parent_inode; |
1151 | struct lo_inode *newparent_inode; | |
1152 | struct lo_inode *oldinode = NULL; | |
1153 | struct lo_inode *newinode = NULL; | |
9257e514 | 1154 | struct lo_data *lo = lo_data(req); |
7c6b6602 | 1155 | |
25dae28c SH |
1156 | if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { |
1157 | fuse_reply_err(req, EINVAL); | |
1158 | return; | |
1159 | } | |
1160 | ||
c241aa94 SH |
1161 | parent_inode = lo_inode(req, parent); |
1162 | newparent_inode = lo_inode(req, newparent); | |
1163 | if (!parent_inode || !newparent_inode) { | |
1164 | fuse_reply_err(req, EBADF); | |
1165 | goto out; | |
1166 | } | |
1167 | ||
9257e514 MS |
1168 | oldinode = lookup_name(req, parent, name); |
1169 | newinode = lookup_name(req, newparent, newname); | |
1170 | ||
1171 | if (!oldinode) { | |
1172 | fuse_reply_err(req, EIO); | |
1173 | goto out; | |
1174 | } | |
1175 | ||
7387863d | 1176 | if (flags) { |
f0ab7d6f | 1177 | #ifndef SYS_renameat2 |
7387863d | 1178 | fuse_reply_err(req, EINVAL); |
f0ab7d6f | 1179 | #else |
c241aa94 SH |
1180 | res = syscall(SYS_renameat2, parent_inode->fd, name, |
1181 | newparent_inode->fd, newname, flags); | |
f0ab7d6f MS |
1182 | if (res == -1 && errno == ENOSYS) { |
1183 | fuse_reply_err(req, EINVAL); | |
1184 | } else { | |
1185 | fuse_reply_err(req, res == -1 ? errno : 0); | |
1186 | } | |
1187 | #endif | |
9257e514 | 1188 | goto out; |
7387863d | 1189 | } |
7c6b6602 | 1190 | |
c241aa94 | 1191 | res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); |
7c6b6602 | 1192 | |
7387863d | 1193 | fuse_reply_err(req, res == -1 ? errno : 0); |
9257e514 MS |
1194 | out: |
1195 | unref_inode_lolocked(lo, oldinode, 1); | |
1196 | unref_inode_lolocked(lo, newinode, 1); | |
c241aa94 SH |
1197 | lo_inode_put(lo, &oldinode); |
1198 | lo_inode_put(lo, &newinode); | |
1199 | lo_inode_put(lo, &parent_inode); | |
1200 | lo_inode_put(lo, &newparent_inode); | |
7c6b6602 DDAG |
1201 | } |
1202 | ||
1203 | static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) | |
1204 | { | |
7387863d | 1205 | int res; |
9257e514 MS |
1206 | struct lo_inode *inode; |
1207 | struct lo_data *lo = lo_data(req); | |
7c6b6602 | 1208 | |
25dae28c SH |
1209 | if (!is_safe_path_component(name)) { |
1210 | fuse_reply_err(req, EINVAL); | |
1211 | return; | |
1212 | } | |
1213 | ||
9257e514 MS |
1214 | inode = lookup_name(req, parent, name); |
1215 | if (!inode) { | |
1216 | fuse_reply_err(req, EIO); | |
1217 | return; | |
1218 | } | |
1219 | ||
7387863d | 1220 | res = unlinkat(lo_fd(req, parent), name, 0); |
7c6b6602 | 1221 | |
7387863d | 1222 | fuse_reply_err(req, res == -1 ? errno : 0); |
9257e514 | 1223 | unref_inode_lolocked(lo, inode, 1); |
c241aa94 | 1224 | lo_inode_put(lo, &inode); |
7c6b6602 DDAG |
1225 | } |
1226 | ||
fe4c1579 DDAG |
1227 | /* To be called with lo->mutex held */ |
1228 | static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) | |
7c6b6602 | 1229 | { |
7387863d DDAG |
1230 | if (!inode) { |
1231 | return; | |
1232 | } | |
1233 | ||
1222f015 SH |
1234 | assert(inode->nlookup >= n); |
1235 | inode->nlookup -= n; | |
1236 | if (!inode->nlookup) { | |
92fb57b8 | 1237 | lo_map_remove(&lo->ino_map, inode->fuse_ino); |
bfc50a6e | 1238 | g_hash_table_remove(lo->inodes, &inode->key); |
0e81414c VG |
1239 | if (g_hash_table_size(inode->posix_locks)) { |
1240 | fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); | |
1241 | } | |
1242 | g_hash_table_destroy(inode->posix_locks); | |
1243 | pthread_mutex_destroy(&inode->plock_mutex); | |
c241aa94 SH |
1244 | |
1245 | /* Drop our refcount from lo_do_lookup() */ | |
1246 | lo_inode_put(lo, &inode); | |
7387863d | 1247 | } |
7c6b6602 DDAG |
1248 | } |
1249 | ||
fe4c1579 DDAG |
1250 | static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, |
1251 | uint64_t n) | |
1252 | { | |
1253 | if (!inode) { | |
1254 | return; | |
1255 | } | |
1256 | ||
1257 | pthread_mutex_lock(&lo->mutex); | |
1258 | unref_inode(lo, inode, n); | |
1259 | pthread_mutex_unlock(&lo->mutex); | |
1260 | } | |
1261 | ||
7c6b6602 DDAG |
1262 | static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) |
1263 | { | |
7387863d | 1264 | struct lo_data *lo = lo_data(req); |
92fb57b8 SH |
1265 | struct lo_inode *inode; |
1266 | ||
1267 | inode = lo_inode(req, ino); | |
1268 | if (!inode) { | |
1269 | return; | |
1270 | } | |
7c6b6602 | 1271 | |
d240314a | 1272 | fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", |
1222f015 | 1273 | (unsigned long long)ino, (unsigned long long)inode->nlookup, |
d240314a | 1274 | (unsigned long long)nlookup); |
7c6b6602 | 1275 | |
95d27157 | 1276 | unref_inode_lolocked(lo, inode, nlookup); |
c241aa94 | 1277 | lo_inode_put(lo, &inode); |
7c6b6602 DDAG |
1278 | } |
1279 | ||
1280 | static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) | |
1281 | { | |
7387863d DDAG |
1282 | lo_forget_one(req, ino, nlookup); |
1283 | fuse_reply_none(req); | |
7c6b6602 DDAG |
1284 | } |
1285 | ||
1286 | static void lo_forget_multi(fuse_req_t req, size_t count, | |
7387863d | 1287 | struct fuse_forget_data *forgets) |
7c6b6602 | 1288 | { |
7387863d | 1289 | int i; |
7c6b6602 | 1290 | |
7387863d DDAG |
1291 | for (i = 0; i < count; i++) { |
1292 | lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); | |
1293 | } | |
1294 | fuse_reply_none(req); | |
7c6b6602 DDAG |
1295 | } |
1296 | ||
1297 | static void lo_readlink(fuse_req_t req, fuse_ino_t ino) | |
1298 | { | |
7387863d DDAG |
1299 | char buf[PATH_MAX + 1]; |
1300 | int res; | |
7c6b6602 | 1301 | |
7387863d DDAG |
1302 | res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); |
1303 | if (res == -1) { | |
1304 | return (void)fuse_reply_err(req, errno); | |
1305 | } | |
7c6b6602 | 1306 | |
7387863d DDAG |
1307 | if (res == sizeof(buf)) { |
1308 | return (void)fuse_reply_err(req, ENAMETOOLONG); | |
1309 | } | |
7c6b6602 | 1310 | |
7387863d | 1311 | buf[res] = '\0'; |
7c6b6602 | 1312 | |
7387863d | 1313 | fuse_reply_readlink(req, buf); |
7c6b6602 DDAG |
1314 | } |
1315 | ||
1316 | struct lo_dirp { | |
acefdde7 | 1317 | gint refcount; |
7387863d DDAG |
1318 | DIR *dp; |
1319 | struct dirent *entry; | |
1320 | off_t offset; | |
7c6b6602 DDAG |
1321 | }; |
1322 | ||
acefdde7 SH |
1323 | static void lo_dirp_put(struct lo_dirp **dp) |
1324 | { | |
1325 | struct lo_dirp *d = *dp; | |
1326 | ||
1327 | if (!d) { | |
1328 | return; | |
1329 | } | |
1330 | *dp = NULL; | |
1331 | ||
1332 | if (g_atomic_int_dec_and_test(&d->refcount)) { | |
1333 | closedir(d->dp); | |
1334 | free(d); | |
1335 | } | |
1336 | } | |
1337 | ||
1338 | /* Call lo_dirp_put() on the return value when no longer needed */ | |
b39bce12 | 1339 | static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) |
7c6b6602 | 1340 | { |
b39bce12 SH |
1341 | struct lo_data *lo = lo_data(req); |
1342 | struct lo_map_elem *elem; | |
1343 | ||
1344 | pthread_mutex_lock(&lo->mutex); | |
1345 | elem = lo_map_get(&lo->dirp_map, fi->fh); | |
acefdde7 SH |
1346 | if (elem) { |
1347 | g_atomic_int_inc(&elem->dirp->refcount); | |
1348 | } | |
b39bce12 SH |
1349 | pthread_mutex_unlock(&lo->mutex); |
1350 | if (!elem) { | |
1351 | return NULL; | |
1352 | } | |
1353 | ||
1354 | return elem->dirp; | |
7c6b6602 DDAG |
1355 | } |
1356 | ||
7387863d DDAG |
1357 | static void lo_opendir(fuse_req_t req, fuse_ino_t ino, |
1358 | struct fuse_file_info *fi) | |
7c6b6602 | 1359 | { |
7387863d DDAG |
1360 | int error = ENOMEM; |
1361 | struct lo_data *lo = lo_data(req); | |
1362 | struct lo_dirp *d; | |
1363 | int fd; | |
b39bce12 | 1364 | ssize_t fh; |
7387863d DDAG |
1365 | |
1366 | d = calloc(1, sizeof(struct lo_dirp)); | |
1367 | if (d == NULL) { | |
1368 | goto out_err; | |
1369 | } | |
1370 | ||
1371 | fd = openat(lo_fd(req, ino), ".", O_RDONLY); | |
1372 | if (fd == -1) { | |
1373 | goto out_errno; | |
1374 | } | |
1375 | ||
1376 | d->dp = fdopendir(fd); | |
1377 | if (d->dp == NULL) { | |
1378 | goto out_errno; | |
1379 | } | |
1380 | ||
1381 | d->offset = 0; | |
1382 | d->entry = NULL; | |
1383 | ||
acefdde7 | 1384 | g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ |
b39bce12 SH |
1385 | pthread_mutex_lock(&lo->mutex); |
1386 | fh = lo_add_dirp_mapping(req, d); | |
1387 | pthread_mutex_unlock(&lo->mutex); | |
1388 | if (fh == -1) { | |
1389 | goto out_err; | |
1390 | } | |
1391 | ||
1392 | fi->fh = fh; | |
7387863d | 1393 | if (lo->cache == CACHE_ALWAYS) { |
9b610b09 | 1394 | fi->cache_readdir = 1; |
7387863d DDAG |
1395 | } |
1396 | fuse_reply_open(req, fi); | |
1397 | return; | |
7c6b6602 DDAG |
1398 | |
1399 | out_errno: | |
7387863d | 1400 | error = errno; |
7c6b6602 | 1401 | out_err: |
7387863d | 1402 | if (d) { |
b39bce12 SH |
1403 | if (d->dp) { |
1404 | closedir(d->dp); | |
e1cd92d9 | 1405 | } else if (fd != -1) { |
7387863d DDAG |
1406 | close(fd); |
1407 | } | |
1408 | free(d); | |
1409 | } | |
1410 | fuse_reply_err(req, error); | |
7c6b6602 DDAG |
1411 | } |
1412 | ||
7c6b6602 | 1413 | static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, |
7387863d | 1414 | off_t offset, struct fuse_file_info *fi, int plus) |
7c6b6602 | 1415 | { |
752272da | 1416 | struct lo_data *lo = lo_data(req); |
acefdde7 | 1417 | struct lo_dirp *d = NULL; |
752272da | 1418 | struct lo_inode *dinode; |
b39bce12 | 1419 | char *buf = NULL; |
7387863d DDAG |
1420 | char *p; |
1421 | size_t rem = size; | |
752272da | 1422 | int err = EBADF; |
7387863d | 1423 | |
752272da SH |
1424 | dinode = lo_inode(req, ino); |
1425 | if (!dinode) { | |
1426 | goto error; | |
1427 | } | |
7387863d | 1428 | |
b39bce12 SH |
1429 | d = lo_dirp(req, fi); |
1430 | if (!d) { | |
1431 | goto error; | |
1432 | } | |
1433 | ||
752272da | 1434 | err = ENOMEM; |
7387863d DDAG |
1435 | buf = calloc(1, size); |
1436 | if (!buf) { | |
7387863d DDAG |
1437 | goto error; |
1438 | } | |
1439 | p = buf; | |
1440 | ||
1441 | if (offset != d->offset) { | |
1442 | seekdir(d->dp, offset); | |
1443 | d->entry = NULL; | |
1444 | d->offset = offset; | |
1445 | } | |
1446 | while (1) { | |
1447 | size_t entsize; | |
1448 | off_t nextoff; | |
1449 | const char *name; | |
1450 | ||
1451 | if (!d->entry) { | |
1452 | errno = 0; | |
1453 | d->entry = readdir(d->dp); | |
1454 | if (!d->entry) { | |
1455 | if (errno) { /* Error */ | |
1456 | err = errno; | |
1457 | goto error; | |
1458 | } else { /* End of stream */ | |
1459 | break; | |
1460 | } | |
1461 | } | |
1462 | } | |
1463 | nextoff = d->entry->d_off; | |
1464 | name = d->entry->d_name; | |
752272da | 1465 | |
7387863d | 1466 | fuse_ino_t entry_ino = 0; |
752272da SH |
1467 | struct fuse_entry_param e = (struct fuse_entry_param){ |
1468 | .attr.st_ino = d->entry->d_ino, | |
1469 | .attr.st_mode = d->entry->d_type << 12, | |
1470 | }; | |
1471 | ||
1472 | /* Hide root's parent directory */ | |
1473 | if (dinode == &lo->root && strcmp(name, "..") == 0) { | |
bfc50a6e | 1474 | e.attr.st_ino = lo->root.key.ino; |
752272da SH |
1475 | e.attr.st_mode = DT_DIR << 12; |
1476 | } | |
1477 | ||
7387863d | 1478 | if (plus) { |
752272da | 1479 | if (!is_dot_or_dotdot(name)) { |
7387863d DDAG |
1480 | err = lo_do_lookup(req, ino, name, &e); |
1481 | if (err) { | |
1482 | goto error; | |
1483 | } | |
1484 | entry_ino = e.ino; | |
1485 | } | |
1486 | ||
1487 | entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); | |
1488 | } else { | |
752272da | 1489 | entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); |
7387863d DDAG |
1490 | } |
1491 | if (entsize > rem) { | |
1492 | if (entry_ino != 0) { | |
1493 | lo_forget_one(req, entry_ino, 1); | |
1494 | } | |
1495 | break; | |
1496 | } | |
1497 | ||
1498 | p += entsize; | |
1499 | rem -= entsize; | |
1500 | ||
1501 | d->entry = NULL; | |
1502 | d->offset = nextoff; | |
1503 | } | |
7c6b6602 DDAG |
1504 | |
1505 | err = 0; | |
1506 | error: | |
acefdde7 | 1507 | lo_dirp_put(&d); |
c241aa94 | 1508 | lo_inode_put(lo, &dinode); |
acefdde7 | 1509 | |
7387863d DDAG |
1510 | /* |
1511 | * If there's an error, we can only signal it if we haven't stored | |
1512 | * any entries yet - otherwise we'd end up with wrong lookup | |
1513 | * counts for the entries that are already in the buffer. So we | |
1514 | * return what we've collected until that point. | |
1515 | */ | |
1516 | if (err && rem == size) { | |
1517 | fuse_reply_err(req, err); | |
1518 | } else { | |
1519 | fuse_reply_buf(req, buf, size - rem); | |
1520 | } | |
7c6b6602 DDAG |
1521 | free(buf); |
1522 | } | |
1523 | ||
1524 | static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, | |
7387863d | 1525 | off_t offset, struct fuse_file_info *fi) |
7c6b6602 | 1526 | { |
7387863d | 1527 | lo_do_readdir(req, ino, size, offset, fi, 0); |
7c6b6602 DDAG |
1528 | } |
1529 | ||
1530 | static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, | |
7387863d | 1531 | off_t offset, struct fuse_file_info *fi) |
7c6b6602 | 1532 | { |
7387863d | 1533 | lo_do_readdir(req, ino, size, offset, fi, 1); |
7c6b6602 DDAG |
1534 | } |
1535 | ||
7387863d DDAG |
1536 | static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, |
1537 | struct fuse_file_info *fi) | |
7c6b6602 | 1538 | { |
b39bce12 | 1539 | struct lo_data *lo = lo_data(req); |
acefdde7 | 1540 | struct lo_map_elem *elem; |
b39bce12 SH |
1541 | struct lo_dirp *d; |
1542 | ||
7387863d | 1543 | (void)ino; |
b39bce12 | 1544 | |
acefdde7 SH |
1545 | pthread_mutex_lock(&lo->mutex); |
1546 | elem = lo_map_get(&lo->dirp_map, fi->fh); | |
1547 | if (!elem) { | |
1548 | pthread_mutex_unlock(&lo->mutex); | |
b39bce12 SH |
1549 | fuse_reply_err(req, EBADF); |
1550 | return; | |
1551 | } | |
1552 | ||
acefdde7 | 1553 | d = elem->dirp; |
b39bce12 SH |
1554 | lo_map_remove(&lo->dirp_map, fi->fh); |
1555 | pthread_mutex_unlock(&lo->mutex); | |
1556 | ||
acefdde7 SH |
1557 | lo_dirp_put(&d); /* paired with lo_opendir() */ |
1558 | ||
7387863d | 1559 | fuse_reply_err(req, 0); |
7c6b6602 DDAG |
1560 | } |
1561 | ||
e12a0eda JZ |
1562 | static void update_open_flags(int writeback, int allow_direct_io, |
1563 | struct fuse_file_info *fi) | |
8e4e41e3 MT |
1564 | { |
1565 | /* | |
1566 | * With writeback cache, kernel may send read requests even | |
1567 | * when userspace opened write-only | |
1568 | */ | |
1569 | if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { | |
1570 | fi->flags &= ~O_ACCMODE; | |
1571 | fi->flags |= O_RDWR; | |
1572 | } | |
1573 | ||
1574 | /* | |
1575 | * With writeback cache, O_APPEND is handled by the kernel. | |
1576 | * This breaks atomicity (since the file may change in the | |
1577 | * underlying filesystem, so that the kernel's idea of the | |
1578 | * end of the file isn't accurate anymore). In this example, | |
1579 | * we just accept that. A more rigorous filesystem may want | |
1580 | * to return an error here | |
1581 | */ | |
1582 | if (writeback && (fi->flags & O_APPEND)) { | |
1583 | fi->flags &= ~O_APPEND; | |
1584 | } | |
1585 | ||
1586 | /* | |
1587 | * O_DIRECT in guest should not necessarily mean bypassing page | |
e12a0eda JZ |
1588 | * cache on host as well. Therefore, we discard it by default |
1589 | * ('-o no_allow_direct_io'). If somebody needs that behavior, | |
1590 | * the '-o allow_direct_io' option should be set. | |
8e4e41e3 | 1591 | */ |
e12a0eda JZ |
1592 | if (!allow_direct_io) { |
1593 | fi->flags &= ~O_DIRECT; | |
1594 | } | |
8e4e41e3 MT |
1595 | } |
1596 | ||
7c6b6602 | 1597 | static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, |
7387863d | 1598 | mode_t mode, struct fuse_file_info *fi) |
7c6b6602 | 1599 | { |
7387863d DDAG |
1600 | int fd; |
1601 | struct lo_data *lo = lo_data(req); | |
c241aa94 | 1602 | struct lo_inode *parent_inode; |
7387863d DDAG |
1603 | struct fuse_entry_param e; |
1604 | int err; | |
929cfb7a | 1605 | struct lo_cred old = {}; |
7387863d | 1606 | |
d240314a EG |
1607 | fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent, |
1608 | name); | |
7387863d | 1609 | |
25dae28c SH |
1610 | if (!is_safe_path_component(name)) { |
1611 | fuse_reply_err(req, EINVAL); | |
1612 | return; | |
1613 | } | |
1614 | ||
c241aa94 SH |
1615 | parent_inode = lo_inode(req, parent); |
1616 | if (!parent_inode) { | |
1617 | fuse_reply_err(req, EBADF); | |
1618 | return; | |
1619 | } | |
1620 | ||
929cfb7a VG |
1621 | err = lo_change_cred(req, &old); |
1622 | if (err) { | |
1623 | goto out; | |
1624 | } | |
1625 | ||
e12a0eda | 1626 | update_open_flags(lo->writeback, lo->allow_direct_io, fi); |
65da4539 | 1627 | |
c241aa94 | 1628 | fd = openat(parent_inode->fd, name, (fi->flags | O_CREAT) & ~O_NOFOLLOW, |
7387863d | 1629 | mode); |
929cfb7a VG |
1630 | err = fd == -1 ? errno : 0; |
1631 | lo_restore_cred(&old); | |
7387863d | 1632 | |
929cfb7a | 1633 | if (!err) { |
73b4d19d SH |
1634 | ssize_t fh; |
1635 | ||
1636 | pthread_mutex_lock(&lo->mutex); | |
1637 | fh = lo_add_fd_mapping(req, fd); | |
1638 | pthread_mutex_unlock(&lo->mutex); | |
1639 | if (fh == -1) { | |
1640 | close(fd); | |
c241aa94 SH |
1641 | err = ENOMEM; |
1642 | goto out; | |
73b4d19d SH |
1643 | } |
1644 | ||
1645 | fi->fh = fh; | |
929cfb7a VG |
1646 | err = lo_do_lookup(req, parent, name, &e); |
1647 | } | |
230e777b | 1648 | if (lo->cache == CACHE_NONE) { |
7387863d DDAG |
1649 | fi->direct_io = 1; |
1650 | } else if (lo->cache == CACHE_ALWAYS) { | |
1651 | fi->keep_cache = 1; | |
1652 | } | |
1653 | ||
929cfb7a | 1654 | out: |
c241aa94 SH |
1655 | lo_inode_put(lo, &parent_inode); |
1656 | ||
7387863d DDAG |
1657 | if (err) { |
1658 | fuse_reply_err(req, err); | |
1659 | } else { | |
1660 | fuse_reply_create(req, &e, fi); | |
1661 | } | |
7c6b6602 DDAG |
1662 | } |
1663 | ||
0e81414c VG |
1664 | /* Should be called with inode->plock_mutex held */ |
1665 | static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | |
1666 | struct lo_inode *inode, | |
1667 | uint64_t lock_owner, | |
1668 | pid_t pid, int *err) | |
1669 | { | |
1670 | struct lo_inode_plock *plock; | |
1671 | char procname[64]; | |
1672 | int fd; | |
1673 | ||
1674 | plock = | |
1675 | g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); | |
1676 | ||
1677 | if (plock) { | |
1678 | return plock; | |
1679 | } | |
1680 | ||
1681 | plock = malloc(sizeof(struct lo_inode_plock)); | |
1682 | if (!plock) { | |
1683 | *err = ENOMEM; | |
1684 | return NULL; | |
1685 | } | |
1686 | ||
1687 | /* Open another instance of file which can be used for ofd locks. */ | |
1688 | sprintf(procname, "%i", inode->fd); | |
1689 | ||
1690 | /* TODO: What if file is not writable? */ | |
1691 | fd = openat(lo->proc_self_fd, procname, O_RDWR); | |
1692 | if (fd == -1) { | |
1693 | *err = errno; | |
1694 | free(plock); | |
1695 | return NULL; | |
1696 | } | |
1697 | ||
1698 | plock->lock_owner = lock_owner; | |
1699 | plock->fd = fd; | |
1700 | g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), | |
1701 | plock); | |
1702 | return plock; | |
1703 | } | |
1704 | ||
1705 | static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, | |
1706 | struct flock *lock) | |
1707 | { | |
1708 | struct lo_data *lo = lo_data(req); | |
1709 | struct lo_inode *inode; | |
1710 | struct lo_inode_plock *plock; | |
1711 | int ret, saverr = 0; | |
1712 | ||
1713 | fuse_log(FUSE_LOG_DEBUG, | |
1714 | "lo_getlk(ino=%" PRIu64 ", flags=%d)" | |
1715 | " owner=0x%lx, l_type=%d l_start=0x%lx" | |
1716 | " l_len=0x%lx\n", | |
1717 | ino, fi->flags, fi->lock_owner, lock->l_type, lock->l_start, | |
1718 | lock->l_len); | |
1719 | ||
1720 | inode = lo_inode(req, ino); | |
1721 | if (!inode) { | |
1722 | fuse_reply_err(req, EBADF); | |
1723 | return; | |
1724 | } | |
1725 | ||
1726 | pthread_mutex_lock(&inode->plock_mutex); | |
1727 | plock = | |
1728 | lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); | |
1729 | if (!plock) { | |
c241aa94 SH |
1730 | saverr = ret; |
1731 | goto out; | |
0e81414c VG |
1732 | } |
1733 | ||
1734 | ret = fcntl(plock->fd, F_OFD_GETLK, lock); | |
1735 | if (ret == -1) { | |
1736 | saverr = errno; | |
1737 | } | |
c241aa94 SH |
1738 | |
1739 | out: | |
0e81414c | 1740 | pthread_mutex_unlock(&inode->plock_mutex); |
c241aa94 | 1741 | lo_inode_put(lo, &inode); |
0e81414c VG |
1742 | |
1743 | if (saverr) { | |
1744 | fuse_reply_err(req, saverr); | |
1745 | } else { | |
1746 | fuse_reply_lock(req, lock); | |
1747 | } | |
1748 | } | |
1749 | ||
1750 | static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, | |
1751 | struct flock *lock, int sleep) | |
1752 | { | |
1753 | struct lo_data *lo = lo_data(req); | |
1754 | struct lo_inode *inode; | |
1755 | struct lo_inode_plock *plock; | |
1756 | int ret, saverr = 0; | |
1757 | ||
1758 | fuse_log(FUSE_LOG_DEBUG, | |
1759 | "lo_setlk(ino=%" PRIu64 ", flags=%d)" | |
1760 | " cmd=%d pid=%d owner=0x%lx sleep=%d l_whence=%d" | |
1761 | " l_start=0x%lx l_len=0x%lx\n", | |
1762 | ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, | |
1763 | lock->l_whence, lock->l_start, lock->l_len); | |
1764 | ||
1765 | if (sleep) { | |
1766 | fuse_reply_err(req, EOPNOTSUPP); | |
1767 | return; | |
1768 | } | |
1769 | ||
1770 | inode = lo_inode(req, ino); | |
1771 | if (!inode) { | |
1772 | fuse_reply_err(req, EBADF); | |
1773 | return; | |
1774 | } | |
1775 | ||
1776 | pthread_mutex_lock(&inode->plock_mutex); | |
1777 | plock = | |
1778 | lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); | |
1779 | ||
1780 | if (!plock) { | |
c241aa94 SH |
1781 | saverr = ret; |
1782 | goto out; | |
0e81414c VG |
1783 | } |
1784 | ||
1785 | /* TODO: Is it alright to modify flock? */ | |
1786 | lock->l_pid = 0; | |
1787 | ret = fcntl(plock->fd, F_OFD_SETLK, lock); | |
1788 | if (ret == -1) { | |
1789 | saverr = errno; | |
1790 | } | |
c241aa94 SH |
1791 | |
1792 | out: | |
0e81414c | 1793 | pthread_mutex_unlock(&inode->plock_mutex); |
c241aa94 SH |
1794 | lo_inode_put(lo, &inode); |
1795 | ||
0e81414c VG |
1796 | fuse_reply_err(req, saverr); |
1797 | } | |
1798 | ||
7c6b6602 | 1799 | static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, |
7387863d | 1800 | struct fuse_file_info *fi) |
7c6b6602 | 1801 | { |
7387863d | 1802 | int res; |
b39bce12 SH |
1803 | struct lo_dirp *d; |
1804 | int fd; | |
1805 | ||
7387863d | 1806 | (void)ino; |
b39bce12 SH |
1807 | |
1808 | d = lo_dirp(req, fi); | |
1809 | if (!d) { | |
1810 | fuse_reply_err(req, EBADF); | |
1811 | return; | |
1812 | } | |
1813 | ||
1814 | fd = dirfd(d->dp); | |
7387863d DDAG |
1815 | if (datasync) { |
1816 | res = fdatasync(fd); | |
1817 | } else { | |
1818 | res = fsync(fd); | |
1819 | } | |
acefdde7 SH |
1820 | |
1821 | lo_dirp_put(&d); | |
1822 | ||
7387863d | 1823 | fuse_reply_err(req, res == -1 ? errno : 0); |
7c6b6602 DDAG |
1824 | } |
1825 | ||
1826 | static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | |
1827 | { | |
7387863d | 1828 | int fd; |
73b4d19d | 1829 | ssize_t fh; |
7387863d DDAG |
1830 | char buf[64]; |
1831 | struct lo_data *lo = lo_data(req); | |
1832 | ||
d240314a EG |
1833 | fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino, |
1834 | fi->flags); | |
7387863d | 1835 | |
e12a0eda | 1836 | update_open_flags(lo->writeback, lo->allow_direct_io, fi); |
65da4539 | 1837 | |
9f59d175 SH |
1838 | sprintf(buf, "%i", lo_fd(req, ino)); |
1839 | fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW); | |
7387863d DDAG |
1840 | if (fd == -1) { |
1841 | return (void)fuse_reply_err(req, errno); | |
1842 | } | |
1843 | ||
73b4d19d SH |
1844 | pthread_mutex_lock(&lo->mutex); |
1845 | fh = lo_add_fd_mapping(req, fd); | |
1846 | pthread_mutex_unlock(&lo->mutex); | |
1847 | if (fh == -1) { | |
1848 | close(fd); | |
1849 | fuse_reply_err(req, ENOMEM); | |
1850 | return; | |
1851 | } | |
1852 | ||
1853 | fi->fh = fh; | |
230e777b | 1854 | if (lo->cache == CACHE_NONE) { |
7387863d DDAG |
1855 | fi->direct_io = 1; |
1856 | } else if (lo->cache == CACHE_ALWAYS) { | |
1857 | fi->keep_cache = 1; | |
1858 | } | |
1859 | fuse_reply_open(req, fi); | |
7c6b6602 DDAG |
1860 | } |
1861 | ||
7387863d DDAG |
1862 | static void lo_release(fuse_req_t req, fuse_ino_t ino, |
1863 | struct fuse_file_info *fi) | |
7c6b6602 | 1864 | { |
73b4d19d | 1865 | struct lo_data *lo = lo_data(req); |
baed65c0 SH |
1866 | struct lo_map_elem *elem; |
1867 | int fd = -1; | |
73b4d19d | 1868 | |
7387863d | 1869 | (void)ino; |
7c6b6602 | 1870 | |
73b4d19d | 1871 | pthread_mutex_lock(&lo->mutex); |
baed65c0 SH |
1872 | elem = lo_map_get(&lo->fd_map, fi->fh); |
1873 | if (elem) { | |
1874 | fd = elem->fd; | |
1875 | elem = NULL; | |
1876 | lo_map_remove(&lo->fd_map, fi->fh); | |
1877 | } | |
73b4d19d SH |
1878 | pthread_mutex_unlock(&lo->mutex); |
1879 | ||
1880 | close(fd); | |
7387863d | 1881 | fuse_reply_err(req, 0); |
7c6b6602 DDAG |
1882 | } |
1883 | ||
1884 | static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | |
1885 | { | |
7387863d DDAG |
1886 | int res; |
1887 | (void)ino; | |
0e81414c VG |
1888 | struct lo_inode *inode; |
1889 | ||
1890 | inode = lo_inode(req, ino); | |
1891 | if (!inode) { | |
1892 | fuse_reply_err(req, EBADF); | |
1893 | return; | |
1894 | } | |
1895 | ||
1896 | /* An fd is going away. Cleanup associated posix locks */ | |
1897 | pthread_mutex_lock(&inode->plock_mutex); | |
1898 | g_hash_table_remove(inode->posix_locks, GUINT_TO_POINTER(fi->lock_owner)); | |
1899 | pthread_mutex_unlock(&inode->plock_mutex); | |
1900 | ||
73b4d19d | 1901 | res = close(dup(lo_fi_fd(req, fi))); |
c241aa94 | 1902 | lo_inode_put(lo_data(req), &inode); |
7387863d | 1903 | fuse_reply_err(req, res == -1 ? errno : 0); |
7c6b6602 DDAG |
1904 | } |
1905 | ||
1906 | static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, | |
7387863d | 1907 | struct fuse_file_info *fi) |
7c6b6602 | 1908 | { |
7387863d | 1909 | int res; |
1b209805 VG |
1910 | int fd; |
1911 | char *buf; | |
1912 | ||
1913 | fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, | |
1914 | (void *)fi); | |
1915 | ||
1916 | if (!fi) { | |
9f59d175 SH |
1917 | struct lo_data *lo = lo_data(req); |
1918 | ||
1919 | res = asprintf(&buf, "%i", lo_fd(req, ino)); | |
1b209805 VG |
1920 | if (res == -1) { |
1921 | return (void)fuse_reply_err(req, errno); | |
1922 | } | |
1923 | ||
9f59d175 | 1924 | fd = openat(lo->proc_self_fd, buf, O_RDWR); |
1b209805 VG |
1925 | free(buf); |
1926 | if (fd == -1) { | |
1927 | return (void)fuse_reply_err(req, errno); | |
1928 | } | |
1929 | } else { | |
73b4d19d | 1930 | fd = lo_fi_fd(req, fi); |
1b209805 VG |
1931 | } |
1932 | ||
7387863d | 1933 | if (datasync) { |
1b209805 | 1934 | res = fdatasync(fd); |
7387863d | 1935 | } else { |
1b209805 VG |
1936 | res = fsync(fd); |
1937 | } | |
1938 | if (!fi) { | |
1939 | close(fd); | |
7387863d DDAG |
1940 | } |
1941 | fuse_reply_err(req, res == -1 ? errno : 0); | |
7c6b6602 DDAG |
1942 | } |
1943 | ||
7387863d DDAG |
1944 | static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, |
1945 | struct fuse_file_info *fi) | |
7c6b6602 | 1946 | { |
7387863d | 1947 | struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); |
7c6b6602 | 1948 | |
d240314a EG |
1949 | fuse_log(FUSE_LOG_DEBUG, |
1950 | "lo_read(ino=%" PRIu64 ", size=%zd, " | |
1951 | "off=%lu)\n", | |
1952 | ino, size, (unsigned long)offset); | |
7c6b6602 | 1953 | |
7387863d | 1954 | buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; |
73b4d19d | 1955 | buf.buf[0].fd = lo_fi_fd(req, fi); |
7387863d | 1956 | buf.buf[0].pos = offset; |
7c6b6602 | 1957 | |
8c3fe75e | 1958 | fuse_reply_data(req, &buf); |
7c6b6602 DDAG |
1959 | } |
1960 | ||
1961 | static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, | |
7387863d DDAG |
1962 | struct fuse_bufvec *in_buf, off_t off, |
1963 | struct fuse_file_info *fi) | |
7c6b6602 | 1964 | { |
7387863d DDAG |
1965 | (void)ino; |
1966 | ssize_t res; | |
1967 | struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); | |
ee884652 | 1968 | bool cap_fsetid_dropped = false; |
7387863d DDAG |
1969 | |
1970 | out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; | |
73b4d19d | 1971 | out_buf.buf[0].fd = lo_fi_fd(req, fi); |
7387863d DDAG |
1972 | out_buf.buf[0].pos = off; |
1973 | ||
d240314a EG |
1974 | fuse_log(FUSE_LOG_DEBUG, |
1975 | "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino, | |
1976 | out_buf.buf[0].size, (unsigned long)off); | |
7387863d | 1977 | |
ee884652 VG |
1978 | /* |
1979 | * If kill_priv is set, drop CAP_FSETID which should lead to kernel | |
1980 | * clearing setuid/setgid on file. | |
1981 | */ | |
1982 | if (fi->kill_priv) { | |
1983 | res = drop_effective_cap("FSETID", &cap_fsetid_dropped); | |
1984 | if (res != 0) { | |
1985 | fuse_reply_err(req, res); | |
1986 | return; | |
1987 | } | |
1988 | } | |
1989 | ||
8c3fe75e | 1990 | res = fuse_buf_copy(&out_buf, in_buf); |
7387863d DDAG |
1991 | if (res < 0) { |
1992 | fuse_reply_err(req, -res); | |
1993 | } else { | |
1994 | fuse_reply_write(req, (size_t)res); | |
1995 | } | |
ee884652 VG |
1996 | |
1997 | if (cap_fsetid_dropped) { | |
1998 | res = gain_effective_cap("FSETID"); | |
1999 | if (res) { | |
2000 | fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); | |
2001 | } | |
2002 | } | |
7c6b6602 DDAG |
2003 | } |
2004 | ||
2005 | static void lo_statfs(fuse_req_t req, fuse_ino_t ino) | |
2006 | { | |
7387863d DDAG |
2007 | int res; |
2008 | struct statvfs stbuf; | |
2009 | ||
2010 | res = fstatvfs(lo_fd(req, ino), &stbuf); | |
2011 | if (res == -1) { | |
2012 | fuse_reply_err(req, errno); | |
2013 | } else { | |
2014 | fuse_reply_statfs(req, &stbuf); | |
2015 | } | |
7c6b6602 DDAG |
2016 | } |
2017 | ||
7387863d DDAG |
2018 | static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, |
2019 | off_t length, struct fuse_file_info *fi) | |
7c6b6602 | 2020 | { |
7387863d DDAG |
2021 | int err = EOPNOTSUPP; |
2022 | (void)ino; | |
7c6b6602 | 2023 | |
9776457c | 2024 | #ifdef CONFIG_FALLOCATE |
73b4d19d | 2025 | err = fallocate(lo_fi_fd(req, fi), mode, offset, length); |
7387863d DDAG |
2026 | if (err < 0) { |
2027 | err = errno; | |
2028 | } | |
7c6b6602 | 2029 | |
9776457c | 2030 | #elif defined(CONFIG_POSIX_FALLOCATE) |
7387863d DDAG |
2031 | if (mode) { |
2032 | fuse_reply_err(req, EOPNOTSUPP); | |
2033 | return; | |
2034 | } | |
7c6b6602 | 2035 | |
73b4d19d | 2036 | err = posix_fallocate(lo_fi_fd(req, fi), offset, length); |
7c6b6602 DDAG |
2037 | #endif |
2038 | ||
7387863d | 2039 | fuse_reply_err(req, err); |
7c6b6602 DDAG |
2040 | } |
2041 | ||
2042 | static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, | |
7387863d | 2043 | int op) |
7c6b6602 | 2044 | { |
7387863d DDAG |
2045 | int res; |
2046 | (void)ino; | |
7c6b6602 | 2047 | |
73b4d19d | 2048 | res = flock(lo_fi_fd(req, fi), op); |
7c6b6602 | 2049 | |
7387863d | 2050 | fuse_reply_err(req, res == -1 ? errno : 0); |
7c6b6602 DDAG |
2051 | } |
2052 | ||
6084633d DDAG |
2053 | /* types */ |
2054 | /* | |
2055 | * Exit; process attribute unmodified if matched. | |
2056 | * An empty key applies to all. | |
2057 | */ | |
2058 | #define XATTR_MAP_FLAG_OK (1 << 0) | |
2059 | /* | |
2060 | * The attribute is unwanted; | |
2061 | * EPERM on write, hidden on read. | |
2062 | */ | |
2063 | #define XATTR_MAP_FLAG_BAD (1 << 1) | |
2064 | /* | |
2065 | * For attr that start with 'key' prepend 'prepend' | |
2066 | * 'key' may be empty to prepend for all attrs | |
2067 | * key is defined from set/remove point of view. | |
2068 | * Automatically reversed on read | |
2069 | */ | |
2070 | #define XATTR_MAP_FLAG_PREFIX (1 << 2) | |
2071 | ||
2072 | /* scopes */ | |
2073 | /* Apply rule to get/set/remove */ | |
2074 | #define XATTR_MAP_FLAG_CLIENT (1 << 16) | |
2075 | /* Apply rule to list */ | |
2076 | #define XATTR_MAP_FLAG_SERVER (1 << 17) | |
2077 | /* Apply rule to all */ | |
2078 | #define XATTR_MAP_FLAG_ALL (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT) | |
2079 | ||
2080 | static void add_xattrmap_entry(struct lo_data *lo, | |
2081 | const XattrMapEntry *new_entry) | |
2082 | { | |
2083 | XattrMapEntry *res = g_realloc_n(lo->xattr_map_list, | |
2084 | lo->xattr_map_nentries + 1, | |
2085 | sizeof(XattrMapEntry)); | |
2086 | res[lo->xattr_map_nentries++] = *new_entry; | |
2087 | ||
2088 | lo->xattr_map_list = res; | |
2089 | } | |
2090 | ||
2091 | static void free_xattrmap(struct lo_data *lo) | |
2092 | { | |
2093 | XattrMapEntry *map = lo->xattr_map_list; | |
2094 | size_t i; | |
2095 | ||
2096 | if (!map) { | |
2097 | return; | |
2098 | } | |
2099 | ||
2100 | for (i = 0; i < lo->xattr_map_nentries; i++) { | |
2101 | g_free(map[i].key); | |
2102 | g_free(map[i].prepend); | |
2103 | }; | |
2104 | ||
2105 | g_free(map); | |
2106 | lo->xattr_map_list = NULL; | |
2107 | lo->xattr_map_nentries = -1; | |
2108 | } | |
2109 | ||
1d84a021 DDAG |
2110 | /* |
2111 | * Handle the 'map' type, which is sugar for a set of commands | |
2112 | * for the common case of prefixing a subset or everything, | |
2113 | * and allowing anything not prefixed through. | |
2114 | * It must be the last entry in the stream, although there | |
2115 | * can be other entries before it. | |
2116 | * The form is: | |
2117 | * :map:key:prefix: | |
2118 | * | |
2119 | * key maybe empty in which case all entries are prefixed. | |
2120 | */ | |
2121 | static void parse_xattrmap_map(struct lo_data *lo, | |
2122 | const char *rule, char sep) | |
2123 | { | |
2124 | const char *tmp; | |
2125 | char *key; | |
2126 | char *prefix; | |
2127 | XattrMapEntry tmp_entry; | |
2128 | ||
2129 | if (*rule != sep) { | |
2130 | fuse_log(FUSE_LOG_ERR, | |
2131 | "%s: Expecting '%c' after 'map' keyword, found '%c'\n", | |
2132 | __func__, sep, *rule); | |
2133 | exit(1); | |
2134 | } | |
2135 | ||
2136 | rule++; | |
2137 | ||
2138 | /* At start of 'key' field */ | |
2139 | tmp = strchr(rule, sep); | |
2140 | if (!tmp) { | |
2141 | fuse_log(FUSE_LOG_ERR, | |
2142 | "%s: Missing '%c' at end of key field in map rule\n", | |
2143 | __func__, sep); | |
2144 | exit(1); | |
2145 | } | |
2146 | ||
2147 | key = g_strndup(rule, tmp - rule); | |
2148 | rule = tmp + 1; | |
2149 | ||
2150 | /* At start of prefix field */ | |
2151 | tmp = strchr(rule, sep); | |
2152 | if (!tmp) { | |
2153 | fuse_log(FUSE_LOG_ERR, | |
2154 | "%s: Missing '%c' at end of prefix field in map rule\n", | |
2155 | __func__, sep); | |
2156 | exit(1); | |
2157 | } | |
2158 | ||
2159 | prefix = g_strndup(rule, tmp - rule); | |
2160 | rule = tmp + 1; | |
2161 | ||
2162 | /* | |
2163 | * This should be the end of the string, we don't allow | |
2164 | * any more commands after 'map'. | |
2165 | */ | |
2166 | if (*rule) { | |
2167 | fuse_log(FUSE_LOG_ERR, | |
2168 | "%s: Expecting end of command after map, found '%c'\n", | |
2169 | __func__, *rule); | |
2170 | exit(1); | |
2171 | } | |
2172 | ||
2173 | /* 1st: Prefix matches/everything */ | |
2174 | tmp_entry.flags = XATTR_MAP_FLAG_PREFIX | XATTR_MAP_FLAG_ALL; | |
2175 | tmp_entry.key = g_strdup(key); | |
2176 | tmp_entry.prepend = g_strdup(prefix); | |
2177 | add_xattrmap_entry(lo, &tmp_entry); | |
2178 | ||
2179 | if (!*key) { | |
2180 | /* Prefix all case */ | |
2181 | ||
2182 | /* 2nd: Hide any non-prefixed entries on the host */ | |
2183 | tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_ALL; | |
2184 | tmp_entry.key = g_strdup(""); | |
2185 | tmp_entry.prepend = g_strdup(""); | |
2186 | add_xattrmap_entry(lo, &tmp_entry); | |
2187 | } else { | |
2188 | /* Prefix matching case */ | |
2189 | ||
2190 | /* 2nd: Hide non-prefixed but matching entries on the host */ | |
2191 | tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_SERVER; | |
2192 | tmp_entry.key = g_strdup(""); /* Not used */ | |
2193 | tmp_entry.prepend = g_strdup(key); | |
2194 | add_xattrmap_entry(lo, &tmp_entry); | |
2195 | ||
2196 | /* 3rd: Stop the client accessing prefixed attributes directly */ | |
2197 | tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_CLIENT; | |
2198 | tmp_entry.key = g_strdup(prefix); | |
2199 | tmp_entry.prepend = g_strdup(""); /* Not used */ | |
2200 | add_xattrmap_entry(lo, &tmp_entry); | |
2201 | ||
2202 | /* 4th: Everything else is OK */ | |
2203 | tmp_entry.flags = XATTR_MAP_FLAG_OK | XATTR_MAP_FLAG_ALL; | |
2204 | tmp_entry.key = g_strdup(""); | |
2205 | tmp_entry.prepend = g_strdup(""); | |
2206 | add_xattrmap_entry(lo, &tmp_entry); | |
2207 | } | |
2208 | ||
2209 | g_free(key); | |
2210 | g_free(prefix); | |
2211 | } | |
2212 | ||
6084633d DDAG |
2213 | static void parse_xattrmap(struct lo_data *lo) |
2214 | { | |
2215 | const char *map = lo->xattrmap; | |
2216 | const char *tmp; | |
2217 | ||
2218 | lo->xattr_map_nentries = 0; | |
2219 | while (*map) { | |
2220 | XattrMapEntry tmp_entry; | |
2221 | char sep; | |
2222 | ||
2223 | if (isspace(*map)) { | |
2224 | map++; | |
2225 | continue; | |
2226 | } | |
2227 | /* The separator is the first non-space of the rule */ | |
2228 | sep = *map++; | |
2229 | if (!sep) { | |
2230 | break; | |
2231 | } | |
2232 | ||
2233 | tmp_entry.flags = 0; | |
2234 | /* Start of 'type' */ | |
2235 | if (strstart(map, "prefix", &map)) { | |
2236 | tmp_entry.flags |= XATTR_MAP_FLAG_PREFIX; | |
2237 | } else if (strstart(map, "ok", &map)) { | |
2238 | tmp_entry.flags |= XATTR_MAP_FLAG_OK; | |
2239 | } else if (strstart(map, "bad", &map)) { | |
2240 | tmp_entry.flags |= XATTR_MAP_FLAG_BAD; | |
1d84a021 DDAG |
2241 | } else if (strstart(map, "map", &map)) { |
2242 | /* | |
2243 | * map is sugar that adds a number of rules, and must be | |
2244 | * the last entry. | |
2245 | */ | |
2246 | parse_xattrmap_map(lo, map, sep); | |
2247 | return; | |
6084633d DDAG |
2248 | } else { |
2249 | fuse_log(FUSE_LOG_ERR, | |
2250 | "%s: Unexpected type;" | |
1d84a021 | 2251 | "Expecting 'prefix', 'ok', 'bad' or 'map' in rule %zu\n", |
6084633d DDAG |
2252 | __func__, lo->xattr_map_nentries); |
2253 | exit(1); | |
2254 | } | |
2255 | ||
2256 | if (*map++ != sep) { | |
2257 | fuse_log(FUSE_LOG_ERR, | |
2258 | "%s: Missing '%c' at end of type field of rule %zu\n", | |
2259 | __func__, sep, lo->xattr_map_nentries); | |
2260 | exit(1); | |
2261 | } | |
2262 | ||
2263 | /* Start of 'scope' */ | |
2264 | if (strstart(map, "client", &map)) { | |
2265 | tmp_entry.flags |= XATTR_MAP_FLAG_CLIENT; | |
2266 | } else if (strstart(map, "server", &map)) { | |
2267 | tmp_entry.flags |= XATTR_MAP_FLAG_SERVER; | |
2268 | } else if (strstart(map, "all", &map)) { | |
2269 | tmp_entry.flags |= XATTR_MAP_FLAG_ALL; | |
2270 | } else { | |
2271 | fuse_log(FUSE_LOG_ERR, | |
2272 | "%s: Unexpected scope;" | |
2273 | " Expecting 'client', 'server', or 'all', in rule %zu\n", | |
2274 | __func__, lo->xattr_map_nentries); | |
2275 | exit(1); | |
2276 | } | |
2277 | ||
2278 | if (*map++ != sep) { | |
2279 | fuse_log(FUSE_LOG_ERR, | |
2280 | "%s: Expecting '%c' found '%c'" | |
2281 | " after scope in rule %zu\n", | |
2282 | __func__, sep, *map, lo->xattr_map_nentries); | |
2283 | exit(1); | |
2284 | } | |
2285 | ||
2286 | /* At start of 'key' field */ | |
2287 | tmp = strchr(map, sep); | |
2288 | if (!tmp) { | |
2289 | fuse_log(FUSE_LOG_ERR, | |
2290 | "%s: Missing '%c' at end of key field of rule %zu", | |
2291 | __func__, sep, lo->xattr_map_nentries); | |
2292 | exit(1); | |
2293 | } | |
2294 | tmp_entry.key = g_strndup(map, tmp - map); | |
2295 | map = tmp + 1; | |
2296 | ||
2297 | /* At start of 'prepend' field */ | |
2298 | tmp = strchr(map, sep); | |
2299 | if (!tmp) { | |
2300 | fuse_log(FUSE_LOG_ERR, | |
2301 | "%s: Missing '%c' at end of prepend field of rule %zu", | |
2302 | __func__, sep, lo->xattr_map_nentries); | |
2303 | exit(1); | |
2304 | } | |
2305 | tmp_entry.prepend = g_strndup(map, tmp - map); | |
2306 | map = tmp + 1; | |
2307 | ||
2308 | add_xattrmap_entry(lo, &tmp_entry); | |
2309 | /* End of rule - go around again for another rule */ | |
2310 | } | |
2311 | ||
2312 | if (!lo->xattr_map_nentries) { | |
2313 | fuse_log(FUSE_LOG_ERR, "Empty xattr map\n"); | |
2314 | exit(1); | |
2315 | } | |
2316 | } | |
2317 | ||
4f088dbf DDAG |
2318 | /* |
2319 | * For use with getxattr/setxattr/removexattr, where the client | |
2320 | * gives us a name and we may need to choose a different one. | |
2321 | * Allocates a buffer for the result placing it in *out_name. | |
2322 | * If there's no change then *out_name is not set. | |
2323 | * Returns 0 on success | |
2324 | * Can return -EPERM to indicate we block a given attribute | |
2325 | * (in which case out_name is not allocated) | |
2326 | * Can return -ENOMEM to indicate out_name couldn't be allocated. | |
2327 | */ | |
2328 | static int xattr_map_client(const struct lo_data *lo, const char *client_name, | |
2329 | char **out_name) | |
2330 | { | |
2331 | size_t i; | |
2332 | for (i = 0; i < lo->xattr_map_nentries; i++) { | |
2333 | const XattrMapEntry *cur_entry = lo->xattr_map_list + i; | |
2334 | ||
2335 | if ((cur_entry->flags & XATTR_MAP_FLAG_CLIENT) && | |
2336 | (strstart(client_name, cur_entry->key, NULL))) { | |
2337 | if (cur_entry->flags & XATTR_MAP_FLAG_BAD) { | |
2338 | return -EPERM; | |
2339 | } | |
2340 | if (cur_entry->flags & XATTR_MAP_FLAG_OK) { | |
2341 | /* Unmodified name */ | |
2342 | return 0; | |
2343 | } | |
2344 | if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { | |
2345 | *out_name = g_try_malloc(strlen(client_name) + | |
2346 | strlen(cur_entry->prepend) + 1); | |
2347 | if (!*out_name) { | |
2348 | return -ENOMEM; | |
2349 | } | |
2350 | sprintf(*out_name, "%s%s", cur_entry->prepend, client_name); | |
2351 | return 0; | |
2352 | } | |
2353 | } | |
2354 | } | |
2355 | ||
2356 | return -EPERM; | |
2357 | } | |
2358 | ||
6409cf19 DDAG |
2359 | /* |
2360 | * For use with listxattr where the server fs gives us a name and we may need | |
2361 | * to sanitize this for the client. | |
2362 | * Returns a pointer to the result in *out_name | |
2363 | * This is always the original string or the current string with some prefix | |
2364 | * removed; no reallocation is done. | |
2365 | * Returns 0 on success | |
2366 | * Can return -ENODATA to indicate the name should be dropped from the list. | |
2367 | */ | |
2368 | static int xattr_map_server(const struct lo_data *lo, const char *server_name, | |
2369 | const char **out_name) | |
2370 | { | |
2371 | size_t i; | |
2372 | const char *end; | |
2373 | ||
2374 | for (i = 0; i < lo->xattr_map_nentries; i++) { | |
2375 | const XattrMapEntry *cur_entry = lo->xattr_map_list + i; | |
2376 | ||
2377 | if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) && | |
2378 | (strstart(server_name, cur_entry->prepend, &end))) { | |
2379 | if (cur_entry->flags & XATTR_MAP_FLAG_BAD) { | |
2380 | return -ENODATA; | |
2381 | } | |
2382 | if (cur_entry->flags & XATTR_MAP_FLAG_OK) { | |
2383 | *out_name = server_name; | |
2384 | return 0; | |
2385 | } | |
2386 | if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { | |
2387 | /* Remove prefix */ | |
2388 | *out_name = end; | |
2389 | return 0; | |
2390 | } | |
2391 | } | |
2392 | } | |
2393 | ||
2394 | return -ENODATA; | |
2395 | } | |
2396 | ||
4f088dbf | 2397 | static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, |
7387863d | 2398 | size_t size) |
7c6b6602 | 2399 | { |
9f59d175 | 2400 | struct lo_data *lo = lo_data(req); |
7387863d DDAG |
2401 | char *value = NULL; |
2402 | char procname[64]; | |
4f088dbf DDAG |
2403 | const char *name; |
2404 | char *mapped_name; | |
92fb57b8 | 2405 | struct lo_inode *inode; |
7387863d DDAG |
2406 | ssize_t ret; |
2407 | int saverr; | |
9f59d175 | 2408 | int fd = -1; |
7387863d | 2409 | |
4f088dbf DDAG |
2410 | mapped_name = NULL; |
2411 | name = in_name; | |
2412 | if (lo->xattrmap) { | |
2413 | ret = xattr_map_client(lo, in_name, &mapped_name); | |
2414 | if (ret < 0) { | |
2415 | if (ret == -EPERM) { | |
2416 | ret = -ENODATA; | |
2417 | } | |
2418 | fuse_reply_err(req, -ret); | |
2419 | return; | |
2420 | } | |
2421 | if (mapped_name) { | |
2422 | name = mapped_name; | |
2423 | } | |
2424 | } | |
2425 | ||
92fb57b8 SH |
2426 | inode = lo_inode(req, ino); |
2427 | if (!inode) { | |
2428 | fuse_reply_err(req, EBADF); | |
4f088dbf | 2429 | g_free(mapped_name); |
92fb57b8 SH |
2430 | return; |
2431 | } | |
2432 | ||
7387863d DDAG |
2433 | saverr = ENOSYS; |
2434 | if (!lo_data(req)->xattr) { | |
2435 | goto out; | |
2436 | } | |
2437 | ||
d240314a EG |
2438 | fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", |
2439 | ino, name, size); | |
7387863d | 2440 | |
16e15a73 MT |
2441 | if (size) { |
2442 | value = malloc(size); | |
2443 | if (!value) { | |
2444 | goto out_err; | |
2445 | } | |
2446 | } | |
2447 | ||
9f59d175 | 2448 | sprintf(procname, "%i", inode->fd); |
bdfd6678 MT |
2449 | /* |
2450 | * It is not safe to open() non-regular/non-dir files in file server | |
2451 | * unless O_PATH is used, so use that method for regular files/dir | |
2452 | * only (as it seems giving less performance overhead). | |
2453 | * Otherwise, call fchdir() to avoid open(). | |
2454 | */ | |
2455 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { | |
2456 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
2457 | if (fd < 0) { | |
2458 | goto out_err; | |
2459 | } | |
2460 | ret = fgetxattr(fd, name, value, size); | |
2461 | } else { | |
2462 | /* fchdir should not fail here */ | |
2463 | assert(fchdir(lo->proc_self_fd) == 0); | |
2464 | ret = getxattr(procname, name, value, size); | |
2465 | assert(fchdir(lo->root.fd) == 0); | |
9f59d175 | 2466 | } |
7387863d | 2467 | |
16e15a73 MT |
2468 | if (ret == -1) { |
2469 | goto out_err; | |
2470 | } | |
7387863d | 2471 | if (size) { |
7387863d DDAG |
2472 | saverr = 0; |
2473 | if (ret == 0) { | |
2474 | goto out; | |
2475 | } | |
7387863d DDAG |
2476 | fuse_reply_buf(req, value, ret); |
2477 | } else { | |
7387863d DDAG |
2478 | fuse_reply_xattr(req, ret); |
2479 | } | |
7c6b6602 | 2480 | out_free: |
7387863d | 2481 | free(value); |
9f59d175 SH |
2482 | |
2483 | if (fd >= 0) { | |
2484 | close(fd); | |
2485 | } | |
c241aa94 SH |
2486 | |
2487 | lo_inode_put(lo, &inode); | |
7387863d | 2488 | return; |
7c6b6602 DDAG |
2489 | |
2490 | out_err: | |
7387863d | 2491 | saverr = errno; |
7c6b6602 | 2492 | out: |
7387863d | 2493 | fuse_reply_err(req, saverr); |
4f088dbf | 2494 | g_free(mapped_name); |
7387863d | 2495 | goto out_free; |
7c6b6602 DDAG |
2496 | } |
2497 | ||
2498 | static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) | |
2499 | { | |
9f59d175 | 2500 | struct lo_data *lo = lo_data(req); |
7387863d DDAG |
2501 | char *value = NULL; |
2502 | char procname[64]; | |
92fb57b8 | 2503 | struct lo_inode *inode; |
7387863d DDAG |
2504 | ssize_t ret; |
2505 | int saverr; | |
9f59d175 | 2506 | int fd = -1; |
7387863d | 2507 | |
92fb57b8 SH |
2508 | inode = lo_inode(req, ino); |
2509 | if (!inode) { | |
2510 | fuse_reply_err(req, EBADF); | |
2511 | return; | |
2512 | } | |
2513 | ||
7387863d DDAG |
2514 | saverr = ENOSYS; |
2515 | if (!lo_data(req)->xattr) { | |
2516 | goto out; | |
2517 | } | |
2518 | ||
d240314a EG |
2519 | fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, |
2520 | size); | |
7387863d | 2521 | |
16e15a73 MT |
2522 | if (size) { |
2523 | value = malloc(size); | |
2524 | if (!value) { | |
2525 | goto out_err; | |
2526 | } | |
2527 | } | |
2528 | ||
9f59d175 | 2529 | sprintf(procname, "%i", inode->fd); |
bdfd6678 MT |
2530 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { |
2531 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
2532 | if (fd < 0) { | |
2533 | goto out_err; | |
2534 | } | |
2535 | ret = flistxattr(fd, value, size); | |
2536 | } else { | |
2537 | /* fchdir should not fail here */ | |
2538 | assert(fchdir(lo->proc_self_fd) == 0); | |
2539 | ret = listxattr(procname, value, size); | |
2540 | assert(fchdir(lo->root.fd) == 0); | |
9f59d175 | 2541 | } |
7387863d | 2542 | |
16e15a73 MT |
2543 | if (ret == -1) { |
2544 | goto out_err; | |
2545 | } | |
7387863d | 2546 | if (size) { |
7387863d DDAG |
2547 | saverr = 0; |
2548 | if (ret == 0) { | |
2549 | goto out; | |
2550 | } | |
6409cf19 DDAG |
2551 | |
2552 | if (lo->xattr_map_list) { | |
2553 | /* | |
2554 | * Map the names back, some attributes might be dropped, | |
2555 | * some shortened, but not increased, so we shouldn't | |
2556 | * run out of room. | |
2557 | */ | |
2558 | size_t out_index, in_index; | |
2559 | out_index = 0; | |
2560 | in_index = 0; | |
2561 | while (in_index < ret) { | |
2562 | const char *map_out; | |
2563 | char *in_ptr = value + in_index; | |
2564 | /* Length of current attribute name */ | |
2565 | size_t in_len = strlen(value + in_index) + 1; | |
2566 | ||
2567 | int mapret = xattr_map_server(lo, in_ptr, &map_out); | |
2568 | if (mapret != -ENODATA && mapret != 0) { | |
2569 | /* Shouldn't happen */ | |
2570 | saverr = -mapret; | |
2571 | goto out; | |
2572 | } | |
2573 | if (mapret == 0) { | |
2574 | /* Either unchanged, or truncated */ | |
2575 | size_t out_len; | |
2576 | if (map_out != in_ptr) { | |
2577 | /* +1 copies the NIL */ | |
2578 | out_len = strlen(map_out) + 1; | |
2579 | } else { | |
2580 | /* No change */ | |
2581 | out_len = in_len; | |
2582 | } | |
2583 | /* | |
2584 | * Move result along, may still be needed for an unchanged | |
2585 | * entry if a previous entry was changed. | |
2586 | */ | |
2587 | memmove(value + out_index, map_out, out_len); | |
2588 | ||
2589 | out_index += out_len; | |
2590 | } | |
2591 | in_index += in_len; | |
2592 | } | |
2593 | ret = out_index; | |
2594 | if (ret == 0) { | |
2595 | goto out; | |
2596 | } | |
2597 | } | |
7387863d DDAG |
2598 | fuse_reply_buf(req, value, ret); |
2599 | } else { | |
6409cf19 DDAG |
2600 | /* |
2601 | * xattrmap only ever shortens the result, | |
2602 | * so we don't need to do anything clever with the | |
2603 | * allocation length here. | |
2604 | */ | |
7387863d DDAG |
2605 | fuse_reply_xattr(req, ret); |
2606 | } | |
7c6b6602 | 2607 | out_free: |
7387863d | 2608 | free(value); |
9f59d175 SH |
2609 | |
2610 | if (fd >= 0) { | |
2611 | close(fd); | |
2612 | } | |
c241aa94 SH |
2613 | |
2614 | lo_inode_put(lo, &inode); | |
7387863d | 2615 | return; |
7c6b6602 DDAG |
2616 | |
2617 | out_err: | |
7387863d | 2618 | saverr = errno; |
7c6b6602 | 2619 | out: |
7387863d DDAG |
2620 | fuse_reply_err(req, saverr); |
2621 | goto out_free; | |
7c6b6602 DDAG |
2622 | } |
2623 | ||
4f088dbf | 2624 | static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, |
7387863d | 2625 | const char *value, size_t size, int flags) |
7c6b6602 | 2626 | { |
7387863d | 2627 | char procname[64]; |
4f088dbf DDAG |
2628 | const char *name; |
2629 | char *mapped_name; | |
9f59d175 | 2630 | struct lo_data *lo = lo_data(req); |
92fb57b8 | 2631 | struct lo_inode *inode; |
7387863d DDAG |
2632 | ssize_t ret; |
2633 | int saverr; | |
9f59d175 | 2634 | int fd = -1; |
7c6b6602 | 2635 | |
4f088dbf DDAG |
2636 | mapped_name = NULL; |
2637 | name = in_name; | |
2638 | if (lo->xattrmap) { | |
2639 | ret = xattr_map_client(lo, in_name, &mapped_name); | |
2640 | if (ret < 0) { | |
2641 | fuse_reply_err(req, -ret); | |
2642 | return; | |
2643 | } | |
2644 | if (mapped_name) { | |
2645 | name = mapped_name; | |
2646 | } | |
2647 | } | |
2648 | ||
92fb57b8 SH |
2649 | inode = lo_inode(req, ino); |
2650 | if (!inode) { | |
2651 | fuse_reply_err(req, EBADF); | |
4f088dbf | 2652 | g_free(mapped_name); |
92fb57b8 SH |
2653 | return; |
2654 | } | |
2655 | ||
7387863d DDAG |
2656 | saverr = ENOSYS; |
2657 | if (!lo_data(req)->xattr) { | |
2658 | goto out; | |
2659 | } | |
7c6b6602 | 2660 | |
d240314a EG |
2661 | fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 |
2662 | ", name=%s value=%s size=%zd)\n", ino, name, value, size); | |
7c6b6602 | 2663 | |
9f59d175 | 2664 | sprintf(procname, "%i", inode->fd); |
bdfd6678 MT |
2665 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { |
2666 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
2667 | if (fd < 0) { | |
2668 | saverr = errno; | |
2669 | goto out; | |
2670 | } | |
2671 | ret = fsetxattr(fd, name, value, size, flags); | |
2672 | } else { | |
2673 | /* fchdir should not fail here */ | |
2674 | assert(fchdir(lo->proc_self_fd) == 0); | |
2675 | ret = setxattr(procname, name, value, size, flags); | |
2676 | assert(fchdir(lo->root.fd) == 0); | |
9f59d175 | 2677 | } |
7c6b6602 | 2678 | |
7387863d | 2679 | saverr = ret == -1 ? errno : 0; |
7c6b6602 DDAG |
2680 | |
2681 | out: | |
9f59d175 SH |
2682 | if (fd >= 0) { |
2683 | close(fd); | |
2684 | } | |
c241aa94 SH |
2685 | |
2686 | lo_inode_put(lo, &inode); | |
4f088dbf | 2687 | g_free(mapped_name); |
7387863d | 2688 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
2689 | } |
2690 | ||
4f088dbf | 2691 | static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name) |
7c6b6602 | 2692 | { |
7387863d | 2693 | char procname[64]; |
4f088dbf DDAG |
2694 | const char *name; |
2695 | char *mapped_name; | |
9f59d175 | 2696 | struct lo_data *lo = lo_data(req); |
92fb57b8 | 2697 | struct lo_inode *inode; |
7387863d DDAG |
2698 | ssize_t ret; |
2699 | int saverr; | |
9f59d175 | 2700 | int fd = -1; |
7c6b6602 | 2701 | |
4f088dbf DDAG |
2702 | mapped_name = NULL; |
2703 | name = in_name; | |
2704 | if (lo->xattrmap) { | |
2705 | ret = xattr_map_client(lo, in_name, &mapped_name); | |
2706 | if (ret < 0) { | |
2707 | fuse_reply_err(req, -ret); | |
2708 | return; | |
2709 | } | |
2710 | if (mapped_name) { | |
2711 | name = mapped_name; | |
2712 | } | |
2713 | } | |
2714 | ||
92fb57b8 SH |
2715 | inode = lo_inode(req, ino); |
2716 | if (!inode) { | |
2717 | fuse_reply_err(req, EBADF); | |
4f088dbf | 2718 | g_free(mapped_name); |
92fb57b8 SH |
2719 | return; |
2720 | } | |
2721 | ||
7387863d DDAG |
2722 | saverr = ENOSYS; |
2723 | if (!lo_data(req)->xattr) { | |
2724 | goto out; | |
2725 | } | |
7c6b6602 | 2726 | |
d240314a EG |
2727 | fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, |
2728 | name); | |
7c6b6602 | 2729 | |
9f59d175 | 2730 | sprintf(procname, "%i", inode->fd); |
bdfd6678 MT |
2731 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { |
2732 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
2733 | if (fd < 0) { | |
2734 | saverr = errno; | |
2735 | goto out; | |
2736 | } | |
2737 | ret = fremovexattr(fd, name); | |
2738 | } else { | |
2739 | /* fchdir should not fail here */ | |
2740 | assert(fchdir(lo->proc_self_fd) == 0); | |
2741 | ret = removexattr(procname, name); | |
2742 | assert(fchdir(lo->root.fd) == 0); | |
9f59d175 | 2743 | } |
7c6b6602 | 2744 | |
7387863d | 2745 | saverr = ret == -1 ? errno : 0; |
7c6b6602 DDAG |
2746 | |
2747 | out: | |
9f59d175 SH |
2748 | if (fd >= 0) { |
2749 | close(fd); | |
2750 | } | |
c241aa94 SH |
2751 | |
2752 | lo_inode_put(lo, &inode); | |
4f088dbf | 2753 | g_free(mapped_name); |
7387863d | 2754 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
2755 | } |
2756 | ||
2757 | #ifdef HAVE_COPY_FILE_RANGE | |
2758 | static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, | |
7387863d DDAG |
2759 | struct fuse_file_info *fi_in, fuse_ino_t ino_out, |
2760 | off_t off_out, struct fuse_file_info *fi_out, | |
2761 | size_t len, int flags) | |
7c6b6602 | 2762 | { |
73b4d19d | 2763 | int in_fd, out_fd; |
7387863d DDAG |
2764 | ssize_t res; |
2765 | ||
73b4d19d SH |
2766 | in_fd = lo_fi_fd(req, fi_in); |
2767 | out_fd = lo_fi_fd(req, fi_out); | |
2768 | ||
2769 | fuse_log(FUSE_LOG_DEBUG, | |
2770 | "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " | |
2771 | "off=%lu, ino=%" PRIu64 "/fd=%d, " | |
2772 | "off=%lu, size=%zd, flags=0x%x)\n", | |
2773 | ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags); | |
7387863d | 2774 | |
73b4d19d | 2775 | res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); |
7387863d | 2776 | if (res < 0) { |
a931b686 | 2777 | fuse_reply_err(req, errno); |
7387863d DDAG |
2778 | } else { |
2779 | fuse_reply_write(req, res); | |
2780 | } | |
7c6b6602 DDAG |
2781 | } |
2782 | #endif | |
2783 | ||
2784 | static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, | |
7387863d | 2785 | struct fuse_file_info *fi) |
7c6b6602 | 2786 | { |
7387863d DDAG |
2787 | off_t res; |
2788 | ||
2789 | (void)ino; | |
73b4d19d | 2790 | res = lseek(lo_fi_fd(req, fi), off, whence); |
7387863d DDAG |
2791 | if (res != -1) { |
2792 | fuse_reply_lseek(req, res); | |
2793 | } else { | |
2794 | fuse_reply_err(req, errno); | |
2795 | } | |
7c6b6602 DDAG |
2796 | } |
2797 | ||
771b01eb DDAG |
2798 | static void lo_destroy(void *userdata) |
2799 | { | |
2800 | struct lo_data *lo = (struct lo_data *)userdata; | |
28f7a3b0 | 2801 | |
fe4c1579 | 2802 | pthread_mutex_lock(&lo->mutex); |
28f7a3b0 SH |
2803 | while (true) { |
2804 | GHashTableIter iter; | |
2805 | gpointer key, value; | |
2806 | ||
2807 | g_hash_table_iter_init(&iter, lo->inodes); | |
2808 | if (!g_hash_table_iter_next(&iter, &key, &value)) { | |
2809 | break; | |
2810 | } | |
2811 | ||
2812 | struct lo_inode *inode = value; | |
fe4c1579 | 2813 | unref_inode(lo, inode, inode->nlookup); |
28f7a3b0 | 2814 | } |
fe4c1579 | 2815 | pthread_mutex_unlock(&lo->mutex); |
771b01eb DDAG |
2816 | } |
2817 | ||
7c6b6602 | 2818 | static struct fuse_lowlevel_ops lo_oper = { |
7387863d DDAG |
2819 | .init = lo_init, |
2820 | .lookup = lo_lookup, | |
2821 | .mkdir = lo_mkdir, | |
2822 | .mknod = lo_mknod, | |
2823 | .symlink = lo_symlink, | |
2824 | .link = lo_link, | |
2825 | .unlink = lo_unlink, | |
2826 | .rmdir = lo_rmdir, | |
2827 | .rename = lo_rename, | |
2828 | .forget = lo_forget, | |
2829 | .forget_multi = lo_forget_multi, | |
2830 | .getattr = lo_getattr, | |
2831 | .setattr = lo_setattr, | |
2832 | .readlink = lo_readlink, | |
2833 | .opendir = lo_opendir, | |
2834 | .readdir = lo_readdir, | |
2835 | .readdirplus = lo_readdirplus, | |
2836 | .releasedir = lo_releasedir, | |
2837 | .fsyncdir = lo_fsyncdir, | |
2838 | .create = lo_create, | |
0e81414c VG |
2839 | .getlk = lo_getlk, |
2840 | .setlk = lo_setlk, | |
7387863d DDAG |
2841 | .open = lo_open, |
2842 | .release = lo_release, | |
2843 | .flush = lo_flush, | |
2844 | .fsync = lo_fsync, | |
2845 | .read = lo_read, | |
2846 | .write_buf = lo_write_buf, | |
2847 | .statfs = lo_statfs, | |
2848 | .fallocate = lo_fallocate, | |
2849 | .flock = lo_flock, | |
2850 | .getxattr = lo_getxattr, | |
2851 | .listxattr = lo_listxattr, | |
2852 | .setxattr = lo_setxattr, | |
2853 | .removexattr = lo_removexattr, | |
7c6b6602 | 2854 | #ifdef HAVE_COPY_FILE_RANGE |
7387863d | 2855 | .copy_file_range = lo_copy_file_range, |
7c6b6602 | 2856 | #endif |
7387863d | 2857 | .lseek = lo_lseek, |
771b01eb | 2858 | .destroy = lo_destroy, |
7c6b6602 DDAG |
2859 | }; |
2860 | ||
45018fbb SH |
2861 | /* Print vhost-user.json backend program capabilities */ |
2862 | static void print_capabilities(void) | |
2863 | { | |
2864 | printf("{\n"); | |
2865 | printf(" \"type\": \"fs\"\n"); | |
2866 | printf("}\n"); | |
2867 | } | |
2868 | ||
66502bbc SH |
2869 | /* |
2870 | * Drop all Linux capabilities because the wait parent process only needs to | |
2871 | * sit in waitpid(2) and terminate. | |
2872 | */ | |
2873 | static void setup_wait_parent_capabilities(void) | |
2874 | { | |
2875 | capng_setpid(syscall(SYS_gettid)); | |
2876 | capng_clear(CAPNG_SELECT_BOTH); | |
2877 | capng_apply(CAPNG_SELECT_BOTH); | |
2878 | } | |
2879 | ||
d74830d1 | 2880 | /* |
8e1d4ef2 | 2881 | * Move to a new mount, net, and pid namespaces to isolate this process. |
d74830d1 | 2882 | */ |
8e1d4ef2 | 2883 | static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) |
d74830d1 | 2884 | { |
8e1d4ef2 SH |
2885 | pid_t child; |
2886 | ||
2887 | /* | |
2888 | * Create a new pid namespace for *child* processes. We'll have to | |
2889 | * fork in order to enter the new pid namespace. A new mount namespace | |
2890 | * is also needed so that we can remount /proc for the new pid | |
2891 | * namespace. | |
2892 | * | |
2893 | * Our UNIX domain sockets have been created. Now we can move to | |
2894 | * an empty network namespace to prevent TCP/IP and other network | |
2895 | * activity in case this process is compromised. | |
2896 | */ | |
2897 | if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { | |
2898 | fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); | |
2899 | exit(1); | |
2900 | } | |
2901 | ||
2902 | child = fork(); | |
2903 | if (child < 0) { | |
2904 | fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); | |
2905 | exit(1); | |
2906 | } | |
2907 | if (child > 0) { | |
2908 | pid_t waited; | |
2909 | int wstatus; | |
2910 | ||
66502bbc SH |
2911 | setup_wait_parent_capabilities(); |
2912 | ||
8e1d4ef2 SH |
2913 | /* The parent waits for the child */ |
2914 | do { | |
2915 | waited = waitpid(child, &wstatus, 0); | |
2916 | } while (waited < 0 && errno == EINTR && !se->exited); | |
2917 | ||
2918 | /* We were terminated by a signal, see fuse_signals.c */ | |
2919 | if (se->exited) { | |
2920 | exit(0); | |
2921 | } | |
2922 | ||
2923 | if (WIFEXITED(wstatus)) { | |
2924 | exit(WEXITSTATUS(wstatus)); | |
2925 | } | |
2926 | ||
2927 | exit(1); | |
2928 | } | |
2929 | ||
2930 | /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ | |
2931 | prctl(PR_SET_PDEATHSIG, SIGTERM); | |
2932 | ||
2933 | /* | |
2934 | * If the mounts have shared propagation then we want to opt out so our | |
2935 | * mount changes don't affect the parent mount namespace. | |
2936 | */ | |
2937 | if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { | |
2938 | fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); | |
2939 | exit(1); | |
2940 | } | |
2941 | ||
2942 | /* The child must remount /proc to use the new pid namespace */ | |
2943 | if (mount("proc", "/proc", "proc", | |
2944 | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { | |
2945 | fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); | |
2946 | exit(1); | |
2947 | } | |
2948 | ||
ebf10195 SH |
2949 | /* |
2950 | * We only need /proc/self/fd. Prevent ".." from accessing parent | |
2951 | * directories of /proc/self/fd by bind-mounting it over /proc. Since / was | |
2952 | * previously remounted with MS_REC | MS_SLAVE this mount change only | |
2953 | * affects our process. | |
2954 | */ | |
2955 | if (mount("/proc/self/fd", "/proc", NULL, MS_BIND, NULL) < 0) { | |
2956 | fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, MS_BIND): %m\n"); | |
397ae982 MS |
2957 | exit(1); |
2958 | } | |
2959 | ||
ebf10195 SH |
2960 | /* Get the /proc (actually /proc/self/fd, see above) file descriptor */ |
2961 | lo->proc_self_fd = open("/proc", O_PATH); | |
8e1d4ef2 | 2962 | if (lo->proc_self_fd == -1) { |
ebf10195 | 2963 | fuse_log(FUSE_LOG_ERR, "open(/proc, O_PATH): %m\n"); |
d74830d1 SH |
2964 | exit(1); |
2965 | } | |
2966 | } | |
2967 | ||
2405f3c0 DDAG |
2968 | /* |
2969 | * Capture the capability state, we'll need to restore this for individual | |
2970 | * threads later; see load_capng. | |
2971 | */ | |
2972 | static void setup_capng(void) | |
2973 | { | |
2974 | /* Note this accesses /proc so has to happen before the sandbox */ | |
2975 | if (capng_get_caps_process()) { | |
2976 | fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); | |
2977 | exit(1); | |
2978 | } | |
2979 | pthread_mutex_init(&cap.mutex, NULL); | |
2980 | pthread_mutex_lock(&cap.mutex); | |
2981 | cap.saved = capng_save_state(); | |
2982 | if (!cap.saved) { | |
2983 | fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); | |
2984 | exit(1); | |
2985 | } | |
2986 | pthread_mutex_unlock(&cap.mutex); | |
2987 | } | |
2988 | ||
2989 | static void cleanup_capng(void) | |
2990 | { | |
2991 | free(cap.saved); | |
2992 | cap.saved = NULL; | |
2993 | pthread_mutex_destroy(&cap.mutex); | |
2994 | } | |
2995 | ||
2996 | ||
8e1d4ef2 SH |
2997 | /* |
2998 | * Make the source directory our root so symlinks cannot escape and no other | |
2999 | * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. | |
3000 | */ | |
3001 | static void setup_mounts(const char *source) | |
5baa3b8e SH |
3002 | { |
3003 | int oldroot; | |
3004 | int newroot; | |
3005 | ||
ace0829c | 3006 | if (mount(source, source, NULL, MS_BIND | MS_REC, NULL) < 0) { |
8e1d4ef2 SH |
3007 | fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); |
3008 | exit(1); | |
3009 | } | |
3010 | ||
3011 | /* This magic is based on lxc's lxc_pivot_root() */ | |
5baa3b8e SH |
3012 | oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); |
3013 | if (oldroot < 0) { | |
3014 | fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); | |
3015 | exit(1); | |
3016 | } | |
3017 | ||
3018 | newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); | |
3019 | if (newroot < 0) { | |
3020 | fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); | |
3021 | exit(1); | |
3022 | } | |
3023 | ||
3024 | if (fchdir(newroot) < 0) { | |
3025 | fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); | |
3026 | exit(1); | |
3027 | } | |
3028 | ||
3029 | if (syscall(__NR_pivot_root, ".", ".") < 0) { | |
3030 | fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); | |
3031 | exit(1); | |
3032 | } | |
3033 | ||
3034 | if (fchdir(oldroot) < 0) { | |
3035 | fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); | |
3036 | exit(1); | |
3037 | } | |
3038 | ||
3039 | if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { | |
3040 | fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); | |
3041 | exit(1); | |
3042 | } | |
3043 | ||
3044 | if (umount2(".", MNT_DETACH) < 0) { | |
3045 | fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); | |
3046 | exit(1); | |
3047 | } | |
3048 | ||
3049 | if (fchdir(newroot) < 0) { | |
3050 | fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); | |
3051 | exit(1); | |
3052 | } | |
3053 | ||
3054 | close(newroot); | |
3055 | close(oldroot); | |
3056 | } | |
3057 | ||
a59feb48 SH |
3058 | /* |
3059 | * Only keep whitelisted capabilities that are needed for file system operation | |
3005c099 | 3060 | * The (possibly NULL) modcaps_in string passed in is free'd before exit. |
a59feb48 | 3061 | */ |
3005c099 | 3062 | static void setup_capabilities(char *modcaps_in) |
a59feb48 | 3063 | { |
3005c099 | 3064 | char *modcaps = modcaps_in; |
a59feb48 SH |
3065 | pthread_mutex_lock(&cap.mutex); |
3066 | capng_restore_state(&cap.saved); | |
3067 | ||
3068 | /* | |
3069 | * Whitelist file system-related capabilities that are needed for a file | |
3070 | * server to act like root. Drop everything else like networking and | |
3071 | * sysadmin capabilities. | |
3072 | * | |
3073 | * Exclusions: | |
3074 | * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl | |
3075 | * and we don't support that. | |
3076 | * 2. CAP_MAC_OVERRIDE is not included because it only seems to be | |
3077 | * used by the Smack LSM. Omit it until there is demand for it. | |
3078 | */ | |
3079 | capng_setpid(syscall(SYS_gettid)); | |
3080 | capng_clear(CAPNG_SELECT_BOTH); | |
55b22a60 | 3081 | if (capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, |
a59feb48 SH |
3082 | CAP_CHOWN, |
3083 | CAP_DAC_OVERRIDE, | |
a59feb48 SH |
3084 | CAP_FOWNER, |
3085 | CAP_FSETID, | |
3086 | CAP_SETGID, | |
3087 | CAP_SETUID, | |
3088 | CAP_MKNOD, | |
b1288dfa | 3089 | CAP_SETFCAP, |
55b22a60 DDAG |
3090 | -1)) { |
3091 | fuse_log(FUSE_LOG_ERR, "%s: capng_updatev failed\n", __func__); | |
3092 | exit(1); | |
3093 | } | |
b1288dfa | 3094 | |
3005c099 DDAG |
3095 | /* |
3096 | * The modcaps option is a colon separated list of caps, | |
3097 | * each preceded by either + or -. | |
3098 | */ | |
3099 | while (modcaps) { | |
3100 | capng_act_t action; | |
3101 | int cap; | |
3102 | ||
3103 | char *next = strchr(modcaps, ':'); | |
3104 | if (next) { | |
3105 | *next = '\0'; | |
3106 | next++; | |
3107 | } | |
3108 | ||
3109 | switch (modcaps[0]) { | |
3110 | case '+': | |
3111 | action = CAPNG_ADD; | |
3112 | break; | |
3113 | ||
3114 | case '-': | |
3115 | action = CAPNG_DROP; | |
3116 | break; | |
3117 | ||
3118 | default: | |
3119 | fuse_log(FUSE_LOG_ERR, | |
3120 | "%s: Expecting '+'/'-' in modcaps but found '%c'\n", | |
3121 | __func__, modcaps[0]); | |
3122 | exit(1); | |
3123 | } | |
3124 | cap = capng_name_to_capability(modcaps + 1); | |
3125 | if (cap < 0) { | |
3126 | fuse_log(FUSE_LOG_ERR, "%s: Unknown capability '%s'\n", __func__, | |
3127 | modcaps); | |
3128 | exit(1); | |
3129 | } | |
3130 | if (capng_update(action, CAPNG_PERMITTED | CAPNG_EFFECTIVE, cap)) { | |
3131 | fuse_log(FUSE_LOG_ERR, "%s: capng_update failed for '%s'\n", | |
3132 | __func__, modcaps); | |
3133 | exit(1); | |
3134 | } | |
3135 | ||
3136 | modcaps = next; | |
3137 | } | |
3138 | g_free(modcaps_in); | |
3139 | ||
55b22a60 DDAG |
3140 | if (capng_apply(CAPNG_SELECT_BOTH)) { |
3141 | fuse_log(FUSE_LOG_ERR, "%s: capng_apply failed\n", __func__); | |
3142 | exit(1); | |
3143 | } | |
a59feb48 SH |
3144 | |
3145 | cap.saved = capng_save_state(); | |
55b22a60 DDAG |
3146 | if (!cap.saved) { |
3147 | fuse_log(FUSE_LOG_ERR, "%s: capng_save_state failed\n", __func__); | |
3148 | exit(1); | |
3149 | } | |
a59feb48 SH |
3150 | pthread_mutex_unlock(&cap.mutex); |
3151 | } | |
3152 | ||
06844584 SH |
3153 | /* |
3154 | * Use chroot as a weaker sandbox for environments where the process is | |
3155 | * launched without CAP_SYS_ADMIN. | |
3156 | */ | |
3157 | static void setup_chroot(struct lo_data *lo) | |
3158 | { | |
3159 | lo->proc_self_fd = open("/proc/self/fd", O_PATH); | |
3160 | if (lo->proc_self_fd == -1) { | |
3161 | fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n"); | |
3162 | exit(1); | |
3163 | } | |
3164 | ||
3165 | /* | |
3166 | * Make the shared directory the file system root so that FUSE_OPEN | |
3167 | * (lo_open()) cannot escape the shared directory by opening a symlink. | |
3168 | * | |
3169 | * The chroot(2) syscall is later disabled by seccomp and the | |
3170 | * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot | |
3171 | * is not possible. | |
3172 | * | |
3173 | * However, it's still possible to escape the chroot via lo->proc_self_fd | |
3174 | * but that requires first gaining control of the process. | |
3175 | */ | |
3176 | if (chroot(lo->source) != 0) { | |
3177 | fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source); | |
3178 | exit(1); | |
3179 | } | |
3180 | ||
3181 | /* Move into the chroot */ | |
3182 | if (chdir("/") != 0) { | |
3183 | fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n"); | |
3184 | exit(1); | |
3185 | } | |
3186 | } | |
3187 | ||
5baa3b8e SH |
3188 | /* |
3189 | * Lock down this process to prevent access to other processes or files outside | |
3190 | * source directory. This reduces the impact of arbitrary code execution bugs. | |
3191 | */ | |
f185621d SH |
3192 | static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, |
3193 | bool enable_syslog) | |
5baa3b8e | 3194 | { |
06844584 SH |
3195 | if (lo->sandbox == SANDBOX_NAMESPACE) { |
3196 | setup_namespaces(lo, se); | |
3197 | setup_mounts(lo->source); | |
3198 | } else { | |
3199 | setup_chroot(lo); | |
3200 | } | |
3201 | ||
f185621d | 3202 | setup_seccomp(enable_syslog); |
3005c099 | 3203 | setup_capabilities(g_strdup(lo->modcaps)); |
5baa3b8e SH |
3204 | } |
3205 | ||
6dbb7168 SH |
3206 | /* Set the maximum number of open file descriptors */ |
3207 | static void setup_nofile_rlimit(unsigned long rlimit_nofile) | |
01a6dc95 | 3208 | { |
6dbb7168 SH |
3209 | struct rlimit rlim = { |
3210 | .rlim_cur = rlimit_nofile, | |
3211 | .rlim_max = rlimit_nofile, | |
3212 | }; | |
01a6dc95 | 3213 | |
6dbb7168 | 3214 | if (rlimit_nofile == 0) { |
01a6dc95 SH |
3215 | return; /* nothing to do */ |
3216 | } | |
3217 | ||
01a6dc95 SH |
3218 | if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { |
3219 | /* Ignore SELinux denials */ | |
3220 | if (errno == EPERM) { | |
3221 | return; | |
3222 | } | |
3223 | ||
3224 | fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); | |
3225 | exit(1); | |
3226 | } | |
3227 | } | |
3228 | ||
f185621d SH |
3229 | static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) |
3230 | { | |
36f38469 MM |
3231 | g_autofree char *localfmt = NULL; |
3232 | ||
d240314a EG |
3233 | if (current_log_level < level) { |
3234 | return; | |
3235 | } | |
3236 | ||
36f38469 | 3237 | if (current_log_level == FUSE_LOG_DEBUG) { |
50fb955a MM |
3238 | if (!use_syslog) { |
3239 | localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s", | |
3240 | get_clock(), syscall(__NR_gettid), fmt); | |
3241 | } else { | |
3242 | localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), | |
3243 | fmt); | |
3244 | } | |
36f38469 MM |
3245 | fmt = localfmt; |
3246 | } | |
3247 | ||
f185621d SH |
3248 | if (use_syslog) { |
3249 | int priority = LOG_ERR; | |
3250 | switch (level) { | |
3251 | case FUSE_LOG_EMERG: | |
3252 | priority = LOG_EMERG; | |
3253 | break; | |
3254 | case FUSE_LOG_ALERT: | |
3255 | priority = LOG_ALERT; | |
3256 | break; | |
3257 | case FUSE_LOG_CRIT: | |
3258 | priority = LOG_CRIT; | |
3259 | break; | |
3260 | case FUSE_LOG_ERR: | |
3261 | priority = LOG_ERR; | |
3262 | break; | |
3263 | case FUSE_LOG_WARNING: | |
3264 | priority = LOG_WARNING; | |
3265 | break; | |
3266 | case FUSE_LOG_NOTICE: | |
3267 | priority = LOG_NOTICE; | |
3268 | break; | |
3269 | case FUSE_LOG_INFO: | |
3270 | priority = LOG_INFO; | |
3271 | break; | |
3272 | case FUSE_LOG_DEBUG: | |
3273 | priority = LOG_DEBUG; | |
3274 | break; | |
3275 | } | |
3276 | vsyslog(priority, fmt, ap); | |
3277 | } else { | |
3278 | vfprintf(stderr, fmt, ap); | |
3279 | } | |
3280 | } | |
3281 | ||
3ca8a2b1 MS |
3282 | static void setup_root(struct lo_data *lo, struct lo_inode *root) |
3283 | { | |
3284 | int fd, res; | |
3285 | struct stat stat; | |
3286 | ||
3287 | fd = open("/", O_PATH); | |
3288 | if (fd == -1) { | |
3289 | fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); | |
3290 | exit(1); | |
3291 | } | |
3292 | ||
3293 | res = fstatat(fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); | |
3294 | if (res == -1) { | |
3295 | fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); | |
3296 | exit(1); | |
3297 | } | |
3298 | ||
bdfd6678 | 3299 | root->filetype = S_IFDIR; |
3ca8a2b1 | 3300 | root->fd = fd; |
bfc50a6e MS |
3301 | root->key.ino = stat.st_ino; |
3302 | root->key.dev = stat.st_dev; | |
1222f015 | 3303 | root->nlookup = 2; |
c241aa94 | 3304 | g_atomic_int_set(&root->refcount, 2); |
3ca8a2b1 MS |
3305 | } |
3306 | ||
bfc50a6e MS |
3307 | static guint lo_key_hash(gconstpointer key) |
3308 | { | |
3309 | const struct lo_key *lkey = key; | |
3310 | ||
3311 | return (guint)lkey->ino + (guint)lkey->dev; | |
3312 | } | |
3313 | ||
3314 | static gboolean lo_key_equal(gconstpointer a, gconstpointer b) | |
3315 | { | |
3316 | const struct lo_key *la = a; | |
3317 | const struct lo_key *lb = b; | |
3318 | ||
3319 | return la->ino == lb->ino && la->dev == lb->dev; | |
3320 | } | |
3321 | ||
18a69cbb LB |
3322 | static void fuse_lo_data_cleanup(struct lo_data *lo) |
3323 | { | |
3324 | if (lo->inodes) { | |
3325 | g_hash_table_destroy(lo->inodes); | |
3326 | } | |
3327 | lo_map_destroy(&lo->fd_map); | |
3328 | lo_map_destroy(&lo->dirp_map); | |
3329 | lo_map_destroy(&lo->ino_map); | |
3330 | ||
3331 | if (lo->proc_self_fd >= 0) { | |
3332 | close(lo->proc_self_fd); | |
3333 | } | |
3334 | ||
3335 | if (lo->root.fd >= 0) { | |
3336 | close(lo->root.fd); | |
3337 | } | |
3338 | ||
6084633d DDAG |
3339 | free(lo->xattrmap); |
3340 | free_xattrmap(lo); | |
18a69cbb LB |
3341 | free(lo->source); |
3342 | } | |
3343 | ||
7c6b6602 DDAG |
3344 | int main(int argc, char *argv[]) |
3345 | { | |
7387863d DDAG |
3346 | struct fuse_args args = FUSE_ARGS_INIT(argc, argv); |
3347 | struct fuse_session *se; | |
3348 | struct fuse_cmdline_opts opts; | |
9f59d175 | 3349 | struct lo_data lo = { |
06844584 | 3350 | .sandbox = SANDBOX_NAMESPACE, |
9f59d175 SH |
3351 | .debug = 0, |
3352 | .writeback = 0, | |
88fc1079 | 3353 | .posix_lock = 0, |
e12a0eda | 3354 | .allow_direct_io = 0, |
9f59d175 SH |
3355 | .proc_self_fd = -1, |
3356 | }; | |
92fb57b8 | 3357 | struct lo_map_elem *root_elem; |
7387863d DDAG |
3358 | int ret = -1; |
3359 | ||
3360 | /* Don't mask creation mode, kernel already did that */ | |
3361 | umask(0); | |
3362 | ||
ff3995e2 DDAG |
3363 | qemu_init_exec_dir(argv[0]); |
3364 | ||
7387863d | 3365 | pthread_mutex_init(&lo.mutex, NULL); |
bfc50a6e | 3366 | lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); |
7387863d | 3367 | lo.root.fd = -1; |
92fb57b8 | 3368 | lo.root.fuse_ino = FUSE_ROOT_ID; |
230e777b | 3369 | lo.cache = CACHE_AUTO; |
7387863d | 3370 | |
92fb57b8 SH |
3371 | /* |
3372 | * Set up the ino map like this: | |
3373 | * [0] Reserved (will not be used) | |
3374 | * [1] Root inode | |
3375 | */ | |
3376 | lo_map_init(&lo.ino_map); | |
3377 | lo_map_reserve(&lo.ino_map, 0)->in_use = false; | |
3378 | root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); | |
3379 | root_elem->inode = &lo.root; | |
3380 | ||
b39bce12 | 3381 | lo_map_init(&lo.dirp_map); |
73b4d19d | 3382 | lo_map_init(&lo.fd_map); |
b39bce12 | 3383 | |
7387863d | 3384 | if (fuse_parse_cmdline(&args, &opts) != 0) { |
c6de8046 | 3385 | goto err_out1; |
7387863d | 3386 | } |
f185621d SH |
3387 | fuse_set_log_func(log_func); |
3388 | use_syslog = opts.syslog; | |
3389 | if (use_syslog) { | |
3390 | openlog("virtiofsd", LOG_PID, LOG_DAEMON); | |
3391 | } | |
c6de8046 | 3392 | |
7387863d | 3393 | if (opts.show_help) { |
67aab022 | 3394 | printf("usage: %s [options]\n\n", argv[0]); |
7387863d | 3395 | fuse_cmdline_help(); |
4ff075f7 | 3396 | printf(" -o source=PATH shared directory tree\n"); |
7387863d DDAG |
3397 | fuse_lowlevel_help(); |
3398 | ret = 0; | |
3399 | goto err_out1; | |
3400 | } else if (opts.show_version) { | |
3401 | fuse_lowlevel_version(); | |
3402 | ret = 0; | |
3403 | goto err_out1; | |
45018fbb SH |
3404 | } else if (opts.print_capabilities) { |
3405 | print_capabilities(); | |
3406 | ret = 0; | |
3407 | goto err_out1; | |
7387863d DDAG |
3408 | } |
3409 | ||
7387863d | 3410 | if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { |
c6de8046 | 3411 | goto err_out1; |
7387863d DDAG |
3412 | } |
3413 | ||
d240314a EG |
3414 | if (opts.log_level != 0) { |
3415 | current_log_level = opts.log_level; | |
800ad114 MT |
3416 | } else { |
3417 | /* default log level is INFO */ | |
3418 | current_log_level = FUSE_LOG_INFO; | |
d240314a | 3419 | } |
7387863d | 3420 | lo.debug = opts.debug; |
d240314a EG |
3421 | if (lo.debug) { |
3422 | current_log_level = FUSE_LOG_DEBUG; | |
3423 | } | |
7387863d DDAG |
3424 | if (lo.source) { |
3425 | struct stat stat; | |
3426 | int res; | |
3427 | ||
3428 | res = lstat(lo.source, &stat); | |
3429 | if (res == -1) { | |
3430 | fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", | |
3431 | lo.source); | |
3432 | exit(1); | |
3433 | } | |
3434 | if (!S_ISDIR(stat.st_mode)) { | |
3435 | fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); | |
3436 | exit(1); | |
3437 | } | |
7387863d | 3438 | } else { |
eb68a33b | 3439 | lo.source = strdup("/"); |
7387863d | 3440 | } |
6084633d DDAG |
3441 | |
3442 | if (lo.xattrmap) { | |
3443 | parse_xattrmap(&lo); | |
3444 | } | |
3445 | ||
7387863d DDAG |
3446 | if (!lo.timeout_set) { |
3447 | switch (lo.cache) { | |
230e777b | 3448 | case CACHE_NONE: |
7387863d DDAG |
3449 | lo.timeout = 0.0; |
3450 | break; | |
3451 | ||
230e777b | 3452 | case CACHE_AUTO: |
7387863d DDAG |
3453 | lo.timeout = 1.0; |
3454 | break; | |
3455 | ||
3456 | case CACHE_ALWAYS: | |
3457 | lo.timeout = 86400.0; | |
3458 | break; | |
3459 | } | |
3460 | } else if (lo.timeout < 0) { | |
3461 | fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); | |
3462 | exit(1); | |
3463 | } | |
3464 | ||
7387863d DDAG |
3465 | se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); |
3466 | if (se == NULL) { | |
3467 | goto err_out1; | |
3468 | } | |
3469 | ||
3470 | if (fuse_set_signal_handlers(se) != 0) { | |
3471 | goto err_out2; | |
3472 | } | |
3473 | ||
67aab022 | 3474 | if (fuse_session_mount(se) != 0) { |
7387863d DDAG |
3475 | goto err_out3; |
3476 | } | |
3477 | ||
3478 | fuse_daemonize(opts.foreground); | |
3479 | ||
6dbb7168 | 3480 | setup_nofile_rlimit(opts.rlimit_nofile); |
01a6dc95 | 3481 | |
2405f3c0 DDAG |
3482 | /* Must be before sandbox since it wants /proc */ |
3483 | setup_capng(); | |
3484 | ||
f185621d | 3485 | setup_sandbox(&lo, se, opts.syslog); |
5baa3b8e | 3486 | |
3ca8a2b1 | 3487 | setup_root(&lo, &lo.root); |
7387863d | 3488 | /* Block until ctrl+c or fusermount -u */ |
f6f3573c | 3489 | ret = virtio_loop(se); |
7387863d DDAG |
3490 | |
3491 | fuse_session_unmount(se); | |
2405f3c0 | 3492 | cleanup_capng(); |
7c6b6602 | 3493 | err_out3: |
7387863d | 3494 | fuse_remove_signal_handlers(se); |
7c6b6602 | 3495 | err_out2: |
7387863d | 3496 | fuse_session_destroy(se); |
7c6b6602 | 3497 | err_out1: |
7387863d | 3498 | fuse_opt_free_args(&args); |
7c6b6602 | 3499 | |
18a69cbb | 3500 | fuse_lo_data_cleanup(&lo); |
eb68a33b | 3501 | |
7387863d | 3502 | return ret ? 1 : 0; |
7c6b6602 | 3503 | } |