]>
Commit | Line | Data |
---|---|---|
7c6b6602 | 1 | /* |
7387863d DDAG |
2 | * FUSE: Filesystem in Userspace |
3 | * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> | |
4 | * | |
5 | * This program can be distributed under the terms of the GNU GPLv2. | |
6 | * See the file COPYING. | |
7 | */ | |
7c6b6602 | 8 | |
7387863d | 9 | /* |
7c6b6602 DDAG |
10 | * |
11 | * This file system mirrors the existing file system hierarchy of the | |
12 | * system, starting at the root file system. This is implemented by | |
13 | * just "passing through" all requests to the corresponding user-space | |
14 | * libc functions. In contrast to passthrough.c and passthrough_fh.c, | |
15 | * this implementation uses the low-level API. Its performance should | |
16 | * be the least bad among the three, but many operations are not | |
17 | * implemented. In particular, it is not possible to remove files (or | |
18 | * directories) because the code necessary to defer actual removal | |
19 | * until the file is not opened anymore would make the example much | |
20 | * more complicated. | |
21 | * | |
22 | * When writeback caching is enabled (-o writeback mount option), it | |
23 | * is only possible to write to files for which the mounting user has | |
24 | * read permissions. This is because the writeback cache requires the | |
25 | * kernel to be able to issue read requests for all files (which the | |
26 | * passthrough filesystem cannot satisfy if it can't read the file in | |
27 | * the underlying filesystem). | |
28 | * | |
29 | * Compile with: | |
30 | * | |
7387863d DDAG |
31 | * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o |
32 | * passthrough_ll | |
7c6b6602 DDAG |
33 | * |
34 | * ## Source code ## | |
35 | * \include passthrough_ll.c | |
36 | */ | |
37 | ||
09863ebc | 38 | #include "qemu/osdep.h" |
50fb955a | 39 | #include "qemu/timer.h" |
6d118c43 | 40 | #include "qemu-version.h" |
49f95221 | 41 | #include "qemu/help-texts.h" |
f6f3573c | 42 | #include "fuse_virtio.h" |
d240314a | 43 | #include "fuse_log.h" |
09863ebc | 44 | #include "fuse_lowlevel.h" |
9d82f6a3 | 45 | #include "standard-headers/linux/fuse.h" |
2405f3c0 | 46 | #include <cap-ng.h> |
7387863d | 47 | #include <dirent.h> |
7c6b6602 DDAG |
48 | #include <pthread.h> |
49 | #include <sys/file.h> | |
5baa3b8e | 50 | #include <sys/mount.h> |
8e1d4ef2 | 51 | #include <sys/prctl.h> |
01a6dc95 | 52 | #include <sys/resource.h> |
929cfb7a | 53 | #include <sys/syscall.h> |
8e1d4ef2 | 54 | #include <sys/wait.h> |
7c6b6602 | 55 | #include <sys/xattr.h> |
f185621d | 56 | #include <syslog.h> |
449e8171 | 57 | #include <grp.h> |
7c6b6602 | 58 | |
6084633d | 59 | #include "qemu/cutils.h" |
7c6b6602 | 60 | #include "passthrough_helpers.h" |
3f99cf57 | 61 | #include "passthrough_seccomp.h" |
7c6b6602 | 62 | |
0e81414c VG |
63 | /* Keep track of inode posix locks for each owner. */ |
64 | struct lo_inode_plock { | |
65 | uint64_t lock_owner; | |
66 | int fd; /* fd for OFD locks */ | |
67 | }; | |
68 | ||
25c13572 SH |
69 | struct lo_map_elem { |
70 | union { | |
92fb57b8 | 71 | struct lo_inode *inode; |
b39bce12 | 72 | struct lo_dirp *dirp; |
73b4d19d | 73 | int fd; |
25c13572 SH |
74 | ssize_t freelist; |
75 | }; | |
76 | bool in_use; | |
77 | }; | |
78 | ||
79 | /* Maps FUSE fh or ino values to internal objects */ | |
80 | struct lo_map { | |
81 | struct lo_map_elem *elems; | |
82 | size_t nelems; | |
83 | ssize_t freelist; | |
84 | }; | |
85 | ||
bfc50a6e MS |
86 | struct lo_key { |
87 | ino_t ino; | |
88 | dev_t dev; | |
d672fce6 | 89 | uint64_t mnt_id; |
bfc50a6e MS |
90 | }; |
91 | ||
7c6b6602 | 92 | struct lo_inode { |
7387863d | 93 | int fd; |
c241aa94 SH |
94 | |
95 | /* | |
96 | * Atomic reference count for this object. The nlookup field holds a | |
97 | * reference and release it when nlookup reaches 0. | |
98 | */ | |
99 | gint refcount; | |
100 | ||
bfc50a6e | 101 | struct lo_key key; |
1222f015 SH |
102 | |
103 | /* | |
104 | * This counter keeps the inode alive during the FUSE session. | |
105 | * Incremented when the FUSE inode number is sent in a reply | |
106 | * (FUSE_LOOKUP, FUSE_READDIRPLUS, etc). Decremented when an inode is | |
d6211148 | 107 | * released by a FUSE_FORGET request. |
1222f015 SH |
108 | * |
109 | * Note that this value is untrusted because the client can manipulate | |
110 | * it arbitrarily using FUSE_FORGET requests. | |
111 | * | |
112 | * Protected by lo->mutex. | |
113 | */ | |
114 | uint64_t nlookup; | |
115 | ||
92fb57b8 | 116 | fuse_ino_t fuse_ino; |
0e81414c VG |
117 | pthread_mutex_t plock_mutex; |
118 | GHashTable *posix_locks; /* protected by lo_inode->plock_mutex */ | |
c241aa94 | 119 | |
bdfd6678 | 120 | mode_t filetype; |
7c6b6602 DDAG |
121 | }; |
122 | ||
929cfb7a VG |
123 | struct lo_cred { |
124 | uid_t euid; | |
125 | gid_t egid; | |
227e5d7f | 126 | mode_t umask; |
929cfb7a VG |
127 | }; |
128 | ||
7c6b6602 | 129 | enum { |
230e777b MS |
130 | CACHE_NONE, |
131 | CACHE_AUTO, | |
7387863d | 132 | CACHE_ALWAYS, |
7c6b6602 DDAG |
133 | }; |
134 | ||
06844584 SH |
135 | enum { |
136 | SANDBOX_NAMESPACE, | |
137 | SANDBOX_CHROOT, | |
138 | }; | |
139 | ||
6084633d DDAG |
140 | typedef struct xattr_map_entry { |
141 | char *key; | |
142 | char *prepend; | |
143 | unsigned int flags; | |
144 | } XattrMapEntry; | |
145 | ||
7c6b6602 | 146 | struct lo_data { |
7387863d | 147 | pthread_mutex_t mutex; |
06844584 | 148 | int sandbox; |
7387863d DDAG |
149 | int debug; |
150 | int writeback; | |
151 | int flock; | |
0e81414c | 152 | int posix_lock; |
7387863d | 153 | int xattr; |
6084633d | 154 | char *xattrmap; |
e586edcb | 155 | char *xattr_security_capability; |
eb68a33b | 156 | char *source; |
3005c099 | 157 | char *modcaps; |
7387863d DDAG |
158 | double timeout; |
159 | int cache; | |
160 | int timeout_set; | |
59aef494 MS |
161 | int readdirplus_set; |
162 | int readdirplus_clear; | |
e12a0eda | 163 | int allow_direct_io; |
9d82f6a3 | 164 | int announce_submounts; |
d672fce6 | 165 | bool use_statx; |
bfc50a6e MS |
166 | struct lo_inode root; |
167 | GHashTable *inodes; /* protected by lo->mutex */ | |
92fb57b8 | 168 | struct lo_map ino_map; /* protected by lo->mutex */ |
b39bce12 | 169 | struct lo_map dirp_map; /* protected by lo->mutex */ |
73b4d19d | 170 | struct lo_map fd_map; /* protected by lo->mutex */ |
6084633d DDAG |
171 | XattrMapEntry *xattr_map_list; |
172 | size_t xattr_map_nentries; | |
9f59d175 SH |
173 | |
174 | /* An O_PATH file descriptor to /proc/self/fd/ */ | |
175 | int proc_self_fd; | |
cb282e55 VG |
176 | /* An O_PATH file descriptor to /proc/self/task/ */ |
177 | int proc_self_task; | |
d64907ac | 178 | int user_killpriv_v2, killpriv_v2; |
227e5d7f VG |
179 | /* If set, virtiofsd is responsible for setting umask during creation */ |
180 | bool change_umask; | |
65a820d2 | 181 | int user_posix_acl, posix_acl; |
cb282e55 VG |
182 | /* Keeps track if /proc/<pid>/attr/fscreate should be used or not */ |
183 | bool use_fscreate; | |
963061dc | 184 | int user_security_label; |
7c6b6602 DDAG |
185 | }; |
186 | ||
187 | static const struct fuse_opt lo_opts[] = { | |
06844584 SH |
188 | { "sandbox=namespace", |
189 | offsetof(struct lo_data, sandbox), | |
190 | SANDBOX_NAMESPACE }, | |
191 | { "sandbox=chroot", | |
192 | offsetof(struct lo_data, sandbox), | |
193 | SANDBOX_CHROOT }, | |
7387863d DDAG |
194 | { "writeback", offsetof(struct lo_data, writeback), 1 }, |
195 | { "no_writeback", offsetof(struct lo_data, writeback), 0 }, | |
196 | { "source=%s", offsetof(struct lo_data, source), 0 }, | |
197 | { "flock", offsetof(struct lo_data, flock), 1 }, | |
198 | { "no_flock", offsetof(struct lo_data, flock), 0 }, | |
0e81414c VG |
199 | { "posix_lock", offsetof(struct lo_data, posix_lock), 1 }, |
200 | { "no_posix_lock", offsetof(struct lo_data, posix_lock), 0 }, | |
7387863d DDAG |
201 | { "xattr", offsetof(struct lo_data, xattr), 1 }, |
202 | { "no_xattr", offsetof(struct lo_data, xattr), 0 }, | |
6084633d | 203 | { "xattrmap=%s", offsetof(struct lo_data, xattrmap), 0 }, |
3005c099 | 204 | { "modcaps=%s", offsetof(struct lo_data, modcaps), 0 }, |
7387863d DDAG |
205 | { "timeout=%lf", offsetof(struct lo_data, timeout), 0 }, |
206 | { "timeout=", offsetof(struct lo_data, timeout_set), 1 }, | |
230e777b MS |
207 | { "cache=none", offsetof(struct lo_data, cache), CACHE_NONE }, |
208 | { "cache=auto", offsetof(struct lo_data, cache), CACHE_AUTO }, | |
7387863d | 209 | { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS }, |
59aef494 MS |
210 | { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 }, |
211 | { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 }, | |
e12a0eda JZ |
212 | { "allow_direct_io", offsetof(struct lo_data, allow_direct_io), 1 }, |
213 | { "no_allow_direct_io", offsetof(struct lo_data, allow_direct_io), 0 }, | |
9d82f6a3 | 214 | { "announce_submounts", offsetof(struct lo_data, announce_submounts), 1 }, |
d64907ac VG |
215 | { "killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 1 }, |
216 | { "no_killpriv_v2", offsetof(struct lo_data, user_killpriv_v2), 0 }, | |
65a820d2 VG |
217 | { "posix_acl", offsetof(struct lo_data, user_posix_acl), 1 }, |
218 | { "no_posix_acl", offsetof(struct lo_data, user_posix_acl), 0 }, | |
963061dc VG |
219 | { "security_label", offsetof(struct lo_data, user_security_label), 1 }, |
220 | { "no_security_label", offsetof(struct lo_data, user_security_label), 0 }, | |
7387863d | 221 | FUSE_OPT_END |
7c6b6602 | 222 | }; |
f185621d | 223 | static bool use_syslog = false; |
d240314a | 224 | static int current_log_level; |
95d27157 MS |
225 | static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, |
226 | uint64_t n); | |
5fe319a7 | 227 | |
2405f3c0 DDAG |
228 | static struct { |
229 | pthread_mutex_t mutex; | |
230 | void *saved; | |
231 | } cap; | |
232 | /* That we loaded cap-ng in the current thread from the saved */ | |
233 | static __thread bool cap_loaded = 0; | |
234 | ||
d672fce6 HR |
235 | static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, |
236 | uint64_t mnt_id); | |
e586edcb DDAG |
237 | static int xattr_map_client(const struct lo_data *lo, const char *client_name, |
238 | char **out_name); | |
5fe319a7 | 239 | |
0c3f81e1 VG |
240 | #define FCHDIR_NOFAIL(fd) do { \ |
241 | int fchdir_res = fchdir(fd); \ | |
242 | assert(fchdir_res == 0); \ | |
243 | } while (0) | |
244 | ||
03ccaaae | 245 | static bool is_dot_or_dotdot(const char *name) |
25dae28c SH |
246 | { |
247 | return name[0] == '.' && | |
248 | (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); | |
249 | } | |
250 | ||
251 | /* Is `path` a single path component that is not "." or ".."? */ | |
03ccaaae | 252 | static bool is_safe_path_component(const char *path) |
25dae28c SH |
253 | { |
254 | if (strchr(path, '/')) { | |
03ccaaae | 255 | return false; |
25dae28c SH |
256 | } |
257 | ||
258 | return !is_dot_or_dotdot(path); | |
259 | } | |
5fe319a7 | 260 | |
28d1ad0e GK |
261 | static bool is_empty(const char *name) |
262 | { | |
263 | return name[0] == '\0'; | |
264 | } | |
265 | ||
7c6b6602 DDAG |
266 | static struct lo_data *lo_data(fuse_req_t req) |
267 | { | |
7387863d | 268 | return (struct lo_data *)fuse_req_userdata(req); |
7c6b6602 DDAG |
269 | } |
270 | ||
cb282e55 VG |
271 | /* |
272 | * Tries to figure out if /proc/<pid>/attr/fscreate is usable or not. With | |
273 | * selinux=0, read from fscreate returns -EINVAL. | |
274 | * | |
275 | * TODO: Link with libselinux and use is_selinux_enabled() instead down | |
276 | * the line. It probably will be more reliable indicator. | |
277 | */ | |
278 | static bool is_fscreate_usable(struct lo_data *lo) | |
279 | { | |
280 | char procname[64]; | |
281 | int fscreate_fd; | |
282 | size_t bytes_read; | |
283 | ||
284 | sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); | |
285 | fscreate_fd = openat(lo->proc_self_task, procname, O_RDWR); | |
286 | if (fscreate_fd == -1) { | |
287 | return false; | |
288 | } | |
289 | ||
290 | bytes_read = read(fscreate_fd, procname, 64); | |
291 | close(fscreate_fd); | |
292 | if (bytes_read == -1) { | |
293 | return false; | |
294 | } | |
295 | return true; | |
296 | } | |
297 | ||
298 | /* Helpers to set/reset fscreate */ | |
cb282e55 VG |
299 | static int open_set_proc_fscreate(struct lo_data *lo, const void *ctx, |
300 | size_t ctxlen, int *fd) | |
301 | { | |
302 | char procname[64]; | |
303 | int fscreate_fd, err = 0; | |
304 | size_t written; | |
305 | ||
306 | sprintf(procname, "%ld/attr/fscreate", syscall(SYS_gettid)); | |
307 | fscreate_fd = openat(lo->proc_self_task, procname, O_WRONLY); | |
308 | err = fscreate_fd == -1 ? errno : 0; | |
309 | if (err) { | |
310 | return err; | |
311 | } | |
312 | ||
313 | written = write(fscreate_fd, ctx, ctxlen); | |
314 | err = written == -1 ? errno : 0; | |
315 | if (err) { | |
316 | goto out; | |
317 | } | |
318 | ||
319 | *fd = fscreate_fd; | |
320 | return 0; | |
321 | out: | |
322 | close(fscreate_fd); | |
323 | return err; | |
324 | } | |
325 | ||
cb282e55 VG |
326 | static void close_reset_proc_fscreate(int fd) |
327 | { | |
328 | if ((write(fd, NULL, 0)) == -1) { | |
329 | fuse_log(FUSE_LOG_WARNING, "Failed to reset fscreate. err=%d\n", errno); | |
330 | } | |
331 | close(fd); | |
332 | return; | |
333 | } | |
334 | ||
2405f3c0 DDAG |
335 | /* |
336 | * Load capng's state from our saved state if the current thread | |
337 | * hadn't previously been loaded. | |
338 | * returns 0 on success | |
339 | */ | |
340 | static int load_capng(void) | |
341 | { | |
342 | if (!cap_loaded) { | |
343 | pthread_mutex_lock(&cap.mutex); | |
344 | capng_restore_state(&cap.saved); | |
345 | /* | |
346 | * restore_state free's the saved copy | |
347 | * so make another. | |
348 | */ | |
349 | cap.saved = capng_save_state(); | |
350 | if (!cap.saved) { | |
68639111 | 351 | pthread_mutex_unlock(&cap.mutex); |
2405f3c0 DDAG |
352 | fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n"); |
353 | return -EINVAL; | |
354 | } | |
355 | pthread_mutex_unlock(&cap.mutex); | |
356 | ||
357 | /* | |
358 | * We want to use the loaded state for our pid, | |
359 | * not the original | |
360 | */ | |
361 | capng_setpid(syscall(SYS_gettid)); | |
362 | cap_loaded = true; | |
363 | } | |
364 | return 0; | |
365 | } | |
366 | ||
ee884652 VG |
367 | /* |
368 | * Helpers for dropping and regaining effective capabilities. Returns 0 | |
369 | * on success, error otherwise | |
370 | */ | |
371 | static int drop_effective_cap(const char *cap_name, bool *cap_dropped) | |
372 | { | |
373 | int cap, ret; | |
374 | ||
375 | cap = capng_name_to_capability(cap_name); | |
376 | if (cap < 0) { | |
377 | ret = errno; | |
378 | fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", | |
379 | cap_name, strerror(errno)); | |
380 | goto out; | |
381 | } | |
382 | ||
383 | if (load_capng()) { | |
384 | ret = errno; | |
385 | fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); | |
386 | goto out; | |
387 | } | |
388 | ||
389 | /* We dont have this capability in effective set already. */ | |
390 | if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) { | |
391 | ret = 0; | |
392 | goto out; | |
393 | } | |
394 | ||
395 | if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) { | |
396 | ret = errno; | |
397 | fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n"); | |
398 | goto out; | |
399 | } | |
400 | ||
401 | if (capng_apply(CAPNG_SELECT_CAPS)) { | |
402 | ret = errno; | |
403 | fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n"); | |
404 | goto out; | |
405 | } | |
406 | ||
407 | ret = 0; | |
408 | if (cap_dropped) { | |
409 | *cap_dropped = true; | |
410 | } | |
411 | ||
412 | out: | |
413 | return ret; | |
414 | } | |
415 | ||
416 | static int gain_effective_cap(const char *cap_name) | |
417 | { | |
418 | int cap; | |
419 | int ret = 0; | |
420 | ||
421 | cap = capng_name_to_capability(cap_name); | |
422 | if (cap < 0) { | |
423 | ret = errno; | |
424 | fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n", | |
425 | cap_name, strerror(errno)); | |
426 | goto out; | |
427 | } | |
428 | ||
429 | if (load_capng()) { | |
430 | ret = errno; | |
431 | fuse_log(FUSE_LOG_ERR, "load_capng() failed\n"); | |
432 | goto out; | |
433 | } | |
434 | ||
435 | if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) { | |
436 | ret = errno; | |
437 | fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n"); | |
438 | goto out; | |
439 | } | |
440 | ||
441 | if (capng_apply(CAPNG_SELECT_CAPS)) { | |
442 | ret = errno; | |
443 | fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n"); | |
444 | goto out; | |
445 | } | |
446 | ret = 0; | |
447 | ||
448 | out: | |
449 | return ret; | |
450 | } | |
451 | ||
e586edcb DDAG |
452 | /* |
453 | * The host kernel normally drops security.capability xattr's on | |
454 | * any write, however if we're remapping xattr names we need to drop | |
455 | * whatever the clients security.capability is actually stored as. | |
456 | */ | |
457 | static int drop_security_capability(const struct lo_data *lo, int fd) | |
458 | { | |
459 | if (!lo->xattr_security_capability) { | |
460 | /* We didn't remap the name, let the host kernel do it */ | |
461 | return 0; | |
462 | } | |
463 | if (!fremovexattr(fd, lo->xattr_security_capability)) { | |
464 | /* All good */ | |
465 | return 0; | |
466 | } | |
467 | ||
468 | switch (errno) { | |
469 | case ENODATA: | |
470 | /* Attribute didn't exist, that's fine */ | |
471 | return 0; | |
472 | ||
473 | case ENOTSUP: | |
474 | /* FS didn't support attribute anyway, also fine */ | |
475 | return 0; | |
476 | ||
477 | default: | |
478 | /* Hmm other error */ | |
479 | return errno; | |
480 | } | |
481 | } | |
482 | ||
92fb57b8 | 483 | static void lo_map_init(struct lo_map *map) |
25c13572 SH |
484 | { |
485 | map->elems = NULL; | |
486 | map->nelems = 0; | |
487 | map->freelist = -1; | |
488 | } | |
489 | ||
92fb57b8 | 490 | static void lo_map_destroy(struct lo_map *map) |
25c13572 | 491 | { |
e85d6d1e | 492 | g_free(map->elems); |
25c13572 SH |
493 | } |
494 | ||
495 | static int lo_map_grow(struct lo_map *map, size_t new_nelems) | |
496 | { | |
497 | struct lo_map_elem *new_elems; | |
498 | size_t i; | |
499 | ||
500 | if (new_nelems <= map->nelems) { | |
501 | return 1; | |
502 | } | |
503 | ||
e85d6d1e | 504 | new_elems = g_try_realloc_n(map->elems, new_nelems, sizeof(map->elems[0])); |
25c13572 SH |
505 | if (!new_elems) { |
506 | return 0; | |
507 | } | |
508 | ||
509 | for (i = map->nelems; i < new_nelems; i++) { | |
510 | new_elems[i].freelist = i + 1; | |
511 | new_elems[i].in_use = false; | |
512 | } | |
513 | new_elems[new_nelems - 1].freelist = -1; | |
514 | ||
515 | map->elems = new_elems; | |
516 | map->freelist = map->nelems; | |
517 | map->nelems = new_nelems; | |
518 | return 1; | |
519 | } | |
520 | ||
92fb57b8 | 521 | static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map) |
25c13572 SH |
522 | { |
523 | struct lo_map_elem *elem; | |
524 | ||
525 | if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) { | |
526 | return NULL; | |
527 | } | |
528 | ||
529 | elem = &map->elems[map->freelist]; | |
530 | map->freelist = elem->freelist; | |
531 | ||
532 | elem->in_use = true; | |
533 | ||
534 | return elem; | |
535 | } | |
536 | ||
92fb57b8 | 537 | static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key) |
25c13572 SH |
538 | { |
539 | ssize_t *prev; | |
540 | ||
541 | if (!lo_map_grow(map, key + 1)) { | |
542 | return NULL; | |
543 | } | |
544 | ||
545 | for (prev = &map->freelist; *prev != -1; | |
546 | prev = &map->elems[*prev].freelist) { | |
547 | if (*prev == key) { | |
548 | struct lo_map_elem *elem = &map->elems[key]; | |
549 | ||
550 | *prev = elem->freelist; | |
551 | elem->in_use = true; | |
552 | return elem; | |
553 | } | |
554 | } | |
555 | return NULL; | |
556 | } | |
557 | ||
92fb57b8 | 558 | static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key) |
25c13572 SH |
559 | { |
560 | if (key >= map->nelems) { | |
561 | return NULL; | |
562 | } | |
563 | if (!map->elems[key].in_use) { | |
564 | return NULL; | |
565 | } | |
566 | return &map->elems[key]; | |
567 | } | |
568 | ||
92fb57b8 | 569 | static void lo_map_remove(struct lo_map *map, size_t key) |
25c13572 SH |
570 | { |
571 | struct lo_map_elem *elem; | |
572 | ||
573 | if (key >= map->nelems) { | |
574 | return; | |
575 | } | |
576 | ||
577 | elem = &map->elems[key]; | |
578 | if (!elem->in_use) { | |
579 | return; | |
580 | } | |
581 | ||
582 | elem->in_use = false; | |
583 | ||
584 | elem->freelist = map->freelist; | |
585 | map->freelist = key; | |
586 | } | |
587 | ||
73b4d19d | 588 | /* Assumes lo->mutex is held */ |
8afaaee9 | 589 | static ssize_t lo_add_fd_mapping(struct lo_data *lo, int fd) |
73b4d19d SH |
590 | { |
591 | struct lo_map_elem *elem; | |
592 | ||
8afaaee9 | 593 | elem = lo_map_alloc_elem(&lo->fd_map); |
73b4d19d SH |
594 | if (!elem) { |
595 | return -1; | |
596 | } | |
597 | ||
598 | elem->fd = fd; | |
8afaaee9 | 599 | return elem - lo->fd_map.elems; |
73b4d19d SH |
600 | } |
601 | ||
b39bce12 SH |
602 | /* Assumes lo->mutex is held */ |
603 | static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp) | |
604 | { | |
605 | struct lo_map_elem *elem; | |
606 | ||
607 | elem = lo_map_alloc_elem(&lo_data(req)->dirp_map); | |
608 | if (!elem) { | |
609 | return -1; | |
610 | } | |
611 | ||
612 | elem->dirp = dirp; | |
613 | return elem - lo_data(req)->dirp_map.elems; | |
614 | } | |
615 | ||
92fb57b8 SH |
616 | /* Assumes lo->mutex is held */ |
617 | static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode) | |
618 | { | |
619 | struct lo_map_elem *elem; | |
620 | ||
621 | elem = lo_map_alloc_elem(&lo_data(req)->ino_map); | |
622 | if (!elem) { | |
623 | return -1; | |
624 | } | |
625 | ||
626 | elem->inode = inode; | |
627 | return elem - lo_data(req)->ino_map.elems; | |
628 | } | |
629 | ||
c241aa94 SH |
630 | static void lo_inode_put(struct lo_data *lo, struct lo_inode **inodep) |
631 | { | |
632 | struct lo_inode *inode = *inodep; | |
633 | ||
634 | if (!inode) { | |
635 | return; | |
636 | } | |
637 | ||
638 | *inodep = NULL; | |
639 | ||
640 | if (g_atomic_int_dec_and_test(&inode->refcount)) { | |
641 | close(inode->fd); | |
642 | free(inode); | |
643 | } | |
644 | } | |
645 | ||
646 | /* Caller must release refcount using lo_inode_put() */ | |
7c6b6602 DDAG |
647 | static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino) |
648 | { | |
92fb57b8 SH |
649 | struct lo_data *lo = lo_data(req); |
650 | struct lo_map_elem *elem; | |
651 | ||
652 | pthread_mutex_lock(&lo->mutex); | |
653 | elem = lo_map_get(&lo->ino_map, ino); | |
c241aa94 SH |
654 | if (elem) { |
655 | g_atomic_int_inc(&elem->inode->refcount); | |
656 | } | |
92fb57b8 SH |
657 | pthread_mutex_unlock(&lo->mutex); |
658 | ||
659 | if (!elem) { | |
660 | return NULL; | |
7387863d | 661 | } |
92fb57b8 SH |
662 | |
663 | return elem->inode; | |
7c6b6602 DDAG |
664 | } |
665 | ||
c241aa94 SH |
666 | /* |
667 | * TODO Remove this helper and force callers to hold an inode refcount until | |
668 | * they are done with the fd. This will be done in a later patch to make | |
669 | * review easier. | |
670 | */ | |
7c6b6602 DDAG |
671 | static int lo_fd(fuse_req_t req, fuse_ino_t ino) |
672 | { | |
92fb57b8 | 673 | struct lo_inode *inode = lo_inode(req, ino); |
c241aa94 SH |
674 | int fd; |
675 | ||
676 | if (!inode) { | |
677 | return -1; | |
678 | } | |
679 | ||
680 | fd = inode->fd; | |
681 | lo_inode_put(lo_data(req), &inode); | |
682 | return fd; | |
7c6b6602 DDAG |
683 | } |
684 | ||
a3fdbbc7 SH |
685 | /* |
686 | * Open a file descriptor for an inode. Returns -EBADF if the inode is not a | |
687 | * regular file or a directory. | |
688 | * | |
689 | * Use this helper function instead of raw openat(2) to prevent security issues | |
690 | * when a malicious client opens special files such as block device nodes. | |
691 | * Symlink inodes are also rejected since symlinks must already have been | |
692 | * traversed on the client side. | |
693 | */ | |
694 | static int lo_inode_open(struct lo_data *lo, struct lo_inode *inode, | |
695 | int open_flags) | |
696 | { | |
697 | g_autofree char *fd_str = g_strdup_printf("%d", inode->fd); | |
698 | int fd; | |
699 | ||
700 | if (!S_ISREG(inode->filetype) && !S_ISDIR(inode->filetype)) { | |
701 | return -EBADF; | |
702 | } | |
703 | ||
704 | /* | |
705 | * The file is a symlink so O_NOFOLLOW must be ignored. We checked earlier | |
706 | * that the inode is not a special file but if an external process races | |
707 | * with us then symlinks are traversed here. It is not possible to escape | |
708 | * the shared directory since it is mounted as "/" though. | |
709 | */ | |
710 | fd = openat(lo->proc_self_fd, fd_str, open_flags & ~O_NOFOLLOW); | |
711 | if (fd < 0) { | |
712 | return -errno; | |
713 | } | |
714 | return fd; | |
715 | } | |
716 | ||
7387863d | 717 | static void lo_init(void *userdata, struct fuse_conn_info *conn) |
7c6b6602 | 718 | { |
7387863d DDAG |
719 | struct lo_data *lo = (struct lo_data *)userdata; |
720 | ||
721 | if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) { | |
722 | conn->want |= FUSE_CAP_EXPORT_SUPPORT; | |
723 | } | |
724 | ||
725 | if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) { | |
d240314a | 726 | fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n"); |
7387863d DDAG |
727 | conn->want |= FUSE_CAP_WRITEBACK_CACHE; |
728 | } | |
e468d4af PT |
729 | if (conn->capable & FUSE_CAP_FLOCK_LOCKS) { |
730 | if (lo->flock) { | |
731 | fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n"); | |
732 | conn->want |= FUSE_CAP_FLOCK_LOCKS; | |
733 | } else { | |
734 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling flock locks\n"); | |
735 | conn->want &= ~FUSE_CAP_FLOCK_LOCKS; | |
736 | } | |
7387863d | 737 | } |
0e81414c VG |
738 | |
739 | if (conn->capable & FUSE_CAP_POSIX_LOCKS) { | |
740 | if (lo->posix_lock) { | |
741 | fuse_log(FUSE_LOG_DEBUG, "lo_init: activating posix locks\n"); | |
742 | conn->want |= FUSE_CAP_POSIX_LOCKS; | |
743 | } else { | |
744 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix locks\n"); | |
745 | conn->want &= ~FUSE_CAP_POSIX_LOCKS; | |
746 | } | |
747 | } | |
748 | ||
230e777b | 749 | if ((lo->cache == CACHE_NONE && !lo->readdirplus_set) || |
59aef494 | 750 | lo->readdirplus_clear) { |
ddcbabcb MS |
751 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n"); |
752 | conn->want &= ~FUSE_CAP_READDIRPLUS; | |
753 | } | |
9d82f6a3 HR |
754 | |
755 | if (!(conn->capable & FUSE_CAP_SUBMOUNTS) && lo->announce_submounts) { | |
756 | fuse_log(FUSE_LOG_WARNING, "lo_init: Cannot announce submounts, client " | |
757 | "does not support it\n"); | |
758 | lo->announce_submounts = false; | |
759 | } | |
d64907ac VG |
760 | |
761 | if (lo->user_killpriv_v2 == 1) { | |
762 | /* | |
763 | * User explicitly asked for this option. Enable it unconditionally. | |
764 | * If connection does not have this capability, it should fail | |
765 | * in fuse_lowlevel.c | |
766 | */ | |
767 | fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling killpriv_v2\n"); | |
768 | conn->want |= FUSE_CAP_HANDLE_KILLPRIV_V2; | |
769 | lo->killpriv_v2 = 1; | |
d64907ac VG |
770 | } else { |
771 | /* | |
a21ba54d VG |
772 | * Either user specified to disable killpriv_v2, or did not |
773 | * specify anything. Disable killpriv_v2 in both the cases. | |
d64907ac VG |
774 | */ |
775 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling killpriv_v2\n"); | |
776 | conn->want &= ~FUSE_CAP_HANDLE_KILLPRIV_V2; | |
777 | lo->killpriv_v2 = 0; | |
778 | } | |
65a820d2 VG |
779 | |
780 | if (lo->user_posix_acl == 1) { | |
781 | /* | |
782 | * User explicitly asked for this option. Enable it unconditionally. | |
783 | * If connection does not have this capability, print error message | |
784 | * now. It will fail later in fuse_lowlevel.c | |
785 | */ | |
786 | if (!(conn->capable & FUSE_CAP_POSIX_ACL) || | |
787 | !(conn->capable & FUSE_CAP_DONT_MASK) || | |
788 | !(conn->capable & FUSE_CAP_SETXATTR_EXT)) { | |
789 | fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable posix acl." | |
790 | " kernel does not support FUSE_POSIX_ACL, FUSE_DONT_MASK" | |
791 | " or FUSE_SETXATTR_EXT capability.\n"); | |
792 | } else { | |
793 | fuse_log(FUSE_LOG_DEBUG, "lo_init: enabling posix acl\n"); | |
794 | } | |
795 | ||
796 | conn->want |= FUSE_CAP_POSIX_ACL | FUSE_CAP_DONT_MASK | | |
797 | FUSE_CAP_SETXATTR_EXT; | |
798 | lo->change_umask = true; | |
799 | lo->posix_acl = true; | |
800 | } else { | |
801 | /* User either did not specify anything or wants it disabled */ | |
802 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling posix_acl\n"); | |
803 | conn->want &= ~FUSE_CAP_POSIX_ACL; | |
804 | } | |
963061dc VG |
805 | |
806 | if (lo->user_security_label == 1) { | |
807 | if (!(conn->capable & FUSE_CAP_SECURITY_CTX)) { | |
808 | fuse_log(FUSE_LOG_ERR, "lo_init: Can not enable security label." | |
809 | " kernel does not support FUSE_SECURITY_CTX capability.\n"); | |
810 | } | |
811 | conn->want |= FUSE_CAP_SECURITY_CTX; | |
812 | } else { | |
813 | fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling security label\n"); | |
814 | conn->want &= ~FUSE_CAP_SECURITY_CTX; | |
815 | } | |
7c6b6602 DDAG |
816 | } |
817 | ||
818 | static void lo_getattr(fuse_req_t req, fuse_ino_t ino, | |
7387863d | 819 | struct fuse_file_info *fi) |
7c6b6602 | 820 | { |
7387863d DDAG |
821 | int res; |
822 | struct stat buf; | |
823 | struct lo_data *lo = lo_data(req); | |
7c6b6602 | 824 | |
7387863d | 825 | (void)fi; |
7c6b6602 | 826 | |
33dc9914 AW |
827 | res = |
828 | fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); | |
7387863d DDAG |
829 | if (res == -1) { |
830 | return (void)fuse_reply_err(req, errno); | |
831 | } | |
7c6b6602 | 832 | |
33dc9914 | 833 | fuse_reply_attr(req, &buf, lo->timeout); |
7c6b6602 DDAG |
834 | } |
835 | ||
73b4d19d SH |
836 | static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi) |
837 | { | |
838 | struct lo_data *lo = lo_data(req); | |
839 | struct lo_map_elem *elem; | |
840 | ||
841 | pthread_mutex_lock(&lo->mutex); | |
842 | elem = lo_map_get(&lo->fd_map, fi->fh); | |
843 | pthread_mutex_unlock(&lo->mutex); | |
844 | ||
845 | if (!elem) { | |
846 | return -1; | |
847 | } | |
848 | ||
849 | return elem->fd; | |
850 | } | |
851 | ||
7c6b6602 | 852 | static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, |
7387863d | 853 | int valid, struct fuse_file_info *fi) |
7c6b6602 | 854 | { |
7387863d DDAG |
855 | int saverr; |
856 | char procname[64]; | |
5fe319a7 | 857 | struct lo_data *lo = lo_data(req); |
92fb57b8 SH |
858 | struct lo_inode *inode; |
859 | int ifd; | |
7387863d | 860 | int res; |
2acf4f8f | 861 | int fd = -1; |
7387863d | 862 | |
92fb57b8 SH |
863 | inode = lo_inode(req, ino); |
864 | if (!inode) { | |
865 | fuse_reply_err(req, EBADF); | |
866 | return; | |
867 | } | |
868 | ||
869 | ifd = inode->fd; | |
870 | ||
73b4d19d SH |
871 | /* If fi->fh is invalid we'll report EBADF later */ |
872 | if (fi) { | |
873 | fd = lo_fi_fd(req, fi); | |
874 | } | |
875 | ||
7387863d DDAG |
876 | if (valid & FUSE_SET_ATTR_MODE) { |
877 | if (fi) { | |
73b4d19d | 878 | res = fchmod(fd, attr->st_mode); |
7387863d | 879 | } else { |
9f59d175 SH |
880 | sprintf(procname, "%i", ifd); |
881 | res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0); | |
7387863d DDAG |
882 | } |
883 | if (res == -1) { | |
1e08f164 | 884 | saverr = errno; |
7387863d DDAG |
885 | goto out_err; |
886 | } | |
887 | } | |
888 | if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { | |
889 | uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1; | |
890 | gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1; | |
891 | ||
e586edcb DDAG |
892 | saverr = drop_security_capability(lo, ifd); |
893 | if (saverr) { | |
894 | goto out_err; | |
895 | } | |
896 | ||
7387863d DDAG |
897 | res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); |
898 | if (res == -1) { | |
1e08f164 | 899 | saverr = errno; |
7387863d DDAG |
900 | goto out_err; |
901 | } | |
902 | } | |
903 | if (valid & FUSE_SET_ATTR_SIZE) { | |
9f59d175 | 904 | int truncfd; |
d64907ac VG |
905 | bool kill_suidgid; |
906 | bool cap_fsetid_dropped = false; | |
9f59d175 | 907 | |
d64907ac | 908 | kill_suidgid = lo->killpriv_v2 && (valid & FUSE_SET_ATTR_KILL_SUIDGID); |
7387863d | 909 | if (fi) { |
9f59d175 | 910 | truncfd = fd; |
7387863d | 911 | } else { |
a3fdbbc7 | 912 | truncfd = lo_inode_open(lo, inode, O_RDWR); |
9f59d175 | 913 | if (truncfd < 0) { |
1e08f164 | 914 | saverr = -truncfd; |
9f59d175 SH |
915 | goto out_err; |
916 | } | |
917 | } | |
918 | ||
e586edcb DDAG |
919 | saverr = drop_security_capability(lo, truncfd); |
920 | if (saverr) { | |
921 | if (!fi) { | |
922 | close(truncfd); | |
923 | } | |
924 | goto out_err; | |
925 | } | |
926 | ||
d64907ac VG |
927 | if (kill_suidgid) { |
928 | res = drop_effective_cap("FSETID", &cap_fsetid_dropped); | |
929 | if (res != 0) { | |
930 | saverr = res; | |
931 | if (!fi) { | |
932 | close(truncfd); | |
933 | } | |
934 | goto out_err; | |
935 | } | |
936 | } | |
937 | ||
9f59d175 | 938 | res = ftruncate(truncfd, attr->st_size); |
1e08f164 | 939 | saverr = res == -1 ? errno : 0; |
d64907ac VG |
940 | |
941 | if (cap_fsetid_dropped) { | |
942 | if (gain_effective_cap("FSETID")) { | |
943 | fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); | |
944 | } | |
945 | } | |
9f59d175 | 946 | if (!fi) { |
9f59d175 | 947 | close(truncfd); |
7387863d DDAG |
948 | } |
949 | if (res == -1) { | |
950 | goto out_err; | |
951 | } | |
952 | } | |
953 | if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { | |
954 | struct timespec tv[2]; | |
955 | ||
956 | tv[0].tv_sec = 0; | |
957 | tv[1].tv_sec = 0; | |
958 | tv[0].tv_nsec = UTIME_OMIT; | |
959 | tv[1].tv_nsec = UTIME_OMIT; | |
960 | ||
961 | if (valid & FUSE_SET_ATTR_ATIME_NOW) { | |
962 | tv[0].tv_nsec = UTIME_NOW; | |
963 | } else if (valid & FUSE_SET_ATTR_ATIME) { | |
964 | tv[0] = attr->st_atim; | |
965 | } | |
966 | ||
967 | if (valid & FUSE_SET_ATTR_MTIME_NOW) { | |
968 | tv[1].tv_nsec = UTIME_NOW; | |
969 | } else if (valid & FUSE_SET_ATTR_MTIME) { | |
970 | tv[1] = attr->st_mtim; | |
971 | } | |
972 | ||
973 | if (fi) { | |
73b4d19d | 974 | res = futimens(fd, tv); |
7387863d | 975 | } else { |
93bb3d8d MS |
976 | sprintf(procname, "%i", inode->fd); |
977 | res = utimensat(lo->proc_self_fd, procname, tv, 0); | |
7387863d DDAG |
978 | } |
979 | if (res == -1) { | |
1e08f164 | 980 | saverr = errno; |
7387863d DDAG |
981 | goto out_err; |
982 | } | |
983 | } | |
c241aa94 | 984 | lo_inode_put(lo, &inode); |
7387863d DDAG |
985 | |
986 | return lo_getattr(req, ino, fi); | |
7c6b6602 DDAG |
987 | |
988 | out_err: | |
c241aa94 | 989 | lo_inode_put(lo, &inode); |
7387863d | 990 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
991 | } |
992 | ||
d672fce6 HR |
993 | static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st, |
994 | uint64_t mnt_id) | |
7c6b6602 | 995 | { |
7387863d | 996 | struct lo_inode *p; |
bfc50a6e MS |
997 | struct lo_key key = { |
998 | .ino = st->st_ino, | |
999 | .dev = st->st_dev, | |
d672fce6 | 1000 | .mnt_id = mnt_id, |
bfc50a6e | 1001 | }; |
7387863d DDAG |
1002 | |
1003 | pthread_mutex_lock(&lo->mutex); | |
bfc50a6e MS |
1004 | p = g_hash_table_lookup(lo->inodes, &key); |
1005 | if (p) { | |
1222f015 SH |
1006 | assert(p->nlookup > 0); |
1007 | p->nlookup++; | |
c241aa94 | 1008 | g_atomic_int_inc(&p->refcount); |
7387863d DDAG |
1009 | } |
1010 | pthread_mutex_unlock(&lo->mutex); | |
bfc50a6e MS |
1011 | |
1012 | return p; | |
7c6b6602 DDAG |
1013 | } |
1014 | ||
0e81414c VG |
1015 | /* value_destroy_func for posix_locks GHashTable */ |
1016 | static void posix_locks_value_destroy(gpointer data) | |
1017 | { | |
1018 | struct lo_inode_plock *plock = data; | |
1019 | ||
1020 | /* | |
1021 | * We had used open() for locks and had only one fd. So | |
1022 | * closing this fd should release all OFD locks. | |
1023 | */ | |
1024 | close(plock->fd); | |
1025 | free(plock); | |
1026 | } | |
1027 | ||
d672fce6 HR |
1028 | static int do_statx(struct lo_data *lo, int dirfd, const char *pathname, |
1029 | struct stat *statbuf, int flags, uint64_t *mnt_id) | |
1030 | { | |
1031 | int res; | |
1032 | ||
4ce7a08d | 1033 | #if defined(CONFIG_STATX) && defined(CONFIG_STATX_MNT_ID) |
d672fce6 HR |
1034 | if (lo->use_statx) { |
1035 | struct statx statxbuf; | |
1036 | ||
1037 | res = statx(dirfd, pathname, flags, STATX_BASIC_STATS | STATX_MNT_ID, | |
1038 | &statxbuf); | |
1039 | if (!res) { | |
1040 | memset(statbuf, 0, sizeof(*statbuf)); | |
1041 | statbuf->st_dev = makedev(statxbuf.stx_dev_major, | |
1042 | statxbuf.stx_dev_minor); | |
1043 | statbuf->st_ino = statxbuf.stx_ino; | |
1044 | statbuf->st_mode = statxbuf.stx_mode; | |
1045 | statbuf->st_nlink = statxbuf.stx_nlink; | |
1046 | statbuf->st_uid = statxbuf.stx_uid; | |
1047 | statbuf->st_gid = statxbuf.stx_gid; | |
1048 | statbuf->st_rdev = makedev(statxbuf.stx_rdev_major, | |
1049 | statxbuf.stx_rdev_minor); | |
1050 | statbuf->st_size = statxbuf.stx_size; | |
1051 | statbuf->st_blksize = statxbuf.stx_blksize; | |
1052 | statbuf->st_blocks = statxbuf.stx_blocks; | |
1053 | statbuf->st_atim.tv_sec = statxbuf.stx_atime.tv_sec; | |
1054 | statbuf->st_atim.tv_nsec = statxbuf.stx_atime.tv_nsec; | |
1055 | statbuf->st_mtim.tv_sec = statxbuf.stx_mtime.tv_sec; | |
1056 | statbuf->st_mtim.tv_nsec = statxbuf.stx_mtime.tv_nsec; | |
1057 | statbuf->st_ctim.tv_sec = statxbuf.stx_ctime.tv_sec; | |
1058 | statbuf->st_ctim.tv_nsec = statxbuf.stx_ctime.tv_nsec; | |
1059 | ||
1060 | if (statxbuf.stx_mask & STATX_MNT_ID) { | |
1061 | *mnt_id = statxbuf.stx_mnt_id; | |
1062 | } else { | |
1063 | *mnt_id = 0; | |
1064 | } | |
1065 | return 0; | |
1066 | } else if (errno != ENOSYS) { | |
1067 | return -1; | |
1068 | } | |
1069 | lo->use_statx = false; | |
1070 | /* fallback */ | |
1071 | } | |
1072 | #endif | |
1073 | res = fstatat(dirfd, pathname, statbuf, flags); | |
1074 | if (res == -1) { | |
1075 | return -1; | |
1076 | } | |
1077 | *mnt_id = 0; | |
1078 | ||
1079 | return 0; | |
1080 | } | |
1081 | ||
c241aa94 | 1082 | /* |
22d2ece7 SH |
1083 | * Increments nlookup on the inode on success. unref_inode_lolocked() must be |
1084 | * called eventually to decrement nlookup again. If inodep is non-NULL, the | |
1085 | * inode pointer is stored and the caller must call lo_inode_put(). | |
c241aa94 | 1086 | */ |
7c6b6602 | 1087 | static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name, |
22d2ece7 SH |
1088 | struct fuse_entry_param *e, |
1089 | struct lo_inode **inodep) | |
7c6b6602 | 1090 | { |
7387863d DDAG |
1091 | int newfd; |
1092 | int res; | |
1093 | int saverr; | |
d672fce6 | 1094 | uint64_t mnt_id; |
7387863d | 1095 | struct lo_data *lo = lo_data(req); |
c241aa94 SH |
1096 | struct lo_inode *inode = NULL; |
1097 | struct lo_inode *dir = lo_inode(req, parent); | |
7387863d | 1098 | |
22d2ece7 | 1099 | if (inodep) { |
a3fdbbc7 | 1100 | *inodep = NULL; /* in case there is an error */ |
22d2ece7 SH |
1101 | } |
1102 | ||
9de4fab5 MS |
1103 | /* |
1104 | * name_to_handle_at() and open_by_handle_at() can reach here with fuse | |
1105 | * mount point in guest, but we don't have its inode info in the | |
1106 | * ino_map. | |
1107 | */ | |
1108 | if (!dir) { | |
1109 | return ENOENT; | |
1110 | } | |
1111 | ||
7387863d DDAG |
1112 | memset(e, 0, sizeof(*e)); |
1113 | e->attr_timeout = lo->timeout; | |
1114 | e->entry_timeout = lo->timeout; | |
1115 | ||
854684bc SH |
1116 | /* Do not allow escaping root directory */ |
1117 | if (dir == &lo->root && strcmp(name, "..") == 0) { | |
1118 | name = "."; | |
1119 | } | |
1120 | ||
9de4fab5 | 1121 | newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); |
7387863d DDAG |
1122 | if (newfd == -1) { |
1123 | goto out_err; | |
1124 | } | |
1125 | ||
d672fce6 HR |
1126 | res = do_statx(lo, newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, |
1127 | &mnt_id); | |
7387863d DDAG |
1128 | if (res == -1) { |
1129 | goto out_err; | |
1130 | } | |
1131 | ||
9d82f6a3 HR |
1132 | if (S_ISDIR(e->attr.st_mode) && lo->announce_submounts && |
1133 | (e->attr.st_dev != dir->key.dev || mnt_id != dir->key.mnt_id)) { | |
1134 | e->attr_flags |= FUSE_ATTR_SUBMOUNT; | |
1135 | } | |
1136 | ||
d672fce6 | 1137 | inode = lo_find(lo, &e->attr, mnt_id); |
7387863d DDAG |
1138 | if (inode) { |
1139 | close(newfd); | |
7387863d | 1140 | } else { |
7387863d DDAG |
1141 | inode = calloc(1, sizeof(struct lo_inode)); |
1142 | if (!inode) { | |
1143 | goto out_err; | |
1144 | } | |
1145 | ||
bdfd6678 MT |
1146 | /* cache only filetype */ |
1147 | inode->filetype = (e->attr.st_mode & S_IFMT); | |
c241aa94 SH |
1148 | |
1149 | /* | |
1150 | * One for the caller and one for nlookup (released in | |
1151 | * unref_inode_lolocked()) | |
1152 | */ | |
1153 | g_atomic_int_set(&inode->refcount, 2); | |
1154 | ||
1222f015 | 1155 | inode->nlookup = 1; |
7387863d | 1156 | inode->fd = newfd; |
bfc50a6e MS |
1157 | inode->key.ino = e->attr.st_ino; |
1158 | inode->key.dev = e->attr.st_dev; | |
d672fce6 | 1159 | inode->key.mnt_id = mnt_id; |
e7e8aa8a VG |
1160 | if (lo->posix_lock) { |
1161 | pthread_mutex_init(&inode->plock_mutex, NULL); | |
1162 | inode->posix_locks = g_hash_table_new_full( | |
1163 | g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); | |
1164 | } | |
7387863d | 1165 | pthread_mutex_lock(&lo->mutex); |
92fb57b8 | 1166 | inode->fuse_ino = lo_add_inode_mapping(req, inode); |
bfc50a6e | 1167 | g_hash_table_insert(lo->inodes, &inode->key, inode); |
7387863d DDAG |
1168 | pthread_mutex_unlock(&lo->mutex); |
1169 | } | |
92fb57b8 | 1170 | e->ino = inode->fuse_ino; |
22d2ece7 SH |
1171 | |
1172 | /* Transfer ownership of inode pointer to caller or drop it */ | |
1173 | if (inodep) { | |
1174 | *inodep = inode; | |
1175 | } else { | |
1176 | lo_inode_put(lo, &inode); | |
1177 | } | |
1178 | ||
c241aa94 | 1179 | lo_inode_put(lo, &dir); |
7387863d | 1180 | |
d240314a EG |
1181 | fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, |
1182 | name, (unsigned long long)e->ino); | |
7387863d DDAG |
1183 | |
1184 | return 0; | |
7c6b6602 DDAG |
1185 | |
1186 | out_err: | |
7387863d DDAG |
1187 | saverr = errno; |
1188 | if (newfd != -1) { | |
1189 | close(newfd); | |
1190 | } | |
c241aa94 SH |
1191 | lo_inode_put(lo, &inode); |
1192 | lo_inode_put(lo, &dir); | |
7387863d | 1193 | return saverr; |
7c6b6602 DDAG |
1194 | } |
1195 | ||
1196 | static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) | |
1197 | { | |
7387863d DDAG |
1198 | struct fuse_entry_param e; |
1199 | int err; | |
1200 | ||
d240314a EG |
1201 | fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent, |
1202 | name); | |
7387863d | 1203 | |
28d1ad0e GK |
1204 | if (is_empty(name)) { |
1205 | fuse_reply_err(req, ENOENT); | |
1206 | return; | |
1207 | } | |
1208 | ||
25dae28c SH |
1209 | /* |
1210 | * Don't use is_safe_path_component(), allow "." and ".." for NFS export | |
1211 | * support. | |
1212 | */ | |
1213 | if (strchr(name, '/')) { | |
1214 | fuse_reply_err(req, EINVAL); | |
1215 | return; | |
1216 | } | |
1217 | ||
22d2ece7 | 1218 | err = lo_do_lookup(req, parent, name, &e, NULL); |
7387863d DDAG |
1219 | if (err) { |
1220 | fuse_reply_err(req, err); | |
1221 | } else { | |
1222 | fuse_reply_entry(req, &e); | |
1223 | } | |
7c6b6602 DDAG |
1224 | } |
1225 | ||
929cfb7a VG |
1226 | /* |
1227 | * On some archs, setres*id is limited to 2^16 but they | |
1228 | * provide setres*id32 variants that allow 2^32. | |
1229 | * Others just let setres*id do 2^32 anyway. | |
1230 | */ | |
1231 | #ifdef SYS_setresgid32 | |
1232 | #define OURSYS_setresgid SYS_setresgid32 | |
1233 | #else | |
1234 | #define OURSYS_setresgid SYS_setresgid | |
1235 | #endif | |
1236 | ||
1237 | #ifdef SYS_setresuid32 | |
1238 | #define OURSYS_setresuid SYS_setresuid32 | |
1239 | #else | |
1240 | #define OURSYS_setresuid SYS_setresuid | |
1241 | #endif | |
1242 | ||
449e8171 VG |
1243 | static void drop_supplementary_groups(void) |
1244 | { | |
1245 | int ret; | |
1246 | ||
1247 | ret = getgroups(0, NULL); | |
1248 | if (ret == -1) { | |
1249 | fuse_log(FUSE_LOG_ERR, "getgroups() failed with error=%d:%s\n", | |
1250 | errno, strerror(errno)); | |
1251 | exit(1); | |
1252 | } | |
1253 | ||
1254 | if (!ret) { | |
1255 | return; | |
1256 | } | |
1257 | ||
1258 | /* Drop all supplementary groups. We should not need it */ | |
1259 | ret = setgroups(0, NULL); | |
1260 | if (ret == -1) { | |
1261 | fuse_log(FUSE_LOG_ERR, "setgroups() failed with error=%d:%s\n", | |
1262 | errno, strerror(errno)); | |
1263 | exit(1); | |
1264 | } | |
1265 | } | |
1266 | ||
929cfb7a VG |
1267 | /* |
1268 | * Change to uid/gid of caller so that file is created with | |
1269 | * ownership of caller. | |
1270 | * TODO: What about selinux context? | |
1271 | */ | |
227e5d7f VG |
1272 | static int lo_change_cred(fuse_req_t req, struct lo_cred *old, |
1273 | bool change_umask) | |
929cfb7a VG |
1274 | { |
1275 | int res; | |
1276 | ||
1277 | old->euid = geteuid(); | |
1278 | old->egid = getegid(); | |
1279 | ||
1280 | res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1); | |
1281 | if (res == -1) { | |
1282 | return errno; | |
1283 | } | |
1284 | ||
1285 | res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1); | |
1286 | if (res == -1) { | |
1287 | int errno_save = errno; | |
1288 | ||
1289 | syscall(OURSYS_setresgid, -1, old->egid, -1); | |
1290 | return errno_save; | |
1291 | } | |
1292 | ||
227e5d7f VG |
1293 | if (change_umask) { |
1294 | old->umask = umask(req->ctx.umask); | |
1295 | } | |
929cfb7a VG |
1296 | return 0; |
1297 | } | |
1298 | ||
1299 | /* Regain Privileges */ | |
227e5d7f | 1300 | static void lo_restore_cred(struct lo_cred *old, bool restore_umask) |
929cfb7a VG |
1301 | { |
1302 | int res; | |
1303 | ||
1304 | res = syscall(OURSYS_setresuid, -1, old->euid, -1); | |
1305 | if (res == -1) { | |
1306 | fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid); | |
1307 | exit(1); | |
1308 | } | |
1309 | ||
1310 | res = syscall(OURSYS_setresgid, -1, old->egid, -1); | |
1311 | if (res == -1) { | |
1312 | fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid); | |
1313 | exit(1); | |
1314 | } | |
227e5d7f VG |
1315 | |
1316 | if (restore_umask) | |
1317 | umask(old->umask); | |
929cfb7a VG |
1318 | } |
1319 | ||
f1aa1774 VG |
1320 | /* |
1321 | * A helper to change cred and drop capability. Returns 0 on success and | |
1322 | * errno on error | |
1323 | */ | |
1324 | static int lo_drop_cap_change_cred(fuse_req_t req, struct lo_cred *old, | |
1325 | bool change_umask, const char *cap_name, | |
1326 | bool *cap_dropped) | |
1327 | { | |
1328 | int ret; | |
1329 | bool __cap_dropped; | |
1330 | ||
1331 | assert(cap_name); | |
1332 | ||
1333 | ret = drop_effective_cap(cap_name, &__cap_dropped); | |
1334 | if (ret) { | |
1335 | return ret; | |
1336 | } | |
1337 | ||
1338 | ret = lo_change_cred(req, old, change_umask); | |
1339 | if (ret) { | |
1340 | if (__cap_dropped) { | |
1341 | if (gain_effective_cap(cap_name)) { | |
1342 | fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name); | |
1343 | } | |
1344 | } | |
1345 | } | |
1346 | ||
1347 | if (cap_dropped) { | |
1348 | *cap_dropped = __cap_dropped; | |
1349 | } | |
1350 | return ret; | |
1351 | } | |
1352 | ||
1353 | static void lo_restore_cred_gain_cap(struct lo_cred *old, bool restore_umask, | |
1354 | const char *cap_name) | |
1355 | { | |
1356 | assert(cap_name); | |
1357 | ||
1358 | lo_restore_cred(old, restore_umask); | |
1359 | ||
1360 | if (gain_effective_cap(cap_name)) { | |
1361 | fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_%s\n", cap_name); | |
1362 | } | |
1363 | } | |
1364 | ||
0c3f81e1 VG |
1365 | static int do_mknod_symlink_secctx(fuse_req_t req, struct lo_inode *dir, |
1366 | const char *name, const char *secctx_name) | |
1367 | { | |
1368 | int path_fd, err; | |
1369 | char procname[64]; | |
1370 | struct lo_data *lo = lo_data(req); | |
1371 | ||
1372 | if (!req->secctx.ctxlen) { | |
1373 | return 0; | |
1374 | } | |
1375 | ||
1376 | /* Open newly created element with O_PATH */ | |
1377 | path_fd = openat(dir->fd, name, O_PATH | O_NOFOLLOW); | |
1378 | err = path_fd == -1 ? errno : 0; | |
1379 | if (err) { | |
1380 | return err; | |
1381 | } | |
1382 | sprintf(procname, "%i", path_fd); | |
1383 | FCHDIR_NOFAIL(lo->proc_self_fd); | |
1384 | /* Set security context. This is not atomic w.r.t file creation */ | |
1385 | err = setxattr(procname, secctx_name, req->secctx.ctx, req->secctx.ctxlen, | |
1386 | 0); | |
1387 | if (err) { | |
1388 | err = errno; | |
1389 | } | |
1390 | FCHDIR_NOFAIL(lo->root.fd); | |
1391 | close(path_fd); | |
1392 | return err; | |
1393 | } | |
1394 | ||
1395 | static int do_mknod_symlink(fuse_req_t req, struct lo_inode *dir, | |
1396 | const char *name, mode_t mode, dev_t rdev, | |
1397 | const char *link) | |
1398 | { | |
1399 | int err, fscreate_fd = -1; | |
1400 | const char *secctx_name = req->secctx.name; | |
1401 | struct lo_cred old = {}; | |
1402 | struct lo_data *lo = lo_data(req); | |
1403 | char *mapped_name = NULL; | |
1404 | bool secctx_enabled = req->secctx.ctxlen; | |
1405 | bool do_fscreate = false; | |
1406 | ||
1407 | if (secctx_enabled && lo->xattrmap) { | |
1408 | err = xattr_map_client(lo, req->secctx.name, &mapped_name); | |
1409 | if (err < 0) { | |
1410 | return -err; | |
1411 | } | |
1412 | secctx_name = mapped_name; | |
1413 | } | |
1414 | ||
1415 | /* | |
1416 | * If security xattr has not been remapped and selinux is enabled on | |
1417 | * host, set fscreate and no need to do a setxattr() after file creation | |
1418 | */ | |
1419 | if (secctx_enabled && !mapped_name && lo->use_fscreate) { | |
1420 | do_fscreate = true; | |
1421 | err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, | |
1422 | &fscreate_fd); | |
1423 | if (err) { | |
1424 | goto out; | |
1425 | } | |
1426 | } | |
1427 | ||
1428 | err = lo_change_cred(req, &old, lo->change_umask && !S_ISLNK(mode)); | |
1429 | if (err) { | |
1430 | goto out; | |
1431 | } | |
1432 | ||
1433 | err = mknod_wrapper(dir->fd, name, link, mode, rdev); | |
1434 | err = err == -1 ? errno : 0; | |
1435 | lo_restore_cred(&old, lo->change_umask && !S_ISLNK(mode)); | |
1436 | if (err) { | |
1437 | goto out; | |
1438 | } | |
1439 | ||
1440 | if (!do_fscreate) { | |
1441 | err = do_mknod_symlink_secctx(req, dir, name, secctx_name); | |
1442 | if (err) { | |
1443 | unlinkat(dir->fd, name, S_ISDIR(mode) ? AT_REMOVEDIR : 0); | |
1444 | } | |
1445 | } | |
1446 | out: | |
1447 | if (fscreate_fd != -1) { | |
1448 | close_reset_proc_fscreate(fscreate_fd); | |
1449 | } | |
1450 | g_free(mapped_name); | |
1451 | return err; | |
1452 | } | |
1453 | ||
7c6b6602 | 1454 | static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent, |
7387863d DDAG |
1455 | const char *name, mode_t mode, dev_t rdev, |
1456 | const char *link) | |
7c6b6602 | 1457 | { |
7387863d | 1458 | int saverr; |
c241aa94 | 1459 | struct lo_data *lo = lo_data(req); |
92fb57b8 | 1460 | struct lo_inode *dir; |
7387863d | 1461 | struct fuse_entry_param e; |
7c6b6602 | 1462 | |
28d1ad0e GK |
1463 | if (is_empty(name)) { |
1464 | fuse_reply_err(req, ENOENT); | |
1465 | return; | |
1466 | } | |
1467 | ||
25dae28c SH |
1468 | if (!is_safe_path_component(name)) { |
1469 | fuse_reply_err(req, EINVAL); | |
1470 | return; | |
1471 | } | |
1472 | ||
92fb57b8 SH |
1473 | dir = lo_inode(req, parent); |
1474 | if (!dir) { | |
1475 | fuse_reply_err(req, EBADF); | |
1476 | return; | |
1477 | } | |
1478 | ||
0c3f81e1 | 1479 | saverr = do_mknod_symlink(req, dir, name, mode, rdev, link); |
929cfb7a VG |
1480 | if (saverr) { |
1481 | goto out; | |
1482 | } | |
1483 | ||
22d2ece7 | 1484 | saverr = lo_do_lookup(req, parent, name, &e, NULL); |
7387863d DDAG |
1485 | if (saverr) { |
1486 | goto out; | |
1487 | } | |
7c6b6602 | 1488 | |
d240314a EG |
1489 | fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, |
1490 | name, (unsigned long long)e.ino); | |
7c6b6602 | 1491 | |
7387863d | 1492 | fuse_reply_entry(req, &e); |
c241aa94 | 1493 | lo_inode_put(lo, &dir); |
7387863d | 1494 | return; |
7c6b6602 DDAG |
1495 | |
1496 | out: | |
c241aa94 | 1497 | lo_inode_put(lo, &dir); |
7387863d | 1498 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
1499 | } |
1500 | ||
7387863d DDAG |
1501 | static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, |
1502 | mode_t mode, dev_t rdev) | |
7c6b6602 | 1503 | { |
7387863d | 1504 | lo_mknod_symlink(req, parent, name, mode, rdev, NULL); |
7c6b6602 DDAG |
1505 | } |
1506 | ||
1507 | static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, | |
7387863d | 1508 | mode_t mode) |
7c6b6602 | 1509 | { |
7387863d | 1510 | lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL); |
7c6b6602 DDAG |
1511 | } |
1512 | ||
7387863d DDAG |
1513 | static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, |
1514 | const char *name) | |
7c6b6602 | 1515 | { |
7387863d | 1516 | lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link); |
7c6b6602 DDAG |
1517 | } |
1518 | ||
7c6b6602 | 1519 | static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, |
7387863d | 1520 | const char *name) |
7c6b6602 | 1521 | { |
7387863d DDAG |
1522 | int res; |
1523 | struct lo_data *lo = lo_data(req); | |
c241aa94 | 1524 | struct lo_inode *parent_inode; |
92fb57b8 | 1525 | struct lo_inode *inode; |
7387863d | 1526 | struct fuse_entry_param e; |
93bb3d8d | 1527 | char procname[64]; |
7387863d DDAG |
1528 | int saverr; |
1529 | ||
28d1ad0e GK |
1530 | if (is_empty(name)) { |
1531 | fuse_reply_err(req, ENOENT); | |
1532 | return; | |
1533 | } | |
1534 | ||
25dae28c SH |
1535 | if (!is_safe_path_component(name)) { |
1536 | fuse_reply_err(req, EINVAL); | |
1537 | return; | |
1538 | } | |
1539 | ||
c241aa94 | 1540 | parent_inode = lo_inode(req, parent); |
92fb57b8 | 1541 | inode = lo_inode(req, ino); |
c241aa94 SH |
1542 | if (!parent_inode || !inode) { |
1543 | errno = EBADF; | |
1544 | goto out_err; | |
92fb57b8 SH |
1545 | } |
1546 | ||
7387863d DDAG |
1547 | memset(&e, 0, sizeof(struct fuse_entry_param)); |
1548 | e.attr_timeout = lo->timeout; | |
1549 | e.entry_timeout = lo->timeout; | |
1550 | ||
93bb3d8d MS |
1551 | sprintf(procname, "%i", inode->fd); |
1552 | res = linkat(lo->proc_self_fd, procname, parent_inode->fd, name, | |
1553 | AT_SYMLINK_FOLLOW); | |
7387863d DDAG |
1554 | if (res == -1) { |
1555 | goto out_err; | |
1556 | } | |
1557 | ||
33dc9914 | 1558 | res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); |
7387863d DDAG |
1559 | if (res == -1) { |
1560 | goto out_err; | |
1561 | } | |
1562 | ||
1563 | pthread_mutex_lock(&lo->mutex); | |
1222f015 | 1564 | inode->nlookup++; |
7387863d | 1565 | pthread_mutex_unlock(&lo->mutex); |
92fb57b8 | 1566 | e.ino = inode->fuse_ino; |
7387863d | 1567 | |
d240314a EG |
1568 | fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent, |
1569 | name, (unsigned long long)e.ino); | |
7387863d DDAG |
1570 | |
1571 | fuse_reply_entry(req, &e); | |
c241aa94 SH |
1572 | lo_inode_put(lo, &parent_inode); |
1573 | lo_inode_put(lo, &inode); | |
7387863d | 1574 | return; |
7c6b6602 DDAG |
1575 | |
1576 | out_err: | |
7387863d | 1577 | saverr = errno; |
c241aa94 SH |
1578 | lo_inode_put(lo, &parent_inode); |
1579 | lo_inode_put(lo, &inode); | |
7387863d | 1580 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
1581 | } |
1582 | ||
c241aa94 | 1583 | /* Increments nlookup and caller must release refcount using lo_inode_put() */ |
9257e514 MS |
1584 | static struct lo_inode *lookup_name(fuse_req_t req, fuse_ino_t parent, |
1585 | const char *name) | |
1586 | { | |
1587 | int res; | |
d672fce6 | 1588 | uint64_t mnt_id; |
9257e514 | 1589 | struct stat attr; |
d672fce6 HR |
1590 | struct lo_data *lo = lo_data(req); |
1591 | struct lo_inode *dir = lo_inode(req, parent); | |
08dce386 | 1592 | |
d672fce6 HR |
1593 | if (!dir) { |
1594 | return NULL; | |
1595 | } | |
1596 | ||
20afcc23 | 1597 | res = do_statx(lo, dir->fd, name, &attr, AT_SYMLINK_NOFOLLOW, &mnt_id); |
d672fce6 | 1598 | lo_inode_put(lo, &dir); |
9257e514 MS |
1599 | if (res == -1) { |
1600 | return NULL; | |
1601 | } | |
1602 | ||
d672fce6 | 1603 | return lo_find(lo, &attr, mnt_id); |
9257e514 MS |
1604 | } |
1605 | ||
7c6b6602 DDAG |
1606 | static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) |
1607 | { | |
7387863d | 1608 | int res; |
9257e514 MS |
1609 | struct lo_inode *inode; |
1610 | struct lo_data *lo = lo_data(req); | |
1611 | ||
28d1ad0e GK |
1612 | if (is_empty(name)) { |
1613 | fuse_reply_err(req, ENOENT); | |
1614 | return; | |
1615 | } | |
1616 | ||
25dae28c SH |
1617 | if (!is_safe_path_component(name)) { |
1618 | fuse_reply_err(req, EINVAL); | |
1619 | return; | |
1620 | } | |
7c6b6602 | 1621 | |
9257e514 MS |
1622 | inode = lookup_name(req, parent, name); |
1623 | if (!inode) { | |
1624 | fuse_reply_err(req, EIO); | |
1625 | return; | |
1626 | } | |
1627 | ||
7387863d | 1628 | res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR); |
7c6b6602 | 1629 | |
7387863d | 1630 | fuse_reply_err(req, res == -1 ? errno : 0); |
9257e514 | 1631 | unref_inode_lolocked(lo, inode, 1); |
c241aa94 | 1632 | lo_inode_put(lo, &inode); |
7c6b6602 DDAG |
1633 | } |
1634 | ||
1635 | static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name, | |
7387863d DDAG |
1636 | fuse_ino_t newparent, const char *newname, |
1637 | unsigned int flags) | |
7c6b6602 | 1638 | { |
7387863d | 1639 | int res; |
c241aa94 SH |
1640 | struct lo_inode *parent_inode; |
1641 | struct lo_inode *newparent_inode; | |
1642 | struct lo_inode *oldinode = NULL; | |
1643 | struct lo_inode *newinode = NULL; | |
9257e514 | 1644 | struct lo_data *lo = lo_data(req); |
7c6b6602 | 1645 | |
28d1ad0e GK |
1646 | if (is_empty(name) || is_empty(newname)) { |
1647 | fuse_reply_err(req, ENOENT); | |
1648 | return; | |
1649 | } | |
1650 | ||
25dae28c SH |
1651 | if (!is_safe_path_component(name) || !is_safe_path_component(newname)) { |
1652 | fuse_reply_err(req, EINVAL); | |
1653 | return; | |
1654 | } | |
1655 | ||
c241aa94 SH |
1656 | parent_inode = lo_inode(req, parent); |
1657 | newparent_inode = lo_inode(req, newparent); | |
1658 | if (!parent_inode || !newparent_inode) { | |
1659 | fuse_reply_err(req, EBADF); | |
1660 | goto out; | |
1661 | } | |
1662 | ||
9257e514 MS |
1663 | oldinode = lookup_name(req, parent, name); |
1664 | newinode = lookup_name(req, newparent, newname); | |
1665 | ||
1666 | if (!oldinode) { | |
1667 | fuse_reply_err(req, EIO); | |
1668 | goto out; | |
1669 | } | |
1670 | ||
7387863d | 1671 | if (flags) { |
f0ab7d6f | 1672 | #ifndef SYS_renameat2 |
7387863d | 1673 | fuse_reply_err(req, EINVAL); |
f0ab7d6f | 1674 | #else |
c241aa94 SH |
1675 | res = syscall(SYS_renameat2, parent_inode->fd, name, |
1676 | newparent_inode->fd, newname, flags); | |
f0ab7d6f MS |
1677 | if (res == -1 && errno == ENOSYS) { |
1678 | fuse_reply_err(req, EINVAL); | |
1679 | } else { | |
1680 | fuse_reply_err(req, res == -1 ? errno : 0); | |
1681 | } | |
1682 | #endif | |
9257e514 | 1683 | goto out; |
7387863d | 1684 | } |
7c6b6602 | 1685 | |
c241aa94 | 1686 | res = renameat(parent_inode->fd, name, newparent_inode->fd, newname); |
7c6b6602 | 1687 | |
7387863d | 1688 | fuse_reply_err(req, res == -1 ? errno : 0); |
9257e514 MS |
1689 | out: |
1690 | unref_inode_lolocked(lo, oldinode, 1); | |
1691 | unref_inode_lolocked(lo, newinode, 1); | |
c241aa94 SH |
1692 | lo_inode_put(lo, &oldinode); |
1693 | lo_inode_put(lo, &newinode); | |
1694 | lo_inode_put(lo, &parent_inode); | |
1695 | lo_inode_put(lo, &newparent_inode); | |
7c6b6602 DDAG |
1696 | } |
1697 | ||
1698 | static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) | |
1699 | { | |
7387863d | 1700 | int res; |
9257e514 MS |
1701 | struct lo_inode *inode; |
1702 | struct lo_data *lo = lo_data(req); | |
7c6b6602 | 1703 | |
28d1ad0e GK |
1704 | if (is_empty(name)) { |
1705 | fuse_reply_err(req, ENOENT); | |
1706 | return; | |
1707 | } | |
1708 | ||
25dae28c SH |
1709 | if (!is_safe_path_component(name)) { |
1710 | fuse_reply_err(req, EINVAL); | |
1711 | return; | |
1712 | } | |
1713 | ||
9257e514 MS |
1714 | inode = lookup_name(req, parent, name); |
1715 | if (!inode) { | |
1716 | fuse_reply_err(req, EIO); | |
1717 | return; | |
1718 | } | |
1719 | ||
7387863d | 1720 | res = unlinkat(lo_fd(req, parent), name, 0); |
7c6b6602 | 1721 | |
7387863d | 1722 | fuse_reply_err(req, res == -1 ? errno : 0); |
9257e514 | 1723 | unref_inode_lolocked(lo, inode, 1); |
c241aa94 | 1724 | lo_inode_put(lo, &inode); |
7c6b6602 DDAG |
1725 | } |
1726 | ||
fe4c1579 DDAG |
1727 | /* To be called with lo->mutex held */ |
1728 | static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n) | |
7c6b6602 | 1729 | { |
7387863d DDAG |
1730 | if (!inode) { |
1731 | return; | |
1732 | } | |
1733 | ||
1222f015 SH |
1734 | assert(inode->nlookup >= n); |
1735 | inode->nlookup -= n; | |
1736 | if (!inode->nlookup) { | |
92fb57b8 | 1737 | lo_map_remove(&lo->ino_map, inode->fuse_ino); |
bfc50a6e | 1738 | g_hash_table_remove(lo->inodes, &inode->key); |
e7e8aa8a VG |
1739 | if (lo->posix_lock) { |
1740 | if (g_hash_table_size(inode->posix_locks)) { | |
1741 | fuse_log(FUSE_LOG_WARNING, "Hash table is not empty\n"); | |
1742 | } | |
1743 | g_hash_table_destroy(inode->posix_locks); | |
1744 | pthread_mutex_destroy(&inode->plock_mutex); | |
0e81414c | 1745 | } |
c241aa94 SH |
1746 | /* Drop our refcount from lo_do_lookup() */ |
1747 | lo_inode_put(lo, &inode); | |
7387863d | 1748 | } |
7c6b6602 DDAG |
1749 | } |
1750 | ||
fe4c1579 DDAG |
1751 | static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode, |
1752 | uint64_t n) | |
1753 | { | |
1754 | if (!inode) { | |
1755 | return; | |
1756 | } | |
1757 | ||
1758 | pthread_mutex_lock(&lo->mutex); | |
1759 | unref_inode(lo, inode, n); | |
1760 | pthread_mutex_unlock(&lo->mutex); | |
1761 | } | |
1762 | ||
7c6b6602 DDAG |
1763 | static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) |
1764 | { | |
7387863d | 1765 | struct lo_data *lo = lo_data(req); |
92fb57b8 SH |
1766 | struct lo_inode *inode; |
1767 | ||
1768 | inode = lo_inode(req, ino); | |
1769 | if (!inode) { | |
1770 | return; | |
1771 | } | |
7c6b6602 | 1772 | |
d240314a | 1773 | fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n", |
1222f015 | 1774 | (unsigned long long)ino, (unsigned long long)inode->nlookup, |
d240314a | 1775 | (unsigned long long)nlookup); |
7c6b6602 | 1776 | |
95d27157 | 1777 | unref_inode_lolocked(lo, inode, nlookup); |
c241aa94 | 1778 | lo_inode_put(lo, &inode); |
7c6b6602 DDAG |
1779 | } |
1780 | ||
1781 | static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) | |
1782 | { | |
7387863d DDAG |
1783 | lo_forget_one(req, ino, nlookup); |
1784 | fuse_reply_none(req); | |
7c6b6602 DDAG |
1785 | } |
1786 | ||
1787 | static void lo_forget_multi(fuse_req_t req, size_t count, | |
7387863d | 1788 | struct fuse_forget_data *forgets) |
7c6b6602 | 1789 | { |
7387863d | 1790 | int i; |
7c6b6602 | 1791 | |
7387863d DDAG |
1792 | for (i = 0; i < count; i++) { |
1793 | lo_forget_one(req, forgets[i].ino, forgets[i].nlookup); | |
1794 | } | |
1795 | fuse_reply_none(req); | |
7c6b6602 DDAG |
1796 | } |
1797 | ||
1798 | static void lo_readlink(fuse_req_t req, fuse_ino_t ino) | |
1799 | { | |
7387863d DDAG |
1800 | char buf[PATH_MAX + 1]; |
1801 | int res; | |
7c6b6602 | 1802 | |
7387863d DDAG |
1803 | res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf)); |
1804 | if (res == -1) { | |
1805 | return (void)fuse_reply_err(req, errno); | |
1806 | } | |
7c6b6602 | 1807 | |
7387863d DDAG |
1808 | if (res == sizeof(buf)) { |
1809 | return (void)fuse_reply_err(req, ENAMETOOLONG); | |
1810 | } | |
7c6b6602 | 1811 | |
7387863d | 1812 | buf[res] = '\0'; |
7c6b6602 | 1813 | |
7387863d | 1814 | fuse_reply_readlink(req, buf); |
7c6b6602 DDAG |
1815 | } |
1816 | ||
1817 | struct lo_dirp { | |
acefdde7 | 1818 | gint refcount; |
7387863d DDAG |
1819 | DIR *dp; |
1820 | struct dirent *entry; | |
1821 | off_t offset; | |
7c6b6602 DDAG |
1822 | }; |
1823 | ||
acefdde7 SH |
1824 | static void lo_dirp_put(struct lo_dirp **dp) |
1825 | { | |
1826 | struct lo_dirp *d = *dp; | |
1827 | ||
1828 | if (!d) { | |
1829 | return; | |
1830 | } | |
1831 | *dp = NULL; | |
1832 | ||
1833 | if (g_atomic_int_dec_and_test(&d->refcount)) { | |
1834 | closedir(d->dp); | |
1835 | free(d); | |
1836 | } | |
1837 | } | |
1838 | ||
1839 | /* Call lo_dirp_put() on the return value when no longer needed */ | |
b39bce12 | 1840 | static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi) |
7c6b6602 | 1841 | { |
b39bce12 SH |
1842 | struct lo_data *lo = lo_data(req); |
1843 | struct lo_map_elem *elem; | |
1844 | ||
1845 | pthread_mutex_lock(&lo->mutex); | |
1846 | elem = lo_map_get(&lo->dirp_map, fi->fh); | |
acefdde7 SH |
1847 | if (elem) { |
1848 | g_atomic_int_inc(&elem->dirp->refcount); | |
1849 | } | |
b39bce12 SH |
1850 | pthread_mutex_unlock(&lo->mutex); |
1851 | if (!elem) { | |
1852 | return NULL; | |
1853 | } | |
1854 | ||
1855 | return elem->dirp; | |
7c6b6602 DDAG |
1856 | } |
1857 | ||
7387863d DDAG |
1858 | static void lo_opendir(fuse_req_t req, fuse_ino_t ino, |
1859 | struct fuse_file_info *fi) | |
7c6b6602 | 1860 | { |
7387863d DDAG |
1861 | int error = ENOMEM; |
1862 | struct lo_data *lo = lo_data(req); | |
1863 | struct lo_dirp *d; | |
1864 | int fd; | |
b39bce12 | 1865 | ssize_t fh; |
7387863d DDAG |
1866 | |
1867 | d = calloc(1, sizeof(struct lo_dirp)); | |
1868 | if (d == NULL) { | |
1869 | goto out_err; | |
1870 | } | |
1871 | ||
1872 | fd = openat(lo_fd(req, ino), ".", O_RDONLY); | |
1873 | if (fd == -1) { | |
1874 | goto out_errno; | |
1875 | } | |
1876 | ||
1877 | d->dp = fdopendir(fd); | |
1878 | if (d->dp == NULL) { | |
1879 | goto out_errno; | |
1880 | } | |
1881 | ||
1882 | d->offset = 0; | |
1883 | d->entry = NULL; | |
1884 | ||
acefdde7 | 1885 | g_atomic_int_set(&d->refcount, 1); /* paired with lo_releasedir() */ |
b39bce12 SH |
1886 | pthread_mutex_lock(&lo->mutex); |
1887 | fh = lo_add_dirp_mapping(req, d); | |
1888 | pthread_mutex_unlock(&lo->mutex); | |
1889 | if (fh == -1) { | |
1890 | goto out_err; | |
1891 | } | |
1892 | ||
1893 | fi->fh = fh; | |
7387863d | 1894 | if (lo->cache == CACHE_ALWAYS) { |
9b610b09 | 1895 | fi->cache_readdir = 1; |
7387863d DDAG |
1896 | } |
1897 | fuse_reply_open(req, fi); | |
1898 | return; | |
7c6b6602 DDAG |
1899 | |
1900 | out_errno: | |
7387863d | 1901 | error = errno; |
7c6b6602 | 1902 | out_err: |
7387863d | 1903 | if (d) { |
b39bce12 SH |
1904 | if (d->dp) { |
1905 | closedir(d->dp); | |
e1cd92d9 | 1906 | } else if (fd != -1) { |
7387863d DDAG |
1907 | close(fd); |
1908 | } | |
1909 | free(d); | |
1910 | } | |
1911 | fuse_reply_err(req, error); | |
7c6b6602 DDAG |
1912 | } |
1913 | ||
7c6b6602 | 1914 | static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, |
7387863d | 1915 | off_t offset, struct fuse_file_info *fi, int plus) |
7c6b6602 | 1916 | { |
752272da | 1917 | struct lo_data *lo = lo_data(req); |
acefdde7 | 1918 | struct lo_dirp *d = NULL; |
752272da | 1919 | struct lo_inode *dinode; |
c9a276f5 | 1920 | g_autofree char *buf = NULL; |
7387863d DDAG |
1921 | char *p; |
1922 | size_t rem = size; | |
752272da | 1923 | int err = EBADF; |
7387863d | 1924 | |
752272da SH |
1925 | dinode = lo_inode(req, ino); |
1926 | if (!dinode) { | |
1927 | goto error; | |
1928 | } | |
7387863d | 1929 | |
b39bce12 SH |
1930 | d = lo_dirp(req, fi); |
1931 | if (!d) { | |
1932 | goto error; | |
1933 | } | |
1934 | ||
752272da | 1935 | err = ENOMEM; |
c9a276f5 | 1936 | buf = g_try_malloc0(size); |
7387863d | 1937 | if (!buf) { |
7387863d DDAG |
1938 | goto error; |
1939 | } | |
1940 | p = buf; | |
1941 | ||
1942 | if (offset != d->offset) { | |
1943 | seekdir(d->dp, offset); | |
1944 | d->entry = NULL; | |
1945 | d->offset = offset; | |
1946 | } | |
1947 | while (1) { | |
1948 | size_t entsize; | |
1949 | off_t nextoff; | |
1950 | const char *name; | |
1951 | ||
1952 | if (!d->entry) { | |
1953 | errno = 0; | |
1954 | d->entry = readdir(d->dp); | |
1955 | if (!d->entry) { | |
1956 | if (errno) { /* Error */ | |
1957 | err = errno; | |
1958 | goto error; | |
1959 | } else { /* End of stream */ | |
1960 | break; | |
1961 | } | |
1962 | } | |
1963 | } | |
1964 | nextoff = d->entry->d_off; | |
1965 | name = d->entry->d_name; | |
752272da | 1966 | |
7387863d | 1967 | fuse_ino_t entry_ino = 0; |
752272da SH |
1968 | struct fuse_entry_param e = (struct fuse_entry_param){ |
1969 | .attr.st_ino = d->entry->d_ino, | |
1970 | .attr.st_mode = d->entry->d_type << 12, | |
1971 | }; | |
1972 | ||
1973 | /* Hide root's parent directory */ | |
1974 | if (dinode == &lo->root && strcmp(name, "..") == 0) { | |
bfc50a6e | 1975 | e.attr.st_ino = lo->root.key.ino; |
752272da SH |
1976 | e.attr.st_mode = DT_DIR << 12; |
1977 | } | |
1978 | ||
7387863d | 1979 | if (plus) { |
752272da | 1980 | if (!is_dot_or_dotdot(name)) { |
22d2ece7 | 1981 | err = lo_do_lookup(req, ino, name, &e, NULL); |
7387863d DDAG |
1982 | if (err) { |
1983 | goto error; | |
1984 | } | |
1985 | entry_ino = e.ino; | |
1986 | } | |
1987 | ||
1988 | entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff); | |
1989 | } else { | |
752272da | 1990 | entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff); |
7387863d DDAG |
1991 | } |
1992 | if (entsize > rem) { | |
1993 | if (entry_ino != 0) { | |
1994 | lo_forget_one(req, entry_ino, 1); | |
1995 | } | |
1996 | break; | |
1997 | } | |
1998 | ||
1999 | p += entsize; | |
2000 | rem -= entsize; | |
2001 | ||
2002 | d->entry = NULL; | |
2003 | d->offset = nextoff; | |
2004 | } | |
7c6b6602 DDAG |
2005 | |
2006 | err = 0; | |
2007 | error: | |
acefdde7 | 2008 | lo_dirp_put(&d); |
c241aa94 | 2009 | lo_inode_put(lo, &dinode); |
acefdde7 | 2010 | |
7387863d DDAG |
2011 | /* |
2012 | * If there's an error, we can only signal it if we haven't stored | |
2013 | * any entries yet - otherwise we'd end up with wrong lookup | |
2014 | * counts for the entries that are already in the buffer. So we | |
2015 | * return what we've collected until that point. | |
2016 | */ | |
2017 | if (err && rem == size) { | |
2018 | fuse_reply_err(req, err); | |
2019 | } else { | |
2020 | fuse_reply_buf(req, buf, size - rem); | |
2021 | } | |
7c6b6602 DDAG |
2022 | } |
2023 | ||
2024 | static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, | |
7387863d | 2025 | off_t offset, struct fuse_file_info *fi) |
7c6b6602 | 2026 | { |
7387863d | 2027 | lo_do_readdir(req, ino, size, offset, fi, 0); |
7c6b6602 DDAG |
2028 | } |
2029 | ||
2030 | static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, | |
7387863d | 2031 | off_t offset, struct fuse_file_info *fi) |
7c6b6602 | 2032 | { |
7387863d | 2033 | lo_do_readdir(req, ino, size, offset, fi, 1); |
7c6b6602 DDAG |
2034 | } |
2035 | ||
7387863d DDAG |
2036 | static void lo_releasedir(fuse_req_t req, fuse_ino_t ino, |
2037 | struct fuse_file_info *fi) | |
7c6b6602 | 2038 | { |
b39bce12 | 2039 | struct lo_data *lo = lo_data(req); |
acefdde7 | 2040 | struct lo_map_elem *elem; |
b39bce12 SH |
2041 | struct lo_dirp *d; |
2042 | ||
7387863d | 2043 | (void)ino; |
b39bce12 | 2044 | |
acefdde7 SH |
2045 | pthread_mutex_lock(&lo->mutex); |
2046 | elem = lo_map_get(&lo->dirp_map, fi->fh); | |
2047 | if (!elem) { | |
2048 | pthread_mutex_unlock(&lo->mutex); | |
b39bce12 SH |
2049 | fuse_reply_err(req, EBADF); |
2050 | return; | |
2051 | } | |
2052 | ||
acefdde7 | 2053 | d = elem->dirp; |
b39bce12 SH |
2054 | lo_map_remove(&lo->dirp_map, fi->fh); |
2055 | pthread_mutex_unlock(&lo->mutex); | |
2056 | ||
acefdde7 SH |
2057 | lo_dirp_put(&d); /* paired with lo_opendir() */ |
2058 | ||
7387863d | 2059 | fuse_reply_err(req, 0); |
7c6b6602 DDAG |
2060 | } |
2061 | ||
e12a0eda JZ |
2062 | static void update_open_flags(int writeback, int allow_direct_io, |
2063 | struct fuse_file_info *fi) | |
8e4e41e3 MT |
2064 | { |
2065 | /* | |
2066 | * With writeback cache, kernel may send read requests even | |
2067 | * when userspace opened write-only | |
2068 | */ | |
2069 | if (writeback && (fi->flags & O_ACCMODE) == O_WRONLY) { | |
2070 | fi->flags &= ~O_ACCMODE; | |
2071 | fi->flags |= O_RDWR; | |
2072 | } | |
2073 | ||
2074 | /* | |
2075 | * With writeback cache, O_APPEND is handled by the kernel. | |
2076 | * This breaks atomicity (since the file may change in the | |
2077 | * underlying filesystem, so that the kernel's idea of the | |
2078 | * end of the file isn't accurate anymore). In this example, | |
2079 | * we just accept that. A more rigorous filesystem may want | |
2080 | * to return an error here | |
2081 | */ | |
2082 | if (writeback && (fi->flags & O_APPEND)) { | |
2083 | fi->flags &= ~O_APPEND; | |
2084 | } | |
2085 | ||
2086 | /* | |
2087 | * O_DIRECT in guest should not necessarily mean bypassing page | |
e12a0eda JZ |
2088 | * cache on host as well. Therefore, we discard it by default |
2089 | * ('-o no_allow_direct_io'). If somebody needs that behavior, | |
2090 | * the '-o allow_direct_io' option should be set. | |
8e4e41e3 | 2091 | */ |
e12a0eda JZ |
2092 | if (!allow_direct_io) { |
2093 | fi->flags &= ~O_DIRECT; | |
2094 | } | |
8e4e41e3 MT |
2095 | } |
2096 | ||
a3fdbbc7 SH |
2097 | /* |
2098 | * Open a regular file, set up an fd mapping, and fill out the struct | |
2099 | * fuse_file_info for it. If existing_fd is not negative, use that fd instead | |
2100 | * opening a new one. Takes ownership of existing_fd. | |
2101 | * | |
2102 | * Returns 0 on success or a positive errno. | |
2103 | */ | |
8afaaee9 | 2104 | static int lo_do_open(struct lo_data *lo, struct lo_inode *inode, |
a3fdbbc7 | 2105 | int existing_fd, struct fuse_file_info *fi) |
8afaaee9 | 2106 | { |
8afaaee9 | 2107 | ssize_t fh; |
a3fdbbc7 | 2108 | int fd = existing_fd; |
d64907ac VG |
2109 | int err; |
2110 | bool cap_fsetid_dropped = false; | |
2111 | bool kill_suidgid = lo->killpriv_v2 && fi->kill_priv; | |
8afaaee9 SH |
2112 | |
2113 | update_open_flags(lo->writeback, lo->allow_direct_io, fi); | |
2114 | ||
a3fdbbc7 | 2115 | if (fd < 0) { |
d64907ac VG |
2116 | if (kill_suidgid) { |
2117 | err = drop_effective_cap("FSETID", &cap_fsetid_dropped); | |
2118 | if (err) { | |
2119 | return err; | |
2120 | } | |
2121 | } | |
2122 | ||
a3fdbbc7 | 2123 | fd = lo_inode_open(lo, inode, fi->flags); |
d64907ac VG |
2124 | |
2125 | if (cap_fsetid_dropped) { | |
2126 | if (gain_effective_cap("FSETID")) { | |
2127 | fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); | |
2128 | } | |
2129 | } | |
a3fdbbc7 SH |
2130 | if (fd < 0) { |
2131 | return -fd; | |
2132 | } | |
e586edcb DDAG |
2133 | if (fi->flags & (O_TRUNC)) { |
2134 | int err = drop_security_capability(lo, fd); | |
2135 | if (err) { | |
2136 | close(fd); | |
2137 | return err; | |
2138 | } | |
2139 | } | |
8afaaee9 SH |
2140 | } |
2141 | ||
2142 | pthread_mutex_lock(&lo->mutex); | |
2143 | fh = lo_add_fd_mapping(lo, fd); | |
2144 | pthread_mutex_unlock(&lo->mutex); | |
2145 | if (fh == -1) { | |
2146 | close(fd); | |
2147 | return ENOMEM; | |
2148 | } | |
2149 | ||
2150 | fi->fh = fh; | |
2151 | if (lo->cache == CACHE_NONE) { | |
2152 | fi->direct_io = 1; | |
2153 | } else if (lo->cache == CACHE_ALWAYS) { | |
2154 | fi->keep_cache = 1; | |
2155 | } | |
2156 | return 0; | |
2157 | } | |
2158 | ||
0c3f81e1 VG |
2159 | static int do_create_nosecctx(fuse_req_t req, struct lo_inode *parent_inode, |
2160 | const char *name, mode_t mode, | |
a675c9a6 VG |
2161 | struct fuse_file_info *fi, int *open_fd, |
2162 | bool tmpfile) | |
81489726 | 2163 | { |
0c3f81e1 | 2164 | int err, fd; |
81489726 VG |
2165 | struct lo_cred old = {}; |
2166 | struct lo_data *lo = lo_data(req); | |
0c3f81e1 VG |
2167 | int flags; |
2168 | ||
a675c9a6 VG |
2169 | if (tmpfile) { |
2170 | flags = fi->flags | O_TMPFILE; | |
2171 | /* | |
2172 | * Don't use O_EXCL as we want to link file later. Also reset O_CREAT | |
2173 | * otherwise openat() returns -EINVAL. | |
2174 | */ | |
2175 | flags &= ~(O_CREAT | O_EXCL); | |
2176 | ||
2177 | /* O_TMPFILE needs either O_RDWR or O_WRONLY */ | |
2178 | if ((flags & O_ACCMODE) == O_RDONLY) { | |
2179 | flags |= O_RDWR; | |
2180 | } | |
2181 | } else { | |
2182 | flags = fi->flags | O_CREAT | O_EXCL; | |
2183 | } | |
81489726 VG |
2184 | |
2185 | err = lo_change_cred(req, &old, lo->change_umask); | |
2186 | if (err) { | |
2187 | return err; | |
2188 | } | |
2189 | ||
2190 | /* Try to create a new file but don't open existing files */ | |
0c3f81e1 VG |
2191 | fd = openat(parent_inode->fd, name, flags, mode); |
2192 | err = fd == -1 ? errno : 0; | |
2193 | lo_restore_cred(&old, lo->change_umask); | |
2194 | if (!err) { | |
81489726 VG |
2195 | *open_fd = fd; |
2196 | } | |
0c3f81e1 VG |
2197 | return err; |
2198 | } | |
2199 | ||
2200 | static int do_create_secctx_fscreate(fuse_req_t req, | |
2201 | struct lo_inode *parent_inode, | |
2202 | const char *name, mode_t mode, | |
2203 | struct fuse_file_info *fi, int *open_fd) | |
2204 | { | |
2205 | int err = 0, fd = -1, fscreate_fd = -1; | |
2206 | struct lo_data *lo = lo_data(req); | |
2207 | ||
2208 | err = open_set_proc_fscreate(lo, req->secctx.ctx, req->secctx.ctxlen, | |
2209 | &fscreate_fd); | |
2210 | if (err) { | |
2211 | return err; | |
2212 | } | |
2213 | ||
a675c9a6 | 2214 | err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); |
0c3f81e1 VG |
2215 | |
2216 | close_reset_proc_fscreate(fscreate_fd); | |
2217 | if (!err) { | |
2218 | *open_fd = fd; | |
2219 | } | |
2220 | return err; | |
2221 | } | |
2222 | ||
a675c9a6 VG |
2223 | static int do_create_secctx_tmpfile(fuse_req_t req, |
2224 | struct lo_inode *parent_inode, | |
2225 | const char *name, mode_t mode, | |
2226 | struct fuse_file_info *fi, | |
2227 | const char *secctx_name, int *open_fd) | |
2228 | { | |
2229 | int err, fd = -1; | |
2230 | struct lo_data *lo = lo_data(req); | |
2231 | char procname[64]; | |
2232 | ||
2233 | err = do_create_nosecctx(req, parent_inode, ".", mode, fi, &fd, true); | |
2234 | if (err) { | |
2235 | return err; | |
2236 | } | |
2237 | ||
2238 | err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); | |
2239 | if (err) { | |
2240 | err = errno; | |
2241 | goto out; | |
2242 | } | |
2243 | ||
2244 | /* Security context set on file. Link it in place */ | |
2245 | sprintf(procname, "%d", fd); | |
2246 | FCHDIR_NOFAIL(lo->proc_self_fd); | |
2247 | err = linkat(AT_FDCWD, procname, parent_inode->fd, name, | |
2248 | AT_SYMLINK_FOLLOW); | |
2249 | err = err == -1 ? errno : 0; | |
2250 | FCHDIR_NOFAIL(lo->root.fd); | |
2251 | ||
2252 | out: | |
2253 | if (!err) { | |
2254 | *open_fd = fd; | |
2255 | } else if (fd != -1) { | |
2256 | close(fd); | |
2257 | } | |
2258 | return err; | |
2259 | } | |
2260 | ||
0c3f81e1 VG |
2261 | static int do_create_secctx_noatomic(fuse_req_t req, |
2262 | struct lo_inode *parent_inode, | |
2263 | const char *name, mode_t mode, | |
2264 | struct fuse_file_info *fi, | |
2265 | const char *secctx_name, int *open_fd) | |
2266 | { | |
2267 | int err = 0, fd = -1; | |
2268 | ||
a675c9a6 | 2269 | err = do_create_nosecctx(req, parent_inode, name, mode, fi, &fd, false); |
0c3f81e1 VG |
2270 | if (err) { |
2271 | goto out; | |
2272 | } | |
2273 | ||
2274 | /* Set security context. This is not atomic w.r.t file creation */ | |
2275 | err = fsetxattr(fd, secctx_name, req->secctx.ctx, req->secctx.ctxlen, 0); | |
2276 | err = err == -1 ? errno : 0; | |
2277 | out: | |
2278 | if (!err) { | |
2279 | *open_fd = fd; | |
2280 | } else { | |
2281 | if (fd != -1) { | |
2282 | close(fd); | |
2283 | unlinkat(parent_inode->fd, name, 0); | |
2284 | } | |
2285 | } | |
2286 | return err; | |
2287 | } | |
2288 | ||
2289 | static int do_lo_create(fuse_req_t req, struct lo_inode *parent_inode, | |
2290 | const char *name, mode_t mode, | |
2291 | struct fuse_file_info *fi, int *open_fd) | |
2292 | { | |
2293 | struct lo_data *lo = lo_data(req); | |
2294 | char *mapped_name = NULL; | |
2295 | int err; | |
2296 | const char *ctxname = req->secctx.name; | |
2297 | bool secctx_enabled = req->secctx.ctxlen; | |
2298 | ||
2299 | if (secctx_enabled && lo->xattrmap) { | |
2300 | err = xattr_map_client(lo, req->secctx.name, &mapped_name); | |
2301 | if (err < 0) { | |
2302 | return -err; | |
2303 | } | |
2304 | ||
2305 | ctxname = mapped_name; | |
2306 | } | |
2307 | ||
2308 | if (secctx_enabled) { | |
2309 | /* | |
2310 | * If security.selinux has not been remapped and selinux is enabled, | |
a675c9a6 VG |
2311 | * use fscreate to set context before file creation. If not, use |
2312 | * tmpfile method for regular files. Otherwise fallback to | |
118d4ed0 | 2313 | * non-atomic method of file creation and xattr setting. |
0c3f81e1 VG |
2314 | */ |
2315 | if (!mapped_name && lo->use_fscreate) { | |
2316 | err = do_create_secctx_fscreate(req, parent_inode, name, mode, fi, | |
2317 | open_fd); | |
2318 | goto out; | |
a675c9a6 VG |
2319 | } else if (S_ISREG(mode)) { |
2320 | err = do_create_secctx_tmpfile(req, parent_inode, name, mode, fi, | |
2321 | ctxname, open_fd); | |
2322 | /* | |
2323 | * If filesystem does not support O_TMPFILE, fallback to non-atomic | |
2324 | * method. | |
2325 | */ | |
2326 | if (!err || err != EOPNOTSUPP) { | |
2327 | goto out; | |
2328 | } | |
0c3f81e1 VG |
2329 | } |
2330 | ||
2331 | err = do_create_secctx_noatomic(req, parent_inode, name, mode, fi, | |
2332 | ctxname, open_fd); | |
2333 | } else { | |
a675c9a6 VG |
2334 | err = do_create_nosecctx(req, parent_inode, name, mode, fi, open_fd, |
2335 | false); | |
0c3f81e1 VG |
2336 | } |
2337 | ||
2338 | out: | |
2339 | g_free(mapped_name); | |
81489726 VG |
2340 | return err; |
2341 | } | |
2342 | ||
7c6b6602 | 2343 | static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name, |
7387863d | 2344 | mode_t mode, struct fuse_file_info *fi) |
7c6b6602 | 2345 | { |
a3fdbbc7 | 2346 | int fd = -1; |
7387863d | 2347 | struct lo_data *lo = lo_data(req); |
c241aa94 | 2348 | struct lo_inode *parent_inode; |
a3fdbbc7 | 2349 | struct lo_inode *inode = NULL; |
7387863d DDAG |
2350 | struct fuse_entry_param e; |
2351 | int err; | |
2352 | ||
d64907ac VG |
2353 | fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)" |
2354 | " kill_priv=%d\n", parent, name, fi->kill_priv); | |
7387863d | 2355 | |
25dae28c SH |
2356 | if (!is_safe_path_component(name)) { |
2357 | fuse_reply_err(req, EINVAL); | |
2358 | return; | |
2359 | } | |
2360 | ||
c241aa94 SH |
2361 | parent_inode = lo_inode(req, parent); |
2362 | if (!parent_inode) { | |
2363 | fuse_reply_err(req, EBADF); | |
2364 | return; | |
2365 | } | |
2366 | ||
e12a0eda | 2367 | update_open_flags(lo->writeback, lo->allow_direct_io, fi); |
65da4539 | 2368 | |
81489726 | 2369 | err = do_lo_create(req, parent_inode, name, mode, fi, &fd); |
73b4d19d | 2370 | |
a3fdbbc7 SH |
2371 | /* Ignore the error if file exists and O_EXCL was not given */ |
2372 | if (err && (err != EEXIST || (fi->flags & O_EXCL))) { | |
2373 | goto out; | |
2374 | } | |
73b4d19d | 2375 | |
a3fdbbc7 SH |
2376 | err = lo_do_lookup(req, parent, name, &e, &inode); |
2377 | if (err) { | |
2378 | goto out; | |
929cfb7a | 2379 | } |
a3fdbbc7 SH |
2380 | |
2381 | err = lo_do_open(lo, inode, fd, fi); | |
2382 | fd = -1; /* lo_do_open() takes ownership of fd */ | |
2383 | if (err) { | |
2384 | /* Undo lo_do_lookup() nlookup ref */ | |
2385 | unref_inode_lolocked(lo, inode, 1); | |
7387863d DDAG |
2386 | } |
2387 | ||
929cfb7a | 2388 | out: |
a3fdbbc7 | 2389 | lo_inode_put(lo, &inode); |
c241aa94 SH |
2390 | lo_inode_put(lo, &parent_inode); |
2391 | ||
7387863d | 2392 | if (err) { |
a3fdbbc7 SH |
2393 | if (fd >= 0) { |
2394 | close(fd); | |
2395 | } | |
2396 | ||
7387863d DDAG |
2397 | fuse_reply_err(req, err); |
2398 | } else { | |
2399 | fuse_reply_create(req, &e, fi); | |
2400 | } | |
7c6b6602 DDAG |
2401 | } |
2402 | ||
0e81414c VG |
2403 | /* Should be called with inode->plock_mutex held */ |
2404 | static struct lo_inode_plock *lookup_create_plock_ctx(struct lo_data *lo, | |
2405 | struct lo_inode *inode, | |
2406 | uint64_t lock_owner, | |
2407 | pid_t pid, int *err) | |
2408 | { | |
2409 | struct lo_inode_plock *plock; | |
0e81414c VG |
2410 | int fd; |
2411 | ||
2412 | plock = | |
2413 | g_hash_table_lookup(inode->posix_locks, GUINT_TO_POINTER(lock_owner)); | |
2414 | ||
2415 | if (plock) { | |
2416 | return plock; | |
2417 | } | |
2418 | ||
2419 | plock = malloc(sizeof(struct lo_inode_plock)); | |
2420 | if (!plock) { | |
2421 | *err = ENOMEM; | |
2422 | return NULL; | |
2423 | } | |
2424 | ||
2425 | /* Open another instance of file which can be used for ofd locks. */ | |
0e81414c | 2426 | /* TODO: What if file is not writable? */ |
a3fdbbc7 SH |
2427 | fd = lo_inode_open(lo, inode, O_RDWR); |
2428 | if (fd < 0) { | |
2429 | *err = -fd; | |
0e81414c VG |
2430 | free(plock); |
2431 | return NULL; | |
2432 | } | |
2433 | ||
2434 | plock->lock_owner = lock_owner; | |
2435 | plock->fd = fd; | |
2436 | g_hash_table_insert(inode->posix_locks, GUINT_TO_POINTER(plock->lock_owner), | |
2437 | plock); | |
2438 | return plock; | |
2439 | } | |
2440 | ||
2441 | static void lo_getlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, | |
2442 | struct flock *lock) | |
2443 | { | |
2444 | struct lo_data *lo = lo_data(req); | |
2445 | struct lo_inode *inode; | |
2446 | struct lo_inode_plock *plock; | |
2447 | int ret, saverr = 0; | |
2448 | ||
2449 | fuse_log(FUSE_LOG_DEBUG, | |
2450 | "lo_getlk(ino=%" PRIu64 ", flags=%d)" | |
d02a3c5a DDAG |
2451 | " owner=0x%" PRIx64 ", l_type=%d l_start=0x%" PRIx64 |
2452 | " l_len=0x%" PRIx64 "\n", | |
2453 | ino, fi->flags, fi->lock_owner, lock->l_type, | |
2454 | (uint64_t)lock->l_start, (uint64_t)lock->l_len); | |
0e81414c | 2455 | |
e7e8aa8a VG |
2456 | if (!lo->posix_lock) { |
2457 | fuse_reply_err(req, ENOSYS); | |
2458 | return; | |
2459 | } | |
2460 | ||
0e81414c VG |
2461 | inode = lo_inode(req, ino); |
2462 | if (!inode) { | |
2463 | fuse_reply_err(req, EBADF); | |
2464 | return; | |
2465 | } | |
2466 | ||
2467 | pthread_mutex_lock(&inode->plock_mutex); | |
2468 | plock = | |
2469 | lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); | |
2470 | if (!plock) { | |
c241aa94 SH |
2471 | saverr = ret; |
2472 | goto out; | |
0e81414c VG |
2473 | } |
2474 | ||
2475 | ret = fcntl(plock->fd, F_OFD_GETLK, lock); | |
2476 | if (ret == -1) { | |
2477 | saverr = errno; | |
2478 | } | |
c241aa94 SH |
2479 | |
2480 | out: | |
0e81414c | 2481 | pthread_mutex_unlock(&inode->plock_mutex); |
c241aa94 | 2482 | lo_inode_put(lo, &inode); |
0e81414c VG |
2483 | |
2484 | if (saverr) { | |
2485 | fuse_reply_err(req, saverr); | |
2486 | } else { | |
2487 | fuse_reply_lock(req, lock); | |
2488 | } | |
2489 | } | |
2490 | ||
2491 | static void lo_setlk(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, | |
2492 | struct flock *lock, int sleep) | |
2493 | { | |
2494 | struct lo_data *lo = lo_data(req); | |
2495 | struct lo_inode *inode; | |
2496 | struct lo_inode_plock *plock; | |
2497 | int ret, saverr = 0; | |
2498 | ||
2499 | fuse_log(FUSE_LOG_DEBUG, | |
2500 | "lo_setlk(ino=%" PRIu64 ", flags=%d)" | |
d02a3c5a DDAG |
2501 | " cmd=%d pid=%d owner=0x%" PRIx64 " sleep=%d l_whence=%d" |
2502 | " l_start=0x%" PRIx64 " l_len=0x%" PRIx64 "\n", | |
0e81414c | 2503 | ino, fi->flags, lock->l_type, lock->l_pid, fi->lock_owner, sleep, |
d02a3c5a | 2504 | lock->l_whence, (uint64_t)lock->l_start, (uint64_t)lock->l_len); |
0e81414c | 2505 | |
e7e8aa8a VG |
2506 | if (!lo->posix_lock) { |
2507 | fuse_reply_err(req, ENOSYS); | |
2508 | return; | |
2509 | } | |
2510 | ||
0e81414c VG |
2511 | if (sleep) { |
2512 | fuse_reply_err(req, EOPNOTSUPP); | |
2513 | return; | |
2514 | } | |
2515 | ||
2516 | inode = lo_inode(req, ino); | |
2517 | if (!inode) { | |
2518 | fuse_reply_err(req, EBADF); | |
2519 | return; | |
2520 | } | |
2521 | ||
2522 | pthread_mutex_lock(&inode->plock_mutex); | |
2523 | plock = | |
2524 | lookup_create_plock_ctx(lo, inode, fi->lock_owner, lock->l_pid, &ret); | |
2525 | ||
2526 | if (!plock) { | |
c241aa94 SH |
2527 | saverr = ret; |
2528 | goto out; | |
0e81414c VG |
2529 | } |
2530 | ||
2531 | /* TODO: Is it alright to modify flock? */ | |
2532 | lock->l_pid = 0; | |
2533 | ret = fcntl(plock->fd, F_OFD_SETLK, lock); | |
2534 | if (ret == -1) { | |
2535 | saverr = errno; | |
2536 | } | |
c241aa94 SH |
2537 | |
2538 | out: | |
0e81414c | 2539 | pthread_mutex_unlock(&inode->plock_mutex); |
c241aa94 SH |
2540 | lo_inode_put(lo, &inode); |
2541 | ||
0e81414c VG |
2542 | fuse_reply_err(req, saverr); |
2543 | } | |
2544 | ||
7c6b6602 | 2545 | static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, |
7387863d | 2546 | struct fuse_file_info *fi) |
7c6b6602 | 2547 | { |
7387863d | 2548 | int res; |
b39bce12 SH |
2549 | struct lo_dirp *d; |
2550 | int fd; | |
2551 | ||
7387863d | 2552 | (void)ino; |
b39bce12 SH |
2553 | |
2554 | d = lo_dirp(req, fi); | |
2555 | if (!d) { | |
2556 | fuse_reply_err(req, EBADF); | |
2557 | return; | |
2558 | } | |
2559 | ||
2560 | fd = dirfd(d->dp); | |
7387863d DDAG |
2561 | if (datasync) { |
2562 | res = fdatasync(fd); | |
2563 | } else { | |
2564 | res = fsync(fd); | |
2565 | } | |
acefdde7 SH |
2566 | |
2567 | lo_dirp_put(&d); | |
2568 | ||
7387863d | 2569 | fuse_reply_err(req, res == -1 ? errno : 0); |
7c6b6602 DDAG |
2570 | } |
2571 | ||
2572 | static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | |
2573 | { | |
7387863d | 2574 | struct lo_data *lo = lo_data(req); |
8afaaee9 SH |
2575 | struct lo_inode *inode = lo_inode(req, ino); |
2576 | int err; | |
7387863d | 2577 | |
d64907ac VG |
2578 | fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d, kill_priv=%d)" |
2579 | "\n", ino, fi->flags, fi->kill_priv); | |
7387863d | 2580 | |
8afaaee9 SH |
2581 | if (!inode) { |
2582 | fuse_reply_err(req, EBADF); | |
73b4d19d SH |
2583 | return; |
2584 | } | |
2585 | ||
a3fdbbc7 | 2586 | err = lo_do_open(lo, inode, -1, fi); |
8afaaee9 SH |
2587 | lo_inode_put(lo, &inode); |
2588 | if (err) { | |
2589 | fuse_reply_err(req, err); | |
2590 | } else { | |
2591 | fuse_reply_open(req, fi); | |
7387863d | 2592 | } |
7c6b6602 DDAG |
2593 | } |
2594 | ||
7387863d DDAG |
2595 | static void lo_release(fuse_req_t req, fuse_ino_t ino, |
2596 | struct fuse_file_info *fi) | |
7c6b6602 | 2597 | { |
73b4d19d | 2598 | struct lo_data *lo = lo_data(req); |
baed65c0 SH |
2599 | struct lo_map_elem *elem; |
2600 | int fd = -1; | |
73b4d19d | 2601 | |
7387863d | 2602 | (void)ino; |
7c6b6602 | 2603 | |
73b4d19d | 2604 | pthread_mutex_lock(&lo->mutex); |
baed65c0 SH |
2605 | elem = lo_map_get(&lo->fd_map, fi->fh); |
2606 | if (elem) { | |
2607 | fd = elem->fd; | |
2608 | elem = NULL; | |
2609 | lo_map_remove(&lo->fd_map, fi->fh); | |
2610 | } | |
73b4d19d SH |
2611 | pthread_mutex_unlock(&lo->mutex); |
2612 | ||
2613 | close(fd); | |
7387863d | 2614 | fuse_reply_err(req, 0); |
7c6b6602 DDAG |
2615 | } |
2616 | ||
2617 | static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | |
2618 | { | |
7387863d DDAG |
2619 | int res; |
2620 | (void)ino; | |
0e81414c | 2621 | struct lo_inode *inode; |
e7e8aa8a | 2622 | struct lo_data *lo = lo_data(req); |
0e81414c VG |
2623 | |
2624 | inode = lo_inode(req, ino); | |
2625 | if (!inode) { | |
2626 | fuse_reply_err(req, EBADF); | |
2627 | return; | |
2628 | } | |
2629 | ||
31a4990f VG |
2630 | if (!S_ISREG(inode->filetype)) { |
2631 | lo_inode_put(lo, &inode); | |
2632 | fuse_reply_err(req, EBADF); | |
2633 | return; | |
2634 | } | |
2635 | ||
0e81414c | 2636 | /* An fd is going away. Cleanup associated posix locks */ |
e7e8aa8a VG |
2637 | if (lo->posix_lock) { |
2638 | pthread_mutex_lock(&inode->plock_mutex); | |
2639 | g_hash_table_remove(inode->posix_locks, | |
2640 | GUINT_TO_POINTER(fi->lock_owner)); | |
2641 | pthread_mutex_unlock(&inode->plock_mutex); | |
2642 | } | |
73b4d19d | 2643 | res = close(dup(lo_fi_fd(req, fi))); |
e7e8aa8a | 2644 | lo_inode_put(lo, &inode); |
7387863d | 2645 | fuse_reply_err(req, res == -1 ? errno : 0); |
7c6b6602 DDAG |
2646 | } |
2647 | ||
2648 | static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, | |
7387863d | 2649 | struct fuse_file_info *fi) |
7c6b6602 | 2650 | { |
a3fdbbc7 SH |
2651 | struct lo_inode *inode = lo_inode(req, ino); |
2652 | struct lo_data *lo = lo_data(req); | |
7387863d | 2653 | int res; |
1b209805 | 2654 | int fd; |
1b209805 VG |
2655 | |
2656 | fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino, | |
2657 | (void *)fi); | |
2658 | ||
a3fdbbc7 SH |
2659 | if (!inode) { |
2660 | fuse_reply_err(req, EBADF); | |
2661 | return; | |
2662 | } | |
1b209805 | 2663 | |
a3fdbbc7 SH |
2664 | if (!fi) { |
2665 | fd = lo_inode_open(lo, inode, O_RDWR); | |
2666 | if (fd < 0) { | |
2667 | res = -fd; | |
2668 | goto out; | |
1b209805 VG |
2669 | } |
2670 | } else { | |
73b4d19d | 2671 | fd = lo_fi_fd(req, fi); |
1b209805 VG |
2672 | } |
2673 | ||
7387863d | 2674 | if (datasync) { |
a3fdbbc7 | 2675 | res = fdatasync(fd) == -1 ? errno : 0; |
7387863d | 2676 | } else { |
a3fdbbc7 | 2677 | res = fsync(fd) == -1 ? errno : 0; |
1b209805 VG |
2678 | } |
2679 | if (!fi) { | |
2680 | close(fd); | |
7387863d | 2681 | } |
a3fdbbc7 SH |
2682 | out: |
2683 | lo_inode_put(lo, &inode); | |
2684 | fuse_reply_err(req, res); | |
7c6b6602 DDAG |
2685 | } |
2686 | ||
7387863d DDAG |
2687 | static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset, |
2688 | struct fuse_file_info *fi) | |
7c6b6602 | 2689 | { |
7387863d | 2690 | struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size); |
7c6b6602 | 2691 | |
d240314a EG |
2692 | fuse_log(FUSE_LOG_DEBUG, |
2693 | "lo_read(ino=%" PRIu64 ", size=%zd, " | |
2694 | "off=%lu)\n", | |
2695 | ino, size, (unsigned long)offset); | |
7c6b6602 | 2696 | |
7387863d | 2697 | buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; |
73b4d19d | 2698 | buf.buf[0].fd = lo_fi_fd(req, fi); |
7387863d | 2699 | buf.buf[0].pos = offset; |
7c6b6602 | 2700 | |
8c3fe75e | 2701 | fuse_reply_data(req, &buf); |
7c6b6602 DDAG |
2702 | } |
2703 | ||
2704 | static void lo_write_buf(fuse_req_t req, fuse_ino_t ino, | |
7387863d DDAG |
2705 | struct fuse_bufvec *in_buf, off_t off, |
2706 | struct fuse_file_info *fi) | |
7c6b6602 | 2707 | { |
7387863d DDAG |
2708 | (void)ino; |
2709 | ssize_t res; | |
2710 | struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf)); | |
ee884652 | 2711 | bool cap_fsetid_dropped = false; |
7387863d DDAG |
2712 | |
2713 | out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK; | |
73b4d19d | 2714 | out_buf.buf[0].fd = lo_fi_fd(req, fi); |
7387863d DDAG |
2715 | out_buf.buf[0].pos = off; |
2716 | ||
d240314a | 2717 | fuse_log(FUSE_LOG_DEBUG, |
d64907ac VG |
2718 | "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu kill_priv=%d)\n", |
2719 | ino, out_buf.buf[0].size, (unsigned long)off, fi->kill_priv); | |
7387863d | 2720 | |
e586edcb DDAG |
2721 | res = drop_security_capability(lo_data(req), out_buf.buf[0].fd); |
2722 | if (res) { | |
2723 | fuse_reply_err(req, res); | |
2724 | return; | |
2725 | } | |
2726 | ||
ee884652 VG |
2727 | /* |
2728 | * If kill_priv is set, drop CAP_FSETID which should lead to kernel | |
d64907ac VG |
2729 | * clearing setuid/setgid on file. Note, for WRITE, we need to do |
2730 | * this even if killpriv_v2 is not enabled. fuse direct write path | |
2731 | * relies on this. | |
ee884652 VG |
2732 | */ |
2733 | if (fi->kill_priv) { | |
2734 | res = drop_effective_cap("FSETID", &cap_fsetid_dropped); | |
2735 | if (res != 0) { | |
2736 | fuse_reply_err(req, res); | |
2737 | return; | |
2738 | } | |
2739 | } | |
2740 | ||
8c3fe75e | 2741 | res = fuse_buf_copy(&out_buf, in_buf); |
7387863d DDAG |
2742 | if (res < 0) { |
2743 | fuse_reply_err(req, -res); | |
2744 | } else { | |
2745 | fuse_reply_write(req, (size_t)res); | |
2746 | } | |
ee884652 VG |
2747 | |
2748 | if (cap_fsetid_dropped) { | |
2749 | res = gain_effective_cap("FSETID"); | |
2750 | if (res) { | |
2751 | fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n"); | |
2752 | } | |
2753 | } | |
7c6b6602 DDAG |
2754 | } |
2755 | ||
2756 | static void lo_statfs(fuse_req_t req, fuse_ino_t ino) | |
2757 | { | |
7387863d DDAG |
2758 | int res; |
2759 | struct statvfs stbuf; | |
2760 | ||
2761 | res = fstatvfs(lo_fd(req, ino), &stbuf); | |
2762 | if (res == -1) { | |
2763 | fuse_reply_err(req, errno); | |
2764 | } else { | |
2765 | fuse_reply_statfs(req, &stbuf); | |
2766 | } | |
7c6b6602 DDAG |
2767 | } |
2768 | ||
7387863d DDAG |
2769 | static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset, |
2770 | off_t length, struct fuse_file_info *fi) | |
7c6b6602 | 2771 | { |
7387863d DDAG |
2772 | int err = EOPNOTSUPP; |
2773 | (void)ino; | |
7c6b6602 | 2774 | |
9776457c | 2775 | #ifdef CONFIG_FALLOCATE |
73b4d19d | 2776 | err = fallocate(lo_fi_fd(req, fi), mode, offset, length); |
7387863d DDAG |
2777 | if (err < 0) { |
2778 | err = errno; | |
2779 | } | |
7c6b6602 | 2780 | |
9776457c | 2781 | #elif defined(CONFIG_POSIX_FALLOCATE) |
7387863d DDAG |
2782 | if (mode) { |
2783 | fuse_reply_err(req, EOPNOTSUPP); | |
2784 | return; | |
2785 | } | |
7c6b6602 | 2786 | |
73b4d19d | 2787 | err = posix_fallocate(lo_fi_fd(req, fi), offset, length); |
7c6b6602 DDAG |
2788 | #endif |
2789 | ||
7387863d | 2790 | fuse_reply_err(req, err); |
7c6b6602 DDAG |
2791 | } |
2792 | ||
2793 | static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi, | |
7387863d | 2794 | int op) |
7c6b6602 | 2795 | { |
7387863d DDAG |
2796 | int res; |
2797 | (void)ino; | |
7c6b6602 | 2798 | |
41af4459 SH |
2799 | if (!(op & LOCK_NB)) { |
2800 | /* | |
2801 | * Blocking flock can deadlock as there is only one thread | |
2802 | * serving the queue. | |
2803 | */ | |
2804 | fuse_reply_err(req, EOPNOTSUPP); | |
2805 | return; | |
2806 | } | |
2807 | ||
73b4d19d | 2808 | res = flock(lo_fi_fd(req, fi), op); |
7c6b6602 | 2809 | |
7387863d | 2810 | fuse_reply_err(req, res == -1 ? errno : 0); |
7c6b6602 DDAG |
2811 | } |
2812 | ||
6084633d DDAG |
2813 | /* types */ |
2814 | /* | |
2815 | * Exit; process attribute unmodified if matched. | |
2816 | * An empty key applies to all. | |
2817 | */ | |
2818 | #define XATTR_MAP_FLAG_OK (1 << 0) | |
2819 | /* | |
2820 | * The attribute is unwanted; | |
2821 | * EPERM on write, hidden on read. | |
2822 | */ | |
2823 | #define XATTR_MAP_FLAG_BAD (1 << 1) | |
2824 | /* | |
2825 | * For attr that start with 'key' prepend 'prepend' | |
2826 | * 'key' may be empty to prepend for all attrs | |
2827 | * key is defined from set/remove point of view. | |
2828 | * Automatically reversed on read | |
2829 | */ | |
2830 | #define XATTR_MAP_FLAG_PREFIX (1 << 2) | |
5afc8df4 VG |
2831 | /* |
2832 | * The attribute is unsupported; | |
2833 | * ENOTSUP on write, hidden on read. | |
2834 | */ | |
2835 | #define XATTR_MAP_FLAG_UNSUPPORTED (1 << 3) | |
6084633d DDAG |
2836 | |
2837 | /* scopes */ | |
2838 | /* Apply rule to get/set/remove */ | |
2839 | #define XATTR_MAP_FLAG_CLIENT (1 << 16) | |
2840 | /* Apply rule to list */ | |
2841 | #define XATTR_MAP_FLAG_SERVER (1 << 17) | |
2842 | /* Apply rule to all */ | |
2843 | #define XATTR_MAP_FLAG_ALL (XATTR_MAP_FLAG_SERVER | XATTR_MAP_FLAG_CLIENT) | |
2844 | ||
2845 | static void add_xattrmap_entry(struct lo_data *lo, | |
2846 | const XattrMapEntry *new_entry) | |
2847 | { | |
2848 | XattrMapEntry *res = g_realloc_n(lo->xattr_map_list, | |
2849 | lo->xattr_map_nentries + 1, | |
2850 | sizeof(XattrMapEntry)); | |
2851 | res[lo->xattr_map_nentries++] = *new_entry; | |
2852 | ||
2853 | lo->xattr_map_list = res; | |
2854 | } | |
2855 | ||
2856 | static void free_xattrmap(struct lo_data *lo) | |
2857 | { | |
2858 | XattrMapEntry *map = lo->xattr_map_list; | |
2859 | size_t i; | |
2860 | ||
2861 | if (!map) { | |
2862 | return; | |
2863 | } | |
2864 | ||
2865 | for (i = 0; i < lo->xattr_map_nentries; i++) { | |
2866 | g_free(map[i].key); | |
2867 | g_free(map[i].prepend); | |
2868 | }; | |
2869 | ||
2870 | g_free(map); | |
2871 | lo->xattr_map_list = NULL; | |
2872 | lo->xattr_map_nentries = -1; | |
2873 | } | |
2874 | ||
1d84a021 DDAG |
2875 | /* |
2876 | * Handle the 'map' type, which is sugar for a set of commands | |
2877 | * for the common case of prefixing a subset or everything, | |
2878 | * and allowing anything not prefixed through. | |
2879 | * It must be the last entry in the stream, although there | |
2880 | * can be other entries before it. | |
2881 | * The form is: | |
2882 | * :map:key:prefix: | |
2883 | * | |
2884 | * key maybe empty in which case all entries are prefixed. | |
2885 | */ | |
2886 | static void parse_xattrmap_map(struct lo_data *lo, | |
2887 | const char *rule, char sep) | |
2888 | { | |
2889 | const char *tmp; | |
2890 | char *key; | |
2891 | char *prefix; | |
2892 | XattrMapEntry tmp_entry; | |
2893 | ||
2894 | if (*rule != sep) { | |
2895 | fuse_log(FUSE_LOG_ERR, | |
2896 | "%s: Expecting '%c' after 'map' keyword, found '%c'\n", | |
2897 | __func__, sep, *rule); | |
2898 | exit(1); | |
2899 | } | |
2900 | ||
2901 | rule++; | |
2902 | ||
2903 | /* At start of 'key' field */ | |
2904 | tmp = strchr(rule, sep); | |
2905 | if (!tmp) { | |
2906 | fuse_log(FUSE_LOG_ERR, | |
2907 | "%s: Missing '%c' at end of key field in map rule\n", | |
2908 | __func__, sep); | |
2909 | exit(1); | |
2910 | } | |
2911 | ||
2912 | key = g_strndup(rule, tmp - rule); | |
2913 | rule = tmp + 1; | |
2914 | ||
2915 | /* At start of prefix field */ | |
2916 | tmp = strchr(rule, sep); | |
2917 | if (!tmp) { | |
2918 | fuse_log(FUSE_LOG_ERR, | |
2919 | "%s: Missing '%c' at end of prefix field in map rule\n", | |
2920 | __func__, sep); | |
2921 | exit(1); | |
2922 | } | |
2923 | ||
2924 | prefix = g_strndup(rule, tmp - rule); | |
2925 | rule = tmp + 1; | |
2926 | ||
2927 | /* | |
2928 | * This should be the end of the string, we don't allow | |
2929 | * any more commands after 'map'. | |
2930 | */ | |
2931 | if (*rule) { | |
2932 | fuse_log(FUSE_LOG_ERR, | |
2933 | "%s: Expecting end of command after map, found '%c'\n", | |
2934 | __func__, *rule); | |
2935 | exit(1); | |
2936 | } | |
2937 | ||
2938 | /* 1st: Prefix matches/everything */ | |
2939 | tmp_entry.flags = XATTR_MAP_FLAG_PREFIX | XATTR_MAP_FLAG_ALL; | |
2940 | tmp_entry.key = g_strdup(key); | |
2941 | tmp_entry.prepend = g_strdup(prefix); | |
2942 | add_xattrmap_entry(lo, &tmp_entry); | |
2943 | ||
2944 | if (!*key) { | |
2945 | /* Prefix all case */ | |
2946 | ||
2947 | /* 2nd: Hide any non-prefixed entries on the host */ | |
2948 | tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_ALL; | |
2949 | tmp_entry.key = g_strdup(""); | |
2950 | tmp_entry.prepend = g_strdup(""); | |
2951 | add_xattrmap_entry(lo, &tmp_entry); | |
2952 | } else { | |
2953 | /* Prefix matching case */ | |
2954 | ||
2955 | /* 2nd: Hide non-prefixed but matching entries on the host */ | |
2956 | tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_SERVER; | |
2957 | tmp_entry.key = g_strdup(""); /* Not used */ | |
2958 | tmp_entry.prepend = g_strdup(key); | |
2959 | add_xattrmap_entry(lo, &tmp_entry); | |
2960 | ||
2961 | /* 3rd: Stop the client accessing prefixed attributes directly */ | |
2962 | tmp_entry.flags = XATTR_MAP_FLAG_BAD | XATTR_MAP_FLAG_CLIENT; | |
2963 | tmp_entry.key = g_strdup(prefix); | |
2964 | tmp_entry.prepend = g_strdup(""); /* Not used */ | |
2965 | add_xattrmap_entry(lo, &tmp_entry); | |
2966 | ||
2967 | /* 4th: Everything else is OK */ | |
2968 | tmp_entry.flags = XATTR_MAP_FLAG_OK | XATTR_MAP_FLAG_ALL; | |
2969 | tmp_entry.key = g_strdup(""); | |
2970 | tmp_entry.prepend = g_strdup(""); | |
2971 | add_xattrmap_entry(lo, &tmp_entry); | |
2972 | } | |
2973 | ||
2974 | g_free(key); | |
2975 | g_free(prefix); | |
2976 | } | |
2977 | ||
6084633d DDAG |
2978 | static void parse_xattrmap(struct lo_data *lo) |
2979 | { | |
2980 | const char *map = lo->xattrmap; | |
2981 | const char *tmp; | |
e586edcb | 2982 | int ret; |
6084633d DDAG |
2983 | |
2984 | lo->xattr_map_nentries = 0; | |
2985 | while (*map) { | |
2986 | XattrMapEntry tmp_entry; | |
2987 | char sep; | |
2988 | ||
2989 | if (isspace(*map)) { | |
2990 | map++; | |
2991 | continue; | |
2992 | } | |
2993 | /* The separator is the first non-space of the rule */ | |
2994 | sep = *map++; | |
2995 | if (!sep) { | |
2996 | break; | |
2997 | } | |
2998 | ||
2999 | tmp_entry.flags = 0; | |
3000 | /* Start of 'type' */ | |
3001 | if (strstart(map, "prefix", &map)) { | |
3002 | tmp_entry.flags |= XATTR_MAP_FLAG_PREFIX; | |
3003 | } else if (strstart(map, "ok", &map)) { | |
3004 | tmp_entry.flags |= XATTR_MAP_FLAG_OK; | |
3005 | } else if (strstart(map, "bad", &map)) { | |
3006 | tmp_entry.flags |= XATTR_MAP_FLAG_BAD; | |
5afc8df4 VG |
3007 | } else if (strstart(map, "unsupported", &map)) { |
3008 | tmp_entry.flags |= XATTR_MAP_FLAG_UNSUPPORTED; | |
1d84a021 DDAG |
3009 | } else if (strstart(map, "map", &map)) { |
3010 | /* | |
3011 | * map is sugar that adds a number of rules, and must be | |
3012 | * the last entry. | |
3013 | */ | |
3014 | parse_xattrmap_map(lo, map, sep); | |
e586edcb | 3015 | break; |
6084633d DDAG |
3016 | } else { |
3017 | fuse_log(FUSE_LOG_ERR, | |
3018 | "%s: Unexpected type;" | |
5afc8df4 VG |
3019 | "Expecting 'prefix', 'ok', 'bad', 'unsupported' or 'map'" |
3020 | " in rule %zu\n", __func__, lo->xattr_map_nentries); | |
6084633d DDAG |
3021 | exit(1); |
3022 | } | |
3023 | ||
3024 | if (*map++ != sep) { | |
3025 | fuse_log(FUSE_LOG_ERR, | |
3026 | "%s: Missing '%c' at end of type field of rule %zu\n", | |
3027 | __func__, sep, lo->xattr_map_nentries); | |
3028 | exit(1); | |
3029 | } | |
3030 | ||
3031 | /* Start of 'scope' */ | |
3032 | if (strstart(map, "client", &map)) { | |
3033 | tmp_entry.flags |= XATTR_MAP_FLAG_CLIENT; | |
3034 | } else if (strstart(map, "server", &map)) { | |
3035 | tmp_entry.flags |= XATTR_MAP_FLAG_SERVER; | |
3036 | } else if (strstart(map, "all", &map)) { | |
3037 | tmp_entry.flags |= XATTR_MAP_FLAG_ALL; | |
3038 | } else { | |
3039 | fuse_log(FUSE_LOG_ERR, | |
3040 | "%s: Unexpected scope;" | |
3041 | " Expecting 'client', 'server', or 'all', in rule %zu\n", | |
3042 | __func__, lo->xattr_map_nentries); | |
3043 | exit(1); | |
3044 | } | |
3045 | ||
3046 | if (*map++ != sep) { | |
3047 | fuse_log(FUSE_LOG_ERR, | |
3048 | "%s: Expecting '%c' found '%c'" | |
3049 | " after scope in rule %zu\n", | |
3050 | __func__, sep, *map, lo->xattr_map_nentries); | |
3051 | exit(1); | |
3052 | } | |
3053 | ||
3054 | /* At start of 'key' field */ | |
3055 | tmp = strchr(map, sep); | |
3056 | if (!tmp) { | |
3057 | fuse_log(FUSE_LOG_ERR, | |
3058 | "%s: Missing '%c' at end of key field of rule %zu", | |
3059 | __func__, sep, lo->xattr_map_nentries); | |
3060 | exit(1); | |
3061 | } | |
3062 | tmp_entry.key = g_strndup(map, tmp - map); | |
3063 | map = tmp + 1; | |
3064 | ||
3065 | /* At start of 'prepend' field */ | |
3066 | tmp = strchr(map, sep); | |
3067 | if (!tmp) { | |
3068 | fuse_log(FUSE_LOG_ERR, | |
3069 | "%s: Missing '%c' at end of prepend field of rule %zu", | |
3070 | __func__, sep, lo->xattr_map_nentries); | |
3071 | exit(1); | |
3072 | } | |
3073 | tmp_entry.prepend = g_strndup(map, tmp - map); | |
3074 | map = tmp + 1; | |
3075 | ||
3076 | add_xattrmap_entry(lo, &tmp_entry); | |
3077 | /* End of rule - go around again for another rule */ | |
3078 | } | |
3079 | ||
3080 | if (!lo->xattr_map_nentries) { | |
3081 | fuse_log(FUSE_LOG_ERR, "Empty xattr map\n"); | |
3082 | exit(1); | |
3083 | } | |
e586edcb DDAG |
3084 | |
3085 | ret = xattr_map_client(lo, "security.capability", | |
3086 | &lo->xattr_security_capability); | |
3087 | if (ret) { | |
3088 | fuse_log(FUSE_LOG_ERR, "Failed to map security.capability: %s\n", | |
3089 | strerror(ret)); | |
3090 | exit(1); | |
3091 | } | |
99c3ac6d DDAG |
3092 | if (!lo->xattr_security_capability || |
3093 | !strcmp(lo->xattr_security_capability, "security.capability")) { | |
e586edcb DDAG |
3094 | /* 1-1 mapping, don't need to do anything */ |
3095 | free(lo->xattr_security_capability); | |
3096 | lo->xattr_security_capability = NULL; | |
3097 | } | |
6084633d DDAG |
3098 | } |
3099 | ||
4f088dbf DDAG |
3100 | /* |
3101 | * For use with getxattr/setxattr/removexattr, where the client | |
3102 | * gives us a name and we may need to choose a different one. | |
3103 | * Allocates a buffer for the result placing it in *out_name. | |
3104 | * If there's no change then *out_name is not set. | |
3105 | * Returns 0 on success | |
3106 | * Can return -EPERM to indicate we block a given attribute | |
3107 | * (in which case out_name is not allocated) | |
3108 | * Can return -ENOMEM to indicate out_name couldn't be allocated. | |
3109 | */ | |
3110 | static int xattr_map_client(const struct lo_data *lo, const char *client_name, | |
3111 | char **out_name) | |
3112 | { | |
3113 | size_t i; | |
3114 | for (i = 0; i < lo->xattr_map_nentries; i++) { | |
3115 | const XattrMapEntry *cur_entry = lo->xattr_map_list + i; | |
3116 | ||
3117 | if ((cur_entry->flags & XATTR_MAP_FLAG_CLIENT) && | |
3118 | (strstart(client_name, cur_entry->key, NULL))) { | |
3119 | if (cur_entry->flags & XATTR_MAP_FLAG_BAD) { | |
3120 | return -EPERM; | |
3121 | } | |
5afc8df4 VG |
3122 | if (cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) { |
3123 | return -ENOTSUP; | |
3124 | } | |
4f088dbf DDAG |
3125 | if (cur_entry->flags & XATTR_MAP_FLAG_OK) { |
3126 | /* Unmodified name */ | |
3127 | return 0; | |
3128 | } | |
3129 | if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { | |
3130 | *out_name = g_try_malloc(strlen(client_name) + | |
3131 | strlen(cur_entry->prepend) + 1); | |
3132 | if (!*out_name) { | |
3133 | return -ENOMEM; | |
3134 | } | |
3135 | sprintf(*out_name, "%s%s", cur_entry->prepend, client_name); | |
3136 | return 0; | |
3137 | } | |
3138 | } | |
3139 | } | |
3140 | ||
3141 | return -EPERM; | |
3142 | } | |
3143 | ||
6409cf19 DDAG |
3144 | /* |
3145 | * For use with listxattr where the server fs gives us a name and we may need | |
3146 | * to sanitize this for the client. | |
3147 | * Returns a pointer to the result in *out_name | |
3148 | * This is always the original string or the current string with some prefix | |
3149 | * removed; no reallocation is done. | |
3150 | * Returns 0 on success | |
3151 | * Can return -ENODATA to indicate the name should be dropped from the list. | |
3152 | */ | |
3153 | static int xattr_map_server(const struct lo_data *lo, const char *server_name, | |
3154 | const char **out_name) | |
3155 | { | |
3156 | size_t i; | |
3157 | const char *end; | |
3158 | ||
3159 | for (i = 0; i < lo->xattr_map_nentries; i++) { | |
3160 | const XattrMapEntry *cur_entry = lo->xattr_map_list + i; | |
3161 | ||
3162 | if ((cur_entry->flags & XATTR_MAP_FLAG_SERVER) && | |
3163 | (strstart(server_name, cur_entry->prepend, &end))) { | |
5afc8df4 VG |
3164 | if (cur_entry->flags & XATTR_MAP_FLAG_BAD || |
3165 | cur_entry->flags & XATTR_MAP_FLAG_UNSUPPORTED) { | |
6409cf19 DDAG |
3166 | return -ENODATA; |
3167 | } | |
3168 | if (cur_entry->flags & XATTR_MAP_FLAG_OK) { | |
3169 | *out_name = server_name; | |
3170 | return 0; | |
3171 | } | |
3172 | if (cur_entry->flags & XATTR_MAP_FLAG_PREFIX) { | |
3173 | /* Remove prefix */ | |
3174 | *out_name = end; | |
3175 | return 0; | |
3176 | } | |
3177 | } | |
3178 | } | |
3179 | ||
3180 | return -ENODATA; | |
3181 | } | |
3182 | ||
65a820d2 VG |
3183 | static bool block_xattr(struct lo_data *lo, const char *name) |
3184 | { | |
3185 | /* | |
3186 | * If user explicitly enabled posix_acl or did not provide any option, | |
3187 | * do not block acl. Otherwise block system.posix_acl_access and | |
3188 | * system.posix_acl_default xattrs. | |
3189 | */ | |
3190 | if (lo->user_posix_acl) { | |
3191 | return false; | |
3192 | } | |
3193 | if (!strcmp(name, "system.posix_acl_access") || | |
3194 | !strcmp(name, "system.posix_acl_default")) | |
3195 | return true; | |
3196 | ||
3197 | return false; | |
3198 | } | |
3199 | ||
3200 | /* | |
3201 | * Returns number of bytes in xattr_list after filtering on success. This | |
3202 | * could be zero as well if nothing is left after filtering. | |
3203 | * | |
3204 | * Returns negative error code on failure. | |
3205 | * xattr_list is modified in place. | |
3206 | */ | |
3207 | static int remove_blocked_xattrs(struct lo_data *lo, char *xattr_list, | |
3208 | unsigned in_size) | |
3209 | { | |
3210 | size_t out_index, in_index; | |
3211 | ||
3212 | /* | |
3213 | * As of now we only filter out acl xattrs. If acls are enabled or | |
3214 | * they have not been explicitly disabled, there is nothing to | |
3215 | * filter. | |
3216 | */ | |
3217 | if (lo->user_posix_acl) { | |
3218 | return in_size; | |
3219 | } | |
3220 | ||
3221 | out_index = 0; | |
3222 | in_index = 0; | |
3223 | while (in_index < in_size) { | |
3224 | char *in_ptr = xattr_list + in_index; | |
3225 | ||
3226 | /* Length of current attribute name */ | |
3227 | size_t in_len = strlen(xattr_list + in_index) + 1; | |
3228 | ||
3229 | if (!block_xattr(lo, in_ptr)) { | |
3230 | if (in_index != out_index) { | |
3231 | memmove(xattr_list + out_index, xattr_list + in_index, in_len); | |
3232 | } | |
3233 | out_index += in_len; | |
3234 | } | |
3235 | in_index += in_len; | |
3236 | } | |
3237 | return out_index; | |
3238 | } | |
3239 | ||
4f088dbf | 3240 | static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, |
7387863d | 3241 | size_t size) |
7c6b6602 | 3242 | { |
9f59d175 | 3243 | struct lo_data *lo = lo_data(req); |
c9a276f5 | 3244 | g_autofree char *value = NULL; |
7387863d | 3245 | char procname[64]; |
4f088dbf DDAG |
3246 | const char *name; |
3247 | char *mapped_name; | |
92fb57b8 | 3248 | struct lo_inode *inode; |
7387863d DDAG |
3249 | ssize_t ret; |
3250 | int saverr; | |
9f59d175 | 3251 | int fd = -1; |
7387863d | 3252 | |
65a820d2 VG |
3253 | if (block_xattr(lo, in_name)) { |
3254 | fuse_reply_err(req, EOPNOTSUPP); | |
3255 | return; | |
3256 | } | |
3257 | ||
4f088dbf DDAG |
3258 | mapped_name = NULL; |
3259 | name = in_name; | |
3260 | if (lo->xattrmap) { | |
3261 | ret = xattr_map_client(lo, in_name, &mapped_name); | |
3262 | if (ret < 0) { | |
3263 | if (ret == -EPERM) { | |
3264 | ret = -ENODATA; | |
3265 | } | |
3266 | fuse_reply_err(req, -ret); | |
3267 | return; | |
3268 | } | |
3269 | if (mapped_name) { | |
3270 | name = mapped_name; | |
3271 | } | |
3272 | } | |
3273 | ||
92fb57b8 SH |
3274 | inode = lo_inode(req, ino); |
3275 | if (!inode) { | |
3276 | fuse_reply_err(req, EBADF); | |
4f088dbf | 3277 | g_free(mapped_name); |
92fb57b8 SH |
3278 | return; |
3279 | } | |
3280 | ||
7387863d DDAG |
3281 | saverr = ENOSYS; |
3282 | if (!lo_data(req)->xattr) { | |
3283 | goto out; | |
3284 | } | |
3285 | ||
d240314a EG |
3286 | fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", |
3287 | ino, name, size); | |
7387863d | 3288 | |
16e15a73 | 3289 | if (size) { |
c9a276f5 | 3290 | value = g_try_malloc(size); |
16e15a73 MT |
3291 | if (!value) { |
3292 | goto out_err; | |
3293 | } | |
3294 | } | |
3295 | ||
9f59d175 | 3296 | sprintf(procname, "%i", inode->fd); |
bdfd6678 MT |
3297 | /* |
3298 | * It is not safe to open() non-regular/non-dir files in file server | |
3299 | * unless O_PATH is used, so use that method for regular files/dir | |
3300 | * only (as it seems giving less performance overhead). | |
3301 | * Otherwise, call fchdir() to avoid open(). | |
3302 | */ | |
3303 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { | |
3304 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
3305 | if (fd < 0) { | |
3306 | goto out_err; | |
3307 | } | |
3308 | ret = fgetxattr(fd, name, value, size); | |
5290fb62 | 3309 | saverr = ret == -1 ? errno : 0; |
bdfd6678 MT |
3310 | } else { |
3311 | /* fchdir should not fail here */ | |
0adb3aff | 3312 | FCHDIR_NOFAIL(lo->proc_self_fd); |
bdfd6678 | 3313 | ret = getxattr(procname, name, value, size); |
5290fb62 | 3314 | saverr = ret == -1 ? errno : 0; |
0adb3aff | 3315 | FCHDIR_NOFAIL(lo->root.fd); |
9f59d175 | 3316 | } |
7387863d | 3317 | |
16e15a73 | 3318 | if (ret == -1) { |
5290fb62 | 3319 | goto out; |
16e15a73 | 3320 | } |
7387863d | 3321 | if (size) { |
7387863d DDAG |
3322 | saverr = 0; |
3323 | if (ret == 0) { | |
3324 | goto out; | |
3325 | } | |
7387863d DDAG |
3326 | fuse_reply_buf(req, value, ret); |
3327 | } else { | |
7387863d DDAG |
3328 | fuse_reply_xattr(req, ret); |
3329 | } | |
7c6b6602 | 3330 | out_free: |
9f59d175 SH |
3331 | if (fd >= 0) { |
3332 | close(fd); | |
3333 | } | |
c241aa94 SH |
3334 | |
3335 | lo_inode_put(lo, &inode); | |
7387863d | 3336 | return; |
7c6b6602 DDAG |
3337 | |
3338 | out_err: | |
7387863d | 3339 | saverr = errno; |
7c6b6602 | 3340 | out: |
7387863d | 3341 | fuse_reply_err(req, saverr); |
4f088dbf | 3342 | g_free(mapped_name); |
7387863d | 3343 | goto out_free; |
7c6b6602 DDAG |
3344 | } |
3345 | ||
3346 | static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) | |
3347 | { | |
9f59d175 | 3348 | struct lo_data *lo = lo_data(req); |
c9a276f5 | 3349 | g_autofree char *value = NULL; |
7387863d | 3350 | char procname[64]; |
92fb57b8 | 3351 | struct lo_inode *inode; |
7387863d DDAG |
3352 | ssize_t ret; |
3353 | int saverr; | |
9f59d175 | 3354 | int fd = -1; |
7387863d | 3355 | |
92fb57b8 SH |
3356 | inode = lo_inode(req, ino); |
3357 | if (!inode) { | |
3358 | fuse_reply_err(req, EBADF); | |
3359 | return; | |
3360 | } | |
3361 | ||
7387863d DDAG |
3362 | saverr = ENOSYS; |
3363 | if (!lo_data(req)->xattr) { | |
3364 | goto out; | |
3365 | } | |
3366 | ||
d240314a EG |
3367 | fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino, |
3368 | size); | |
7387863d | 3369 | |
16e15a73 | 3370 | if (size) { |
c9a276f5 | 3371 | value = g_try_malloc(size); |
16e15a73 MT |
3372 | if (!value) { |
3373 | goto out_err; | |
3374 | } | |
3375 | } | |
3376 | ||
9f59d175 | 3377 | sprintf(procname, "%i", inode->fd); |
bdfd6678 MT |
3378 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { |
3379 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
3380 | if (fd < 0) { | |
3381 | goto out_err; | |
3382 | } | |
3383 | ret = flistxattr(fd, value, size); | |
5290fb62 | 3384 | saverr = ret == -1 ? errno : 0; |
bdfd6678 MT |
3385 | } else { |
3386 | /* fchdir should not fail here */ | |
0adb3aff | 3387 | FCHDIR_NOFAIL(lo->proc_self_fd); |
bdfd6678 | 3388 | ret = listxattr(procname, value, size); |
5290fb62 | 3389 | saverr = ret == -1 ? errno : 0; |
0adb3aff | 3390 | FCHDIR_NOFAIL(lo->root.fd); |
9f59d175 | 3391 | } |
7387863d | 3392 | |
16e15a73 | 3393 | if (ret == -1) { |
5290fb62 | 3394 | goto out; |
16e15a73 | 3395 | } |
7387863d | 3396 | if (size) { |
7387863d DDAG |
3397 | saverr = 0; |
3398 | if (ret == 0) { | |
3399 | goto out; | |
3400 | } | |
6409cf19 DDAG |
3401 | |
3402 | if (lo->xattr_map_list) { | |
3403 | /* | |
3404 | * Map the names back, some attributes might be dropped, | |
3405 | * some shortened, but not increased, so we shouldn't | |
3406 | * run out of room. | |
3407 | */ | |
3408 | size_t out_index, in_index; | |
3409 | out_index = 0; | |
3410 | in_index = 0; | |
3411 | while (in_index < ret) { | |
3412 | const char *map_out; | |
3413 | char *in_ptr = value + in_index; | |
3414 | /* Length of current attribute name */ | |
3415 | size_t in_len = strlen(value + in_index) + 1; | |
3416 | ||
3417 | int mapret = xattr_map_server(lo, in_ptr, &map_out); | |
3418 | if (mapret != -ENODATA && mapret != 0) { | |
3419 | /* Shouldn't happen */ | |
3420 | saverr = -mapret; | |
3421 | goto out; | |
3422 | } | |
3423 | if (mapret == 0) { | |
3424 | /* Either unchanged, or truncated */ | |
3425 | size_t out_len; | |
3426 | if (map_out != in_ptr) { | |
3427 | /* +1 copies the NIL */ | |
3428 | out_len = strlen(map_out) + 1; | |
3429 | } else { | |
3430 | /* No change */ | |
3431 | out_len = in_len; | |
3432 | } | |
3433 | /* | |
3434 | * Move result along, may still be needed for an unchanged | |
3435 | * entry if a previous entry was changed. | |
3436 | */ | |
3437 | memmove(value + out_index, map_out, out_len); | |
3438 | ||
3439 | out_index += out_len; | |
3440 | } | |
3441 | in_index += in_len; | |
3442 | } | |
3443 | ret = out_index; | |
3444 | if (ret == 0) { | |
3445 | goto out; | |
3446 | } | |
3447 | } | |
65a820d2 VG |
3448 | |
3449 | ret = remove_blocked_xattrs(lo, value, ret); | |
3450 | if (ret <= 0) { | |
3451 | saverr = -ret; | |
3452 | goto out; | |
3453 | } | |
7387863d DDAG |
3454 | fuse_reply_buf(req, value, ret); |
3455 | } else { | |
6409cf19 DDAG |
3456 | /* |
3457 | * xattrmap only ever shortens the result, | |
3458 | * so we don't need to do anything clever with the | |
3459 | * allocation length here. | |
3460 | */ | |
7387863d DDAG |
3461 | fuse_reply_xattr(req, ret); |
3462 | } | |
7c6b6602 | 3463 | out_free: |
9f59d175 SH |
3464 | if (fd >= 0) { |
3465 | close(fd); | |
3466 | } | |
c241aa94 SH |
3467 | |
3468 | lo_inode_put(lo, &inode); | |
7387863d | 3469 | return; |
7c6b6602 DDAG |
3470 | |
3471 | out_err: | |
7387863d | 3472 | saverr = errno; |
7c6b6602 | 3473 | out: |
7387863d DDAG |
3474 | fuse_reply_err(req, saverr); |
3475 | goto out_free; | |
7c6b6602 DDAG |
3476 | } |
3477 | ||
4f088dbf | 3478 | static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *in_name, |
c46ef954 VG |
3479 | const char *value, size_t size, int flags, |
3480 | uint32_t extra_flags) | |
7c6b6602 | 3481 | { |
7387863d | 3482 | char procname[64]; |
4f088dbf DDAG |
3483 | const char *name; |
3484 | char *mapped_name; | |
9f59d175 | 3485 | struct lo_data *lo = lo_data(req); |
92fb57b8 | 3486 | struct lo_inode *inode; |
7387863d DDAG |
3487 | ssize_t ret; |
3488 | int saverr; | |
9f59d175 | 3489 | int fd = -1; |
f1aa1774 VG |
3490 | bool switched_creds = false; |
3491 | bool cap_fsetid_dropped = false; | |
3492 | struct lo_cred old = {}; | |
7c6b6602 | 3493 | |
65a820d2 VG |
3494 | if (block_xattr(lo, in_name)) { |
3495 | fuse_reply_err(req, EOPNOTSUPP); | |
3496 | return; | |
3497 | } | |
3498 | ||
4f088dbf DDAG |
3499 | mapped_name = NULL; |
3500 | name = in_name; | |
3501 | if (lo->xattrmap) { | |
3502 | ret = xattr_map_client(lo, in_name, &mapped_name); | |
3503 | if (ret < 0) { | |
3504 | fuse_reply_err(req, -ret); | |
3505 | return; | |
3506 | } | |
3507 | if (mapped_name) { | |
3508 | name = mapped_name; | |
3509 | } | |
3510 | } | |
3511 | ||
92fb57b8 SH |
3512 | inode = lo_inode(req, ino); |
3513 | if (!inode) { | |
3514 | fuse_reply_err(req, EBADF); | |
4f088dbf | 3515 | g_free(mapped_name); |
92fb57b8 SH |
3516 | return; |
3517 | } | |
3518 | ||
7387863d DDAG |
3519 | saverr = ENOSYS; |
3520 | if (!lo_data(req)->xattr) { | |
3521 | goto out; | |
3522 | } | |
7c6b6602 | 3523 | |
d240314a EG |
3524 | fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64 |
3525 | ", name=%s value=%s size=%zd)\n", ino, name, value, size); | |
7c6b6602 | 3526 | |
9f59d175 | 3527 | sprintf(procname, "%i", inode->fd); |
f1aa1774 VG |
3528 | /* |
3529 | * If we are setting posix access acl and if SGID needs to be | |
3530 | * cleared, then switch to caller's gid and drop CAP_FSETID | |
3531 | * and that should make sure host kernel clears SGID. | |
3532 | * | |
3533 | * This probably will not work when we support idmapped mounts. | |
3534 | * In that case we will need to find a non-root gid and switch | |
3535 | * to it. (Instead of gid in request). Fix it when we support | |
3536 | * idmapped mounts. | |
3537 | */ | |
3538 | if (lo->posix_acl && !strcmp(name, "system.posix_acl_access") | |
3539 | && (extra_flags & FUSE_SETXATTR_ACL_KILL_SGID)) { | |
3540 | ret = lo_drop_cap_change_cred(req, &old, false, "FSETID", | |
3541 | &cap_fsetid_dropped); | |
3542 | if (ret) { | |
3543 | saverr = ret; | |
3544 | goto out; | |
3545 | } | |
3546 | switched_creds = true; | |
3547 | } | |
bdfd6678 MT |
3548 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { |
3549 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
3550 | if (fd < 0) { | |
3551 | saverr = errno; | |
3552 | goto out; | |
3553 | } | |
3554 | ret = fsetxattr(fd, name, value, size, flags); | |
5290fb62 | 3555 | saverr = ret == -1 ? errno : 0; |
bdfd6678 MT |
3556 | } else { |
3557 | /* fchdir should not fail here */ | |
0adb3aff | 3558 | FCHDIR_NOFAIL(lo->proc_self_fd); |
bdfd6678 | 3559 | ret = setxattr(procname, name, value, size, flags); |
5290fb62 | 3560 | saverr = ret == -1 ? errno : 0; |
0adb3aff | 3561 | FCHDIR_NOFAIL(lo->root.fd); |
9f59d175 | 3562 | } |
f1aa1774 VG |
3563 | if (switched_creds) { |
3564 | if (cap_fsetid_dropped) | |
3565 | lo_restore_cred_gain_cap(&old, false, "FSETID"); | |
3566 | else | |
3567 | lo_restore_cred(&old, false); | |
3568 | } | |
7c6b6602 | 3569 | |
7c6b6602 | 3570 | out: |
9f59d175 SH |
3571 | if (fd >= 0) { |
3572 | close(fd); | |
3573 | } | |
c241aa94 SH |
3574 | |
3575 | lo_inode_put(lo, &inode); | |
4f088dbf | 3576 | g_free(mapped_name); |
7387863d | 3577 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
3578 | } |
3579 | ||
4f088dbf | 3580 | static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *in_name) |
7c6b6602 | 3581 | { |
7387863d | 3582 | char procname[64]; |
4f088dbf DDAG |
3583 | const char *name; |
3584 | char *mapped_name; | |
9f59d175 | 3585 | struct lo_data *lo = lo_data(req); |
92fb57b8 | 3586 | struct lo_inode *inode; |
7387863d DDAG |
3587 | ssize_t ret; |
3588 | int saverr; | |
9f59d175 | 3589 | int fd = -1; |
7c6b6602 | 3590 | |
65a820d2 VG |
3591 | if (block_xattr(lo, in_name)) { |
3592 | fuse_reply_err(req, EOPNOTSUPP); | |
3593 | return; | |
3594 | } | |
3595 | ||
4f088dbf DDAG |
3596 | mapped_name = NULL; |
3597 | name = in_name; | |
3598 | if (lo->xattrmap) { | |
3599 | ret = xattr_map_client(lo, in_name, &mapped_name); | |
3600 | if (ret < 0) { | |
3601 | fuse_reply_err(req, -ret); | |
3602 | return; | |
3603 | } | |
3604 | if (mapped_name) { | |
3605 | name = mapped_name; | |
3606 | } | |
3607 | } | |
3608 | ||
92fb57b8 SH |
3609 | inode = lo_inode(req, ino); |
3610 | if (!inode) { | |
3611 | fuse_reply_err(req, EBADF); | |
4f088dbf | 3612 | g_free(mapped_name); |
92fb57b8 SH |
3613 | return; |
3614 | } | |
3615 | ||
7387863d DDAG |
3616 | saverr = ENOSYS; |
3617 | if (!lo_data(req)->xattr) { | |
3618 | goto out; | |
3619 | } | |
7c6b6602 | 3620 | |
d240314a EG |
3621 | fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino, |
3622 | name); | |
7c6b6602 | 3623 | |
9f59d175 | 3624 | sprintf(procname, "%i", inode->fd); |
bdfd6678 MT |
3625 | if (S_ISREG(inode->filetype) || S_ISDIR(inode->filetype)) { |
3626 | fd = openat(lo->proc_self_fd, procname, O_RDONLY); | |
3627 | if (fd < 0) { | |
3628 | saverr = errno; | |
3629 | goto out; | |
3630 | } | |
3631 | ret = fremovexattr(fd, name); | |
5290fb62 | 3632 | saverr = ret == -1 ? errno : 0; |
bdfd6678 MT |
3633 | } else { |
3634 | /* fchdir should not fail here */ | |
0adb3aff | 3635 | FCHDIR_NOFAIL(lo->proc_self_fd); |
bdfd6678 | 3636 | ret = removexattr(procname, name); |
5290fb62 | 3637 | saverr = ret == -1 ? errno : 0; |
0adb3aff | 3638 | FCHDIR_NOFAIL(lo->root.fd); |
9f59d175 | 3639 | } |
7c6b6602 | 3640 | |
7c6b6602 | 3641 | out: |
9f59d175 SH |
3642 | if (fd >= 0) { |
3643 | close(fd); | |
3644 | } | |
c241aa94 SH |
3645 | |
3646 | lo_inode_put(lo, &inode); | |
4f088dbf | 3647 | g_free(mapped_name); |
7387863d | 3648 | fuse_reply_err(req, saverr); |
7c6b6602 DDAG |
3649 | } |
3650 | ||
3651 | #ifdef HAVE_COPY_FILE_RANGE | |
3652 | static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in, | |
7387863d DDAG |
3653 | struct fuse_file_info *fi_in, fuse_ino_t ino_out, |
3654 | off_t off_out, struct fuse_file_info *fi_out, | |
3655 | size_t len, int flags) | |
7c6b6602 | 3656 | { |
73b4d19d | 3657 | int in_fd, out_fd; |
7387863d DDAG |
3658 | ssize_t res; |
3659 | ||
73b4d19d SH |
3660 | in_fd = lo_fi_fd(req, fi_in); |
3661 | out_fd = lo_fi_fd(req, fi_out); | |
3662 | ||
3663 | fuse_log(FUSE_LOG_DEBUG, | |
3664 | "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, " | |
d02a3c5a DDAG |
3665 | "off=%ju, ino=%" PRIu64 "/fd=%d, " |
3666 | "off=%ju, size=%zd, flags=0x%x)\n", | |
3667 | ino_in, in_fd, (intmax_t)off_in, | |
3668 | ino_out, out_fd, (intmax_t)off_out, len, flags); | |
7387863d | 3669 | |
73b4d19d | 3670 | res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags); |
7387863d | 3671 | if (res < 0) { |
a931b686 | 3672 | fuse_reply_err(req, errno); |
7387863d DDAG |
3673 | } else { |
3674 | fuse_reply_write(req, res); | |
3675 | } | |
7c6b6602 DDAG |
3676 | } |
3677 | #endif | |
3678 | ||
3679 | static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence, | |
7387863d | 3680 | struct fuse_file_info *fi) |
7c6b6602 | 3681 | { |
7387863d DDAG |
3682 | off_t res; |
3683 | ||
3684 | (void)ino; | |
73b4d19d | 3685 | res = lseek(lo_fi_fd(req, fi), off, whence); |
7387863d DDAG |
3686 | if (res != -1) { |
3687 | fuse_reply_lseek(req, res); | |
3688 | } else { | |
3689 | fuse_reply_err(req, errno); | |
3690 | } | |
7c6b6602 DDAG |
3691 | } |
3692 | ||
45b04ef4 GK |
3693 | static int lo_do_syncfs(struct lo_data *lo, struct lo_inode *inode) |
3694 | { | |
3695 | int fd, ret = 0; | |
3696 | ||
3697 | fuse_log(FUSE_LOG_DEBUG, "lo_do_syncfs(ino=%" PRIu64 ")\n", | |
3698 | inode->fuse_ino); | |
3699 | ||
3700 | fd = lo_inode_open(lo, inode, O_RDONLY); | |
3701 | if (fd < 0) { | |
3702 | return -fd; | |
3703 | } | |
3704 | ||
3705 | if (syncfs(fd) < 0) { | |
3706 | ret = errno; | |
3707 | } | |
3708 | ||
3709 | close(fd); | |
3710 | return ret; | |
3711 | } | |
3712 | ||
3713 | static void lo_syncfs(fuse_req_t req, fuse_ino_t ino) | |
3714 | { | |
3715 | struct lo_data *lo = lo_data(req); | |
3716 | struct lo_inode *inode = lo_inode(req, ino); | |
3717 | int err; | |
3718 | ||
3719 | if (!inode) { | |
3720 | fuse_reply_err(req, EBADF); | |
3721 | return; | |
3722 | } | |
3723 | ||
3724 | err = lo_do_syncfs(lo, inode); | |
3725 | lo_inode_put(lo, &inode); | |
3726 | ||
3727 | /* | |
3728 | * If submounts aren't announced, the client only sends a request to | |
3729 | * sync the root inode. TODO: Track submounts internally and iterate | |
3730 | * over them as well. | |
3731 | */ | |
3732 | ||
3733 | fuse_reply_err(req, err); | |
3734 | } | |
3735 | ||
771b01eb DDAG |
3736 | static void lo_destroy(void *userdata) |
3737 | { | |
3738 | struct lo_data *lo = (struct lo_data *)userdata; | |
28f7a3b0 | 3739 | |
fe4c1579 | 3740 | pthread_mutex_lock(&lo->mutex); |
28f7a3b0 SH |
3741 | while (true) { |
3742 | GHashTableIter iter; | |
3743 | gpointer key, value; | |
3744 | ||
3745 | g_hash_table_iter_init(&iter, lo->inodes); | |
3746 | if (!g_hash_table_iter_next(&iter, &key, &value)) { | |
3747 | break; | |
3748 | } | |
3749 | ||
3750 | struct lo_inode *inode = value; | |
fe4c1579 | 3751 | unref_inode(lo, inode, inode->nlookup); |
28f7a3b0 | 3752 | } |
fe4c1579 | 3753 | pthread_mutex_unlock(&lo->mutex); |
771b01eb DDAG |
3754 | } |
3755 | ||
7c6b6602 | 3756 | static struct fuse_lowlevel_ops lo_oper = { |
7387863d DDAG |
3757 | .init = lo_init, |
3758 | .lookup = lo_lookup, | |
3759 | .mkdir = lo_mkdir, | |
3760 | .mknod = lo_mknod, | |
3761 | .symlink = lo_symlink, | |
3762 | .link = lo_link, | |
3763 | .unlink = lo_unlink, | |
3764 | .rmdir = lo_rmdir, | |
3765 | .rename = lo_rename, | |
3766 | .forget = lo_forget, | |
3767 | .forget_multi = lo_forget_multi, | |
3768 | .getattr = lo_getattr, | |
3769 | .setattr = lo_setattr, | |
3770 | .readlink = lo_readlink, | |
3771 | .opendir = lo_opendir, | |
3772 | .readdir = lo_readdir, | |
3773 | .readdirplus = lo_readdirplus, | |
3774 | .releasedir = lo_releasedir, | |
3775 | .fsyncdir = lo_fsyncdir, | |
3776 | .create = lo_create, | |
0e81414c VG |
3777 | .getlk = lo_getlk, |
3778 | .setlk = lo_setlk, | |
7387863d DDAG |
3779 | .open = lo_open, |
3780 | .release = lo_release, | |
3781 | .flush = lo_flush, | |
3782 | .fsync = lo_fsync, | |
3783 | .read = lo_read, | |
3784 | .write_buf = lo_write_buf, | |
3785 | .statfs = lo_statfs, | |
3786 | .fallocate = lo_fallocate, | |
3787 | .flock = lo_flock, | |
3788 | .getxattr = lo_getxattr, | |
3789 | .listxattr = lo_listxattr, | |
3790 | .setxattr = lo_setxattr, | |
3791 | .removexattr = lo_removexattr, | |
7c6b6602 | 3792 | #ifdef HAVE_COPY_FILE_RANGE |
7387863d | 3793 | .copy_file_range = lo_copy_file_range, |
7c6b6602 | 3794 | #endif |
7387863d | 3795 | .lseek = lo_lseek, |
45b04ef4 | 3796 | .syncfs = lo_syncfs, |
771b01eb | 3797 | .destroy = lo_destroy, |
7c6b6602 DDAG |
3798 | }; |
3799 | ||
45018fbb SH |
3800 | /* Print vhost-user.json backend program capabilities */ |
3801 | static void print_capabilities(void) | |
3802 | { | |
3803 | printf("{\n"); | |
3804 | printf(" \"type\": \"fs\"\n"); | |
3805 | printf("}\n"); | |
3806 | } | |
3807 | ||
66502bbc SH |
3808 | /* |
3809 | * Drop all Linux capabilities because the wait parent process only needs to | |
3810 | * sit in waitpid(2) and terminate. | |
3811 | */ | |
3812 | static void setup_wait_parent_capabilities(void) | |
3813 | { | |
3814 | capng_setpid(syscall(SYS_gettid)); | |
3815 | capng_clear(CAPNG_SELECT_BOTH); | |
3816 | capng_apply(CAPNG_SELECT_BOTH); | |
3817 | } | |
3818 | ||
d74830d1 | 3819 | /* |
8e1d4ef2 | 3820 | * Move to a new mount, net, and pid namespaces to isolate this process. |
d74830d1 | 3821 | */ |
8e1d4ef2 | 3822 | static void setup_namespaces(struct lo_data *lo, struct fuse_session *se) |
d74830d1 | 3823 | { |
8e1d4ef2 SH |
3824 | pid_t child; |
3825 | ||
3826 | /* | |
3827 | * Create a new pid namespace for *child* processes. We'll have to | |
3828 | * fork in order to enter the new pid namespace. A new mount namespace | |
3829 | * is also needed so that we can remount /proc for the new pid | |
3830 | * namespace. | |
3831 | * | |
3832 | * Our UNIX domain sockets have been created. Now we can move to | |
3833 | * an empty network namespace to prevent TCP/IP and other network | |
3834 | * activity in case this process is compromised. | |
3835 | */ | |
3836 | if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) { | |
3837 | fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n"); | |
3838 | exit(1); | |
3839 | } | |
3840 | ||
3841 | child = fork(); | |
3842 | if (child < 0) { | |
3843 | fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n"); | |
3844 | exit(1); | |
3845 | } | |
3846 | if (child > 0) { | |
3847 | pid_t waited; | |
3848 | int wstatus; | |
3849 | ||
66502bbc SH |
3850 | setup_wait_parent_capabilities(); |
3851 | ||
8e1d4ef2 SH |
3852 | /* The parent waits for the child */ |
3853 | do { | |
3854 | waited = waitpid(child, &wstatus, 0); | |
3855 | } while (waited < 0 && errno == EINTR && !se->exited); | |
3856 | ||
3857 | /* We were terminated by a signal, see fuse_signals.c */ | |
3858 | if (se->exited) { | |
3859 | exit(0); | |
3860 | } | |
3861 | ||
3862 | if (WIFEXITED(wstatus)) { | |
3863 | exit(WEXITSTATUS(wstatus)); | |
3864 | } | |
3865 | ||
3866 | exit(1); | |
3867 | } | |
3868 | ||
3869 | /* Send us SIGTERM when the parent thread terminates, see prctl(2) */ | |
3870 | prctl(PR_SET_PDEATHSIG, SIGTERM); | |
3871 | ||
3872 | /* | |
3873 | * If the mounts have shared propagation then we want to opt out so our | |
3874 | * mount changes don't affect the parent mount namespace. | |
3875 | */ | |
3876 | if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) { | |
3877 | fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n"); | |
3878 | exit(1); | |
3879 | } | |
3880 | ||
3881 | /* The child must remount /proc to use the new pid namespace */ | |
3882 | if (mount("proc", "/proc", "proc", | |
3883 | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) { | |
3884 | fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n"); | |
3885 | exit(1); | |
3886 | } | |
3887 | ||
cb282e55 VG |
3888 | /* Get the /proc/self/task descriptor */ |
3889 | lo->proc_self_task = open("/proc/self/task/", O_PATH); | |
3890 | if (lo->proc_self_task == -1) { | |
3891 | fuse_log(FUSE_LOG_ERR, "open(/proc/self/task, O_PATH): %m\n"); | |
3892 | exit(1); | |
3893 | } | |
3894 | ||
3895 | lo->use_fscreate = is_fscreate_usable(lo); | |
3896 | ||
ebf10195 SH |
3897 | /* |
3898 | * We only need /proc/self/fd. Prevent ".." from accessing parent | |
3899 | * directories of /proc/self/fd by bind-mounting it over /proc. Since / was | |
3900 | * previously remounted with MS_REC | MS_SLAVE this mount change only | |
3901 | * affects our process. | |
3902 | */ | |
3903 | if (mount("/proc/self/fd", "/proc", NULL, MS_BIND, NULL) < 0) { | |
3904 | fuse_log(FUSE_LOG_ERR, "mount(/proc/self/fd, MS_BIND): %m\n"); | |
397ae982 MS |
3905 | exit(1); |
3906 | } | |
3907 | ||
ebf10195 SH |
3908 | /* Get the /proc (actually /proc/self/fd, see above) file descriptor */ |
3909 | lo->proc_self_fd = open("/proc", O_PATH); | |
8e1d4ef2 | 3910 | if (lo->proc_self_fd == -1) { |
ebf10195 | 3911 | fuse_log(FUSE_LOG_ERR, "open(/proc, O_PATH): %m\n"); |
d74830d1 SH |
3912 | exit(1); |
3913 | } | |
3914 | } | |
3915 | ||
2405f3c0 DDAG |
3916 | /* |
3917 | * Capture the capability state, we'll need to restore this for individual | |
3918 | * threads later; see load_capng. | |
3919 | */ | |
3920 | static void setup_capng(void) | |
3921 | { | |
3922 | /* Note this accesses /proc so has to happen before the sandbox */ | |
3923 | if (capng_get_caps_process()) { | |
3924 | fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n"); | |
3925 | exit(1); | |
3926 | } | |
3927 | pthread_mutex_init(&cap.mutex, NULL); | |
3928 | pthread_mutex_lock(&cap.mutex); | |
3929 | cap.saved = capng_save_state(); | |
3930 | if (!cap.saved) { | |
3931 | fuse_log(FUSE_LOG_ERR, "capng_save_state\n"); | |
3932 | exit(1); | |
3933 | } | |
3934 | pthread_mutex_unlock(&cap.mutex); | |
3935 | } | |
3936 | ||
3937 | static void cleanup_capng(void) | |
3938 | { | |
3939 | free(cap.saved); | |
3940 | cap.saved = NULL; | |
3941 | pthread_mutex_destroy(&cap.mutex); | |
3942 | } | |
3943 | ||
3944 | ||
8e1d4ef2 SH |
3945 | /* |
3946 | * Make the source directory our root so symlinks cannot escape and no other | |
3947 | * files are accessible. Assumes unshare(CLONE_NEWNS) was already called. | |
3948 | */ | |
3949 | static void setup_mounts(const char *source) | |
5baa3b8e SH |
3950 | { |
3951 | int oldroot; | |
3952 | int newroot; | |
3953 | ||
ace0829c | 3954 | if (mount(source, source, NULL, MS_BIND | MS_REC, NULL) < 0) { |
8e1d4ef2 SH |
3955 | fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source); |
3956 | exit(1); | |
3957 | } | |
3958 | ||
3959 | /* This magic is based on lxc's lxc_pivot_root() */ | |
5baa3b8e SH |
3960 | oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC); |
3961 | if (oldroot < 0) { | |
3962 | fuse_log(FUSE_LOG_ERR, "open(/): %m\n"); | |
3963 | exit(1); | |
3964 | } | |
3965 | ||
3966 | newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC); | |
3967 | if (newroot < 0) { | |
3968 | fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source); | |
3969 | exit(1); | |
3970 | } | |
3971 | ||
3972 | if (fchdir(newroot) < 0) { | |
3973 | fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); | |
3974 | exit(1); | |
3975 | } | |
3976 | ||
3977 | if (syscall(__NR_pivot_root, ".", ".") < 0) { | |
3978 | fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n"); | |
3979 | exit(1); | |
3980 | } | |
3981 | ||
3982 | if (fchdir(oldroot) < 0) { | |
3983 | fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n"); | |
3984 | exit(1); | |
3985 | } | |
3986 | ||
3987 | if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) { | |
3988 | fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n"); | |
3989 | exit(1); | |
3990 | } | |
3991 | ||
3992 | if (umount2(".", MNT_DETACH) < 0) { | |
3993 | fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n"); | |
3994 | exit(1); | |
3995 | } | |
3996 | ||
3997 | if (fchdir(newroot) < 0) { | |
3998 | fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n"); | |
3999 | exit(1); | |
4000 | } | |
4001 | ||
4002 | close(newroot); | |
4003 | close(oldroot); | |
4004 | } | |
4005 | ||
a59feb48 | 4006 | /* |
a65963ef | 4007 | * Only keep capabilities in allowlist that are needed for file system operation |
3005c099 | 4008 | * The (possibly NULL) modcaps_in string passed in is free'd before exit. |
a59feb48 | 4009 | */ |
3005c099 | 4010 | static void setup_capabilities(char *modcaps_in) |
a59feb48 | 4011 | { |
3005c099 | 4012 | char *modcaps = modcaps_in; |
a59feb48 SH |
4013 | pthread_mutex_lock(&cap.mutex); |
4014 | capng_restore_state(&cap.saved); | |
4015 | ||
4016 | /* | |
a65963ef PMD |
4017 | * Add to allowlist file system-related capabilities that are needed for a |
4018 | * file server to act like root. Drop everything else like networking and | |
a59feb48 SH |
4019 | * sysadmin capabilities. |
4020 | * | |
4021 | * Exclusions: | |
4022 | * 1. CAP_LINUX_IMMUTABLE is not included because it's only used via ioctl | |
4023 | * and we don't support that. | |
4024 | * 2. CAP_MAC_OVERRIDE is not included because it only seems to be | |
4025 | * used by the Smack LSM. Omit it until there is demand for it. | |
4026 | */ | |
4027 | capng_setpid(syscall(SYS_gettid)); | |
4028 | capng_clear(CAPNG_SELECT_BOTH); | |
55b22a60 | 4029 | if (capng_updatev(CAPNG_ADD, CAPNG_PERMITTED | CAPNG_EFFECTIVE, |
a59feb48 SH |
4030 | CAP_CHOWN, |
4031 | CAP_DAC_OVERRIDE, | |
a59feb48 SH |
4032 | CAP_FOWNER, |
4033 | CAP_FSETID, | |
4034 | CAP_SETGID, | |
4035 | CAP_SETUID, | |
4036 | CAP_MKNOD, | |
b1288dfa | 4037 | CAP_SETFCAP, |
55b22a60 DDAG |
4038 | -1)) { |
4039 | fuse_log(FUSE_LOG_ERR, "%s: capng_updatev failed\n", __func__); | |
4040 | exit(1); | |
4041 | } | |
b1288dfa | 4042 | |
3005c099 DDAG |
4043 | /* |
4044 | * The modcaps option is a colon separated list of caps, | |
4045 | * each preceded by either + or -. | |
4046 | */ | |
4047 | while (modcaps) { | |
4048 | capng_act_t action; | |
4049 | int cap; | |
4050 | ||
4051 | char *next = strchr(modcaps, ':'); | |
4052 | if (next) { | |
4053 | *next = '\0'; | |
4054 | next++; | |
4055 | } | |
4056 | ||
4057 | switch (modcaps[0]) { | |
4058 | case '+': | |
4059 | action = CAPNG_ADD; | |
4060 | break; | |
4061 | ||
4062 | case '-': | |
4063 | action = CAPNG_DROP; | |
4064 | break; | |
4065 | ||
4066 | default: | |
4067 | fuse_log(FUSE_LOG_ERR, | |
4068 | "%s: Expecting '+'/'-' in modcaps but found '%c'\n", | |
4069 | __func__, modcaps[0]); | |
4070 | exit(1); | |
4071 | } | |
4072 | cap = capng_name_to_capability(modcaps + 1); | |
4073 | if (cap < 0) { | |
4074 | fuse_log(FUSE_LOG_ERR, "%s: Unknown capability '%s'\n", __func__, | |
4075 | modcaps); | |
4076 | exit(1); | |
4077 | } | |
4078 | if (capng_update(action, CAPNG_PERMITTED | CAPNG_EFFECTIVE, cap)) { | |
4079 | fuse_log(FUSE_LOG_ERR, "%s: capng_update failed for '%s'\n", | |
4080 | __func__, modcaps); | |
4081 | exit(1); | |
4082 | } | |
4083 | ||
4084 | modcaps = next; | |
4085 | } | |
4086 | g_free(modcaps_in); | |
4087 | ||
55b22a60 DDAG |
4088 | if (capng_apply(CAPNG_SELECT_BOTH)) { |
4089 | fuse_log(FUSE_LOG_ERR, "%s: capng_apply failed\n", __func__); | |
4090 | exit(1); | |
4091 | } | |
a59feb48 SH |
4092 | |
4093 | cap.saved = capng_save_state(); | |
55b22a60 DDAG |
4094 | if (!cap.saved) { |
4095 | fuse_log(FUSE_LOG_ERR, "%s: capng_save_state failed\n", __func__); | |
4096 | exit(1); | |
4097 | } | |
a59feb48 SH |
4098 | pthread_mutex_unlock(&cap.mutex); |
4099 | } | |
4100 | ||
06844584 SH |
4101 | /* |
4102 | * Use chroot as a weaker sandbox for environments where the process is | |
4103 | * launched without CAP_SYS_ADMIN. | |
4104 | */ | |
4105 | static void setup_chroot(struct lo_data *lo) | |
4106 | { | |
4107 | lo->proc_self_fd = open("/proc/self/fd", O_PATH); | |
4108 | if (lo->proc_self_fd == -1) { | |
4109 | fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/fd\", O_PATH): %m\n"); | |
4110 | exit(1); | |
4111 | } | |
4112 | ||
cb282e55 VG |
4113 | lo->proc_self_task = open("/proc/self/task", O_PATH); |
4114 | if (lo->proc_self_fd == -1) { | |
4115 | fuse_log(FUSE_LOG_ERR, "open(\"/proc/self/task\", O_PATH): %m\n"); | |
4116 | exit(1); | |
4117 | } | |
4118 | ||
4119 | lo->use_fscreate = is_fscreate_usable(lo); | |
4120 | ||
06844584 SH |
4121 | /* |
4122 | * Make the shared directory the file system root so that FUSE_OPEN | |
4123 | * (lo_open()) cannot escape the shared directory by opening a symlink. | |
4124 | * | |
4125 | * The chroot(2) syscall is later disabled by seccomp and the | |
4126 | * CAP_SYS_CHROOT capability is dropped so that tampering with the chroot | |
4127 | * is not possible. | |
4128 | * | |
4129 | * However, it's still possible to escape the chroot via lo->proc_self_fd | |
4130 | * but that requires first gaining control of the process. | |
4131 | */ | |
4132 | if (chroot(lo->source) != 0) { | |
4133 | fuse_log(FUSE_LOG_ERR, "chroot(\"%s\"): %m\n", lo->source); | |
4134 | exit(1); | |
4135 | } | |
4136 | ||
4137 | /* Move into the chroot */ | |
4138 | if (chdir("/") != 0) { | |
4139 | fuse_log(FUSE_LOG_ERR, "chdir(\"/\"): %m\n"); | |
4140 | exit(1); | |
4141 | } | |
4142 | } | |
4143 | ||
5baa3b8e SH |
4144 | /* |
4145 | * Lock down this process to prevent access to other processes or files outside | |
4146 | * source directory. This reduces the impact of arbitrary code execution bugs. | |
4147 | */ | |
f185621d SH |
4148 | static void setup_sandbox(struct lo_data *lo, struct fuse_session *se, |
4149 | bool enable_syslog) | |
5baa3b8e | 4150 | { |
06844584 SH |
4151 | if (lo->sandbox == SANDBOX_NAMESPACE) { |
4152 | setup_namespaces(lo, se); | |
4153 | setup_mounts(lo->source); | |
4154 | } else { | |
4155 | setup_chroot(lo); | |
4156 | } | |
4157 | ||
f185621d | 4158 | setup_seccomp(enable_syslog); |
3005c099 | 4159 | setup_capabilities(g_strdup(lo->modcaps)); |
5baa3b8e SH |
4160 | } |
4161 | ||
6dbb7168 SH |
4162 | /* Set the maximum number of open file descriptors */ |
4163 | static void setup_nofile_rlimit(unsigned long rlimit_nofile) | |
01a6dc95 | 4164 | { |
6dbb7168 SH |
4165 | struct rlimit rlim = { |
4166 | .rlim_cur = rlimit_nofile, | |
4167 | .rlim_max = rlimit_nofile, | |
4168 | }; | |
01a6dc95 | 4169 | |
6dbb7168 | 4170 | if (rlimit_nofile == 0) { |
01a6dc95 SH |
4171 | return; /* nothing to do */ |
4172 | } | |
4173 | ||
01a6dc95 SH |
4174 | if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) { |
4175 | /* Ignore SELinux denials */ | |
4176 | if (errno == EPERM) { | |
4177 | return; | |
4178 | } | |
4179 | ||
4180 | fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n"); | |
4181 | exit(1); | |
4182 | } | |
4183 | } | |
4184 | ||
e4418354 | 4185 | G_GNUC_PRINTF(2, 0) |
f185621d SH |
4186 | static void log_func(enum fuse_log_level level, const char *fmt, va_list ap) |
4187 | { | |
36f38469 | 4188 | g_autofree char *localfmt = NULL; |
f16d15c9 | 4189 | char buf[64]; |
36f38469 | 4190 | |
d240314a EG |
4191 | if (current_log_level < level) { |
4192 | return; | |
4193 | } | |
4194 | ||
36f38469 | 4195 | if (current_log_level == FUSE_LOG_DEBUG) { |
bebc3c24 LE |
4196 | if (use_syslog) { |
4197 | /* no timestamp needed */ | |
50fb955a MM |
4198 | localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid), |
4199 | fmt); | |
bebc3c24 | 4200 | } else { |
d9a801f7 | 4201 | g_autoptr(GDateTime) now = g_date_time_new_now_utc(); |
f16d15c9 YO |
4202 | g_autofree char *nowstr = g_date_time_format(now, |
4203 | "%Y-%m-%d %H:%M:%S.%%06d%z"); | |
4204 | snprintf(buf, 64, nowstr, g_date_time_get_microsecond(now)); | |
d9a801f7 | 4205 | localfmt = g_strdup_printf("[%s] [ID: %08ld] %s", |
f16d15c9 | 4206 | buf, syscall(__NR_gettid), fmt); |
50fb955a | 4207 | } |
36f38469 MM |
4208 | fmt = localfmt; |
4209 | } | |
4210 | ||
f185621d SH |
4211 | if (use_syslog) { |
4212 | int priority = LOG_ERR; | |
4213 | switch (level) { | |
4214 | case FUSE_LOG_EMERG: | |
4215 | priority = LOG_EMERG; | |
4216 | break; | |
4217 | case FUSE_LOG_ALERT: | |
4218 | priority = LOG_ALERT; | |
4219 | break; | |
4220 | case FUSE_LOG_CRIT: | |
4221 | priority = LOG_CRIT; | |
4222 | break; | |
4223 | case FUSE_LOG_ERR: | |
4224 | priority = LOG_ERR; | |
4225 | break; | |
4226 | case FUSE_LOG_WARNING: | |
4227 | priority = LOG_WARNING; | |
4228 | break; | |
4229 | case FUSE_LOG_NOTICE: | |
4230 | priority = LOG_NOTICE; | |
4231 | break; | |
4232 | case FUSE_LOG_INFO: | |
4233 | priority = LOG_INFO; | |
4234 | break; | |
4235 | case FUSE_LOG_DEBUG: | |
4236 | priority = LOG_DEBUG; | |
4237 | break; | |
4238 | } | |
4239 | vsyslog(priority, fmt, ap); | |
4240 | } else { | |
4241 | vfprintf(stderr, fmt, ap); | |
4242 | } | |
4243 | } | |
4244 | ||
3ca8a2b1 MS |
4245 | static void setup_root(struct lo_data *lo, struct lo_inode *root) |
4246 | { | |
4247 | int fd, res; | |
4248 | struct stat stat; | |
d672fce6 | 4249 | uint64_t mnt_id; |
3ca8a2b1 MS |
4250 | |
4251 | fd = open("/", O_PATH); | |
4252 | if (fd == -1) { | |
4253 | fuse_log(FUSE_LOG_ERR, "open(%s, O_PATH): %m\n", lo->source); | |
4254 | exit(1); | |
4255 | } | |
4256 | ||
d672fce6 HR |
4257 | res = do_statx(lo, fd, "", &stat, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW, |
4258 | &mnt_id); | |
3ca8a2b1 MS |
4259 | if (res == -1) { |
4260 | fuse_log(FUSE_LOG_ERR, "fstatat(%s): %m\n", lo->source); | |
4261 | exit(1); | |
4262 | } | |
4263 | ||
bdfd6678 | 4264 | root->filetype = S_IFDIR; |
3ca8a2b1 | 4265 | root->fd = fd; |
bfc50a6e MS |
4266 | root->key.ino = stat.st_ino; |
4267 | root->key.dev = stat.st_dev; | |
d672fce6 | 4268 | root->key.mnt_id = mnt_id; |
1222f015 | 4269 | root->nlookup = 2; |
c241aa94 | 4270 | g_atomic_int_set(&root->refcount, 2); |
e7e8aa8a VG |
4271 | if (lo->posix_lock) { |
4272 | pthread_mutex_init(&root->plock_mutex, NULL); | |
4273 | root->posix_locks = g_hash_table_new_full( | |
4274 | g_direct_hash, g_direct_equal, NULL, posix_locks_value_destroy); | |
4275 | } | |
3ca8a2b1 MS |
4276 | } |
4277 | ||
bfc50a6e MS |
4278 | static guint lo_key_hash(gconstpointer key) |
4279 | { | |
4280 | const struct lo_key *lkey = key; | |
4281 | ||
d672fce6 | 4282 | return (guint)lkey->ino + (guint)lkey->dev + (guint)lkey->mnt_id; |
bfc50a6e MS |
4283 | } |
4284 | ||
4285 | static gboolean lo_key_equal(gconstpointer a, gconstpointer b) | |
4286 | { | |
4287 | const struct lo_key *la = a; | |
4288 | const struct lo_key *lb = b; | |
4289 | ||
d672fce6 | 4290 | return la->ino == lb->ino && la->dev == lb->dev && la->mnt_id == lb->mnt_id; |
bfc50a6e MS |
4291 | } |
4292 | ||
18a69cbb LB |
4293 | static void fuse_lo_data_cleanup(struct lo_data *lo) |
4294 | { | |
4295 | if (lo->inodes) { | |
4296 | g_hash_table_destroy(lo->inodes); | |
4297 | } | |
ad3bfe1b VG |
4298 | |
4299 | if (lo->root.posix_locks) { | |
4300 | g_hash_table_destroy(lo->root.posix_locks); | |
4301 | } | |
18a69cbb LB |
4302 | lo_map_destroy(&lo->fd_map); |
4303 | lo_map_destroy(&lo->dirp_map); | |
4304 | lo_map_destroy(&lo->ino_map); | |
4305 | ||
4306 | if (lo->proc_self_fd >= 0) { | |
4307 | close(lo->proc_self_fd); | |
4308 | } | |
4309 | ||
cb282e55 VG |
4310 | if (lo->proc_self_task >= 0) { |
4311 | close(lo->proc_self_task); | |
4312 | } | |
4313 | ||
18a69cbb LB |
4314 | if (lo->root.fd >= 0) { |
4315 | close(lo->root.fd); | |
4316 | } | |
4317 | ||
6084633d DDAG |
4318 | free(lo->xattrmap); |
4319 | free_xattrmap(lo); | |
e586edcb | 4320 | free(lo->xattr_security_capability); |
18a69cbb LB |
4321 | free(lo->source); |
4322 | } | |
4323 | ||
6d118c43 VG |
4324 | static void qemu_version(void) |
4325 | { | |
4326 | printf("virtiofsd version " QEMU_FULL_VERSION "\n" QEMU_COPYRIGHT "\n"); | |
4327 | } | |
4328 | ||
7c6b6602 DDAG |
4329 | int main(int argc, char *argv[]) |
4330 | { | |
7387863d DDAG |
4331 | struct fuse_args args = FUSE_ARGS_INIT(argc, argv); |
4332 | struct fuse_session *se; | |
4333 | struct fuse_cmdline_opts opts; | |
9f59d175 | 4334 | struct lo_data lo = { |
06844584 | 4335 | .sandbox = SANDBOX_NAMESPACE, |
9f59d175 SH |
4336 | .debug = 0, |
4337 | .writeback = 0, | |
88fc1079 | 4338 | .posix_lock = 0, |
e12a0eda | 4339 | .allow_direct_io = 0, |
9f59d175 | 4340 | .proc_self_fd = -1, |
cb282e55 | 4341 | .proc_self_task = -1, |
d64907ac | 4342 | .user_killpriv_v2 = -1, |
65a820d2 | 4343 | .user_posix_acl = -1, |
963061dc | 4344 | .user_security_label = -1, |
9f59d175 | 4345 | }; |
92fb57b8 | 4346 | struct lo_map_elem *root_elem; |
db2e026a | 4347 | struct lo_map_elem *reserve_elem; |
7387863d DDAG |
4348 | int ret = -1; |
4349 | ||
bebc3c24 LE |
4350 | /* Initialize time conversion information for localtime_r(). */ |
4351 | tzset(); | |
4352 | ||
7387863d DDAG |
4353 | /* Don't mask creation mode, kernel already did that */ |
4354 | umask(0); | |
4355 | ||
ff3995e2 DDAG |
4356 | qemu_init_exec_dir(argv[0]); |
4357 | ||
449e8171 VG |
4358 | drop_supplementary_groups(); |
4359 | ||
7387863d | 4360 | pthread_mutex_init(&lo.mutex, NULL); |
bfc50a6e | 4361 | lo.inodes = g_hash_table_new(lo_key_hash, lo_key_equal); |
7387863d | 4362 | lo.root.fd = -1; |
92fb57b8 | 4363 | lo.root.fuse_ino = FUSE_ROOT_ID; |
230e777b | 4364 | lo.cache = CACHE_AUTO; |
7387863d | 4365 | |
92fb57b8 SH |
4366 | /* |
4367 | * Set up the ino map like this: | |
4368 | * [0] Reserved (will not be used) | |
4369 | * [1] Root inode | |
4370 | */ | |
4371 | lo_map_init(&lo.ino_map); | |
db2e026a HL |
4372 | reserve_elem = lo_map_reserve(&lo.ino_map, 0); |
4373 | if (!reserve_elem) { | |
4374 | fuse_log(FUSE_LOG_ERR, "failed to alloc reserve_elem.\n"); | |
4375 | goto err_out1; | |
4376 | } | |
4377 | reserve_elem->in_use = false; | |
92fb57b8 | 4378 | root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino); |
db2e026a HL |
4379 | if (!root_elem) { |
4380 | fuse_log(FUSE_LOG_ERR, "failed to alloc root_elem.\n"); | |
4381 | goto err_out1; | |
4382 | } | |
92fb57b8 SH |
4383 | root_elem->inode = &lo.root; |
4384 | ||
b39bce12 | 4385 | lo_map_init(&lo.dirp_map); |
73b4d19d | 4386 | lo_map_init(&lo.fd_map); |
b39bce12 | 4387 | |
7387863d | 4388 | if (fuse_parse_cmdline(&args, &opts) != 0) { |
c6de8046 | 4389 | goto err_out1; |
7387863d | 4390 | } |
f185621d SH |
4391 | fuse_set_log_func(log_func); |
4392 | use_syslog = opts.syslog; | |
4393 | if (use_syslog) { | |
4394 | openlog("virtiofsd", LOG_PID, LOG_DAEMON); | |
4395 | } | |
c6de8046 | 4396 | |
7387863d | 4397 | if (opts.show_help) { |
67aab022 | 4398 | printf("usage: %s [options]\n\n", argv[0]); |
7387863d | 4399 | fuse_cmdline_help(); |
4ff075f7 | 4400 | printf(" -o source=PATH shared directory tree\n"); |
7387863d DDAG |
4401 | fuse_lowlevel_help(); |
4402 | ret = 0; | |
4403 | goto err_out1; | |
4404 | } else if (opts.show_version) { | |
6d118c43 | 4405 | qemu_version(); |
7387863d DDAG |
4406 | fuse_lowlevel_version(); |
4407 | ret = 0; | |
4408 | goto err_out1; | |
45018fbb SH |
4409 | } else if (opts.print_capabilities) { |
4410 | print_capabilities(); | |
4411 | ret = 0; | |
4412 | goto err_out1; | |
7387863d DDAG |
4413 | } |
4414 | ||
7387863d | 4415 | if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) { |
c6de8046 | 4416 | goto err_out1; |
7387863d DDAG |
4417 | } |
4418 | ||
d240314a EG |
4419 | if (opts.log_level != 0) { |
4420 | current_log_level = opts.log_level; | |
800ad114 MT |
4421 | } else { |
4422 | /* default log level is INFO */ | |
4423 | current_log_level = FUSE_LOG_INFO; | |
d240314a | 4424 | } |
7387863d | 4425 | lo.debug = opts.debug; |
d240314a EG |
4426 | if (lo.debug) { |
4427 | current_log_level = FUSE_LOG_DEBUG; | |
4428 | } | |
7387863d DDAG |
4429 | if (lo.source) { |
4430 | struct stat stat; | |
4431 | int res; | |
4432 | ||
4433 | res = lstat(lo.source, &stat); | |
4434 | if (res == -1) { | |
4435 | fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n", | |
4436 | lo.source); | |
4437 | exit(1); | |
4438 | } | |
4439 | if (!S_ISDIR(stat.st_mode)) { | |
4440 | fuse_log(FUSE_LOG_ERR, "source is not a directory\n"); | |
4441 | exit(1); | |
4442 | } | |
7387863d | 4443 | } else { |
eb68a33b | 4444 | lo.source = strdup("/"); |
7632b56c HL |
4445 | if (!lo.source) { |
4446 | fuse_log(FUSE_LOG_ERR, "failed to strdup source\n"); | |
4447 | goto err_out1; | |
4448 | } | |
7387863d | 4449 | } |
6084633d DDAG |
4450 | |
4451 | if (lo.xattrmap) { | |
a87d29e0 | 4452 | lo.xattr = 1; |
6084633d DDAG |
4453 | parse_xattrmap(&lo); |
4454 | } | |
4455 | ||
7387863d DDAG |
4456 | if (!lo.timeout_set) { |
4457 | switch (lo.cache) { | |
230e777b | 4458 | case CACHE_NONE: |
7387863d DDAG |
4459 | lo.timeout = 0.0; |
4460 | break; | |
4461 | ||
230e777b | 4462 | case CACHE_AUTO: |
7387863d DDAG |
4463 | lo.timeout = 1.0; |
4464 | break; | |
4465 | ||
4466 | case CACHE_ALWAYS: | |
4467 | lo.timeout = 86400.0; | |
4468 | break; | |
4469 | } | |
4470 | } else if (lo.timeout < 0) { | |
4471 | fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout); | |
4472 | exit(1); | |
4473 | } | |
4474 | ||
65a820d2 VG |
4475 | if (lo.user_posix_acl == 1 && !lo.xattr) { |
4476 | fuse_log(FUSE_LOG_ERR, "Can't enable posix ACLs. xattrs are disabled." | |
4477 | "\n"); | |
4478 | exit(1); | |
4479 | } | |
4480 | ||
d672fce6 HR |
4481 | lo.use_statx = true; |
4482 | ||
7387863d DDAG |
4483 | se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo); |
4484 | if (se == NULL) { | |
4485 | goto err_out1; | |
4486 | } | |
4487 | ||
4488 | if (fuse_set_signal_handlers(se) != 0) { | |
4489 | goto err_out2; | |
4490 | } | |
4491 | ||
67aab022 | 4492 | if (fuse_session_mount(se) != 0) { |
7387863d DDAG |
4493 | goto err_out3; |
4494 | } | |
4495 | ||
4496 | fuse_daemonize(opts.foreground); | |
4497 | ||
6dbb7168 | 4498 | setup_nofile_rlimit(opts.rlimit_nofile); |
01a6dc95 | 4499 | |
2405f3c0 DDAG |
4500 | /* Must be before sandbox since it wants /proc */ |
4501 | setup_capng(); | |
4502 | ||
f185621d | 4503 | setup_sandbox(&lo, se, opts.syslog); |
5baa3b8e | 4504 | |
3ca8a2b1 | 4505 | setup_root(&lo, &lo.root); |
7387863d | 4506 | /* Block until ctrl+c or fusermount -u */ |
f6f3573c | 4507 | ret = virtio_loop(se); |
7387863d DDAG |
4508 | |
4509 | fuse_session_unmount(se); | |
2405f3c0 | 4510 | cleanup_capng(); |
7c6b6602 | 4511 | err_out3: |
7387863d | 4512 | fuse_remove_signal_handlers(se); |
7c6b6602 | 4513 | err_out2: |
7387863d | 4514 | fuse_session_destroy(se); |
7c6b6602 | 4515 | err_out1: |
7387863d | 4516 | fuse_opt_free_args(&args); |
7c6b6602 | 4517 | |
18a69cbb | 4518 | fuse_lo_data_cleanup(&lo); |
eb68a33b | 4519 | |
7387863d | 4520 | return ret ? 1 : 0; |
7c6b6602 | 4521 | } |