]> git.proxmox.com Git - mirror_qemu.git/blame - tools/virtiofsd/passthrough_ll.c
virtiofsd: Parse flag FUSE_WRITE_KILL_PRIV
[mirror_qemu.git] / tools / virtiofsd / passthrough_ll.c
CommitLineData
7c6b6602 1/*
7387863d
DDAG
2 * FUSE: Filesystem in Userspace
3 * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
4 *
5 * This program can be distributed under the terms of the GNU GPLv2.
6 * See the file COPYING.
7 */
7c6b6602 8
7387863d 9/*
7c6b6602
DDAG
10 *
11 * This file system mirrors the existing file system hierarchy of the
12 * system, starting at the root file system. This is implemented by
13 * just "passing through" all requests to the corresponding user-space
14 * libc functions. In contrast to passthrough.c and passthrough_fh.c,
15 * this implementation uses the low-level API. Its performance should
16 * be the least bad among the three, but many operations are not
17 * implemented. In particular, it is not possible to remove files (or
18 * directories) because the code necessary to defer actual removal
19 * until the file is not opened anymore would make the example much
20 * more complicated.
21 *
22 * When writeback caching is enabled (-o writeback mount option), it
23 * is only possible to write to files for which the mounting user has
24 * read permissions. This is because the writeback cache requires the
25 * kernel to be able to issue read requests for all files (which the
26 * passthrough filesystem cannot satisfy if it can't read the file in
27 * the underlying filesystem).
28 *
29 * Compile with:
30 *
7387863d
DDAG
31 * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o
32 * passthrough_ll
7c6b6602
DDAG
33 *
34 * ## Source code ##
35 * \include passthrough_ll.c
36 */
37
09863ebc 38#include "qemu/osdep.h"
f6f3573c 39#include "fuse_virtio.h"
09863ebc 40#include "fuse_lowlevel.h"
7c6b6602 41#include <assert.h>
7387863d 42#include <dirent.h>
7c6b6602
DDAG
43#include <errno.h>
44#include <inttypes.h>
7387863d 45#include <limits.h>
7c6b6602 46#include <pthread.h>
7387863d
DDAG
47#include <stdbool.h>
48#include <stddef.h>
49#include <stdio.h>
50#include <stdlib.h>
51#include <string.h>
7c6b6602 52#include <sys/file.h>
5baa3b8e 53#include <sys/mount.h>
8e1d4ef2 54#include <sys/prctl.h>
929cfb7a 55#include <sys/syscall.h>
8e1d4ef2
SH
56#include <sys/types.h>
57#include <sys/wait.h>
7c6b6602 58#include <sys/xattr.h>
7387863d 59#include <unistd.h>
7c6b6602
DDAG
60
61#include "passthrough_helpers.h"
4f8bde99 62#include "seccomp.h"
7c6b6602 63
25c13572
SH
64struct lo_map_elem {
65 union {
92fb57b8 66 struct lo_inode *inode;
b39bce12 67 struct lo_dirp *dirp;
73b4d19d 68 int fd;
25c13572
SH
69 ssize_t freelist;
70 };
71 bool in_use;
72};
73
74/* Maps FUSE fh or ino values to internal objects */
75struct lo_map {
76 struct lo_map_elem *elems;
77 size_t nelems;
78 ssize_t freelist;
79};
80
7c6b6602 81struct lo_inode {
7387863d
DDAG
82 struct lo_inode *next; /* protected by lo->mutex */
83 struct lo_inode *prev; /* protected by lo->mutex */
84 int fd;
85 bool is_symlink;
86 ino_t ino;
87 dev_t dev;
88 uint64_t refcount; /* protected by lo->mutex */
92fb57b8 89 fuse_ino_t fuse_ino;
7c6b6602
DDAG
90};
91
929cfb7a
VG
92struct lo_cred {
93 uid_t euid;
94 gid_t egid;
95};
96
7c6b6602 97enum {
7387863d
DDAG
98 CACHE_NEVER,
99 CACHE_NORMAL,
100 CACHE_ALWAYS,
7c6b6602
DDAG
101};
102
103struct lo_data {
7387863d
DDAG
104 pthread_mutex_t mutex;
105 int debug;
5fe319a7 106 int norace;
7387863d
DDAG
107 int writeback;
108 int flock;
109 int xattr;
110 const char *source;
111 double timeout;
112 int cache;
113 int timeout_set;
114 struct lo_inode root; /* protected by lo->mutex */
92fb57b8 115 struct lo_map ino_map; /* protected by lo->mutex */
b39bce12 116 struct lo_map dirp_map; /* protected by lo->mutex */
73b4d19d 117 struct lo_map fd_map; /* protected by lo->mutex */
9f59d175
SH
118
119 /* An O_PATH file descriptor to /proc/self/fd/ */
120 int proc_self_fd;
7c6b6602
DDAG
121};
122
123static const struct fuse_opt lo_opts[] = {
7387863d
DDAG
124 { "writeback", offsetof(struct lo_data, writeback), 1 },
125 { "no_writeback", offsetof(struct lo_data, writeback), 0 },
126 { "source=%s", offsetof(struct lo_data, source), 0 },
127 { "flock", offsetof(struct lo_data, flock), 1 },
128 { "no_flock", offsetof(struct lo_data, flock), 0 },
129 { "xattr", offsetof(struct lo_data, xattr), 1 },
130 { "no_xattr", offsetof(struct lo_data, xattr), 0 },
131 { "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
132 { "timeout=", offsetof(struct lo_data, timeout_set), 1 },
133 { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
134 { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
135 { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
5fe319a7 136 { "norace", offsetof(struct lo_data, norace), 1 },
7387863d 137 FUSE_OPT_END
7c6b6602
DDAG
138};
139
5fe319a7
MS
140static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n);
141
142static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
143
25dae28c
SH
144static int is_dot_or_dotdot(const char *name)
145{
146 return name[0] == '.' &&
147 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
148}
149
150/* Is `path` a single path component that is not "." or ".."? */
151static int is_safe_path_component(const char *path)
152{
153 if (strchr(path, '/')) {
154 return 0;
155 }
156
157 return !is_dot_or_dotdot(path);
158}
5fe319a7 159
7c6b6602
DDAG
160static struct lo_data *lo_data(fuse_req_t req)
161{
7387863d 162 return (struct lo_data *)fuse_req_userdata(req);
7c6b6602
DDAG
163}
164
92fb57b8 165static void lo_map_init(struct lo_map *map)
25c13572
SH
166{
167 map->elems = NULL;
168 map->nelems = 0;
169 map->freelist = -1;
170}
171
92fb57b8 172static void lo_map_destroy(struct lo_map *map)
25c13572
SH
173{
174 free(map->elems);
175}
176
177static int lo_map_grow(struct lo_map *map, size_t new_nelems)
178{
179 struct lo_map_elem *new_elems;
180 size_t i;
181
182 if (new_nelems <= map->nelems) {
183 return 1;
184 }
185
186 new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems);
187 if (!new_elems) {
188 return 0;
189 }
190
191 for (i = map->nelems; i < new_nelems; i++) {
192 new_elems[i].freelist = i + 1;
193 new_elems[i].in_use = false;
194 }
195 new_elems[new_nelems - 1].freelist = -1;
196
197 map->elems = new_elems;
198 map->freelist = map->nelems;
199 map->nelems = new_nelems;
200 return 1;
201}
202
92fb57b8 203static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map)
25c13572
SH
204{
205 struct lo_map_elem *elem;
206
207 if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) {
208 return NULL;
209 }
210
211 elem = &map->elems[map->freelist];
212 map->freelist = elem->freelist;
213
214 elem->in_use = true;
215
216 return elem;
217}
218
92fb57b8 219static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key)
25c13572
SH
220{
221 ssize_t *prev;
222
223 if (!lo_map_grow(map, key + 1)) {
224 return NULL;
225 }
226
227 for (prev = &map->freelist; *prev != -1;
228 prev = &map->elems[*prev].freelist) {
229 if (*prev == key) {
230 struct lo_map_elem *elem = &map->elems[key];
231
232 *prev = elem->freelist;
233 elem->in_use = true;
234 return elem;
235 }
236 }
237 return NULL;
238}
239
92fb57b8 240static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key)
25c13572
SH
241{
242 if (key >= map->nelems) {
243 return NULL;
244 }
245 if (!map->elems[key].in_use) {
246 return NULL;
247 }
248 return &map->elems[key];
249}
250
92fb57b8 251static void lo_map_remove(struct lo_map *map, size_t key)
25c13572
SH
252{
253 struct lo_map_elem *elem;
254
255 if (key >= map->nelems) {
256 return;
257 }
258
259 elem = &map->elems[key];
260 if (!elem->in_use) {
261 return;
262 }
263
264 elem->in_use = false;
265
266 elem->freelist = map->freelist;
267 map->freelist = key;
268}
269
73b4d19d
SH
270/* Assumes lo->mutex is held */
271static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd)
272{
273 struct lo_map_elem *elem;
274
275 elem = lo_map_alloc_elem(&lo_data(req)->fd_map);
276 if (!elem) {
277 return -1;
278 }
279
280 elem->fd = fd;
281 return elem - lo_data(req)->fd_map.elems;
282}
283
b39bce12
SH
284/* Assumes lo->mutex is held */
285static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp)
286{
287 struct lo_map_elem *elem;
288
289 elem = lo_map_alloc_elem(&lo_data(req)->dirp_map);
290 if (!elem) {
291 return -1;
292 }
293
294 elem->dirp = dirp;
295 return elem - lo_data(req)->dirp_map.elems;
296}
297
92fb57b8
SH
298/* Assumes lo->mutex is held */
299static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
300{
301 struct lo_map_elem *elem;
302
303 elem = lo_map_alloc_elem(&lo_data(req)->ino_map);
304 if (!elem) {
305 return -1;
306 }
307
308 elem->inode = inode;
309 return elem - lo_data(req)->ino_map.elems;
310}
311
7c6b6602
DDAG
312static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
313{
92fb57b8
SH
314 struct lo_data *lo = lo_data(req);
315 struct lo_map_elem *elem;
316
317 pthread_mutex_lock(&lo->mutex);
318 elem = lo_map_get(&lo->ino_map, ino);
319 pthread_mutex_unlock(&lo->mutex);
320
321 if (!elem) {
322 return NULL;
7387863d 323 }
92fb57b8
SH
324
325 return elem->inode;
7c6b6602
DDAG
326}
327
328static int lo_fd(fuse_req_t req, fuse_ino_t ino)
329{
92fb57b8
SH
330 struct lo_inode *inode = lo_inode(req, ino);
331 return inode ? inode->fd : -1;
7c6b6602
DDAG
332}
333
334static bool lo_debug(fuse_req_t req)
335{
7387863d 336 return lo_data(req)->debug != 0;
7c6b6602
DDAG
337}
338
7387863d 339static void lo_init(void *userdata, struct fuse_conn_info *conn)
7c6b6602 340{
7387863d
DDAG
341 struct lo_data *lo = (struct lo_data *)userdata;
342
343 if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) {
344 conn->want |= FUSE_CAP_EXPORT_SUPPORT;
345 }
346
347 if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
348 if (lo->debug) {
349 fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
350 }
351 conn->want |= FUSE_CAP_WRITEBACK_CACHE;
352 }
353 if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) {
354 if (lo->debug) {
355 fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
356 }
357 conn->want |= FUSE_CAP_FLOCK_LOCKS;
358 }
7c6b6602
DDAG
359}
360
361static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
7387863d 362 struct fuse_file_info *fi)
7c6b6602 363{
7387863d
DDAG
364 int res;
365 struct stat buf;
366 struct lo_data *lo = lo_data(req);
7c6b6602 367
7387863d 368 (void)fi;
7c6b6602 369
7387863d
DDAG
370 res =
371 fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
372 if (res == -1) {
373 return (void)fuse_reply_err(req, errno);
374 }
7c6b6602 375
7387863d 376 fuse_reply_attr(req, &buf, lo->timeout);
7c6b6602
DDAG
377}
378
5fe319a7
MS
379static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
380 char path[PATH_MAX], struct lo_inode **parent)
7c6b6602 381{
7387863d 382 char procname[64];
5fe319a7
MS
383 char *last;
384 struct stat stat;
385 struct lo_inode *p;
386 int retries = 2;
387 int res;
388
389retry:
9f59d175 390 sprintf(procname, "%i", inode->fd);
5fe319a7 391
9f59d175 392 res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX);
5fe319a7
MS
393 if (res < 0) {
394 fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
395 goto fail_noretry;
396 }
397
398 if (res >= PATH_MAX) {
399 fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
400 goto fail_noretry;
401 }
402 path[res] = '\0';
403
404 last = strrchr(path, '/');
405 if (last == NULL) {
406 /* Shouldn't happen */
407 fuse_log(
408 FUSE_LOG_WARNING,
409 "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
410 goto fail_noretry;
411 }
412 if (last == path) {
413 p = &lo->root;
414 pthread_mutex_lock(&lo->mutex);
415 p->refcount++;
416 pthread_mutex_unlock(&lo->mutex);
417 } else {
418 *last = '\0';
419 res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
420 if (res == -1) {
421 if (!retries) {
422 fuse_log(FUSE_LOG_WARNING,
423 "%s: failed to stat parent: %m\n", __func__);
424 }
425 goto fail;
426 }
427 p = lo_find(lo, &stat);
428 if (p == NULL) {
429 if (!retries) {
430 fuse_log(FUSE_LOG_WARNING,
431 "%s: failed to find parent\n", __func__);
432 }
433 goto fail;
434 }
435 }
436 last++;
437 res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
438 if (res == -1) {
439 if (!retries) {
440 fuse_log(FUSE_LOG_WARNING,
441 "%s: failed to stat last\n", __func__);
442 }
443 goto fail_unref;
444 }
445 if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
446 if (!retries) {
447 fuse_log(FUSE_LOG_WARNING,
448 "%s: failed to match last\n", __func__);
449 }
450 goto fail_unref;
451 }
452 *parent = p;
453 memmove(path, last, strlen(last) + 1);
454
455 return 0;
456
457fail_unref:
458 unref_inode(lo, p, 1);
459fail:
460 if (retries) {
461 retries--;
462 goto retry;
463 }
464fail_noretry:
465 errno = EIO;
466 return -1;
467}
468
469static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
470 const struct timespec *tv)
471{
472 int res;
473 struct lo_inode *parent;
474 char path[PATH_MAX];
7387863d
DDAG
475
476 if (inode->is_symlink) {
5fe319a7 477 res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
7387863d
DDAG
478 if (res == -1 && errno == EINVAL) {
479 /* Sorry, no race free way to set times on symlink. */
5fe319a7
MS
480 if (lo->norace) {
481 errno = EPERM;
482 } else {
483 goto fallback;
484 }
7387863d
DDAG
485 }
486 return res;
487 }
9f59d175 488 sprintf(path, "%i", inode->fd);
5fe319a7 489
9f59d175 490 return utimensat(lo->proc_self_fd, path, tv, 0);
7387863d 491
5fe319a7
MS
492fallback:
493 res = lo_parent_and_name(lo, inode, path, &parent);
494 if (res != -1) {
495 res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
496 unref_inode(lo, parent, 1);
497 }
498
499 return res;
7c6b6602
DDAG
500}
501
73b4d19d
SH
502static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
503{
504 struct lo_data *lo = lo_data(req);
505 struct lo_map_elem *elem;
506
507 pthread_mutex_lock(&lo->mutex);
508 elem = lo_map_get(&lo->fd_map, fi->fh);
509 pthread_mutex_unlock(&lo->mutex);
510
511 if (!elem) {
512 return -1;
513 }
514
515 return elem->fd;
516}
517
7c6b6602 518static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
7387863d 519 int valid, struct fuse_file_info *fi)
7c6b6602 520{
7387863d
DDAG
521 int saverr;
522 char procname[64];
5fe319a7 523 struct lo_data *lo = lo_data(req);
92fb57b8
SH
524 struct lo_inode *inode;
525 int ifd;
7387863d 526 int res;
73b4d19d 527 int fd;
7387863d 528
92fb57b8
SH
529 inode = lo_inode(req, ino);
530 if (!inode) {
531 fuse_reply_err(req, EBADF);
532 return;
533 }
534
535 ifd = inode->fd;
536
73b4d19d
SH
537 /* If fi->fh is invalid we'll report EBADF later */
538 if (fi) {
539 fd = lo_fi_fd(req, fi);
540 }
541
7387863d
DDAG
542 if (valid & FUSE_SET_ATTR_MODE) {
543 if (fi) {
73b4d19d 544 res = fchmod(fd, attr->st_mode);
7387863d 545 } else {
9f59d175
SH
546 sprintf(procname, "%i", ifd);
547 res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0);
7387863d
DDAG
548 }
549 if (res == -1) {
550 goto out_err;
551 }
552 }
553 if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
554 uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1;
555 gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1;
556
557 res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
558 if (res == -1) {
559 goto out_err;
560 }
561 }
562 if (valid & FUSE_SET_ATTR_SIZE) {
9f59d175
SH
563 int truncfd;
564
7387863d 565 if (fi) {
9f59d175 566 truncfd = fd;
7387863d 567 } else {
9f59d175
SH
568 sprintf(procname, "%i", ifd);
569 truncfd = openat(lo->proc_self_fd, procname, O_RDWR);
570 if (truncfd < 0) {
571 goto out_err;
572 }
573 }
574
575 res = ftruncate(truncfd, attr->st_size);
576 if (!fi) {
577 saverr = errno;
578 close(truncfd);
579 errno = saverr;
7387863d
DDAG
580 }
581 if (res == -1) {
582 goto out_err;
583 }
584 }
585 if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
586 struct timespec tv[2];
587
588 tv[0].tv_sec = 0;
589 tv[1].tv_sec = 0;
590 tv[0].tv_nsec = UTIME_OMIT;
591 tv[1].tv_nsec = UTIME_OMIT;
592
593 if (valid & FUSE_SET_ATTR_ATIME_NOW) {
594 tv[0].tv_nsec = UTIME_NOW;
595 } else if (valid & FUSE_SET_ATTR_ATIME) {
596 tv[0] = attr->st_atim;
597 }
598
599 if (valid & FUSE_SET_ATTR_MTIME_NOW) {
600 tv[1].tv_nsec = UTIME_NOW;
601 } else if (valid & FUSE_SET_ATTR_MTIME) {
602 tv[1] = attr->st_mtim;
603 }
604
605 if (fi) {
73b4d19d 606 res = futimens(fd, tv);
7387863d 607 } else {
5fe319a7 608 res = utimensat_empty(lo, inode, tv);
7387863d
DDAG
609 }
610 if (res == -1) {
611 goto out_err;
612 }
613 }
614
615 return lo_getattr(req, ino, fi);
7c6b6602
DDAG
616
617out_err:
7387863d
DDAG
618 saverr = errno;
619 fuse_reply_err(req, saverr);
7c6b6602
DDAG
620}
621
622static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
623{
7387863d
DDAG
624 struct lo_inode *p;
625 struct lo_inode *ret = NULL;
626
627 pthread_mutex_lock(&lo->mutex);
628 for (p = lo->root.next; p != &lo->root; p = p->next) {
629 if (p->ino == st->st_ino && p->dev == st->st_dev) {
630 assert(p->refcount > 0);
631 ret = p;
632 ret->refcount++;
633 break;
634 }
635 }
636 pthread_mutex_unlock(&lo->mutex);
637 return ret;
7c6b6602
DDAG
638}
639
640static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d 641 struct fuse_entry_param *e)
7c6b6602 642{
7387863d
DDAG
643 int newfd;
644 int res;
645 int saverr;
646 struct lo_data *lo = lo_data(req);
854684bc 647 struct lo_inode *inode, *dir = lo_inode(req, parent);
7387863d
DDAG
648
649 memset(e, 0, sizeof(*e));
650 e->attr_timeout = lo->timeout;
651 e->entry_timeout = lo->timeout;
652
854684bc
SH
653 /* Do not allow escaping root directory */
654 if (dir == &lo->root && strcmp(name, "..") == 0) {
655 name = ".";
656 }
657
7387863d
DDAG
658 newfd = openat(lo_fd(req, parent), name, O_PATH | O_NOFOLLOW);
659 if (newfd == -1) {
660 goto out_err;
661 }
662
663 res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
664 if (res == -1) {
665 goto out_err;
666 }
667
668 inode = lo_find(lo_data(req), &e->attr);
669 if (inode) {
670 close(newfd);
671 newfd = -1;
672 } else {
673 struct lo_inode *prev, *next;
674
675 saverr = ENOMEM;
676 inode = calloc(1, sizeof(struct lo_inode));
677 if (!inode) {
678 goto out_err;
679 }
680
681 inode->is_symlink = S_ISLNK(e->attr.st_mode);
682 inode->refcount = 1;
683 inode->fd = newfd;
684 inode->ino = e->attr.st_ino;
685 inode->dev = e->attr.st_dev;
686
687 pthread_mutex_lock(&lo->mutex);
92fb57b8 688 inode->fuse_ino = lo_add_inode_mapping(req, inode);
7387863d
DDAG
689 prev = &lo->root;
690 next = prev->next;
691 next->prev = inode;
692 inode->next = next;
693 inode->prev = prev;
694 prev->next = inode;
695 pthread_mutex_unlock(&lo->mutex);
696 }
92fb57b8 697 e->ino = inode->fuse_ino;
7387863d
DDAG
698
699 if (lo_debug(req)) {
700 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
701 (unsigned long long)parent, name, (unsigned long long)e->ino);
702 }
703
704 return 0;
7c6b6602
DDAG
705
706out_err:
7387863d
DDAG
707 saverr = errno;
708 if (newfd != -1) {
709 close(newfd);
710 }
711 return saverr;
7c6b6602
DDAG
712}
713
714static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
715{
7387863d
DDAG
716 struct fuse_entry_param e;
717 int err;
718
719 if (lo_debug(req)) {
720 fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n",
721 parent, name);
722 }
723
25dae28c
SH
724 /*
725 * Don't use is_safe_path_component(), allow "." and ".." for NFS export
726 * support.
727 */
728 if (strchr(name, '/')) {
729 fuse_reply_err(req, EINVAL);
730 return;
731 }
732
7387863d
DDAG
733 err = lo_do_lookup(req, parent, name, &e);
734 if (err) {
735 fuse_reply_err(req, err);
736 } else {
737 fuse_reply_entry(req, &e);
738 }
7c6b6602
DDAG
739}
740
929cfb7a
VG
741/*
742 * On some archs, setres*id is limited to 2^16 but they
743 * provide setres*id32 variants that allow 2^32.
744 * Others just let setres*id do 2^32 anyway.
745 */
746#ifdef SYS_setresgid32
747#define OURSYS_setresgid SYS_setresgid32
748#else
749#define OURSYS_setresgid SYS_setresgid
750#endif
751
752#ifdef SYS_setresuid32
753#define OURSYS_setresuid SYS_setresuid32
754#else
755#define OURSYS_setresuid SYS_setresuid
756#endif
757
758/*
759 * Change to uid/gid of caller so that file is created with
760 * ownership of caller.
761 * TODO: What about selinux context?
762 */
763static int lo_change_cred(fuse_req_t req, struct lo_cred *old)
764{
765 int res;
766
767 old->euid = geteuid();
768 old->egid = getegid();
769
770 res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1);
771 if (res == -1) {
772 return errno;
773 }
774
775 res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1);
776 if (res == -1) {
777 int errno_save = errno;
778
779 syscall(OURSYS_setresgid, -1, old->egid, -1);
780 return errno_save;
781 }
782
783 return 0;
784}
785
786/* Regain Privileges */
787static void lo_restore_cred(struct lo_cred *old)
788{
789 int res;
790
791 res = syscall(OURSYS_setresuid, -1, old->euid, -1);
792 if (res == -1) {
793 fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid);
794 exit(1);
795 }
796
797 res = syscall(OURSYS_setresgid, -1, old->egid, -1);
798 if (res == -1) {
799 fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid);
800 exit(1);
801 }
802}
803
7c6b6602 804static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
7387863d
DDAG
805 const char *name, mode_t mode, dev_t rdev,
806 const char *link)
7c6b6602 807{
7387863d
DDAG
808 int res;
809 int saverr;
92fb57b8 810 struct lo_inode *dir;
7387863d 811 struct fuse_entry_param e;
929cfb7a 812 struct lo_cred old = {};
7c6b6602 813
25dae28c
SH
814 if (!is_safe_path_component(name)) {
815 fuse_reply_err(req, EINVAL);
816 return;
817 }
818
92fb57b8
SH
819 dir = lo_inode(req, parent);
820 if (!dir) {
821 fuse_reply_err(req, EBADF);
822 return;
823 }
824
7387863d 825 saverr = ENOMEM;
7c6b6602 826
929cfb7a
VG
827 saverr = lo_change_cred(req, &old);
828 if (saverr) {
829 goto out;
830 }
831
7387863d 832 res = mknod_wrapper(dir->fd, name, link, mode, rdev);
7c6b6602 833
7387863d 834 saverr = errno;
929cfb7a
VG
835
836 lo_restore_cred(&old);
837
7387863d
DDAG
838 if (res == -1) {
839 goto out;
840 }
7c6b6602 841
7387863d
DDAG
842 saverr = lo_do_lookup(req, parent, name, &e);
843 if (saverr) {
844 goto out;
845 }
7c6b6602 846
7387863d
DDAG
847 if (lo_debug(req)) {
848 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
849 (unsigned long long)parent, name, (unsigned long long)e.ino);
850 }
7c6b6602 851
7387863d
DDAG
852 fuse_reply_entry(req, &e);
853 return;
7c6b6602
DDAG
854
855out:
7387863d 856 fuse_reply_err(req, saverr);
7c6b6602
DDAG
857}
858
7387863d
DDAG
859static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name,
860 mode_t mode, dev_t rdev)
7c6b6602 861{
7387863d 862 lo_mknod_symlink(req, parent, name, mode, rdev, NULL);
7c6b6602
DDAG
863}
864
865static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d 866 mode_t mode)
7c6b6602 867{
7387863d 868 lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL);
7c6b6602
DDAG
869}
870
7387863d
DDAG
871static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
872 const char *name)
7c6b6602 873{
7387863d 874 lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
7c6b6602
DDAG
875}
876
5fe319a7
MS
877static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
878 int dfd, const char *name)
7c6b6602 879{
7387863d 880 int res;
5fe319a7
MS
881 struct lo_inode *parent;
882 char path[PATH_MAX];
7c6b6602 883
7387863d
DDAG
884 if (inode->is_symlink) {
885 res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
886 if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
887 /* Sorry, no race free way to hard-link a symlink. */
5fe319a7
MS
888 if (lo->norace) {
889 errno = EPERM;
890 } else {
891 goto fallback;
892 }
7387863d
DDAG
893 }
894 return res;
895 }
7c6b6602 896
9f59d175 897 sprintf(path, "%i", inode->fd);
5fe319a7 898
9f59d175 899 return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW);
5fe319a7
MS
900
901fallback:
902 res = lo_parent_and_name(lo, inode, path, &parent);
903 if (res != -1) {
904 res = linkat(parent->fd, path, dfd, name, 0);
905 unref_inode(lo, parent, 1);
906 }
7c6b6602 907
5fe319a7 908 return res;
7c6b6602
DDAG
909}
910
911static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
7387863d 912 const char *name)
7c6b6602 913{
7387863d
DDAG
914 int res;
915 struct lo_data *lo = lo_data(req);
92fb57b8 916 struct lo_inode *inode;
7387863d
DDAG
917 struct fuse_entry_param e;
918 int saverr;
919
25dae28c
SH
920 if (!is_safe_path_component(name)) {
921 fuse_reply_err(req, EINVAL);
922 return;
923 }
924
92fb57b8
SH
925 inode = lo_inode(req, ino);
926 if (!inode) {
927 fuse_reply_err(req, EBADF);
928 return;
929 }
930
7387863d
DDAG
931 memset(&e, 0, sizeof(struct fuse_entry_param));
932 e.attr_timeout = lo->timeout;
933 e.entry_timeout = lo->timeout;
934
5fe319a7 935 res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
7387863d
DDAG
936 if (res == -1) {
937 goto out_err;
938 }
939
940 res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
941 if (res == -1) {
942 goto out_err;
943 }
944
945 pthread_mutex_lock(&lo->mutex);
946 inode->refcount++;
947 pthread_mutex_unlock(&lo->mutex);
92fb57b8 948 e.ino = inode->fuse_ino;
7387863d
DDAG
949
950 if (lo_debug(req)) {
951 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n",
952 (unsigned long long)parent, name, (unsigned long long)e.ino);
953 }
954
955 fuse_reply_entry(req, &e);
956 return;
7c6b6602
DDAG
957
958out_err:
7387863d
DDAG
959 saverr = errno;
960 fuse_reply_err(req, saverr);
7c6b6602
DDAG
961}
962
963static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
964{
7387863d 965 int res;
25dae28c
SH
966 if (!is_safe_path_component(name)) {
967 fuse_reply_err(req, EINVAL);
968 return;
969 }
7c6b6602 970
7387863d 971 res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
7c6b6602 972
7387863d 973 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
974}
975
976static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d
DDAG
977 fuse_ino_t newparent, const char *newname,
978 unsigned int flags)
7c6b6602 979{
7387863d 980 int res;
7c6b6602 981
25dae28c
SH
982 if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
983 fuse_reply_err(req, EINVAL);
984 return;
985 }
986
7387863d
DDAG
987 if (flags) {
988 fuse_reply_err(req, EINVAL);
989 return;
990 }
7c6b6602 991
7387863d 992 res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
7c6b6602 993
7387863d 994 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
995}
996
997static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
998{
7387863d 999 int res;
7c6b6602 1000
25dae28c
SH
1001 if (!is_safe_path_component(name)) {
1002 fuse_reply_err(req, EINVAL);
1003 return;
1004 }
1005
7387863d 1006 res = unlinkat(lo_fd(req, parent), name, 0);
7c6b6602 1007
7387863d 1008 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1009}
1010
1011static void unref_inode(struct lo_data *lo, struct lo_inode *inode, uint64_t n)
1012{
7387863d
DDAG
1013 if (!inode) {
1014 return;
1015 }
1016
1017 pthread_mutex_lock(&lo->mutex);
1018 assert(inode->refcount >= n);
1019 inode->refcount -= n;
1020 if (!inode->refcount) {
1021 struct lo_inode *prev, *next;
1022
1023 prev = inode->prev;
1024 next = inode->next;
1025 next->prev = prev;
1026 prev->next = next;
1027
92fb57b8 1028 lo_map_remove(&lo->ino_map, inode->fuse_ino);
7387863d
DDAG
1029 pthread_mutex_unlock(&lo->mutex);
1030 close(inode->fd);
1031 free(inode);
7387863d
DDAG
1032 } else {
1033 pthread_mutex_unlock(&lo->mutex);
1034 }
7c6b6602
DDAG
1035}
1036
1037static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
1038{
7387863d 1039 struct lo_data *lo = lo_data(req);
92fb57b8
SH
1040 struct lo_inode *inode;
1041
1042 inode = lo_inode(req, ino);
1043 if (!inode) {
1044 return;
1045 }
7c6b6602 1046
7387863d
DDAG
1047 if (lo_debug(req)) {
1048 fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n",
1049 (unsigned long long)ino, (unsigned long long)inode->refcount,
1050 (unsigned long long)nlookup);
1051 }
7c6b6602 1052
7387863d 1053 unref_inode(lo, inode, nlookup);
7c6b6602
DDAG
1054}
1055
1056static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
1057{
7387863d
DDAG
1058 lo_forget_one(req, ino, nlookup);
1059 fuse_reply_none(req);
7c6b6602
DDAG
1060}
1061
1062static void lo_forget_multi(fuse_req_t req, size_t count,
7387863d 1063 struct fuse_forget_data *forgets)
7c6b6602 1064{
7387863d 1065 int i;
7c6b6602 1066
7387863d
DDAG
1067 for (i = 0; i < count; i++) {
1068 lo_forget_one(req, forgets[i].ino, forgets[i].nlookup);
1069 }
1070 fuse_reply_none(req);
7c6b6602
DDAG
1071}
1072
1073static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
1074{
7387863d
DDAG
1075 char buf[PATH_MAX + 1];
1076 int res;
7c6b6602 1077
7387863d
DDAG
1078 res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf));
1079 if (res == -1) {
1080 return (void)fuse_reply_err(req, errno);
1081 }
7c6b6602 1082
7387863d
DDAG
1083 if (res == sizeof(buf)) {
1084 return (void)fuse_reply_err(req, ENAMETOOLONG);
1085 }
7c6b6602 1086
7387863d 1087 buf[res] = '\0';
7c6b6602 1088
7387863d 1089 fuse_reply_readlink(req, buf);
7c6b6602
DDAG
1090}
1091
1092struct lo_dirp {
7387863d
DDAG
1093 DIR *dp;
1094 struct dirent *entry;
1095 off_t offset;
7c6b6602
DDAG
1096};
1097
b39bce12 1098static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
7c6b6602 1099{
b39bce12
SH
1100 struct lo_data *lo = lo_data(req);
1101 struct lo_map_elem *elem;
1102
1103 pthread_mutex_lock(&lo->mutex);
1104 elem = lo_map_get(&lo->dirp_map, fi->fh);
1105 pthread_mutex_unlock(&lo->mutex);
1106 if (!elem) {
1107 return NULL;
1108 }
1109
1110 return elem->dirp;
7c6b6602
DDAG
1111}
1112
7387863d
DDAG
1113static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
1114 struct fuse_file_info *fi)
7c6b6602 1115{
7387863d
DDAG
1116 int error = ENOMEM;
1117 struct lo_data *lo = lo_data(req);
1118 struct lo_dirp *d;
1119 int fd;
b39bce12 1120 ssize_t fh;
7387863d
DDAG
1121
1122 d = calloc(1, sizeof(struct lo_dirp));
1123 if (d == NULL) {
1124 goto out_err;
1125 }
1126
1127 fd = openat(lo_fd(req, ino), ".", O_RDONLY);
1128 if (fd == -1) {
1129 goto out_errno;
1130 }
1131
1132 d->dp = fdopendir(fd);
1133 if (d->dp == NULL) {
1134 goto out_errno;
1135 }
1136
1137 d->offset = 0;
1138 d->entry = NULL;
1139
b39bce12
SH
1140 pthread_mutex_lock(&lo->mutex);
1141 fh = lo_add_dirp_mapping(req, d);
1142 pthread_mutex_unlock(&lo->mutex);
1143 if (fh == -1) {
1144 goto out_err;
1145 }
1146
1147 fi->fh = fh;
7387863d
DDAG
1148 if (lo->cache == CACHE_ALWAYS) {
1149 fi->keep_cache = 1;
1150 }
1151 fuse_reply_open(req, fi);
1152 return;
7c6b6602
DDAG
1153
1154out_errno:
7387863d 1155 error = errno;
7c6b6602 1156out_err:
7387863d 1157 if (d) {
b39bce12
SH
1158 if (d->dp) {
1159 closedir(d->dp);
1160 }
7387863d
DDAG
1161 if (fd != -1) {
1162 close(fd);
1163 }
1164 free(d);
1165 }
1166 fuse_reply_err(req, error);
7c6b6602
DDAG
1167}
1168
7c6b6602 1169static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
7387863d 1170 off_t offset, struct fuse_file_info *fi, int plus)
7c6b6602 1171{
752272da 1172 struct lo_data *lo = lo_data(req);
b39bce12 1173 struct lo_dirp *d;
752272da 1174 struct lo_inode *dinode;
b39bce12 1175 char *buf = NULL;
7387863d
DDAG
1176 char *p;
1177 size_t rem = size;
752272da 1178 int err = EBADF;
7387863d 1179
752272da
SH
1180 dinode = lo_inode(req, ino);
1181 if (!dinode) {
1182 goto error;
1183 }
7387863d 1184
b39bce12
SH
1185 d = lo_dirp(req, fi);
1186 if (!d) {
1187 goto error;
1188 }
1189
752272da 1190 err = ENOMEM;
7387863d
DDAG
1191 buf = calloc(1, size);
1192 if (!buf) {
7387863d
DDAG
1193 goto error;
1194 }
1195 p = buf;
1196
1197 if (offset != d->offset) {
1198 seekdir(d->dp, offset);
1199 d->entry = NULL;
1200 d->offset = offset;
1201 }
1202 while (1) {
1203 size_t entsize;
1204 off_t nextoff;
1205 const char *name;
1206
1207 if (!d->entry) {
1208 errno = 0;
1209 d->entry = readdir(d->dp);
1210 if (!d->entry) {
1211 if (errno) { /* Error */
1212 err = errno;
1213 goto error;
1214 } else { /* End of stream */
1215 break;
1216 }
1217 }
1218 }
1219 nextoff = d->entry->d_off;
1220 name = d->entry->d_name;
752272da 1221
7387863d 1222 fuse_ino_t entry_ino = 0;
752272da
SH
1223 struct fuse_entry_param e = (struct fuse_entry_param){
1224 .attr.st_ino = d->entry->d_ino,
1225 .attr.st_mode = d->entry->d_type << 12,
1226 };
1227
1228 /* Hide root's parent directory */
1229 if (dinode == &lo->root && strcmp(name, "..") == 0) {
1230 e.attr.st_ino = lo->root.ino;
1231 e.attr.st_mode = DT_DIR << 12;
1232 }
1233
7387863d 1234 if (plus) {
752272da 1235 if (!is_dot_or_dotdot(name)) {
7387863d
DDAG
1236 err = lo_do_lookup(req, ino, name, &e);
1237 if (err) {
1238 goto error;
1239 }
1240 entry_ino = e.ino;
1241 }
1242
1243 entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff);
1244 } else {
752272da 1245 entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff);
7387863d
DDAG
1246 }
1247 if (entsize > rem) {
1248 if (entry_ino != 0) {
1249 lo_forget_one(req, entry_ino, 1);
1250 }
1251 break;
1252 }
1253
1254 p += entsize;
1255 rem -= entsize;
1256
1257 d->entry = NULL;
1258 d->offset = nextoff;
1259 }
7c6b6602
DDAG
1260
1261 err = 0;
1262error:
7387863d
DDAG
1263 /*
1264 * If there's an error, we can only signal it if we haven't stored
1265 * any entries yet - otherwise we'd end up with wrong lookup
1266 * counts for the entries that are already in the buffer. So we
1267 * return what we've collected until that point.
1268 */
1269 if (err && rem == size) {
1270 fuse_reply_err(req, err);
1271 } else {
1272 fuse_reply_buf(req, buf, size - rem);
1273 }
7c6b6602
DDAG
1274 free(buf);
1275}
1276
1277static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
7387863d 1278 off_t offset, struct fuse_file_info *fi)
7c6b6602 1279{
7387863d 1280 lo_do_readdir(req, ino, size, offset, fi, 0);
7c6b6602
DDAG
1281}
1282
1283static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
7387863d 1284 off_t offset, struct fuse_file_info *fi)
7c6b6602 1285{
7387863d 1286 lo_do_readdir(req, ino, size, offset, fi, 1);
7c6b6602
DDAG
1287}
1288
7387863d
DDAG
1289static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
1290 struct fuse_file_info *fi)
7c6b6602 1291{
b39bce12
SH
1292 struct lo_data *lo = lo_data(req);
1293 struct lo_dirp *d;
1294
7387863d 1295 (void)ino;
b39bce12
SH
1296
1297 d = lo_dirp(req, fi);
1298 if (!d) {
1299 fuse_reply_err(req, EBADF);
1300 return;
1301 }
1302
1303 pthread_mutex_lock(&lo->mutex);
1304 lo_map_remove(&lo->dirp_map, fi->fh);
1305 pthread_mutex_unlock(&lo->mutex);
1306
7387863d
DDAG
1307 closedir(d->dp);
1308 free(d);
1309 fuse_reply_err(req, 0);
7c6b6602
DDAG
1310}
1311
1312static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d 1313 mode_t mode, struct fuse_file_info *fi)
7c6b6602 1314{
7387863d
DDAG
1315 int fd;
1316 struct lo_data *lo = lo_data(req);
1317 struct fuse_entry_param e;
1318 int err;
929cfb7a 1319 struct lo_cred old = {};
7387863d
DDAG
1320
1321 if (lo_debug(req)) {
1322 fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n",
1323 parent, name);
1324 }
1325
25dae28c
SH
1326 if (!is_safe_path_component(name)) {
1327 fuse_reply_err(req, EINVAL);
1328 return;
1329 }
1330
929cfb7a
VG
1331 err = lo_change_cred(req, &old);
1332 if (err) {
1333 goto out;
1334 }
1335
7387863d
DDAG
1336 fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
1337 mode);
929cfb7a
VG
1338 err = fd == -1 ? errno : 0;
1339 lo_restore_cred(&old);
7387863d 1340
929cfb7a 1341 if (!err) {
73b4d19d
SH
1342 ssize_t fh;
1343
1344 pthread_mutex_lock(&lo->mutex);
1345 fh = lo_add_fd_mapping(req, fd);
1346 pthread_mutex_unlock(&lo->mutex);
1347 if (fh == -1) {
1348 close(fd);
1349 fuse_reply_err(req, ENOMEM);
1350 return;
1351 }
1352
1353 fi->fh = fh;
929cfb7a
VG
1354 err = lo_do_lookup(req, parent, name, &e);
1355 }
7387863d
DDAG
1356 if (lo->cache == CACHE_NEVER) {
1357 fi->direct_io = 1;
1358 } else if (lo->cache == CACHE_ALWAYS) {
1359 fi->keep_cache = 1;
1360 }
1361
929cfb7a 1362out:
7387863d
DDAG
1363 if (err) {
1364 fuse_reply_err(req, err);
1365 } else {
1366 fuse_reply_create(req, &e, fi);
1367 }
7c6b6602
DDAG
1368}
1369
1370static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
7387863d 1371 struct fuse_file_info *fi)
7c6b6602 1372{
7387863d 1373 int res;
b39bce12
SH
1374 struct lo_dirp *d;
1375 int fd;
1376
7387863d 1377 (void)ino;
b39bce12
SH
1378
1379 d = lo_dirp(req, fi);
1380 if (!d) {
1381 fuse_reply_err(req, EBADF);
1382 return;
1383 }
1384
1385 fd = dirfd(d->dp);
7387863d
DDAG
1386 if (datasync) {
1387 res = fdatasync(fd);
1388 } else {
1389 res = fsync(fd);
1390 }
1391 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1392}
1393
1394static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1395{
7387863d 1396 int fd;
73b4d19d 1397 ssize_t fh;
7387863d
DDAG
1398 char buf[64];
1399 struct lo_data *lo = lo_data(req);
1400
1401 if (lo_debug(req)) {
1402 fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
1403 fi->flags);
1404 }
1405
1406 /*
1407 * With writeback cache, kernel may send read requests even
1408 * when userspace opened write-only
1409 */
1410 if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
1411 fi->flags &= ~O_ACCMODE;
1412 fi->flags |= O_RDWR;
1413 }
1414
1415 /*
1416 * With writeback cache, O_APPEND is handled by the kernel.
1417 * This breaks atomicity (since the file may change in the
1418 * underlying filesystem, so that the kernel's idea of the
1419 * end of the file isn't accurate anymore). In this example,
1420 * we just accept that. A more rigorous filesystem may want
1421 * to return an error here
1422 */
1423 if (lo->writeback && (fi->flags & O_APPEND)) {
1424 fi->flags &= ~O_APPEND;
1425 }
1426
9f59d175
SH
1427 sprintf(buf, "%i", lo_fd(req, ino));
1428 fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
7387863d
DDAG
1429 if (fd == -1) {
1430 return (void)fuse_reply_err(req, errno);
1431 }
1432
73b4d19d
SH
1433 pthread_mutex_lock(&lo->mutex);
1434 fh = lo_add_fd_mapping(req, fd);
1435 pthread_mutex_unlock(&lo->mutex);
1436 if (fh == -1) {
1437 close(fd);
1438 fuse_reply_err(req, ENOMEM);
1439 return;
1440 }
1441
1442 fi->fh = fh;
7387863d
DDAG
1443 if (lo->cache == CACHE_NEVER) {
1444 fi->direct_io = 1;
1445 } else if (lo->cache == CACHE_ALWAYS) {
1446 fi->keep_cache = 1;
1447 }
1448 fuse_reply_open(req, fi);
7c6b6602
DDAG
1449}
1450
7387863d
DDAG
1451static void lo_release(fuse_req_t req, fuse_ino_t ino,
1452 struct fuse_file_info *fi)
7c6b6602 1453{
73b4d19d
SH
1454 struct lo_data *lo = lo_data(req);
1455 int fd;
1456
7387863d 1457 (void)ino;
7c6b6602 1458
73b4d19d
SH
1459 fd = lo_fi_fd(req, fi);
1460
1461 pthread_mutex_lock(&lo->mutex);
1462 lo_map_remove(&lo->fd_map, fi->fh);
1463 pthread_mutex_unlock(&lo->mutex);
1464
1465 close(fd);
7387863d 1466 fuse_reply_err(req, 0);
7c6b6602
DDAG
1467}
1468
1469static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1470{
7387863d
DDAG
1471 int res;
1472 (void)ino;
73b4d19d 1473 res = close(dup(lo_fi_fd(req, fi)));
7387863d 1474 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1475}
1476
1477static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
7387863d 1478 struct fuse_file_info *fi)
7c6b6602 1479{
7387863d 1480 int res;
1b209805
VG
1481 int fd;
1482 char *buf;
1483
1484 fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
1485 (void *)fi);
1486
1487 if (!fi) {
9f59d175
SH
1488 struct lo_data *lo = lo_data(req);
1489
1490 res = asprintf(&buf, "%i", lo_fd(req, ino));
1b209805
VG
1491 if (res == -1) {
1492 return (void)fuse_reply_err(req, errno);
1493 }
1494
9f59d175 1495 fd = openat(lo->proc_self_fd, buf, O_RDWR);
1b209805
VG
1496 free(buf);
1497 if (fd == -1) {
1498 return (void)fuse_reply_err(req, errno);
1499 }
1500 } else {
73b4d19d 1501 fd = lo_fi_fd(req, fi);
1b209805
VG
1502 }
1503
7387863d 1504 if (datasync) {
1b209805 1505 res = fdatasync(fd);
7387863d 1506 } else {
1b209805
VG
1507 res = fsync(fd);
1508 }
1509 if (!fi) {
1510 close(fd);
7387863d
DDAG
1511 }
1512 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1513}
1514
7387863d
DDAG
1515static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
1516 struct fuse_file_info *fi)
7c6b6602 1517{
7387863d 1518 struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
7c6b6602 1519
7387863d
DDAG
1520 if (lo_debug(req)) {
1521 fuse_log(FUSE_LOG_DEBUG,
1522 "lo_read(ino=%" PRIu64 ", size=%zd, "
1523 "off=%lu)\n",
1524 ino, size, (unsigned long)offset);
1525 }
7c6b6602 1526
7387863d 1527 buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
73b4d19d 1528 buf.buf[0].fd = lo_fi_fd(req, fi);
7387863d 1529 buf.buf[0].pos = offset;
7c6b6602 1530
8c3fe75e 1531 fuse_reply_data(req, &buf);
7c6b6602
DDAG
1532}
1533
1534static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
7387863d
DDAG
1535 struct fuse_bufvec *in_buf, off_t off,
1536 struct fuse_file_info *fi)
7c6b6602 1537{
7387863d
DDAG
1538 (void)ino;
1539 ssize_t res;
1540 struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
1541
1542 out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
73b4d19d 1543 out_buf.buf[0].fd = lo_fi_fd(req, fi);
7387863d
DDAG
1544 out_buf.buf[0].pos = off;
1545
1546 if (lo_debug(req)) {
1547 fuse_log(FUSE_LOG_DEBUG,
1548 "lo_write(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino,
1549 out_buf.buf[0].size, (unsigned long)off);
1550 }
1551
8c3fe75e 1552 res = fuse_buf_copy(&out_buf, in_buf);
7387863d
DDAG
1553 if (res < 0) {
1554 fuse_reply_err(req, -res);
1555 } else {
1556 fuse_reply_write(req, (size_t)res);
1557 }
7c6b6602
DDAG
1558}
1559
1560static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
1561{
7387863d
DDAG
1562 int res;
1563 struct statvfs stbuf;
1564
1565 res = fstatvfs(lo_fd(req, ino), &stbuf);
1566 if (res == -1) {
1567 fuse_reply_err(req, errno);
1568 } else {
1569 fuse_reply_statfs(req, &stbuf);
1570 }
7c6b6602
DDAG
1571}
1572
7387863d
DDAG
1573static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
1574 off_t length, struct fuse_file_info *fi)
7c6b6602 1575{
7387863d
DDAG
1576 int err = EOPNOTSUPP;
1577 (void)ino;
7c6b6602 1578
9776457c 1579#ifdef CONFIG_FALLOCATE
73b4d19d 1580 err = fallocate(lo_fi_fd(req, fi), mode, offset, length);
7387863d
DDAG
1581 if (err < 0) {
1582 err = errno;
1583 }
7c6b6602 1584
9776457c 1585#elif defined(CONFIG_POSIX_FALLOCATE)
7387863d
DDAG
1586 if (mode) {
1587 fuse_reply_err(req, EOPNOTSUPP);
1588 return;
1589 }
7c6b6602 1590
73b4d19d 1591 err = posix_fallocate(lo_fi_fd(req, fi), offset, length);
7c6b6602
DDAG
1592#endif
1593
7387863d 1594 fuse_reply_err(req, err);
7c6b6602
DDAG
1595}
1596
1597static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
7387863d 1598 int op)
7c6b6602 1599{
7387863d
DDAG
1600 int res;
1601 (void)ino;
7c6b6602 1602
73b4d19d 1603 res = flock(lo_fi_fd(req, fi), op);
7c6b6602 1604
7387863d 1605 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1606}
1607
1608static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
7387863d 1609 size_t size)
7c6b6602 1610{
9f59d175 1611 struct lo_data *lo = lo_data(req);
7387863d
DDAG
1612 char *value = NULL;
1613 char procname[64];
92fb57b8 1614 struct lo_inode *inode;
7387863d
DDAG
1615 ssize_t ret;
1616 int saverr;
9f59d175 1617 int fd = -1;
7387863d 1618
92fb57b8
SH
1619 inode = lo_inode(req, ino);
1620 if (!inode) {
1621 fuse_reply_err(req, EBADF);
1622 return;
1623 }
1624
7387863d
DDAG
1625 saverr = ENOSYS;
1626 if (!lo_data(req)->xattr) {
1627 goto out;
1628 }
1629
1630 if (lo_debug(req)) {
1631 fuse_log(FUSE_LOG_DEBUG,
1632 "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n", ino, name,
1633 size);
1634 }
1635
1636 if (inode->is_symlink) {
1637 /* Sorry, no race free way to getxattr on symlink. */
1638 saverr = EPERM;
1639 goto out;
1640 }
1641
9f59d175
SH
1642 sprintf(procname, "%i", inode->fd);
1643 fd = openat(lo->proc_self_fd, procname, O_RDONLY);
1644 if (fd < 0) {
1645 goto out_err;
1646 }
7387863d
DDAG
1647
1648 if (size) {
1649 value = malloc(size);
1650 if (!value) {
1651 goto out_err;
1652 }
1653
9f59d175 1654 ret = fgetxattr(fd, name, value, size);
7387863d
DDAG
1655 if (ret == -1) {
1656 goto out_err;
1657 }
1658 saverr = 0;
1659 if (ret == 0) {
1660 goto out;
1661 }
1662
1663 fuse_reply_buf(req, value, ret);
1664 } else {
9f59d175 1665 ret = fgetxattr(fd, name, NULL, 0);
7387863d
DDAG
1666 if (ret == -1) {
1667 goto out_err;
1668 }
1669
1670 fuse_reply_xattr(req, ret);
1671 }
7c6b6602 1672out_free:
7387863d 1673 free(value);
9f59d175
SH
1674
1675 if (fd >= 0) {
1676 close(fd);
1677 }
7387863d 1678 return;
7c6b6602
DDAG
1679
1680out_err:
7387863d 1681 saverr = errno;
7c6b6602 1682out:
7387863d
DDAG
1683 fuse_reply_err(req, saverr);
1684 goto out_free;
7c6b6602
DDAG
1685}
1686
1687static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
1688{
9f59d175 1689 struct lo_data *lo = lo_data(req);
7387863d
DDAG
1690 char *value = NULL;
1691 char procname[64];
92fb57b8 1692 struct lo_inode *inode;
7387863d
DDAG
1693 ssize_t ret;
1694 int saverr;
9f59d175 1695 int fd = -1;
7387863d 1696
92fb57b8
SH
1697 inode = lo_inode(req, ino);
1698 if (!inode) {
1699 fuse_reply_err(req, EBADF);
1700 return;
1701 }
1702
7387863d
DDAG
1703 saverr = ENOSYS;
1704 if (!lo_data(req)->xattr) {
1705 goto out;
1706 }
1707
1708 if (lo_debug(req)) {
1709 fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n",
1710 ino, size);
1711 }
1712
1713 if (inode->is_symlink) {
1714 /* Sorry, no race free way to listxattr on symlink. */
1715 saverr = EPERM;
1716 goto out;
1717 }
1718
9f59d175
SH
1719 sprintf(procname, "%i", inode->fd);
1720 fd = openat(lo->proc_self_fd, procname, O_RDONLY);
1721 if (fd < 0) {
1722 goto out_err;
1723 }
7387863d
DDAG
1724
1725 if (size) {
1726 value = malloc(size);
1727 if (!value) {
1728 goto out_err;
1729 }
1730
9f59d175 1731 ret = flistxattr(fd, value, size);
7387863d
DDAG
1732 if (ret == -1) {
1733 goto out_err;
1734 }
1735 saverr = 0;
1736 if (ret == 0) {
1737 goto out;
1738 }
1739
1740 fuse_reply_buf(req, value, ret);
1741 } else {
9f59d175 1742 ret = flistxattr(fd, NULL, 0);
7387863d
DDAG
1743 if (ret == -1) {
1744 goto out_err;
1745 }
1746
1747 fuse_reply_xattr(req, ret);
1748 }
7c6b6602 1749out_free:
7387863d 1750 free(value);
9f59d175
SH
1751
1752 if (fd >= 0) {
1753 close(fd);
1754 }
7387863d 1755 return;
7c6b6602
DDAG
1756
1757out_err:
7387863d 1758 saverr = errno;
7c6b6602 1759out:
7387863d
DDAG
1760 fuse_reply_err(req, saverr);
1761 goto out_free;
7c6b6602
DDAG
1762}
1763
1764static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
7387863d 1765 const char *value, size_t size, int flags)
7c6b6602 1766{
7387863d 1767 char procname[64];
9f59d175 1768 struct lo_data *lo = lo_data(req);
92fb57b8 1769 struct lo_inode *inode;
7387863d
DDAG
1770 ssize_t ret;
1771 int saverr;
9f59d175 1772 int fd = -1;
7c6b6602 1773
92fb57b8
SH
1774 inode = lo_inode(req, ino);
1775 if (!inode) {
1776 fuse_reply_err(req, EBADF);
1777 return;
1778 }
1779
7387863d
DDAG
1780 saverr = ENOSYS;
1781 if (!lo_data(req)->xattr) {
1782 goto out;
1783 }
7c6b6602 1784
7387863d
DDAG
1785 if (lo_debug(req)) {
1786 fuse_log(FUSE_LOG_DEBUG,
1787 "lo_setxattr(ino=%" PRIu64 ", name=%s value=%s size=%zd)\n",
1788 ino, name, value, size);
1789 }
7c6b6602 1790
7387863d
DDAG
1791 if (inode->is_symlink) {
1792 /* Sorry, no race free way to setxattr on symlink. */
1793 saverr = EPERM;
1794 goto out;
1795 }
7c6b6602 1796
9f59d175
SH
1797 sprintf(procname, "%i", inode->fd);
1798 fd = openat(lo->proc_self_fd, procname, O_RDWR);
1799 if (fd < 0) {
1800 saverr = errno;
1801 goto out;
1802 }
7c6b6602 1803
9f59d175 1804 ret = fsetxattr(fd, name, value, size, flags);
7387863d 1805 saverr = ret == -1 ? errno : 0;
7c6b6602
DDAG
1806
1807out:
9f59d175
SH
1808 if (fd >= 0) {
1809 close(fd);
1810 }
7387863d 1811 fuse_reply_err(req, saverr);
7c6b6602
DDAG
1812}
1813
1814static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
1815{
7387863d 1816 char procname[64];
9f59d175 1817 struct lo_data *lo = lo_data(req);
92fb57b8 1818 struct lo_inode *inode;
7387863d
DDAG
1819 ssize_t ret;
1820 int saverr;
9f59d175 1821 int fd = -1;
7c6b6602 1822
92fb57b8
SH
1823 inode = lo_inode(req, ino);
1824 if (!inode) {
1825 fuse_reply_err(req, EBADF);
1826 return;
1827 }
1828
7387863d
DDAG
1829 saverr = ENOSYS;
1830 if (!lo_data(req)->xattr) {
1831 goto out;
1832 }
7c6b6602 1833
7387863d
DDAG
1834 if (lo_debug(req)) {
1835 fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n",
1836 ino, name);
1837 }
7c6b6602 1838
7387863d
DDAG
1839 if (inode->is_symlink) {
1840 /* Sorry, no race free way to setxattr on symlink. */
1841 saverr = EPERM;
1842 goto out;
1843 }
7c6b6602 1844
9f59d175
SH
1845 sprintf(procname, "%i", inode->fd);
1846 fd = openat(lo->proc_self_fd, procname, O_RDWR);
1847 if (fd < 0) {
1848 saverr = errno;
1849 goto out;
1850 }
7c6b6602 1851
9f59d175 1852 ret = fremovexattr(fd, name);
7387863d 1853 saverr = ret == -1 ? errno : 0;
7c6b6602
DDAG
1854
1855out:
9f59d175
SH
1856 if (fd >= 0) {
1857 close(fd);
1858 }
7387863d 1859 fuse_reply_err(req, saverr);
7c6b6602
DDAG
1860}
1861
1862#ifdef HAVE_COPY_FILE_RANGE
1863static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
7387863d
DDAG
1864 struct fuse_file_info *fi_in, fuse_ino_t ino_out,
1865 off_t off_out, struct fuse_file_info *fi_out,
1866 size_t len, int flags)
7c6b6602 1867{
73b4d19d 1868 int in_fd, out_fd;
7387863d
DDAG
1869 ssize_t res;
1870
73b4d19d
SH
1871 in_fd = lo_fi_fd(req, fi_in);
1872 out_fd = lo_fi_fd(req, fi_out);
1873
1874 fuse_log(FUSE_LOG_DEBUG,
1875 "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, "
1876 "off=%lu, ino=%" PRIu64 "/fd=%d, "
1877 "off=%lu, size=%zd, flags=0x%x)\n",
1878 ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags);
7387863d 1879
73b4d19d 1880 res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
7387863d
DDAG
1881 if (res < 0) {
1882 fuse_reply_err(req, -errno);
1883 } else {
1884 fuse_reply_write(req, res);
1885 }
7c6b6602
DDAG
1886}
1887#endif
1888
1889static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
7387863d 1890 struct fuse_file_info *fi)
7c6b6602 1891{
7387863d
DDAG
1892 off_t res;
1893
1894 (void)ino;
73b4d19d 1895 res = lseek(lo_fi_fd(req, fi), off, whence);
7387863d
DDAG
1896 if (res != -1) {
1897 fuse_reply_lseek(req, res);
1898 } else {
1899 fuse_reply_err(req, errno);
1900 }
7c6b6602
DDAG
1901}
1902
1903static struct fuse_lowlevel_ops lo_oper = {
7387863d
DDAG
1904 .init = lo_init,
1905 .lookup = lo_lookup,
1906 .mkdir = lo_mkdir,
1907 .mknod = lo_mknod,
1908 .symlink = lo_symlink,
1909 .link = lo_link,
1910 .unlink = lo_unlink,
1911 .rmdir = lo_rmdir,
1912 .rename = lo_rename,
1913 .forget = lo_forget,
1914 .forget_multi = lo_forget_multi,
1915 .getattr = lo_getattr,
1916 .setattr = lo_setattr,
1917 .readlink = lo_readlink,
1918 .opendir = lo_opendir,
1919 .readdir = lo_readdir,
1920 .readdirplus = lo_readdirplus,
1921 .releasedir = lo_releasedir,
1922 .fsyncdir = lo_fsyncdir,
1923 .create = lo_create,
1924 .open = lo_open,
1925 .release = lo_release,
1926 .flush = lo_flush,
1927 .fsync = lo_fsync,
1928 .read = lo_read,
1929 .write_buf = lo_write_buf,
1930 .statfs = lo_statfs,
1931 .fallocate = lo_fallocate,
1932 .flock = lo_flock,
1933 .getxattr = lo_getxattr,
1934 .listxattr = lo_listxattr,
1935 .setxattr = lo_setxattr,
1936 .removexattr = lo_removexattr,
7c6b6602 1937#ifdef HAVE_COPY_FILE_RANGE
7387863d 1938 .copy_file_range = lo_copy_file_range,
7c6b6602 1939#endif
7387863d 1940 .lseek = lo_lseek,
7c6b6602
DDAG
1941};
1942
45018fbb
SH
1943/* Print vhost-user.json backend program capabilities */
1944static void print_capabilities(void)
1945{
1946 printf("{\n");
1947 printf(" \"type\": \"fs\"\n");
1948 printf("}\n");
1949}
1950
d74830d1 1951/*
8e1d4ef2 1952 * Move to a new mount, net, and pid namespaces to isolate this process.
d74830d1 1953 */
8e1d4ef2 1954static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
d74830d1 1955{
8e1d4ef2
SH
1956 pid_t child;
1957
1958 /*
1959 * Create a new pid namespace for *child* processes. We'll have to
1960 * fork in order to enter the new pid namespace. A new mount namespace
1961 * is also needed so that we can remount /proc for the new pid
1962 * namespace.
1963 *
1964 * Our UNIX domain sockets have been created. Now we can move to
1965 * an empty network namespace to prevent TCP/IP and other network
1966 * activity in case this process is compromised.
1967 */
1968 if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
1969 fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
1970 exit(1);
1971 }
1972
1973 child = fork();
1974 if (child < 0) {
1975 fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
1976 exit(1);
1977 }
1978 if (child > 0) {
1979 pid_t waited;
1980 int wstatus;
1981
1982 /* The parent waits for the child */
1983 do {
1984 waited = waitpid(child, &wstatus, 0);
1985 } while (waited < 0 && errno == EINTR && !se->exited);
1986
1987 /* We were terminated by a signal, see fuse_signals.c */
1988 if (se->exited) {
1989 exit(0);
1990 }
1991
1992 if (WIFEXITED(wstatus)) {
1993 exit(WEXITSTATUS(wstatus));
1994 }
1995
1996 exit(1);
1997 }
1998
1999 /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
2000 prctl(PR_SET_PDEATHSIG, SIGTERM);
2001
2002 /*
2003 * If the mounts have shared propagation then we want to opt out so our
2004 * mount changes don't affect the parent mount namespace.
2005 */
2006 if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
2007 fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
2008 exit(1);
2009 }
2010
2011 /* The child must remount /proc to use the new pid namespace */
2012 if (mount("proc", "/proc", "proc",
2013 MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
2014 fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
2015 exit(1);
2016 }
2017
2018 /* Now we can get our /proc/self/fd directory file descriptor */
2019 lo->proc_self_fd = open("/proc/self/fd", O_PATH);
2020 if (lo->proc_self_fd == -1) {
2021 fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
d74830d1
SH
2022 exit(1);
2023 }
2024}
2025
8e1d4ef2
SH
2026/*
2027 * Make the source directory our root so symlinks cannot escape and no other
2028 * files are accessible. Assumes unshare(CLONE_NEWNS) was already called.
2029 */
2030static void setup_mounts(const char *source)
5baa3b8e
SH
2031{
2032 int oldroot;
2033 int newroot;
2034
8e1d4ef2
SH
2035 if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
2036 fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
2037 exit(1);
2038 }
2039
2040 /* This magic is based on lxc's lxc_pivot_root() */
5baa3b8e
SH
2041 oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
2042 if (oldroot < 0) {
2043 fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
2044 exit(1);
2045 }
2046
2047 newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
2048 if (newroot < 0) {
2049 fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source);
2050 exit(1);
2051 }
2052
2053 if (fchdir(newroot) < 0) {
2054 fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
2055 exit(1);
2056 }
2057
2058 if (syscall(__NR_pivot_root, ".", ".") < 0) {
2059 fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n");
2060 exit(1);
2061 }
2062
2063 if (fchdir(oldroot) < 0) {
2064 fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n");
2065 exit(1);
2066 }
2067
2068 if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) {
2069 fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n");
2070 exit(1);
2071 }
2072
2073 if (umount2(".", MNT_DETACH) < 0) {
2074 fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n");
2075 exit(1);
2076 }
2077
2078 if (fchdir(newroot) < 0) {
2079 fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
2080 exit(1);
2081 }
2082
2083 close(newroot);
2084 close(oldroot);
2085}
2086
5baa3b8e
SH
2087/*
2088 * Lock down this process to prevent access to other processes or files outside
2089 * source directory. This reduces the impact of arbitrary code execution bugs.
2090 */
8e1d4ef2 2091static void setup_sandbox(struct lo_data *lo, struct fuse_session *se)
5baa3b8e 2092{
8e1d4ef2
SH
2093 setup_namespaces(lo, se);
2094 setup_mounts(lo->source);
4f8bde99 2095 setup_seccomp();
5baa3b8e
SH
2096}
2097
7c6b6602
DDAG
2098int main(int argc, char *argv[])
2099{
7387863d
DDAG
2100 struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
2101 struct fuse_session *se;
2102 struct fuse_cmdline_opts opts;
9f59d175
SH
2103 struct lo_data lo = {
2104 .debug = 0,
2105 .writeback = 0,
2106 .proc_self_fd = -1,
2107 };
92fb57b8 2108 struct lo_map_elem *root_elem;
7387863d
DDAG
2109 int ret = -1;
2110
2111 /* Don't mask creation mode, kernel already did that */
2112 umask(0);
2113
2114 pthread_mutex_init(&lo.mutex, NULL);
2115 lo.root.next = lo.root.prev = &lo.root;
2116 lo.root.fd = -1;
92fb57b8 2117 lo.root.fuse_ino = FUSE_ROOT_ID;
7387863d
DDAG
2118 lo.cache = CACHE_NORMAL;
2119
92fb57b8
SH
2120 /*
2121 * Set up the ino map like this:
2122 * [0] Reserved (will not be used)
2123 * [1] Root inode
2124 */
2125 lo_map_init(&lo.ino_map);
2126 lo_map_reserve(&lo.ino_map, 0)->in_use = false;
2127 root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino);
2128 root_elem->inode = &lo.root;
2129
b39bce12 2130 lo_map_init(&lo.dirp_map);
73b4d19d 2131 lo_map_init(&lo.fd_map);
b39bce12 2132
7387863d
DDAG
2133 if (fuse_parse_cmdline(&args, &opts) != 0) {
2134 return 1;
2135 }
2136 if (opts.show_help) {
67aab022 2137 printf("usage: %s [options]\n\n", argv[0]);
7387863d 2138 fuse_cmdline_help();
4ff075f7 2139 printf(" -o source=PATH shared directory tree\n");
7387863d
DDAG
2140 fuse_lowlevel_help();
2141 ret = 0;
2142 goto err_out1;
2143 } else if (opts.show_version) {
2144 fuse_lowlevel_version();
2145 ret = 0;
2146 goto err_out1;
45018fbb
SH
2147 } else if (opts.print_capabilities) {
2148 print_capabilities();
2149 ret = 0;
2150 goto err_out1;
7387863d
DDAG
2151 }
2152
7387863d
DDAG
2153 if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) {
2154 return 1;
2155 }
2156
2157 lo.debug = opts.debug;
2158 lo.root.refcount = 2;
2159 if (lo.source) {
2160 struct stat stat;
2161 int res;
2162
2163 res = lstat(lo.source, &stat);
2164 if (res == -1) {
2165 fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n",
2166 lo.source);
2167 exit(1);
2168 }
2169 if (!S_ISDIR(stat.st_mode)) {
2170 fuse_log(FUSE_LOG_ERR, "source is not a directory\n");
2171 exit(1);
2172 }
2173
2174 } else {
2175 lo.source = "/";
2176 }
2177 lo.root.is_symlink = false;
2178 if (!lo.timeout_set) {
2179 switch (lo.cache) {
2180 case CACHE_NEVER:
2181 lo.timeout = 0.0;
2182 break;
2183
2184 case CACHE_NORMAL:
2185 lo.timeout = 1.0;
2186 break;
2187
2188 case CACHE_ALWAYS:
2189 lo.timeout = 86400.0;
2190 break;
2191 }
2192 } else if (lo.timeout < 0) {
2193 fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout);
2194 exit(1);
2195 }
2196
2197 lo.root.fd = open(lo.source, O_PATH);
5baa3b8e 2198
7387863d
DDAG
2199 if (lo.root.fd == -1) {
2200 fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source);
2201 exit(1);
2202 }
2203
2204 se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
2205 if (se == NULL) {
2206 goto err_out1;
2207 }
2208
2209 if (fuse_set_signal_handlers(se) != 0) {
2210 goto err_out2;
2211 }
2212
67aab022 2213 if (fuse_session_mount(se) != 0) {
7387863d
DDAG
2214 goto err_out3;
2215 }
2216
2217 fuse_daemonize(opts.foreground);
2218
8e1d4ef2 2219 setup_sandbox(&lo, se);
5baa3b8e 2220
7387863d 2221 /* Block until ctrl+c or fusermount -u */
f6f3573c 2222 ret = virtio_loop(se);
7387863d
DDAG
2223
2224 fuse_session_unmount(se);
7c6b6602 2225err_out3:
7387863d 2226 fuse_remove_signal_handlers(se);
7c6b6602 2227err_out2:
7387863d 2228 fuse_session_destroy(se);
7c6b6602 2229err_out1:
7387863d 2230 fuse_opt_free_args(&args);
7c6b6602 2231
73b4d19d 2232 lo_map_destroy(&lo.fd_map);
b39bce12 2233 lo_map_destroy(&lo.dirp_map);
92fb57b8
SH
2234 lo_map_destroy(&lo.ino_map);
2235
9f59d175
SH
2236 if (lo.proc_self_fd >= 0) {
2237 close(lo.proc_self_fd);
2238 }
2239
7387863d
DDAG
2240 if (lo.root.fd >= 0) {
2241 close(lo.root.fd);
2242 }
7c6b6602 2243
7387863d 2244 return ret ? 1 : 0;
7c6b6602 2245}