]> git.proxmox.com Git - mirror_qemu.git/blame - tools/virtiofsd/passthrough_ll.c
virtiofsd: fail when parent inode isn't known in lo_do_lookup()
[mirror_qemu.git] / tools / virtiofsd / passthrough_ll.c
CommitLineData
7c6b6602 1/*
7387863d
DDAG
2 * FUSE: Filesystem in Userspace
3 * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
4 *
5 * This program can be distributed under the terms of the GNU GPLv2.
6 * See the file COPYING.
7 */
7c6b6602 8
7387863d 9/*
7c6b6602
DDAG
10 *
11 * This file system mirrors the existing file system hierarchy of the
12 * system, starting at the root file system. This is implemented by
13 * just "passing through" all requests to the corresponding user-space
14 * libc functions. In contrast to passthrough.c and passthrough_fh.c,
15 * this implementation uses the low-level API. Its performance should
16 * be the least bad among the three, but many operations are not
17 * implemented. In particular, it is not possible to remove files (or
18 * directories) because the code necessary to defer actual removal
19 * until the file is not opened anymore would make the example much
20 * more complicated.
21 *
22 * When writeback caching is enabled (-o writeback mount option), it
23 * is only possible to write to files for which the mounting user has
24 * read permissions. This is because the writeback cache requires the
25 * kernel to be able to issue read requests for all files (which the
26 * passthrough filesystem cannot satisfy if it can't read the file in
27 * the underlying filesystem).
28 *
29 * Compile with:
30 *
7387863d
DDAG
31 * gcc -Wall passthrough_ll.c `pkg-config fuse3 --cflags --libs` -o
32 * passthrough_ll
7c6b6602
DDAG
33 *
34 * ## Source code ##
35 * \include passthrough_ll.c
36 */
37
09863ebc 38#include "qemu/osdep.h"
50fb955a 39#include "qemu/timer.h"
f6f3573c 40#include "fuse_virtio.h"
d240314a 41#include "fuse_log.h"
09863ebc 42#include "fuse_lowlevel.h"
7c6b6602 43#include <assert.h>
2405f3c0 44#include <cap-ng.h>
7387863d 45#include <dirent.h>
7c6b6602 46#include <errno.h>
36f38469 47#include <glib.h>
7c6b6602 48#include <inttypes.h>
7387863d 49#include <limits.h>
7c6b6602 50#include <pthread.h>
7387863d
DDAG
51#include <stdbool.h>
52#include <stddef.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
7c6b6602 56#include <sys/file.h>
5baa3b8e 57#include <sys/mount.h>
8e1d4ef2 58#include <sys/prctl.h>
01a6dc95 59#include <sys/resource.h>
929cfb7a 60#include <sys/syscall.h>
8e1d4ef2
SH
61#include <sys/types.h>
62#include <sys/wait.h>
7c6b6602 63#include <sys/xattr.h>
f185621d 64#include <syslog.h>
7387863d 65#include <unistd.h>
7c6b6602
DDAG
66
67#include "passthrough_helpers.h"
4f8bde99 68#include "seccomp.h"
7c6b6602 69
25c13572
SH
70struct lo_map_elem {
71 union {
92fb57b8 72 struct lo_inode *inode;
b39bce12 73 struct lo_dirp *dirp;
73b4d19d 74 int fd;
25c13572
SH
75 ssize_t freelist;
76 };
77 bool in_use;
78};
79
80/* Maps FUSE fh or ino values to internal objects */
81struct lo_map {
82 struct lo_map_elem *elems;
83 size_t nelems;
84 ssize_t freelist;
85};
86
7c6b6602 87struct lo_inode {
7387863d
DDAG
88 struct lo_inode *next; /* protected by lo->mutex */
89 struct lo_inode *prev; /* protected by lo->mutex */
90 int fd;
91 bool is_symlink;
92 ino_t ino;
93 dev_t dev;
94 uint64_t refcount; /* protected by lo->mutex */
92fb57b8 95 fuse_ino_t fuse_ino;
7c6b6602
DDAG
96};
97
929cfb7a
VG
98struct lo_cred {
99 uid_t euid;
100 gid_t egid;
101};
102
7c6b6602 103enum {
7387863d
DDAG
104 CACHE_NEVER,
105 CACHE_NORMAL,
106 CACHE_ALWAYS,
7c6b6602
DDAG
107};
108
109struct lo_data {
7387863d
DDAG
110 pthread_mutex_t mutex;
111 int debug;
5fe319a7 112 int norace;
7387863d
DDAG
113 int writeback;
114 int flock;
115 int xattr;
116 const char *source;
117 double timeout;
118 int cache;
119 int timeout_set;
59aef494
MS
120 int readdirplus_set;
121 int readdirplus_clear;
7387863d 122 struct lo_inode root; /* protected by lo->mutex */
92fb57b8 123 struct lo_map ino_map; /* protected by lo->mutex */
b39bce12 124 struct lo_map dirp_map; /* protected by lo->mutex */
73b4d19d 125 struct lo_map fd_map; /* protected by lo->mutex */
9f59d175
SH
126
127 /* An O_PATH file descriptor to /proc/self/fd/ */
128 int proc_self_fd;
7c6b6602
DDAG
129};
130
131static const struct fuse_opt lo_opts[] = {
7387863d
DDAG
132 { "writeback", offsetof(struct lo_data, writeback), 1 },
133 { "no_writeback", offsetof(struct lo_data, writeback), 0 },
134 { "source=%s", offsetof(struct lo_data, source), 0 },
135 { "flock", offsetof(struct lo_data, flock), 1 },
136 { "no_flock", offsetof(struct lo_data, flock), 0 },
137 { "xattr", offsetof(struct lo_data, xattr), 1 },
138 { "no_xattr", offsetof(struct lo_data, xattr), 0 },
139 { "timeout=%lf", offsetof(struct lo_data, timeout), 0 },
140 { "timeout=", offsetof(struct lo_data, timeout_set), 1 },
141 { "cache=never", offsetof(struct lo_data, cache), CACHE_NEVER },
142 { "cache=auto", offsetof(struct lo_data, cache), CACHE_NORMAL },
143 { "cache=always", offsetof(struct lo_data, cache), CACHE_ALWAYS },
5fe319a7 144 { "norace", offsetof(struct lo_data, norace), 1 },
59aef494
MS
145 { "readdirplus", offsetof(struct lo_data, readdirplus_set), 1 },
146 { "no_readdirplus", offsetof(struct lo_data, readdirplus_clear), 1 },
7387863d 147 FUSE_OPT_END
7c6b6602 148};
f185621d 149static bool use_syslog = false;
d240314a 150static int current_log_level;
95d27157
MS
151static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
152 uint64_t n);
5fe319a7 153
2405f3c0
DDAG
154static struct {
155 pthread_mutex_t mutex;
156 void *saved;
157} cap;
158/* That we loaded cap-ng in the current thread from the saved */
159static __thread bool cap_loaded = 0;
160
5fe319a7
MS
161static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st);
162
25dae28c
SH
163static int is_dot_or_dotdot(const char *name)
164{
165 return name[0] == '.' &&
166 (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
167}
168
169/* Is `path` a single path component that is not "." or ".."? */
170static int is_safe_path_component(const char *path)
171{
172 if (strchr(path, '/')) {
173 return 0;
174 }
175
176 return !is_dot_or_dotdot(path);
177}
5fe319a7 178
7c6b6602
DDAG
179static struct lo_data *lo_data(fuse_req_t req)
180{
7387863d 181 return (struct lo_data *)fuse_req_userdata(req);
7c6b6602
DDAG
182}
183
2405f3c0
DDAG
184/*
185 * Load capng's state from our saved state if the current thread
186 * hadn't previously been loaded.
187 * returns 0 on success
188 */
189static int load_capng(void)
190{
191 if (!cap_loaded) {
192 pthread_mutex_lock(&cap.mutex);
193 capng_restore_state(&cap.saved);
194 /*
195 * restore_state free's the saved copy
196 * so make another.
197 */
198 cap.saved = capng_save_state();
199 if (!cap.saved) {
200 fuse_log(FUSE_LOG_ERR, "capng_save_state (thread)\n");
201 return -EINVAL;
202 }
203 pthread_mutex_unlock(&cap.mutex);
204
205 /*
206 * We want to use the loaded state for our pid,
207 * not the original
208 */
209 capng_setpid(syscall(SYS_gettid));
210 cap_loaded = true;
211 }
212 return 0;
213}
214
ee884652
VG
215/*
216 * Helpers for dropping and regaining effective capabilities. Returns 0
217 * on success, error otherwise
218 */
219static int drop_effective_cap(const char *cap_name, bool *cap_dropped)
220{
221 int cap, ret;
222
223 cap = capng_name_to_capability(cap_name);
224 if (cap < 0) {
225 ret = errno;
226 fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
227 cap_name, strerror(errno));
228 goto out;
229 }
230
231 if (load_capng()) {
232 ret = errno;
233 fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
234 goto out;
235 }
236
237 /* We dont have this capability in effective set already. */
238 if (!capng_have_capability(CAPNG_EFFECTIVE, cap)) {
239 ret = 0;
240 goto out;
241 }
242
243 if (capng_update(CAPNG_DROP, CAPNG_EFFECTIVE, cap)) {
244 ret = errno;
245 fuse_log(FUSE_LOG_ERR, "capng_update(DROP,) failed\n");
246 goto out;
247 }
248
249 if (capng_apply(CAPNG_SELECT_CAPS)) {
250 ret = errno;
251 fuse_log(FUSE_LOG_ERR, "drop:capng_apply() failed\n");
252 goto out;
253 }
254
255 ret = 0;
256 if (cap_dropped) {
257 *cap_dropped = true;
258 }
259
260out:
261 return ret;
262}
263
264static int gain_effective_cap(const char *cap_name)
265{
266 int cap;
267 int ret = 0;
268
269 cap = capng_name_to_capability(cap_name);
270 if (cap < 0) {
271 ret = errno;
272 fuse_log(FUSE_LOG_ERR, "capng_name_to_capability(%s) failed:%s\n",
273 cap_name, strerror(errno));
274 goto out;
275 }
276
277 if (load_capng()) {
278 ret = errno;
279 fuse_log(FUSE_LOG_ERR, "load_capng() failed\n");
280 goto out;
281 }
282
283 if (capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap)) {
284 ret = errno;
285 fuse_log(FUSE_LOG_ERR, "capng_update(ADD,) failed\n");
286 goto out;
287 }
288
289 if (capng_apply(CAPNG_SELECT_CAPS)) {
290 ret = errno;
291 fuse_log(FUSE_LOG_ERR, "gain:capng_apply() failed\n");
292 goto out;
293 }
294 ret = 0;
295
296out:
297 return ret;
298}
299
92fb57b8 300static void lo_map_init(struct lo_map *map)
25c13572
SH
301{
302 map->elems = NULL;
303 map->nelems = 0;
304 map->freelist = -1;
305}
306
92fb57b8 307static void lo_map_destroy(struct lo_map *map)
25c13572
SH
308{
309 free(map->elems);
310}
311
312static int lo_map_grow(struct lo_map *map, size_t new_nelems)
313{
314 struct lo_map_elem *new_elems;
315 size_t i;
316
317 if (new_nelems <= map->nelems) {
318 return 1;
319 }
320
321 new_elems = realloc(map->elems, sizeof(map->elems[0]) * new_nelems);
322 if (!new_elems) {
323 return 0;
324 }
325
326 for (i = map->nelems; i < new_nelems; i++) {
327 new_elems[i].freelist = i + 1;
328 new_elems[i].in_use = false;
329 }
330 new_elems[new_nelems - 1].freelist = -1;
331
332 map->elems = new_elems;
333 map->freelist = map->nelems;
334 map->nelems = new_nelems;
335 return 1;
336}
337
92fb57b8 338static struct lo_map_elem *lo_map_alloc_elem(struct lo_map *map)
25c13572
SH
339{
340 struct lo_map_elem *elem;
341
342 if (map->freelist == -1 && !lo_map_grow(map, map->nelems + 256)) {
343 return NULL;
344 }
345
346 elem = &map->elems[map->freelist];
347 map->freelist = elem->freelist;
348
349 elem->in_use = true;
350
351 return elem;
352}
353
92fb57b8 354static struct lo_map_elem *lo_map_reserve(struct lo_map *map, size_t key)
25c13572
SH
355{
356 ssize_t *prev;
357
358 if (!lo_map_grow(map, key + 1)) {
359 return NULL;
360 }
361
362 for (prev = &map->freelist; *prev != -1;
363 prev = &map->elems[*prev].freelist) {
364 if (*prev == key) {
365 struct lo_map_elem *elem = &map->elems[key];
366
367 *prev = elem->freelist;
368 elem->in_use = true;
369 return elem;
370 }
371 }
372 return NULL;
373}
374
92fb57b8 375static struct lo_map_elem *lo_map_get(struct lo_map *map, size_t key)
25c13572
SH
376{
377 if (key >= map->nelems) {
378 return NULL;
379 }
380 if (!map->elems[key].in_use) {
381 return NULL;
382 }
383 return &map->elems[key];
384}
385
92fb57b8 386static void lo_map_remove(struct lo_map *map, size_t key)
25c13572
SH
387{
388 struct lo_map_elem *elem;
389
390 if (key >= map->nelems) {
391 return;
392 }
393
394 elem = &map->elems[key];
395 if (!elem->in_use) {
396 return;
397 }
398
399 elem->in_use = false;
400
401 elem->freelist = map->freelist;
402 map->freelist = key;
403}
404
73b4d19d
SH
405/* Assumes lo->mutex is held */
406static ssize_t lo_add_fd_mapping(fuse_req_t req, int fd)
407{
408 struct lo_map_elem *elem;
409
410 elem = lo_map_alloc_elem(&lo_data(req)->fd_map);
411 if (!elem) {
412 return -1;
413 }
414
415 elem->fd = fd;
416 return elem - lo_data(req)->fd_map.elems;
417}
418
b39bce12
SH
419/* Assumes lo->mutex is held */
420static ssize_t lo_add_dirp_mapping(fuse_req_t req, struct lo_dirp *dirp)
421{
422 struct lo_map_elem *elem;
423
424 elem = lo_map_alloc_elem(&lo_data(req)->dirp_map);
425 if (!elem) {
426 return -1;
427 }
428
429 elem->dirp = dirp;
430 return elem - lo_data(req)->dirp_map.elems;
431}
432
92fb57b8
SH
433/* Assumes lo->mutex is held */
434static ssize_t lo_add_inode_mapping(fuse_req_t req, struct lo_inode *inode)
435{
436 struct lo_map_elem *elem;
437
438 elem = lo_map_alloc_elem(&lo_data(req)->ino_map);
439 if (!elem) {
440 return -1;
441 }
442
443 elem->inode = inode;
444 return elem - lo_data(req)->ino_map.elems;
445}
446
7c6b6602
DDAG
447static struct lo_inode *lo_inode(fuse_req_t req, fuse_ino_t ino)
448{
92fb57b8
SH
449 struct lo_data *lo = lo_data(req);
450 struct lo_map_elem *elem;
451
452 pthread_mutex_lock(&lo->mutex);
453 elem = lo_map_get(&lo->ino_map, ino);
454 pthread_mutex_unlock(&lo->mutex);
455
456 if (!elem) {
457 return NULL;
7387863d 458 }
92fb57b8
SH
459
460 return elem->inode;
7c6b6602
DDAG
461}
462
463static int lo_fd(fuse_req_t req, fuse_ino_t ino)
464{
92fb57b8
SH
465 struct lo_inode *inode = lo_inode(req, ino);
466 return inode ? inode->fd : -1;
7c6b6602
DDAG
467}
468
7387863d 469static void lo_init(void *userdata, struct fuse_conn_info *conn)
7c6b6602 470{
7387863d
DDAG
471 struct lo_data *lo = (struct lo_data *)userdata;
472
473 if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) {
474 conn->want |= FUSE_CAP_EXPORT_SUPPORT;
475 }
476
477 if (lo->writeback && conn->capable & FUSE_CAP_WRITEBACK_CACHE) {
d240314a 478 fuse_log(FUSE_LOG_DEBUG, "lo_init: activating writeback\n");
7387863d
DDAG
479 conn->want |= FUSE_CAP_WRITEBACK_CACHE;
480 }
481 if (lo->flock && conn->capable & FUSE_CAP_FLOCK_LOCKS) {
d240314a 482 fuse_log(FUSE_LOG_DEBUG, "lo_init: activating flock locks\n");
7387863d
DDAG
483 conn->want |= FUSE_CAP_FLOCK_LOCKS;
484 }
59aef494
MS
485 if ((lo->cache == CACHE_NEVER && !lo->readdirplus_set) ||
486 lo->readdirplus_clear) {
ddcbabcb
MS
487 fuse_log(FUSE_LOG_DEBUG, "lo_init: disabling readdirplus\n");
488 conn->want &= ~FUSE_CAP_READDIRPLUS;
489 }
7c6b6602
DDAG
490}
491
492static void lo_getattr(fuse_req_t req, fuse_ino_t ino,
7387863d 493 struct fuse_file_info *fi)
7c6b6602 494{
7387863d
DDAG
495 int res;
496 struct stat buf;
497 struct lo_data *lo = lo_data(req);
7c6b6602 498
7387863d 499 (void)fi;
7c6b6602 500
7387863d
DDAG
501 res =
502 fstatat(lo_fd(req, ino), "", &buf, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
503 if (res == -1) {
504 return (void)fuse_reply_err(req, errno);
505 }
7c6b6602 506
7387863d 507 fuse_reply_attr(req, &buf, lo->timeout);
7c6b6602
DDAG
508}
509
5fe319a7
MS
510static int lo_parent_and_name(struct lo_data *lo, struct lo_inode *inode,
511 char path[PATH_MAX], struct lo_inode **parent)
7c6b6602 512{
7387863d 513 char procname[64];
5fe319a7
MS
514 char *last;
515 struct stat stat;
516 struct lo_inode *p;
517 int retries = 2;
518 int res;
519
520retry:
9f59d175 521 sprintf(procname, "%i", inode->fd);
5fe319a7 522
9f59d175 523 res = readlinkat(lo->proc_self_fd, procname, path, PATH_MAX);
5fe319a7
MS
524 if (res < 0) {
525 fuse_log(FUSE_LOG_WARNING, "%s: readlink failed: %m\n", __func__);
526 goto fail_noretry;
527 }
528
529 if (res >= PATH_MAX) {
530 fuse_log(FUSE_LOG_WARNING, "%s: readlink overflowed\n", __func__);
531 goto fail_noretry;
532 }
533 path[res] = '\0';
534
535 last = strrchr(path, '/');
536 if (last == NULL) {
537 /* Shouldn't happen */
538 fuse_log(
539 FUSE_LOG_WARNING,
540 "%s: INTERNAL ERROR: bad path read from proc\n", __func__);
541 goto fail_noretry;
542 }
543 if (last == path) {
544 p = &lo->root;
545 pthread_mutex_lock(&lo->mutex);
546 p->refcount++;
547 pthread_mutex_unlock(&lo->mutex);
548 } else {
549 *last = '\0';
550 res = fstatat(AT_FDCWD, last == path ? "/" : path, &stat, 0);
551 if (res == -1) {
552 if (!retries) {
553 fuse_log(FUSE_LOG_WARNING,
554 "%s: failed to stat parent: %m\n", __func__);
555 }
556 goto fail;
557 }
558 p = lo_find(lo, &stat);
559 if (p == NULL) {
560 if (!retries) {
561 fuse_log(FUSE_LOG_WARNING,
562 "%s: failed to find parent\n", __func__);
563 }
564 goto fail;
565 }
566 }
567 last++;
568 res = fstatat(p->fd, last, &stat, AT_SYMLINK_NOFOLLOW);
569 if (res == -1) {
570 if (!retries) {
571 fuse_log(FUSE_LOG_WARNING,
572 "%s: failed to stat last\n", __func__);
573 }
574 goto fail_unref;
575 }
576 if (stat.st_dev != inode->dev || stat.st_ino != inode->ino) {
577 if (!retries) {
578 fuse_log(FUSE_LOG_WARNING,
579 "%s: failed to match last\n", __func__);
580 }
581 goto fail_unref;
582 }
583 *parent = p;
584 memmove(path, last, strlen(last) + 1);
585
586 return 0;
587
588fail_unref:
95d27157 589 unref_inode_lolocked(lo, p, 1);
5fe319a7
MS
590fail:
591 if (retries) {
592 retries--;
593 goto retry;
594 }
595fail_noretry:
596 errno = EIO;
597 return -1;
598}
599
600static int utimensat_empty(struct lo_data *lo, struct lo_inode *inode,
601 const struct timespec *tv)
602{
603 int res;
604 struct lo_inode *parent;
605 char path[PATH_MAX];
7387863d
DDAG
606
607 if (inode->is_symlink) {
5fe319a7 608 res = utimensat(inode->fd, "", tv, AT_EMPTY_PATH);
7387863d
DDAG
609 if (res == -1 && errno == EINVAL) {
610 /* Sorry, no race free way to set times on symlink. */
5fe319a7
MS
611 if (lo->norace) {
612 errno = EPERM;
613 } else {
614 goto fallback;
615 }
7387863d
DDAG
616 }
617 return res;
618 }
9f59d175 619 sprintf(path, "%i", inode->fd);
5fe319a7 620
9f59d175 621 return utimensat(lo->proc_self_fd, path, tv, 0);
7387863d 622
5fe319a7
MS
623fallback:
624 res = lo_parent_and_name(lo, inode, path, &parent);
625 if (res != -1) {
626 res = utimensat(parent->fd, path, tv, AT_SYMLINK_NOFOLLOW);
95d27157 627 unref_inode_lolocked(lo, parent, 1);
5fe319a7
MS
628 }
629
630 return res;
7c6b6602
DDAG
631}
632
73b4d19d
SH
633static int lo_fi_fd(fuse_req_t req, struct fuse_file_info *fi)
634{
635 struct lo_data *lo = lo_data(req);
636 struct lo_map_elem *elem;
637
638 pthread_mutex_lock(&lo->mutex);
639 elem = lo_map_get(&lo->fd_map, fi->fh);
640 pthread_mutex_unlock(&lo->mutex);
641
642 if (!elem) {
643 return -1;
644 }
645
646 return elem->fd;
647}
648
7c6b6602 649static void lo_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
7387863d 650 int valid, struct fuse_file_info *fi)
7c6b6602 651{
7387863d
DDAG
652 int saverr;
653 char procname[64];
5fe319a7 654 struct lo_data *lo = lo_data(req);
92fb57b8
SH
655 struct lo_inode *inode;
656 int ifd;
7387863d 657 int res;
73b4d19d 658 int fd;
7387863d 659
92fb57b8
SH
660 inode = lo_inode(req, ino);
661 if (!inode) {
662 fuse_reply_err(req, EBADF);
663 return;
664 }
665
666 ifd = inode->fd;
667
73b4d19d
SH
668 /* If fi->fh is invalid we'll report EBADF later */
669 if (fi) {
670 fd = lo_fi_fd(req, fi);
671 }
672
7387863d
DDAG
673 if (valid & FUSE_SET_ATTR_MODE) {
674 if (fi) {
73b4d19d 675 res = fchmod(fd, attr->st_mode);
7387863d 676 } else {
9f59d175
SH
677 sprintf(procname, "%i", ifd);
678 res = fchmodat(lo->proc_self_fd, procname, attr->st_mode, 0);
7387863d
DDAG
679 }
680 if (res == -1) {
681 goto out_err;
682 }
683 }
684 if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
685 uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : (uid_t)-1;
686 gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : (gid_t)-1;
687
688 res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
689 if (res == -1) {
690 goto out_err;
691 }
692 }
693 if (valid & FUSE_SET_ATTR_SIZE) {
9f59d175
SH
694 int truncfd;
695
7387863d 696 if (fi) {
9f59d175 697 truncfd = fd;
7387863d 698 } else {
9f59d175
SH
699 sprintf(procname, "%i", ifd);
700 truncfd = openat(lo->proc_self_fd, procname, O_RDWR);
701 if (truncfd < 0) {
702 goto out_err;
703 }
704 }
705
706 res = ftruncate(truncfd, attr->st_size);
707 if (!fi) {
708 saverr = errno;
709 close(truncfd);
710 errno = saverr;
7387863d
DDAG
711 }
712 if (res == -1) {
713 goto out_err;
714 }
715 }
716 if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
717 struct timespec tv[2];
718
719 tv[0].tv_sec = 0;
720 tv[1].tv_sec = 0;
721 tv[0].tv_nsec = UTIME_OMIT;
722 tv[1].tv_nsec = UTIME_OMIT;
723
724 if (valid & FUSE_SET_ATTR_ATIME_NOW) {
725 tv[0].tv_nsec = UTIME_NOW;
726 } else if (valid & FUSE_SET_ATTR_ATIME) {
727 tv[0] = attr->st_atim;
728 }
729
730 if (valid & FUSE_SET_ATTR_MTIME_NOW) {
731 tv[1].tv_nsec = UTIME_NOW;
732 } else if (valid & FUSE_SET_ATTR_MTIME) {
733 tv[1] = attr->st_mtim;
734 }
735
736 if (fi) {
73b4d19d 737 res = futimens(fd, tv);
7387863d 738 } else {
5fe319a7 739 res = utimensat_empty(lo, inode, tv);
7387863d
DDAG
740 }
741 if (res == -1) {
742 goto out_err;
743 }
744 }
745
746 return lo_getattr(req, ino, fi);
7c6b6602
DDAG
747
748out_err:
7387863d
DDAG
749 saverr = errno;
750 fuse_reply_err(req, saverr);
7c6b6602
DDAG
751}
752
753static struct lo_inode *lo_find(struct lo_data *lo, struct stat *st)
754{
7387863d
DDAG
755 struct lo_inode *p;
756 struct lo_inode *ret = NULL;
757
758 pthread_mutex_lock(&lo->mutex);
759 for (p = lo->root.next; p != &lo->root; p = p->next) {
760 if (p->ino == st->st_ino && p->dev == st->st_dev) {
761 assert(p->refcount > 0);
762 ret = p;
763 ret->refcount++;
764 break;
765 }
766 }
767 pthread_mutex_unlock(&lo->mutex);
768 return ret;
7c6b6602
DDAG
769}
770
771static int lo_do_lookup(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d 772 struct fuse_entry_param *e)
7c6b6602 773{
7387863d
DDAG
774 int newfd;
775 int res;
776 int saverr;
777 struct lo_data *lo = lo_data(req);
854684bc 778 struct lo_inode *inode, *dir = lo_inode(req, parent);
7387863d 779
9de4fab5
MS
780 /*
781 * name_to_handle_at() and open_by_handle_at() can reach here with fuse
782 * mount point in guest, but we don't have its inode info in the
783 * ino_map.
784 */
785 if (!dir) {
786 return ENOENT;
787 }
788
7387863d
DDAG
789 memset(e, 0, sizeof(*e));
790 e->attr_timeout = lo->timeout;
791 e->entry_timeout = lo->timeout;
792
854684bc
SH
793 /* Do not allow escaping root directory */
794 if (dir == &lo->root && strcmp(name, "..") == 0) {
795 name = ".";
796 }
797
9de4fab5 798 newfd = openat(dir->fd, name, O_PATH | O_NOFOLLOW);
7387863d
DDAG
799 if (newfd == -1) {
800 goto out_err;
801 }
802
803 res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
804 if (res == -1) {
805 goto out_err;
806 }
807
9de4fab5 808 inode = lo_find(lo, &e->attr);
7387863d
DDAG
809 if (inode) {
810 close(newfd);
811 newfd = -1;
812 } else {
813 struct lo_inode *prev, *next;
814
815 saverr = ENOMEM;
816 inode = calloc(1, sizeof(struct lo_inode));
817 if (!inode) {
818 goto out_err;
819 }
820
821 inode->is_symlink = S_ISLNK(e->attr.st_mode);
822 inode->refcount = 1;
823 inode->fd = newfd;
9de4fab5 824 newfd = -1;
7387863d
DDAG
825 inode->ino = e->attr.st_ino;
826 inode->dev = e->attr.st_dev;
827
828 pthread_mutex_lock(&lo->mutex);
92fb57b8 829 inode->fuse_ino = lo_add_inode_mapping(req, inode);
7387863d
DDAG
830 prev = &lo->root;
831 next = prev->next;
832 next->prev = inode;
833 inode->next = next;
834 inode->prev = prev;
835 prev->next = inode;
836 pthread_mutex_unlock(&lo->mutex);
837 }
92fb57b8 838 e->ino = inode->fuse_ino;
7387863d 839
d240314a
EG
840 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
841 name, (unsigned long long)e->ino);
7387863d
DDAG
842
843 return 0;
7c6b6602
DDAG
844
845out_err:
7387863d
DDAG
846 saverr = errno;
847 if (newfd != -1) {
848 close(newfd);
849 }
850 return saverr;
7c6b6602
DDAG
851}
852
853static void lo_lookup(fuse_req_t req, fuse_ino_t parent, const char *name)
854{
7387863d
DDAG
855 struct fuse_entry_param e;
856 int err;
857
d240314a
EG
858 fuse_log(FUSE_LOG_DEBUG, "lo_lookup(parent=%" PRIu64 ", name=%s)\n", parent,
859 name);
7387863d 860
25dae28c
SH
861 /*
862 * Don't use is_safe_path_component(), allow "." and ".." for NFS export
863 * support.
864 */
865 if (strchr(name, '/')) {
866 fuse_reply_err(req, EINVAL);
867 return;
868 }
869
7387863d
DDAG
870 err = lo_do_lookup(req, parent, name, &e);
871 if (err) {
872 fuse_reply_err(req, err);
873 } else {
874 fuse_reply_entry(req, &e);
875 }
7c6b6602
DDAG
876}
877
929cfb7a
VG
878/*
879 * On some archs, setres*id is limited to 2^16 but they
880 * provide setres*id32 variants that allow 2^32.
881 * Others just let setres*id do 2^32 anyway.
882 */
883#ifdef SYS_setresgid32
884#define OURSYS_setresgid SYS_setresgid32
885#else
886#define OURSYS_setresgid SYS_setresgid
887#endif
888
889#ifdef SYS_setresuid32
890#define OURSYS_setresuid SYS_setresuid32
891#else
892#define OURSYS_setresuid SYS_setresuid
893#endif
894
895/*
896 * Change to uid/gid of caller so that file is created with
897 * ownership of caller.
898 * TODO: What about selinux context?
899 */
900static int lo_change_cred(fuse_req_t req, struct lo_cred *old)
901{
902 int res;
903
904 old->euid = geteuid();
905 old->egid = getegid();
906
907 res = syscall(OURSYS_setresgid, -1, fuse_req_ctx(req)->gid, -1);
908 if (res == -1) {
909 return errno;
910 }
911
912 res = syscall(OURSYS_setresuid, -1, fuse_req_ctx(req)->uid, -1);
913 if (res == -1) {
914 int errno_save = errno;
915
916 syscall(OURSYS_setresgid, -1, old->egid, -1);
917 return errno_save;
918 }
919
920 return 0;
921}
922
923/* Regain Privileges */
924static void lo_restore_cred(struct lo_cred *old)
925{
926 int res;
927
928 res = syscall(OURSYS_setresuid, -1, old->euid, -1);
929 if (res == -1) {
930 fuse_log(FUSE_LOG_ERR, "seteuid(%u): %m\n", old->euid);
931 exit(1);
932 }
933
934 res = syscall(OURSYS_setresgid, -1, old->egid, -1);
935 if (res == -1) {
936 fuse_log(FUSE_LOG_ERR, "setegid(%u): %m\n", old->egid);
937 exit(1);
938 }
939}
940
7c6b6602 941static void lo_mknod_symlink(fuse_req_t req, fuse_ino_t parent,
7387863d
DDAG
942 const char *name, mode_t mode, dev_t rdev,
943 const char *link)
7c6b6602 944{
7387863d
DDAG
945 int res;
946 int saverr;
92fb57b8 947 struct lo_inode *dir;
7387863d 948 struct fuse_entry_param e;
929cfb7a 949 struct lo_cred old = {};
7c6b6602 950
25dae28c
SH
951 if (!is_safe_path_component(name)) {
952 fuse_reply_err(req, EINVAL);
953 return;
954 }
955
92fb57b8
SH
956 dir = lo_inode(req, parent);
957 if (!dir) {
958 fuse_reply_err(req, EBADF);
959 return;
960 }
961
7387863d 962 saverr = ENOMEM;
7c6b6602 963
929cfb7a
VG
964 saverr = lo_change_cred(req, &old);
965 if (saverr) {
966 goto out;
967 }
968
7387863d 969 res = mknod_wrapper(dir->fd, name, link, mode, rdev);
7c6b6602 970
7387863d 971 saverr = errno;
929cfb7a
VG
972
973 lo_restore_cred(&old);
974
7387863d
DDAG
975 if (res == -1) {
976 goto out;
977 }
7c6b6602 978
7387863d
DDAG
979 saverr = lo_do_lookup(req, parent, name, &e);
980 if (saverr) {
981 goto out;
982 }
7c6b6602 983
d240314a
EG
984 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
985 name, (unsigned long long)e.ino);
7c6b6602 986
7387863d
DDAG
987 fuse_reply_entry(req, &e);
988 return;
7c6b6602
DDAG
989
990out:
7387863d 991 fuse_reply_err(req, saverr);
7c6b6602
DDAG
992}
993
7387863d
DDAG
994static void lo_mknod(fuse_req_t req, fuse_ino_t parent, const char *name,
995 mode_t mode, dev_t rdev)
7c6b6602 996{
7387863d 997 lo_mknod_symlink(req, parent, name, mode, rdev, NULL);
7c6b6602
DDAG
998}
999
1000static void lo_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d 1001 mode_t mode)
7c6b6602 1002{
7387863d 1003 lo_mknod_symlink(req, parent, name, S_IFDIR | mode, 0, NULL);
7c6b6602
DDAG
1004}
1005
7387863d
DDAG
1006static void lo_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
1007 const char *name)
7c6b6602 1008{
7387863d 1009 lo_mknod_symlink(req, parent, name, S_IFLNK, 0, link);
7c6b6602
DDAG
1010}
1011
5fe319a7
MS
1012static int linkat_empty_nofollow(struct lo_data *lo, struct lo_inode *inode,
1013 int dfd, const char *name)
7c6b6602 1014{
7387863d 1015 int res;
5fe319a7
MS
1016 struct lo_inode *parent;
1017 char path[PATH_MAX];
7c6b6602 1018
7387863d
DDAG
1019 if (inode->is_symlink) {
1020 res = linkat(inode->fd, "", dfd, name, AT_EMPTY_PATH);
1021 if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
1022 /* Sorry, no race free way to hard-link a symlink. */
5fe319a7
MS
1023 if (lo->norace) {
1024 errno = EPERM;
1025 } else {
1026 goto fallback;
1027 }
7387863d
DDAG
1028 }
1029 return res;
1030 }
7c6b6602 1031
9f59d175 1032 sprintf(path, "%i", inode->fd);
5fe319a7 1033
9f59d175 1034 return linkat(lo->proc_self_fd, path, dfd, name, AT_SYMLINK_FOLLOW);
5fe319a7
MS
1035
1036fallback:
1037 res = lo_parent_and_name(lo, inode, path, &parent);
1038 if (res != -1) {
1039 res = linkat(parent->fd, path, dfd, name, 0);
95d27157 1040 unref_inode_lolocked(lo, parent, 1);
5fe319a7 1041 }
7c6b6602 1042
5fe319a7 1043 return res;
7c6b6602
DDAG
1044}
1045
1046static void lo_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
7387863d 1047 const char *name)
7c6b6602 1048{
7387863d
DDAG
1049 int res;
1050 struct lo_data *lo = lo_data(req);
92fb57b8 1051 struct lo_inode *inode;
7387863d
DDAG
1052 struct fuse_entry_param e;
1053 int saverr;
1054
25dae28c
SH
1055 if (!is_safe_path_component(name)) {
1056 fuse_reply_err(req, EINVAL);
1057 return;
1058 }
1059
92fb57b8
SH
1060 inode = lo_inode(req, ino);
1061 if (!inode) {
1062 fuse_reply_err(req, EBADF);
1063 return;
1064 }
1065
7387863d
DDAG
1066 memset(&e, 0, sizeof(struct fuse_entry_param));
1067 e.attr_timeout = lo->timeout;
1068 e.entry_timeout = lo->timeout;
1069
5fe319a7 1070 res = linkat_empty_nofollow(lo, inode, lo_fd(req, parent), name);
7387863d
DDAG
1071 if (res == -1) {
1072 goto out_err;
1073 }
1074
1075 res = fstatat(inode->fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
1076 if (res == -1) {
1077 goto out_err;
1078 }
1079
1080 pthread_mutex_lock(&lo->mutex);
1081 inode->refcount++;
1082 pthread_mutex_unlock(&lo->mutex);
92fb57b8 1083 e.ino = inode->fuse_ino;
7387863d 1084
d240314a
EG
1085 fuse_log(FUSE_LOG_DEBUG, " %lli/%s -> %lli\n", (unsigned long long)parent,
1086 name, (unsigned long long)e.ino);
7387863d
DDAG
1087
1088 fuse_reply_entry(req, &e);
1089 return;
7c6b6602
DDAG
1090
1091out_err:
7387863d
DDAG
1092 saverr = errno;
1093 fuse_reply_err(req, saverr);
7c6b6602
DDAG
1094}
1095
1096static void lo_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name)
1097{
7387863d 1098 int res;
25dae28c
SH
1099 if (!is_safe_path_component(name)) {
1100 fuse_reply_err(req, EINVAL);
1101 return;
1102 }
7c6b6602 1103
7387863d 1104 res = unlinkat(lo_fd(req, parent), name, AT_REMOVEDIR);
7c6b6602 1105
7387863d 1106 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1107}
1108
1109static void lo_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d
DDAG
1110 fuse_ino_t newparent, const char *newname,
1111 unsigned int flags)
7c6b6602 1112{
7387863d 1113 int res;
7c6b6602 1114
25dae28c
SH
1115 if (!is_safe_path_component(name) || !is_safe_path_component(newname)) {
1116 fuse_reply_err(req, EINVAL);
1117 return;
1118 }
1119
7387863d 1120 if (flags) {
f0ab7d6f 1121#ifndef SYS_renameat2
7387863d 1122 fuse_reply_err(req, EINVAL);
f0ab7d6f
MS
1123#else
1124 res = syscall(SYS_renameat2, lo_fd(req, parent), name,
1125 lo_fd(req, newparent), newname, flags);
1126 if (res == -1 && errno == ENOSYS) {
1127 fuse_reply_err(req, EINVAL);
1128 } else {
1129 fuse_reply_err(req, res == -1 ? errno : 0);
1130 }
1131#endif
7387863d
DDAG
1132 return;
1133 }
7c6b6602 1134
7387863d 1135 res = renameat(lo_fd(req, parent), name, lo_fd(req, newparent), newname);
7c6b6602 1136
7387863d 1137 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1138}
1139
1140static void lo_unlink(fuse_req_t req, fuse_ino_t parent, const char *name)
1141{
7387863d 1142 int res;
7c6b6602 1143
25dae28c
SH
1144 if (!is_safe_path_component(name)) {
1145 fuse_reply_err(req, EINVAL);
1146 return;
1147 }
1148
7387863d 1149 res = unlinkat(lo_fd(req, parent), name, 0);
7c6b6602 1150
7387863d 1151 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1152}
1153
95d27157
MS
1154static void unref_inode_lolocked(struct lo_data *lo, struct lo_inode *inode,
1155 uint64_t n)
7c6b6602 1156{
7387863d
DDAG
1157 if (!inode) {
1158 return;
1159 }
1160
1161 pthread_mutex_lock(&lo->mutex);
1162 assert(inode->refcount >= n);
1163 inode->refcount -= n;
1164 if (!inode->refcount) {
1165 struct lo_inode *prev, *next;
1166
1167 prev = inode->prev;
1168 next = inode->next;
1169 next->prev = prev;
1170 prev->next = next;
1171
92fb57b8 1172 lo_map_remove(&lo->ino_map, inode->fuse_ino);
7387863d
DDAG
1173 pthread_mutex_unlock(&lo->mutex);
1174 close(inode->fd);
1175 free(inode);
7387863d
DDAG
1176 } else {
1177 pthread_mutex_unlock(&lo->mutex);
1178 }
7c6b6602
DDAG
1179}
1180
1181static void lo_forget_one(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
1182{
7387863d 1183 struct lo_data *lo = lo_data(req);
92fb57b8
SH
1184 struct lo_inode *inode;
1185
1186 inode = lo_inode(req, ino);
1187 if (!inode) {
1188 return;
1189 }
7c6b6602 1190
d240314a
EG
1191 fuse_log(FUSE_LOG_DEBUG, " forget %lli %lli -%lli\n",
1192 (unsigned long long)ino, (unsigned long long)inode->refcount,
1193 (unsigned long long)nlookup);
7c6b6602 1194
95d27157 1195 unref_inode_lolocked(lo, inode, nlookup);
7c6b6602
DDAG
1196}
1197
1198static void lo_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup)
1199{
7387863d
DDAG
1200 lo_forget_one(req, ino, nlookup);
1201 fuse_reply_none(req);
7c6b6602
DDAG
1202}
1203
1204static void lo_forget_multi(fuse_req_t req, size_t count,
7387863d 1205 struct fuse_forget_data *forgets)
7c6b6602 1206{
7387863d 1207 int i;
7c6b6602 1208
7387863d
DDAG
1209 for (i = 0; i < count; i++) {
1210 lo_forget_one(req, forgets[i].ino, forgets[i].nlookup);
1211 }
1212 fuse_reply_none(req);
7c6b6602
DDAG
1213}
1214
1215static void lo_readlink(fuse_req_t req, fuse_ino_t ino)
1216{
7387863d
DDAG
1217 char buf[PATH_MAX + 1];
1218 int res;
7c6b6602 1219
7387863d
DDAG
1220 res = readlinkat(lo_fd(req, ino), "", buf, sizeof(buf));
1221 if (res == -1) {
1222 return (void)fuse_reply_err(req, errno);
1223 }
7c6b6602 1224
7387863d
DDAG
1225 if (res == sizeof(buf)) {
1226 return (void)fuse_reply_err(req, ENAMETOOLONG);
1227 }
7c6b6602 1228
7387863d 1229 buf[res] = '\0';
7c6b6602 1230
7387863d 1231 fuse_reply_readlink(req, buf);
7c6b6602
DDAG
1232}
1233
1234struct lo_dirp {
7387863d
DDAG
1235 DIR *dp;
1236 struct dirent *entry;
1237 off_t offset;
7c6b6602
DDAG
1238};
1239
b39bce12 1240static struct lo_dirp *lo_dirp(fuse_req_t req, struct fuse_file_info *fi)
7c6b6602 1241{
b39bce12
SH
1242 struct lo_data *lo = lo_data(req);
1243 struct lo_map_elem *elem;
1244
1245 pthread_mutex_lock(&lo->mutex);
1246 elem = lo_map_get(&lo->dirp_map, fi->fh);
1247 pthread_mutex_unlock(&lo->mutex);
1248 if (!elem) {
1249 return NULL;
1250 }
1251
1252 return elem->dirp;
7c6b6602
DDAG
1253}
1254
7387863d
DDAG
1255static void lo_opendir(fuse_req_t req, fuse_ino_t ino,
1256 struct fuse_file_info *fi)
7c6b6602 1257{
7387863d
DDAG
1258 int error = ENOMEM;
1259 struct lo_data *lo = lo_data(req);
1260 struct lo_dirp *d;
1261 int fd;
b39bce12 1262 ssize_t fh;
7387863d
DDAG
1263
1264 d = calloc(1, sizeof(struct lo_dirp));
1265 if (d == NULL) {
1266 goto out_err;
1267 }
1268
1269 fd = openat(lo_fd(req, ino), ".", O_RDONLY);
1270 if (fd == -1) {
1271 goto out_errno;
1272 }
1273
1274 d->dp = fdopendir(fd);
1275 if (d->dp == NULL) {
1276 goto out_errno;
1277 }
1278
1279 d->offset = 0;
1280 d->entry = NULL;
1281
b39bce12
SH
1282 pthread_mutex_lock(&lo->mutex);
1283 fh = lo_add_dirp_mapping(req, d);
1284 pthread_mutex_unlock(&lo->mutex);
1285 if (fh == -1) {
1286 goto out_err;
1287 }
1288
1289 fi->fh = fh;
7387863d
DDAG
1290 if (lo->cache == CACHE_ALWAYS) {
1291 fi->keep_cache = 1;
1292 }
1293 fuse_reply_open(req, fi);
1294 return;
7c6b6602
DDAG
1295
1296out_errno:
7387863d 1297 error = errno;
7c6b6602 1298out_err:
7387863d 1299 if (d) {
b39bce12
SH
1300 if (d->dp) {
1301 closedir(d->dp);
1302 }
7387863d
DDAG
1303 if (fd != -1) {
1304 close(fd);
1305 }
1306 free(d);
1307 }
1308 fuse_reply_err(req, error);
7c6b6602
DDAG
1309}
1310
7c6b6602 1311static void lo_do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
7387863d 1312 off_t offset, struct fuse_file_info *fi, int plus)
7c6b6602 1313{
752272da 1314 struct lo_data *lo = lo_data(req);
b39bce12 1315 struct lo_dirp *d;
752272da 1316 struct lo_inode *dinode;
b39bce12 1317 char *buf = NULL;
7387863d
DDAG
1318 char *p;
1319 size_t rem = size;
752272da 1320 int err = EBADF;
7387863d 1321
752272da
SH
1322 dinode = lo_inode(req, ino);
1323 if (!dinode) {
1324 goto error;
1325 }
7387863d 1326
b39bce12
SH
1327 d = lo_dirp(req, fi);
1328 if (!d) {
1329 goto error;
1330 }
1331
752272da 1332 err = ENOMEM;
7387863d
DDAG
1333 buf = calloc(1, size);
1334 if (!buf) {
7387863d
DDAG
1335 goto error;
1336 }
1337 p = buf;
1338
1339 if (offset != d->offset) {
1340 seekdir(d->dp, offset);
1341 d->entry = NULL;
1342 d->offset = offset;
1343 }
1344 while (1) {
1345 size_t entsize;
1346 off_t nextoff;
1347 const char *name;
1348
1349 if (!d->entry) {
1350 errno = 0;
1351 d->entry = readdir(d->dp);
1352 if (!d->entry) {
1353 if (errno) { /* Error */
1354 err = errno;
1355 goto error;
1356 } else { /* End of stream */
1357 break;
1358 }
1359 }
1360 }
1361 nextoff = d->entry->d_off;
1362 name = d->entry->d_name;
752272da 1363
7387863d 1364 fuse_ino_t entry_ino = 0;
752272da
SH
1365 struct fuse_entry_param e = (struct fuse_entry_param){
1366 .attr.st_ino = d->entry->d_ino,
1367 .attr.st_mode = d->entry->d_type << 12,
1368 };
1369
1370 /* Hide root's parent directory */
1371 if (dinode == &lo->root && strcmp(name, "..") == 0) {
1372 e.attr.st_ino = lo->root.ino;
1373 e.attr.st_mode = DT_DIR << 12;
1374 }
1375
7387863d 1376 if (plus) {
752272da 1377 if (!is_dot_or_dotdot(name)) {
7387863d
DDAG
1378 err = lo_do_lookup(req, ino, name, &e);
1379 if (err) {
1380 goto error;
1381 }
1382 entry_ino = e.ino;
1383 }
1384
1385 entsize = fuse_add_direntry_plus(req, p, rem, name, &e, nextoff);
1386 } else {
752272da 1387 entsize = fuse_add_direntry(req, p, rem, name, &e.attr, nextoff);
7387863d
DDAG
1388 }
1389 if (entsize > rem) {
1390 if (entry_ino != 0) {
1391 lo_forget_one(req, entry_ino, 1);
1392 }
1393 break;
1394 }
1395
1396 p += entsize;
1397 rem -= entsize;
1398
1399 d->entry = NULL;
1400 d->offset = nextoff;
1401 }
7c6b6602
DDAG
1402
1403 err = 0;
1404error:
7387863d
DDAG
1405 /*
1406 * If there's an error, we can only signal it if we haven't stored
1407 * any entries yet - otherwise we'd end up with wrong lookup
1408 * counts for the entries that are already in the buffer. So we
1409 * return what we've collected until that point.
1410 */
1411 if (err && rem == size) {
1412 fuse_reply_err(req, err);
1413 } else {
1414 fuse_reply_buf(req, buf, size - rem);
1415 }
7c6b6602
DDAG
1416 free(buf);
1417}
1418
1419static void lo_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
7387863d 1420 off_t offset, struct fuse_file_info *fi)
7c6b6602 1421{
7387863d 1422 lo_do_readdir(req, ino, size, offset, fi, 0);
7c6b6602
DDAG
1423}
1424
1425static void lo_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
7387863d 1426 off_t offset, struct fuse_file_info *fi)
7c6b6602 1427{
7387863d 1428 lo_do_readdir(req, ino, size, offset, fi, 1);
7c6b6602
DDAG
1429}
1430
7387863d
DDAG
1431static void lo_releasedir(fuse_req_t req, fuse_ino_t ino,
1432 struct fuse_file_info *fi)
7c6b6602 1433{
b39bce12
SH
1434 struct lo_data *lo = lo_data(req);
1435 struct lo_dirp *d;
1436
7387863d 1437 (void)ino;
b39bce12
SH
1438
1439 d = lo_dirp(req, fi);
1440 if (!d) {
1441 fuse_reply_err(req, EBADF);
1442 return;
1443 }
1444
1445 pthread_mutex_lock(&lo->mutex);
1446 lo_map_remove(&lo->dirp_map, fi->fh);
1447 pthread_mutex_unlock(&lo->mutex);
1448
7387863d
DDAG
1449 closedir(d->dp);
1450 free(d);
1451 fuse_reply_err(req, 0);
7c6b6602
DDAG
1452}
1453
1454static void lo_create(fuse_req_t req, fuse_ino_t parent, const char *name,
7387863d 1455 mode_t mode, struct fuse_file_info *fi)
7c6b6602 1456{
7387863d
DDAG
1457 int fd;
1458 struct lo_data *lo = lo_data(req);
1459 struct fuse_entry_param e;
1460 int err;
929cfb7a 1461 struct lo_cred old = {};
7387863d 1462
d240314a
EG
1463 fuse_log(FUSE_LOG_DEBUG, "lo_create(parent=%" PRIu64 ", name=%s)\n", parent,
1464 name);
7387863d 1465
25dae28c
SH
1466 if (!is_safe_path_component(name)) {
1467 fuse_reply_err(req, EINVAL);
1468 return;
1469 }
1470
929cfb7a
VG
1471 err = lo_change_cred(req, &old);
1472 if (err) {
1473 goto out;
1474 }
1475
7387863d
DDAG
1476 fd = openat(lo_fd(req, parent), name, (fi->flags | O_CREAT) & ~O_NOFOLLOW,
1477 mode);
929cfb7a
VG
1478 err = fd == -1 ? errno : 0;
1479 lo_restore_cred(&old);
7387863d 1480
929cfb7a 1481 if (!err) {
73b4d19d
SH
1482 ssize_t fh;
1483
1484 pthread_mutex_lock(&lo->mutex);
1485 fh = lo_add_fd_mapping(req, fd);
1486 pthread_mutex_unlock(&lo->mutex);
1487 if (fh == -1) {
1488 close(fd);
1489 fuse_reply_err(req, ENOMEM);
1490 return;
1491 }
1492
1493 fi->fh = fh;
929cfb7a
VG
1494 err = lo_do_lookup(req, parent, name, &e);
1495 }
7387863d
DDAG
1496 if (lo->cache == CACHE_NEVER) {
1497 fi->direct_io = 1;
1498 } else if (lo->cache == CACHE_ALWAYS) {
1499 fi->keep_cache = 1;
1500 }
1501
929cfb7a 1502out:
7387863d
DDAG
1503 if (err) {
1504 fuse_reply_err(req, err);
1505 } else {
1506 fuse_reply_create(req, &e, fi);
1507 }
7c6b6602
DDAG
1508}
1509
1510static void lo_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
7387863d 1511 struct fuse_file_info *fi)
7c6b6602 1512{
7387863d 1513 int res;
b39bce12
SH
1514 struct lo_dirp *d;
1515 int fd;
1516
7387863d 1517 (void)ino;
b39bce12
SH
1518
1519 d = lo_dirp(req, fi);
1520 if (!d) {
1521 fuse_reply_err(req, EBADF);
1522 return;
1523 }
1524
1525 fd = dirfd(d->dp);
7387863d
DDAG
1526 if (datasync) {
1527 res = fdatasync(fd);
1528 } else {
1529 res = fsync(fd);
1530 }
1531 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1532}
1533
1534static void lo_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1535{
7387863d 1536 int fd;
73b4d19d 1537 ssize_t fh;
7387863d
DDAG
1538 char buf[64];
1539 struct lo_data *lo = lo_data(req);
1540
d240314a
EG
1541 fuse_log(FUSE_LOG_DEBUG, "lo_open(ino=%" PRIu64 ", flags=%d)\n", ino,
1542 fi->flags);
7387863d
DDAG
1543
1544 /*
1545 * With writeback cache, kernel may send read requests even
1546 * when userspace opened write-only
1547 */
1548 if (lo->writeback && (fi->flags & O_ACCMODE) == O_WRONLY) {
1549 fi->flags &= ~O_ACCMODE;
1550 fi->flags |= O_RDWR;
1551 }
1552
1553 /*
1554 * With writeback cache, O_APPEND is handled by the kernel.
1555 * This breaks atomicity (since the file may change in the
1556 * underlying filesystem, so that the kernel's idea of the
1557 * end of the file isn't accurate anymore). In this example,
1558 * we just accept that. A more rigorous filesystem may want
1559 * to return an error here
1560 */
1561 if (lo->writeback && (fi->flags & O_APPEND)) {
1562 fi->flags &= ~O_APPEND;
1563 }
1564
9f59d175
SH
1565 sprintf(buf, "%i", lo_fd(req, ino));
1566 fd = openat(lo->proc_self_fd, buf, fi->flags & ~O_NOFOLLOW);
7387863d
DDAG
1567 if (fd == -1) {
1568 return (void)fuse_reply_err(req, errno);
1569 }
1570
73b4d19d
SH
1571 pthread_mutex_lock(&lo->mutex);
1572 fh = lo_add_fd_mapping(req, fd);
1573 pthread_mutex_unlock(&lo->mutex);
1574 if (fh == -1) {
1575 close(fd);
1576 fuse_reply_err(req, ENOMEM);
1577 return;
1578 }
1579
1580 fi->fh = fh;
7387863d
DDAG
1581 if (lo->cache == CACHE_NEVER) {
1582 fi->direct_io = 1;
1583 } else if (lo->cache == CACHE_ALWAYS) {
1584 fi->keep_cache = 1;
1585 }
1586 fuse_reply_open(req, fi);
7c6b6602
DDAG
1587}
1588
7387863d
DDAG
1589static void lo_release(fuse_req_t req, fuse_ino_t ino,
1590 struct fuse_file_info *fi)
7c6b6602 1591{
73b4d19d
SH
1592 struct lo_data *lo = lo_data(req);
1593 int fd;
1594
7387863d 1595 (void)ino;
7c6b6602 1596
73b4d19d
SH
1597 fd = lo_fi_fd(req, fi);
1598
1599 pthread_mutex_lock(&lo->mutex);
1600 lo_map_remove(&lo->fd_map, fi->fh);
1601 pthread_mutex_unlock(&lo->mutex);
1602
1603 close(fd);
7387863d 1604 fuse_reply_err(req, 0);
7c6b6602
DDAG
1605}
1606
1607static void lo_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
1608{
7387863d
DDAG
1609 int res;
1610 (void)ino;
73b4d19d 1611 res = close(dup(lo_fi_fd(req, fi)));
7387863d 1612 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1613}
1614
1615static void lo_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
7387863d 1616 struct fuse_file_info *fi)
7c6b6602 1617{
7387863d 1618 int res;
1b209805
VG
1619 int fd;
1620 char *buf;
1621
1622 fuse_log(FUSE_LOG_DEBUG, "lo_fsync(ino=%" PRIu64 ", fi=0x%p)\n", ino,
1623 (void *)fi);
1624
1625 if (!fi) {
9f59d175
SH
1626 struct lo_data *lo = lo_data(req);
1627
1628 res = asprintf(&buf, "%i", lo_fd(req, ino));
1b209805
VG
1629 if (res == -1) {
1630 return (void)fuse_reply_err(req, errno);
1631 }
1632
9f59d175 1633 fd = openat(lo->proc_self_fd, buf, O_RDWR);
1b209805
VG
1634 free(buf);
1635 if (fd == -1) {
1636 return (void)fuse_reply_err(req, errno);
1637 }
1638 } else {
73b4d19d 1639 fd = lo_fi_fd(req, fi);
1b209805
VG
1640 }
1641
7387863d 1642 if (datasync) {
1b209805 1643 res = fdatasync(fd);
7387863d 1644 } else {
1b209805
VG
1645 res = fsync(fd);
1646 }
1647 if (!fi) {
1648 close(fd);
7387863d
DDAG
1649 }
1650 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1651}
1652
7387863d
DDAG
1653static void lo_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t offset,
1654 struct fuse_file_info *fi)
7c6b6602 1655{
7387863d 1656 struct fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
7c6b6602 1657
d240314a
EG
1658 fuse_log(FUSE_LOG_DEBUG,
1659 "lo_read(ino=%" PRIu64 ", size=%zd, "
1660 "off=%lu)\n",
1661 ino, size, (unsigned long)offset);
7c6b6602 1662
7387863d 1663 buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
73b4d19d 1664 buf.buf[0].fd = lo_fi_fd(req, fi);
7387863d 1665 buf.buf[0].pos = offset;
7c6b6602 1666
8c3fe75e 1667 fuse_reply_data(req, &buf);
7c6b6602
DDAG
1668}
1669
1670static void lo_write_buf(fuse_req_t req, fuse_ino_t ino,
7387863d
DDAG
1671 struct fuse_bufvec *in_buf, off_t off,
1672 struct fuse_file_info *fi)
7c6b6602 1673{
7387863d
DDAG
1674 (void)ino;
1675 ssize_t res;
1676 struct fuse_bufvec out_buf = FUSE_BUFVEC_INIT(fuse_buf_size(in_buf));
ee884652 1677 bool cap_fsetid_dropped = false;
7387863d
DDAG
1678
1679 out_buf.buf[0].flags = FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK;
73b4d19d 1680 out_buf.buf[0].fd = lo_fi_fd(req, fi);
7387863d
DDAG
1681 out_buf.buf[0].pos = off;
1682
d240314a
EG
1683 fuse_log(FUSE_LOG_DEBUG,
1684 "lo_write_buf(ino=%" PRIu64 ", size=%zd, off=%lu)\n", ino,
1685 out_buf.buf[0].size, (unsigned long)off);
7387863d 1686
ee884652
VG
1687 /*
1688 * If kill_priv is set, drop CAP_FSETID which should lead to kernel
1689 * clearing setuid/setgid on file.
1690 */
1691 if (fi->kill_priv) {
1692 res = drop_effective_cap("FSETID", &cap_fsetid_dropped);
1693 if (res != 0) {
1694 fuse_reply_err(req, res);
1695 return;
1696 }
1697 }
1698
8c3fe75e 1699 res = fuse_buf_copy(&out_buf, in_buf);
7387863d
DDAG
1700 if (res < 0) {
1701 fuse_reply_err(req, -res);
1702 } else {
1703 fuse_reply_write(req, (size_t)res);
1704 }
ee884652
VG
1705
1706 if (cap_fsetid_dropped) {
1707 res = gain_effective_cap("FSETID");
1708 if (res) {
1709 fuse_log(FUSE_LOG_ERR, "Failed to gain CAP_FSETID\n");
1710 }
1711 }
7c6b6602
DDAG
1712}
1713
1714static void lo_statfs(fuse_req_t req, fuse_ino_t ino)
1715{
7387863d
DDAG
1716 int res;
1717 struct statvfs stbuf;
1718
1719 res = fstatvfs(lo_fd(req, ino), &stbuf);
1720 if (res == -1) {
1721 fuse_reply_err(req, errno);
1722 } else {
1723 fuse_reply_statfs(req, &stbuf);
1724 }
7c6b6602
DDAG
1725}
1726
7387863d
DDAG
1727static void lo_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, off_t offset,
1728 off_t length, struct fuse_file_info *fi)
7c6b6602 1729{
7387863d
DDAG
1730 int err = EOPNOTSUPP;
1731 (void)ino;
7c6b6602 1732
9776457c 1733#ifdef CONFIG_FALLOCATE
73b4d19d 1734 err = fallocate(lo_fi_fd(req, fi), mode, offset, length);
7387863d
DDAG
1735 if (err < 0) {
1736 err = errno;
1737 }
7c6b6602 1738
9776457c 1739#elif defined(CONFIG_POSIX_FALLOCATE)
7387863d
DDAG
1740 if (mode) {
1741 fuse_reply_err(req, EOPNOTSUPP);
1742 return;
1743 }
7c6b6602 1744
73b4d19d 1745 err = posix_fallocate(lo_fi_fd(req, fi), offset, length);
7c6b6602
DDAG
1746#endif
1747
7387863d 1748 fuse_reply_err(req, err);
7c6b6602
DDAG
1749}
1750
1751static void lo_flock(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi,
7387863d 1752 int op)
7c6b6602 1753{
7387863d
DDAG
1754 int res;
1755 (void)ino;
7c6b6602 1756
73b4d19d 1757 res = flock(lo_fi_fd(req, fi), op);
7c6b6602 1758
7387863d 1759 fuse_reply_err(req, res == -1 ? errno : 0);
7c6b6602
DDAG
1760}
1761
1762static void lo_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
7387863d 1763 size_t size)
7c6b6602 1764{
9f59d175 1765 struct lo_data *lo = lo_data(req);
7387863d
DDAG
1766 char *value = NULL;
1767 char procname[64];
92fb57b8 1768 struct lo_inode *inode;
7387863d
DDAG
1769 ssize_t ret;
1770 int saverr;
9f59d175 1771 int fd = -1;
7387863d 1772
92fb57b8
SH
1773 inode = lo_inode(req, ino);
1774 if (!inode) {
1775 fuse_reply_err(req, EBADF);
1776 return;
1777 }
1778
7387863d
DDAG
1779 saverr = ENOSYS;
1780 if (!lo_data(req)->xattr) {
1781 goto out;
1782 }
1783
d240314a
EG
1784 fuse_log(FUSE_LOG_DEBUG, "lo_getxattr(ino=%" PRIu64 ", name=%s size=%zd)\n",
1785 ino, name, size);
7387863d
DDAG
1786
1787 if (inode->is_symlink) {
1788 /* Sorry, no race free way to getxattr on symlink. */
1789 saverr = EPERM;
1790 goto out;
1791 }
1792
9f59d175
SH
1793 sprintf(procname, "%i", inode->fd);
1794 fd = openat(lo->proc_self_fd, procname, O_RDONLY);
1795 if (fd < 0) {
1796 goto out_err;
1797 }
7387863d
DDAG
1798
1799 if (size) {
1800 value = malloc(size);
1801 if (!value) {
1802 goto out_err;
1803 }
1804
9f59d175 1805 ret = fgetxattr(fd, name, value, size);
7387863d
DDAG
1806 if (ret == -1) {
1807 goto out_err;
1808 }
1809 saverr = 0;
1810 if (ret == 0) {
1811 goto out;
1812 }
1813
1814 fuse_reply_buf(req, value, ret);
1815 } else {
9f59d175 1816 ret = fgetxattr(fd, name, NULL, 0);
7387863d
DDAG
1817 if (ret == -1) {
1818 goto out_err;
1819 }
1820
1821 fuse_reply_xattr(req, ret);
1822 }
7c6b6602 1823out_free:
7387863d 1824 free(value);
9f59d175
SH
1825
1826 if (fd >= 0) {
1827 close(fd);
1828 }
7387863d 1829 return;
7c6b6602
DDAG
1830
1831out_err:
7387863d 1832 saverr = errno;
7c6b6602 1833out:
7387863d
DDAG
1834 fuse_reply_err(req, saverr);
1835 goto out_free;
7c6b6602
DDAG
1836}
1837
1838static void lo_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size)
1839{
9f59d175 1840 struct lo_data *lo = lo_data(req);
7387863d
DDAG
1841 char *value = NULL;
1842 char procname[64];
92fb57b8 1843 struct lo_inode *inode;
7387863d
DDAG
1844 ssize_t ret;
1845 int saverr;
9f59d175 1846 int fd = -1;
7387863d 1847
92fb57b8
SH
1848 inode = lo_inode(req, ino);
1849 if (!inode) {
1850 fuse_reply_err(req, EBADF);
1851 return;
1852 }
1853
7387863d
DDAG
1854 saverr = ENOSYS;
1855 if (!lo_data(req)->xattr) {
1856 goto out;
1857 }
1858
d240314a
EG
1859 fuse_log(FUSE_LOG_DEBUG, "lo_listxattr(ino=%" PRIu64 ", size=%zd)\n", ino,
1860 size);
7387863d
DDAG
1861
1862 if (inode->is_symlink) {
1863 /* Sorry, no race free way to listxattr on symlink. */
1864 saverr = EPERM;
1865 goto out;
1866 }
1867
9f59d175
SH
1868 sprintf(procname, "%i", inode->fd);
1869 fd = openat(lo->proc_self_fd, procname, O_RDONLY);
1870 if (fd < 0) {
1871 goto out_err;
1872 }
7387863d
DDAG
1873
1874 if (size) {
1875 value = malloc(size);
1876 if (!value) {
1877 goto out_err;
1878 }
1879
9f59d175 1880 ret = flistxattr(fd, value, size);
7387863d
DDAG
1881 if (ret == -1) {
1882 goto out_err;
1883 }
1884 saverr = 0;
1885 if (ret == 0) {
1886 goto out;
1887 }
1888
1889 fuse_reply_buf(req, value, ret);
1890 } else {
9f59d175 1891 ret = flistxattr(fd, NULL, 0);
7387863d
DDAG
1892 if (ret == -1) {
1893 goto out_err;
1894 }
1895
1896 fuse_reply_xattr(req, ret);
1897 }
7c6b6602 1898out_free:
7387863d 1899 free(value);
9f59d175
SH
1900
1901 if (fd >= 0) {
1902 close(fd);
1903 }
7387863d 1904 return;
7c6b6602
DDAG
1905
1906out_err:
7387863d 1907 saverr = errno;
7c6b6602 1908out:
7387863d
DDAG
1909 fuse_reply_err(req, saverr);
1910 goto out_free;
7c6b6602
DDAG
1911}
1912
1913static void lo_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
7387863d 1914 const char *value, size_t size, int flags)
7c6b6602 1915{
7387863d 1916 char procname[64];
9f59d175 1917 struct lo_data *lo = lo_data(req);
92fb57b8 1918 struct lo_inode *inode;
7387863d
DDAG
1919 ssize_t ret;
1920 int saverr;
9f59d175 1921 int fd = -1;
7c6b6602 1922
92fb57b8
SH
1923 inode = lo_inode(req, ino);
1924 if (!inode) {
1925 fuse_reply_err(req, EBADF);
1926 return;
1927 }
1928
7387863d
DDAG
1929 saverr = ENOSYS;
1930 if (!lo_data(req)->xattr) {
1931 goto out;
1932 }
7c6b6602 1933
d240314a
EG
1934 fuse_log(FUSE_LOG_DEBUG, "lo_setxattr(ino=%" PRIu64
1935 ", name=%s value=%s size=%zd)\n", ino, name, value, size);
7c6b6602 1936
7387863d
DDAG
1937 if (inode->is_symlink) {
1938 /* Sorry, no race free way to setxattr on symlink. */
1939 saverr = EPERM;
1940 goto out;
1941 }
7c6b6602 1942
9f59d175
SH
1943 sprintf(procname, "%i", inode->fd);
1944 fd = openat(lo->proc_self_fd, procname, O_RDWR);
1945 if (fd < 0) {
1946 saverr = errno;
1947 goto out;
1948 }
7c6b6602 1949
9f59d175 1950 ret = fsetxattr(fd, name, value, size, flags);
7387863d 1951 saverr = ret == -1 ? errno : 0;
7c6b6602
DDAG
1952
1953out:
9f59d175
SH
1954 if (fd >= 0) {
1955 close(fd);
1956 }
7387863d 1957 fuse_reply_err(req, saverr);
7c6b6602
DDAG
1958}
1959
1960static void lo_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name)
1961{
7387863d 1962 char procname[64];
9f59d175 1963 struct lo_data *lo = lo_data(req);
92fb57b8 1964 struct lo_inode *inode;
7387863d
DDAG
1965 ssize_t ret;
1966 int saverr;
9f59d175 1967 int fd = -1;
7c6b6602 1968
92fb57b8
SH
1969 inode = lo_inode(req, ino);
1970 if (!inode) {
1971 fuse_reply_err(req, EBADF);
1972 return;
1973 }
1974
7387863d
DDAG
1975 saverr = ENOSYS;
1976 if (!lo_data(req)->xattr) {
1977 goto out;
1978 }
7c6b6602 1979
d240314a
EG
1980 fuse_log(FUSE_LOG_DEBUG, "lo_removexattr(ino=%" PRIu64 ", name=%s)\n", ino,
1981 name);
7c6b6602 1982
7387863d
DDAG
1983 if (inode->is_symlink) {
1984 /* Sorry, no race free way to setxattr on symlink. */
1985 saverr = EPERM;
1986 goto out;
1987 }
7c6b6602 1988
9f59d175
SH
1989 sprintf(procname, "%i", inode->fd);
1990 fd = openat(lo->proc_self_fd, procname, O_RDWR);
1991 if (fd < 0) {
1992 saverr = errno;
1993 goto out;
1994 }
7c6b6602 1995
9f59d175 1996 ret = fremovexattr(fd, name);
7387863d 1997 saverr = ret == -1 ? errno : 0;
7c6b6602
DDAG
1998
1999out:
9f59d175
SH
2000 if (fd >= 0) {
2001 close(fd);
2002 }
7387863d 2003 fuse_reply_err(req, saverr);
7c6b6602
DDAG
2004}
2005
2006#ifdef HAVE_COPY_FILE_RANGE
2007static void lo_copy_file_range(fuse_req_t req, fuse_ino_t ino_in, off_t off_in,
7387863d
DDAG
2008 struct fuse_file_info *fi_in, fuse_ino_t ino_out,
2009 off_t off_out, struct fuse_file_info *fi_out,
2010 size_t len, int flags)
7c6b6602 2011{
73b4d19d 2012 int in_fd, out_fd;
7387863d
DDAG
2013 ssize_t res;
2014
73b4d19d
SH
2015 in_fd = lo_fi_fd(req, fi_in);
2016 out_fd = lo_fi_fd(req, fi_out);
2017
2018 fuse_log(FUSE_LOG_DEBUG,
2019 "lo_copy_file_range(ino=%" PRIu64 "/fd=%d, "
2020 "off=%lu, ino=%" PRIu64 "/fd=%d, "
2021 "off=%lu, size=%zd, flags=0x%x)\n",
2022 ino_in, in_fd, off_in, ino_out, out_fd, off_out, len, flags);
7387863d 2023
73b4d19d 2024 res = copy_file_range(in_fd, &off_in, out_fd, &off_out, len, flags);
7387863d
DDAG
2025 if (res < 0) {
2026 fuse_reply_err(req, -errno);
2027 } else {
2028 fuse_reply_write(req, res);
2029 }
7c6b6602
DDAG
2030}
2031#endif
2032
2033static void lo_lseek(fuse_req_t req, fuse_ino_t ino, off_t off, int whence,
7387863d 2034 struct fuse_file_info *fi)
7c6b6602 2035{
7387863d
DDAG
2036 off_t res;
2037
2038 (void)ino;
73b4d19d 2039 res = lseek(lo_fi_fd(req, fi), off, whence);
7387863d
DDAG
2040 if (res != -1) {
2041 fuse_reply_lseek(req, res);
2042 } else {
2043 fuse_reply_err(req, errno);
2044 }
7c6b6602
DDAG
2045}
2046
2047static struct fuse_lowlevel_ops lo_oper = {
7387863d
DDAG
2048 .init = lo_init,
2049 .lookup = lo_lookup,
2050 .mkdir = lo_mkdir,
2051 .mknod = lo_mknod,
2052 .symlink = lo_symlink,
2053 .link = lo_link,
2054 .unlink = lo_unlink,
2055 .rmdir = lo_rmdir,
2056 .rename = lo_rename,
2057 .forget = lo_forget,
2058 .forget_multi = lo_forget_multi,
2059 .getattr = lo_getattr,
2060 .setattr = lo_setattr,
2061 .readlink = lo_readlink,
2062 .opendir = lo_opendir,
2063 .readdir = lo_readdir,
2064 .readdirplus = lo_readdirplus,
2065 .releasedir = lo_releasedir,
2066 .fsyncdir = lo_fsyncdir,
2067 .create = lo_create,
2068 .open = lo_open,
2069 .release = lo_release,
2070 .flush = lo_flush,
2071 .fsync = lo_fsync,
2072 .read = lo_read,
2073 .write_buf = lo_write_buf,
2074 .statfs = lo_statfs,
2075 .fallocate = lo_fallocate,
2076 .flock = lo_flock,
2077 .getxattr = lo_getxattr,
2078 .listxattr = lo_listxattr,
2079 .setxattr = lo_setxattr,
2080 .removexattr = lo_removexattr,
7c6b6602 2081#ifdef HAVE_COPY_FILE_RANGE
7387863d 2082 .copy_file_range = lo_copy_file_range,
7c6b6602 2083#endif
7387863d 2084 .lseek = lo_lseek,
7c6b6602
DDAG
2085};
2086
45018fbb
SH
2087/* Print vhost-user.json backend program capabilities */
2088static void print_capabilities(void)
2089{
2090 printf("{\n");
2091 printf(" \"type\": \"fs\"\n");
2092 printf("}\n");
2093}
2094
d74830d1 2095/*
8e1d4ef2 2096 * Move to a new mount, net, and pid namespaces to isolate this process.
d74830d1 2097 */
8e1d4ef2 2098static void setup_namespaces(struct lo_data *lo, struct fuse_session *se)
d74830d1 2099{
8e1d4ef2
SH
2100 pid_t child;
2101
2102 /*
2103 * Create a new pid namespace for *child* processes. We'll have to
2104 * fork in order to enter the new pid namespace. A new mount namespace
2105 * is also needed so that we can remount /proc for the new pid
2106 * namespace.
2107 *
2108 * Our UNIX domain sockets have been created. Now we can move to
2109 * an empty network namespace to prevent TCP/IP and other network
2110 * activity in case this process is compromised.
2111 */
2112 if (unshare(CLONE_NEWPID | CLONE_NEWNS | CLONE_NEWNET) != 0) {
2113 fuse_log(FUSE_LOG_ERR, "unshare(CLONE_NEWPID | CLONE_NEWNS): %m\n");
2114 exit(1);
2115 }
2116
2117 child = fork();
2118 if (child < 0) {
2119 fuse_log(FUSE_LOG_ERR, "fork() failed: %m\n");
2120 exit(1);
2121 }
2122 if (child > 0) {
2123 pid_t waited;
2124 int wstatus;
2125
2126 /* The parent waits for the child */
2127 do {
2128 waited = waitpid(child, &wstatus, 0);
2129 } while (waited < 0 && errno == EINTR && !se->exited);
2130
2131 /* We were terminated by a signal, see fuse_signals.c */
2132 if (se->exited) {
2133 exit(0);
2134 }
2135
2136 if (WIFEXITED(wstatus)) {
2137 exit(WEXITSTATUS(wstatus));
2138 }
2139
2140 exit(1);
2141 }
2142
2143 /* Send us SIGTERM when the parent thread terminates, see prctl(2) */
2144 prctl(PR_SET_PDEATHSIG, SIGTERM);
2145
2146 /*
2147 * If the mounts have shared propagation then we want to opt out so our
2148 * mount changes don't affect the parent mount namespace.
2149 */
2150 if (mount(NULL, "/", NULL, MS_REC | MS_SLAVE, NULL) < 0) {
2151 fuse_log(FUSE_LOG_ERR, "mount(/, MS_REC|MS_SLAVE): %m\n");
2152 exit(1);
2153 }
2154
2155 /* The child must remount /proc to use the new pid namespace */
2156 if (mount("proc", "/proc", "proc",
2157 MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RELATIME, NULL) < 0) {
2158 fuse_log(FUSE_LOG_ERR, "mount(/proc): %m\n");
2159 exit(1);
2160 }
2161
2162 /* Now we can get our /proc/self/fd directory file descriptor */
2163 lo->proc_self_fd = open("/proc/self/fd", O_PATH);
2164 if (lo->proc_self_fd == -1) {
2165 fuse_log(FUSE_LOG_ERR, "open(/proc/self/fd, O_PATH): %m\n");
d74830d1
SH
2166 exit(1);
2167 }
2168}
2169
2405f3c0
DDAG
2170/*
2171 * Capture the capability state, we'll need to restore this for individual
2172 * threads later; see load_capng.
2173 */
2174static void setup_capng(void)
2175{
2176 /* Note this accesses /proc so has to happen before the sandbox */
2177 if (capng_get_caps_process()) {
2178 fuse_log(FUSE_LOG_ERR, "capng_get_caps_process\n");
2179 exit(1);
2180 }
2181 pthread_mutex_init(&cap.mutex, NULL);
2182 pthread_mutex_lock(&cap.mutex);
2183 cap.saved = capng_save_state();
2184 if (!cap.saved) {
2185 fuse_log(FUSE_LOG_ERR, "capng_save_state\n");
2186 exit(1);
2187 }
2188 pthread_mutex_unlock(&cap.mutex);
2189}
2190
2191static void cleanup_capng(void)
2192{
2193 free(cap.saved);
2194 cap.saved = NULL;
2195 pthread_mutex_destroy(&cap.mutex);
2196}
2197
2198
8e1d4ef2
SH
2199/*
2200 * Make the source directory our root so symlinks cannot escape and no other
2201 * files are accessible. Assumes unshare(CLONE_NEWNS) was already called.
2202 */
2203static void setup_mounts(const char *source)
5baa3b8e
SH
2204{
2205 int oldroot;
2206 int newroot;
2207
8e1d4ef2
SH
2208 if (mount(source, source, NULL, MS_BIND, NULL) < 0) {
2209 fuse_log(FUSE_LOG_ERR, "mount(%s, %s, MS_BIND): %m\n", source, source);
2210 exit(1);
2211 }
2212
2213 /* This magic is based on lxc's lxc_pivot_root() */
5baa3b8e
SH
2214 oldroot = open("/", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
2215 if (oldroot < 0) {
2216 fuse_log(FUSE_LOG_ERR, "open(/): %m\n");
2217 exit(1);
2218 }
2219
2220 newroot = open(source, O_DIRECTORY | O_RDONLY | O_CLOEXEC);
2221 if (newroot < 0) {
2222 fuse_log(FUSE_LOG_ERR, "open(%s): %m\n", source);
2223 exit(1);
2224 }
2225
2226 if (fchdir(newroot) < 0) {
2227 fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
2228 exit(1);
2229 }
2230
2231 if (syscall(__NR_pivot_root, ".", ".") < 0) {
2232 fuse_log(FUSE_LOG_ERR, "pivot_root(., .): %m\n");
2233 exit(1);
2234 }
2235
2236 if (fchdir(oldroot) < 0) {
2237 fuse_log(FUSE_LOG_ERR, "fchdir(oldroot): %m\n");
2238 exit(1);
2239 }
2240
2241 if (mount("", ".", "", MS_SLAVE | MS_REC, NULL) < 0) {
2242 fuse_log(FUSE_LOG_ERR, "mount(., MS_SLAVE | MS_REC): %m\n");
2243 exit(1);
2244 }
2245
2246 if (umount2(".", MNT_DETACH) < 0) {
2247 fuse_log(FUSE_LOG_ERR, "umount2(., MNT_DETACH): %m\n");
2248 exit(1);
2249 }
2250
2251 if (fchdir(newroot) < 0) {
2252 fuse_log(FUSE_LOG_ERR, "fchdir(newroot): %m\n");
2253 exit(1);
2254 }
2255
2256 close(newroot);
2257 close(oldroot);
2258}
2259
5baa3b8e
SH
2260/*
2261 * Lock down this process to prevent access to other processes or files outside
2262 * source directory. This reduces the impact of arbitrary code execution bugs.
2263 */
f185621d
SH
2264static void setup_sandbox(struct lo_data *lo, struct fuse_session *se,
2265 bool enable_syslog)
5baa3b8e 2266{
8e1d4ef2
SH
2267 setup_namespaces(lo, se);
2268 setup_mounts(lo->source);
f185621d 2269 setup_seccomp(enable_syslog);
5baa3b8e
SH
2270}
2271
01a6dc95
SH
2272/* Raise the maximum number of open file descriptors */
2273static void setup_nofile_rlimit(void)
2274{
2275 const rlim_t max_fds = 1000000;
2276 struct rlimit rlim;
2277
2278 if (getrlimit(RLIMIT_NOFILE, &rlim) < 0) {
2279 fuse_log(FUSE_LOG_ERR, "getrlimit(RLIMIT_NOFILE): %m\n");
2280 exit(1);
2281 }
2282
2283 if (rlim.rlim_cur >= max_fds) {
2284 return; /* nothing to do */
2285 }
2286
2287 rlim.rlim_cur = max_fds;
2288 rlim.rlim_max = max_fds;
2289
2290 if (setrlimit(RLIMIT_NOFILE, &rlim) < 0) {
2291 /* Ignore SELinux denials */
2292 if (errno == EPERM) {
2293 return;
2294 }
2295
2296 fuse_log(FUSE_LOG_ERR, "setrlimit(RLIMIT_NOFILE): %m\n");
2297 exit(1);
2298 }
2299}
2300
f185621d
SH
2301static void log_func(enum fuse_log_level level, const char *fmt, va_list ap)
2302{
36f38469
MM
2303 g_autofree char *localfmt = NULL;
2304
d240314a
EG
2305 if (current_log_level < level) {
2306 return;
2307 }
2308
36f38469 2309 if (current_log_level == FUSE_LOG_DEBUG) {
50fb955a
MM
2310 if (!use_syslog) {
2311 localfmt = g_strdup_printf("[%" PRId64 "] [ID: %08ld] %s",
2312 get_clock(), syscall(__NR_gettid), fmt);
2313 } else {
2314 localfmt = g_strdup_printf("[ID: %08ld] %s", syscall(__NR_gettid),
2315 fmt);
2316 }
36f38469
MM
2317 fmt = localfmt;
2318 }
2319
f185621d
SH
2320 if (use_syslog) {
2321 int priority = LOG_ERR;
2322 switch (level) {
2323 case FUSE_LOG_EMERG:
2324 priority = LOG_EMERG;
2325 break;
2326 case FUSE_LOG_ALERT:
2327 priority = LOG_ALERT;
2328 break;
2329 case FUSE_LOG_CRIT:
2330 priority = LOG_CRIT;
2331 break;
2332 case FUSE_LOG_ERR:
2333 priority = LOG_ERR;
2334 break;
2335 case FUSE_LOG_WARNING:
2336 priority = LOG_WARNING;
2337 break;
2338 case FUSE_LOG_NOTICE:
2339 priority = LOG_NOTICE;
2340 break;
2341 case FUSE_LOG_INFO:
2342 priority = LOG_INFO;
2343 break;
2344 case FUSE_LOG_DEBUG:
2345 priority = LOG_DEBUG;
2346 break;
2347 }
2348 vsyslog(priority, fmt, ap);
2349 } else {
2350 vfprintf(stderr, fmt, ap);
2351 }
2352}
2353
7c6b6602
DDAG
2354int main(int argc, char *argv[])
2355{
7387863d
DDAG
2356 struct fuse_args args = FUSE_ARGS_INIT(argc, argv);
2357 struct fuse_session *se;
2358 struct fuse_cmdline_opts opts;
9f59d175
SH
2359 struct lo_data lo = {
2360 .debug = 0,
2361 .writeback = 0,
2362 .proc_self_fd = -1,
2363 };
92fb57b8 2364 struct lo_map_elem *root_elem;
7387863d
DDAG
2365 int ret = -1;
2366
2367 /* Don't mask creation mode, kernel already did that */
2368 umask(0);
2369
2370 pthread_mutex_init(&lo.mutex, NULL);
2371 lo.root.next = lo.root.prev = &lo.root;
2372 lo.root.fd = -1;
92fb57b8 2373 lo.root.fuse_ino = FUSE_ROOT_ID;
7387863d
DDAG
2374 lo.cache = CACHE_NORMAL;
2375
92fb57b8
SH
2376 /*
2377 * Set up the ino map like this:
2378 * [0] Reserved (will not be used)
2379 * [1] Root inode
2380 */
2381 lo_map_init(&lo.ino_map);
2382 lo_map_reserve(&lo.ino_map, 0)->in_use = false;
2383 root_elem = lo_map_reserve(&lo.ino_map, lo.root.fuse_ino);
2384 root_elem->inode = &lo.root;
2385
b39bce12 2386 lo_map_init(&lo.dirp_map);
73b4d19d 2387 lo_map_init(&lo.fd_map);
b39bce12 2388
7387863d
DDAG
2389 if (fuse_parse_cmdline(&args, &opts) != 0) {
2390 return 1;
2391 }
f185621d
SH
2392 fuse_set_log_func(log_func);
2393 use_syslog = opts.syslog;
2394 if (use_syslog) {
2395 openlog("virtiofsd", LOG_PID, LOG_DAEMON);
2396 }
7387863d 2397 if (opts.show_help) {
67aab022 2398 printf("usage: %s [options]\n\n", argv[0]);
7387863d 2399 fuse_cmdline_help();
4ff075f7 2400 printf(" -o source=PATH shared directory tree\n");
7387863d
DDAG
2401 fuse_lowlevel_help();
2402 ret = 0;
2403 goto err_out1;
2404 } else if (opts.show_version) {
2405 fuse_lowlevel_version();
2406 ret = 0;
2407 goto err_out1;
45018fbb
SH
2408 } else if (opts.print_capabilities) {
2409 print_capabilities();
2410 ret = 0;
2411 goto err_out1;
7387863d
DDAG
2412 }
2413
7387863d
DDAG
2414 if (fuse_opt_parse(&args, &lo, lo_opts, NULL) == -1) {
2415 return 1;
2416 }
2417
d240314a
EG
2418 /*
2419 * log_level is 0 if not configured via cmd options (0 is LOG_EMERG,
2420 * and we don't use this log level).
2421 */
2422 if (opts.log_level != 0) {
2423 current_log_level = opts.log_level;
2424 }
7387863d 2425 lo.debug = opts.debug;
d240314a
EG
2426 if (lo.debug) {
2427 current_log_level = FUSE_LOG_DEBUG;
2428 }
7387863d 2429 lo.root.refcount = 2;
d240314a 2430
7387863d
DDAG
2431 if (lo.source) {
2432 struct stat stat;
2433 int res;
2434
2435 res = lstat(lo.source, &stat);
2436 if (res == -1) {
2437 fuse_log(FUSE_LOG_ERR, "failed to stat source (\"%s\"): %m\n",
2438 lo.source);
2439 exit(1);
2440 }
2441 if (!S_ISDIR(stat.st_mode)) {
2442 fuse_log(FUSE_LOG_ERR, "source is not a directory\n");
2443 exit(1);
2444 }
2445
2446 } else {
2447 lo.source = "/";
2448 }
2449 lo.root.is_symlink = false;
2450 if (!lo.timeout_set) {
2451 switch (lo.cache) {
2452 case CACHE_NEVER:
2453 lo.timeout = 0.0;
2454 break;
2455
2456 case CACHE_NORMAL:
2457 lo.timeout = 1.0;
2458 break;
2459
2460 case CACHE_ALWAYS:
2461 lo.timeout = 86400.0;
2462 break;
2463 }
2464 } else if (lo.timeout < 0) {
2465 fuse_log(FUSE_LOG_ERR, "timeout is negative (%lf)\n", lo.timeout);
2466 exit(1);
2467 }
2468
2469 lo.root.fd = open(lo.source, O_PATH);
5baa3b8e 2470
7387863d
DDAG
2471 if (lo.root.fd == -1) {
2472 fuse_log(FUSE_LOG_ERR, "open(\"%s\", O_PATH): %m\n", lo.source);
2473 exit(1);
2474 }
2475
2476 se = fuse_session_new(&args, &lo_oper, sizeof(lo_oper), &lo);
2477 if (se == NULL) {
2478 goto err_out1;
2479 }
2480
2481 if (fuse_set_signal_handlers(se) != 0) {
2482 goto err_out2;
2483 }
2484
67aab022 2485 if (fuse_session_mount(se) != 0) {
7387863d
DDAG
2486 goto err_out3;
2487 }
2488
2489 fuse_daemonize(opts.foreground);
2490
01a6dc95
SH
2491 setup_nofile_rlimit();
2492
2405f3c0
DDAG
2493 /* Must be before sandbox since it wants /proc */
2494 setup_capng();
2495
f185621d 2496 setup_sandbox(&lo, se, opts.syslog);
5baa3b8e 2497
7387863d 2498 /* Block until ctrl+c or fusermount -u */
f6f3573c 2499 ret = virtio_loop(se);
7387863d
DDAG
2500
2501 fuse_session_unmount(se);
2405f3c0 2502 cleanup_capng();
7c6b6602 2503err_out3:
7387863d 2504 fuse_remove_signal_handlers(se);
7c6b6602 2505err_out2:
7387863d 2506 fuse_session_destroy(se);
7c6b6602 2507err_out1:
7387863d 2508 fuse_opt_free_args(&args);
7c6b6602 2509
73b4d19d 2510 lo_map_destroy(&lo.fd_map);
b39bce12 2511 lo_map_destroy(&lo.dirp_map);
92fb57b8
SH
2512 lo_map_destroy(&lo.ino_map);
2513
9f59d175
SH
2514 if (lo.proc_self_fd >= 0) {
2515 close(lo.proc_self_fd);
2516 }
2517
7387863d
DDAG
2518 if (lo.root.fd >= 0) {
2519 close(lo.root.fd);
2520 }
7c6b6602 2521
7387863d 2522 return ret ? 1 : 0;
7c6b6602 2523}