1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
19 #include "common/CommandTable.h"
20 #include "common/Finisher.h"
21 #include "common/Timer.h"
22 #include "common/ceph_mutex.h"
23 #include "common/cmdparse.h"
24 #include "common/compiler_extensions.h"
25 #include "include/common_fwd.h"
26 #include "include/cephfs/ceph_ll_client.h"
27 #include "include/filepath.h"
28 #include "include/interval_set.h"
29 #include "include/lru.h"
30 #include "include/types.h"
31 #include "include/unordered_map.h"
32 #include "include/unordered_set.h"
33 #include "include/cephfs/metrics/Types.h"
34 #include "mds/mdstypes.h"
35 #include "include/cephfs/types.h"
36 #include "msg/Dispatcher.h"
37 #include "msg/MessageRef.h"
38 #include "msg/Messenger.h"
39 #include "osdc/ObjectCacher.h"
43 #include "MetaSession.h"
68 class WritebackHandler
;
72 class destructive_lock_ref_t
;
94 class MDSCommandOp
: public CommandOp
99 explicit MDSCommandOp(ceph_tid_t t
) : CommandOp(t
) {}
102 /* error code for ceph_fuse */
103 #define CEPH_FUSE_NO_MDS_UP -((1<<16)+0) /* no mds up deteced in ceph_fuse */
104 #define CEPH_FUSE_LAST -((1<<16)+1) /* (unused) */
106 // ============================================
107 // types for my local metadata cache
110 - Dentries live in an LRU loop. they get expired based on last access.
111 see include/lru.h. items can be bumped to "mid" or "top" of list, etc.
112 - Inode has ref count for each Fh, Dir, or Dentry that points to it.
113 - when Inode ref goes to 0, it's expired.
114 - when Dir is empty, it's removed (and it's Inode ref--)
120 explicit DirEntry(const std::string
&s
) : d_name(s
), stmask(0) {}
121 DirEntry(const std::string
&n
, struct stat
& s
, int stm
)
122 : d_name(n
), st(s
), stmask(stm
) {}
137 class ceph_lock_state_t
;
139 // ========================================================
142 struct dir_result_t
{
143 static const int SHIFT
= 28;
144 static const int64_t MASK
= (1 << SHIFT
) - 1;
145 static const int64_t HASH
= 0xFFULL
<< (SHIFT
+ 24); // impossible frag bits
146 static const loff_t END
= 1ULL << (SHIFT
+ 32);
151 std::string alternate_name
;
153 explicit dentry(int64_t o
) : offset(o
) {}
154 dentry(int64_t o
, std::string n
, std::string an
, InodeRef in
) :
155 offset(o
), name(std::move(n
)), alternate_name(std::move(an
)), inode(std::move(in
)) {}
157 struct dentry_off_lt
{
158 bool operator()(const dentry
& d
, int64_t off
) const {
159 return dir_result_t::fpos_cmp(d
.offset
, off
) < 0;
164 explicit dir_result_t(Inode
*in
, const UserPerm
& perms
);
167 static uint64_t make_fpos(unsigned h
, unsigned l
, bool hash
) {
168 uint64_t v
= ((uint64_t)h
<< SHIFT
) | (uint64_t)l
;
172 ceph_assert((v
& HASH
) != HASH
);
175 static unsigned fpos_high(uint64_t p
) {
176 unsigned v
= (p
& (END
-1)) >> SHIFT
;
177 if ((p
& HASH
) == HASH
)
178 return ceph_frag_value(v
);
181 static unsigned fpos_low(uint64_t p
) {
184 static int fpos_cmp(uint64_t l
, uint64_t r
) {
185 int c
= ceph_frag_compare(fpos_high(l
), fpos_high(r
));
188 if (fpos_low(l
) == fpos_low(r
))
190 return fpos_low(l
) < fpos_low(r
) ? -1 : 1;
193 unsigned offset_high() { return fpos_high(offset
); }
194 unsigned offset_low() { return fpos_low(offset
); }
196 void set_end() { offset
|= END
; }
197 bool at_end() { return (offset
& END
); }
199 void set_hash_order() { offset
|= HASH
; }
200 bool hash_order() { return (offset
& HASH
) == HASH
; }
206 return buffer_frag
.contains(offset_high());
208 return buffer_frag
== frag_t(offset_high());
222 int64_t offset
; // hash order:
223 // (0xff << 52) | ((24 bits hash) << 28) |
224 // (the nth entry has hash collision);
226 // ((frag value) << 28) | (the nth entry in frag);
228 unsigned next_offset
; // offset of next chunk (last_name's + 1)
229 std::string last_name
; // last entry in previous chunk
231 uint64_t release_count
;
232 uint64_t ordered_count
;
233 unsigned cache_index
;
234 int start_shared_gen
; // dir shared_gen at start of readdir
239 std::vector
<dentry
> buffer
;
243 class Client
: public Dispatcher
, public md_config_obs_t
{
245 friend class C_Block_Sync
; // Calls block map and protected helpers
246 friend class C_Client_CacheInvalidate
; // calls ino_invalidate_cb
247 friend class C_Client_DentryInvalidate
; // calls dentry_invalidate_cb
248 friend class C_Client_FlushComplete
; // calls put_inode()
249 friend class C_Client_Remount
;
250 friend class C_Client_RequestInterrupt
;
251 friend class C_Deleg_Timeout
; // Asserts on client_lock, called when a delegation is unreturned
252 friend class C_Client_CacheRelease
; // Asserts on client_lock
253 friend class SyntheticClient
;
254 friend void intrusive_ptr_release(Inode
*in
);
255 template <typename T
> friend struct RWRefState
;
256 template <typename T
> friend class RWRef
;
258 using Dispatcher::cct
;
259 using clock
= ceph::coarse_mono_clock
;
261 typedef int (*add_dirent_cb_t
)(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
);
263 struct walk_dentry_result
{
265 std::string alternate_name
;
268 class CommandHook
: public AdminSocketHook
{
270 explicit CommandHook(Client
*client
);
271 int call(std::string_view command
, const cmdmap_t
& cmdmap
,
275 bufferlist
& out
) override
;
280 // snapshot info returned via get_snap_info(). nothing to do
281 // with SnapInfo on the MDS.
284 std::map
<std::string
, std::string
> metadata
;
287 Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
);
288 Client(const Client
&) = delete;
289 Client(const Client
&&) = delete;
290 virtual ~Client() override
;
292 static UserPerm
pick_my_perms(CephContext
*c
) {
293 uid_t uid
= c
->_conf
->client_mount_uid
>= 0 ? c
->_conf
->client_mount_uid
: -1;
294 gid_t gid
= c
->_conf
->client_mount_gid
>= 0 ? c
->_conf
->client_mount_gid
: -1;
295 return UserPerm(uid
, gid
);
297 UserPerm
pick_my_perms() {
298 uid_t uid
= user_id
>= 0 ? user_id
: -1;
299 gid_t gid
= group_id
>= 0 ? group_id
: -1;
300 return UserPerm(uid
, gid
);
303 int mount(const std::string
&mount_root
, const UserPerm
& perms
,
304 bool require_mds
=false, const std::string
&fs_name
="");
306 bool is_unmounting() const {
307 return mount_state
.check_current_state(CLIENT_UNMOUNTING
);
309 bool is_mounted() const {
310 return mount_state
.check_current_state(CLIENT_MOUNTED
);
312 bool is_mounting() const {
313 return mount_state
.check_current_state(CLIENT_MOUNTING
);
315 bool is_initialized() const {
316 return initialize_state
.check_current_state(CLIENT_INITIALIZED
);
320 void set_uuid(const std::string
& uuid
);
321 void set_session_timeout(unsigned timeout
);
322 int start_reclaim(const std::string
& uuid
, unsigned flags
,
323 const std::string
& fs_name
);
324 void finish_reclaim();
326 fs_cluster_id_t
get_fs_cid() {
331 const std::string
&mds_spec
,
332 const std::vector
<std::string
>& cmd
,
333 const bufferlist
& inbl
,
334 bufferlist
*poutbl
, std::string
*prs
, Context
*onfinish
);
336 // these should (more or less) mirror the actual system calls.
337 int statfs(const char *path
, struct statvfs
*stbuf
, const UserPerm
& perms
);
340 int chdir(const char *s
, std::string
&new_cwd
, const UserPerm
& perms
);
341 void _getcwd(std::string
& cwd
, const UserPerm
& perms
);
342 void getcwd(std::string
& cwd
, const UserPerm
& perms
);
345 int opendir(const char *name
, dir_result_t
**dirpp
, const UserPerm
& perms
);
346 int fdopendir(int dirfd
, dir_result_t
**dirpp
, const UserPerm
& perms
);
347 int closedir(dir_result_t
*dirp
);
350 * Fill a directory listing from dirp, invoking cb for each entry
351 * with the given pointer, the dirent, the struct stat, the stmask,
354 * Returns 0 if it reached the end of the directory.
355 * If @a cb returns a negative error code, stop and return that.
357 int readdir_r_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
358 unsigned want
=0, unsigned flags
=AT_STATX_DONT_SYNC
,
361 struct dirent
* readdir(dir_result_t
*d
);
362 int readdir_r(dir_result_t
*dirp
, struct dirent
*de
);
363 int readdirplus_r(dir_result_t
*dirp
, struct dirent
*de
, struct ceph_statx
*stx
, unsigned want
, unsigned flags
, Inode
**out
);
366 * Get the next snapshot delta entry.
369 int readdir_snapdiff(dir_result_t
* dir1
, snapid_t snap2
,
370 struct dirent
* out_de
, snapid_t
* out_snap
);
372 int getdir(const char *relpath
, std::list
<std::string
>& names
,
373 const UserPerm
& perms
); // get the whole dir at once.
376 * Returns the length of the buffer that got filled in, or -errno.
377 * If it returns -CEPHFS_ERANGE you just need to increase the size of the
378 * buffer and try again.
380 int _getdents(dir_result_t
*dirp
, char *buf
, int buflen
, bool ful
); // get a bunch of dentries at once
381 int getdents(dir_result_t
*dirp
, char *buf
, int buflen
) {
382 return _getdents(dirp
, buf
, buflen
, true);
384 int getdnames(dir_result_t
*dirp
, char *buf
, int buflen
) {
385 return _getdents(dirp
, buf
, buflen
, false);
388 void rewinddir(dir_result_t
*dirp
);
389 loff_t
telldir(dir_result_t
*dirp
);
390 void seekdir(dir_result_t
*dirp
, loff_t offset
);
392 int may_delete(const char *relpath
, const UserPerm
& perms
);
393 int link(const char *existing
, const char *newname
, const UserPerm
& perm
, std::string alternate_name
="");
394 int unlink(const char *path
, const UserPerm
& perm
);
395 int unlinkat(int dirfd
, const char *relpath
, int flags
, const UserPerm
& perm
);
396 int rename(const char *from
, const char *to
, const UserPerm
& perm
, std::string alternate_name
="");
399 int mkdir(const char *path
, mode_t mode
, const UserPerm
& perm
, std::string alternate_name
="");
400 int mkdirat(int dirfd
, const char *relpath
, mode_t mode
, const UserPerm
& perm
,
401 std::string alternate_name
="");
402 int mkdirs(const char *path
, mode_t mode
, const UserPerm
& perms
);
403 int rmdir(const char *path
, const UserPerm
& perms
);
406 int readlink(const char *path
, char *buf
, loff_t size
, const UserPerm
& perms
);
407 int readlinkat(int dirfd
, const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
);
409 int symlink(const char *existing
, const char *newname
, const UserPerm
& perms
, std::string alternate_name
="");
410 int symlinkat(const char *target
, int dirfd
, const char *relpath
, const UserPerm
& perms
,
411 std::string alternate_name
="");
413 // path traversal for high-level interface
414 int walk(std::string_view path
, struct walk_dentry_result
* result
, const UserPerm
& perms
, bool followsym
=true);
417 unsigned statx_to_mask(unsigned int flags
, unsigned int want
);
418 int stat(const char *path
, struct stat
*stbuf
, const UserPerm
& perms
,
419 frag_info_t
*dirstat
=0, int mask
=CEPH_STAT_CAP_INODE_ALL
);
420 int statx(const char *path
, struct ceph_statx
*stx
,
421 const UserPerm
& perms
,
422 unsigned int want
, unsigned int flags
);
423 int lstat(const char *path
, struct stat
*stbuf
, const UserPerm
& perms
,
424 frag_info_t
*dirstat
=0, int mask
=CEPH_STAT_CAP_INODE_ALL
);
426 int setattr(const char *relpath
, struct stat
*attr
, int mask
,
427 const UserPerm
& perms
);
428 int setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
429 const UserPerm
& perms
, int flags
=0);
430 int fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
);
431 int fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
);
432 int chmod(const char *path
, mode_t mode
, const UserPerm
& perms
);
433 int fchmod(int fd
, mode_t mode
, const UserPerm
& perms
);
434 int chmodat(int dirfd
, const char *relpath
, mode_t mode
, int flags
, const UserPerm
& perms
);
435 int lchmod(const char *path
, mode_t mode
, const UserPerm
& perms
);
436 int chown(const char *path
, uid_t new_uid
, gid_t new_gid
,
437 const UserPerm
& perms
);
438 int fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
);
439 int lchown(const char *path
, uid_t new_uid
, gid_t new_gid
,
440 const UserPerm
& perms
);
441 int chownat(int dirfd
, const char *relpath
, uid_t new_uid
, gid_t new_gid
,
442 int flags
, const UserPerm
& perms
);
443 int utime(const char *path
, struct utimbuf
*buf
, const UserPerm
& perms
);
444 int lutime(const char *path
, struct utimbuf
*buf
, const UserPerm
& perms
);
445 int futime(int fd
, struct utimbuf
*buf
, const UserPerm
& perms
);
446 int utimes(const char *relpath
, struct timeval times
[2], const UserPerm
& perms
);
447 int lutimes(const char *relpath
, struct timeval times
[2], const UserPerm
& perms
);
448 int futimes(int fd
, struct timeval times
[2], const UserPerm
& perms
);
449 int futimens(int fd
, struct timespec times
[2], const UserPerm
& perms
);
450 int utimensat(int dirfd
, const char *relpath
, struct timespec times
[2], int flags
,
451 const UserPerm
& perms
);
452 int flock(int fd
, int operation
, uint64_t owner
);
453 int truncate(const char *path
, loff_t size
, const UserPerm
& perms
);
456 int mknod(const char *path
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
=0);
458 int create_and_open(int dirfd
, const char *relpath
, int flags
, const UserPerm
& perms
,
459 mode_t mode
, int stripe_unit
, int stripe_count
, int object_size
,
460 const char *data_pool
, std::string alternate_name
);
461 int open(const char *path
, int flags
, const UserPerm
& perms
, mode_t mode
=0, std::string alternate_name
="") {
462 return open(path
, flags
, perms
, mode
, 0, 0, 0, NULL
, alternate_name
);
464 int open(const char *path
, int flags
, const UserPerm
& perms
,
465 mode_t mode
, int stripe_unit
, int stripe_count
, int object_size
,
466 const char *data_pool
, std::string alternate_name
="");
467 int openat(int dirfd
, const char *relpath
, int flags
, const UserPerm
& perms
,
468 mode_t mode
, int stripe_unit
, int stripe_count
,
469 int object_size
, const char *data_pool
, std::string alternate_name
);
470 int openat(int dirfd
, const char *path
, int flags
, const UserPerm
& perms
, mode_t mode
=0,
471 std::string alternate_name
="") {
472 return openat(dirfd
, path
, flags
, perms
, mode
, 0, 0, 0, NULL
, alternate_name
);
475 int lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
476 const UserPerm
& perms
);
477 int lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
=NULL
);
478 int lookup_name(Inode
*in
, Inode
*parent
, const UserPerm
& perms
);
481 loff_t
lseek(int fd
, loff_t offset
, int whence
);
482 int read(int fd
, char *buf
, loff_t size
, loff_t offset
=-1);
483 int preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
=-1);
484 int write(int fd
, const char *buf
, loff_t size
, loff_t offset
=-1);
485 int pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
=-1);
486 int fake_write_size(int fd
, loff_t size
);
487 int ftruncate(int fd
, loff_t size
, const UserPerm
& perms
);
488 int fsync(int fd
, bool syncdataonly
);
489 int fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
,
490 int mask
=CEPH_STAT_CAP_INODE_ALL
);
491 int fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
492 unsigned int want
, unsigned int flags
);
493 int statxat(int dirfd
, const char *relpath
,
494 struct ceph_statx
*stx
, const UserPerm
& perms
,
495 unsigned int want
, unsigned int flags
);
496 int fallocate(int fd
, int mode
, loff_t offset
, loff_t length
);
498 // full path xattr ops
499 int getxattr(const char *path
, const char *name
, void *value
, size_t size
,
500 const UserPerm
& perms
);
501 int lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
502 const UserPerm
& perms
);
503 int fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
504 const UserPerm
& perms
);
505 int listxattr(const char *path
, char *list
, size_t size
, const UserPerm
& perms
);
506 int llistxattr(const char *path
, char *list
, size_t size
, const UserPerm
& perms
);
507 int flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
);
508 int removexattr(const char *path
, const char *name
, const UserPerm
& perms
);
509 int lremovexattr(const char *path
, const char *name
, const UserPerm
& perms
);
510 int fremovexattr(int fd
, const char *name
, const UserPerm
& perms
);
511 int setxattr(const char *path
, const char *name
, const void *value
,
512 size_t size
, int flags
, const UserPerm
& perms
);
513 int lsetxattr(const char *path
, const char *name
, const void *value
,
514 size_t size
, int flags
, const UserPerm
& perms
);
515 int fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
516 int flags
, const UserPerm
& perms
);
519 int64_t drop_caches();
521 int get_snap_info(const char *path
, const UserPerm
&perms
, SnapInfo
*snap_info
);
524 int lazyio(int fd
, int enable
);
525 int lazyio_propagate(int fd
, loff_t offset
, size_t count
);
526 int lazyio_synchronize(int fd
, loff_t offset
, size_t count
);
528 // expose file layout
529 int describe_layout(const char *path
, file_layout_t
* layout
,
530 const UserPerm
& perms
);
531 int fdescribe_layout(int fd
, file_layout_t
* layout
);
532 int get_file_stripe_address(int fd
, loff_t offset
, std::vector
<entity_addr_t
>& address
);
533 int get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, std::vector
<int>& osds
);
534 int get_osd_addr(int osd
, entity_addr_t
& addr
);
537 int64_t get_default_pool_id();
541 int get_pool_replication(int64_t pool
);
542 int64_t get_pool_id(const char *pool_name
);
543 std::string
get_pool_name(int64_t pool
);
544 int get_osd_crush_location(int id
, std::vector
<std::pair
<std::string
, std::string
> >& path
);
546 int enumerate_layout(int fd
, std::vector
<ObjectExtent
>& result
,
547 loff_t length
, loff_t offset
);
549 int mksnap(const char *path
, const char *name
, const UserPerm
& perm
,
550 mode_t mode
=0, const std::map
<std::string
, std::string
> &metadata
={});
551 int rmsnap(const char *path
, const char *name
, const UserPerm
& perm
, bool check_perms
=false);
553 // Inode permission checking
554 int inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
);
557 int get_caps_issued(int fd
);
558 int get_caps_issued(const char *path
, const UserPerm
& perms
);
560 snapid_t
ll_get_snapid(Inode
*in
);
561 vinodeno_t
ll_get_vino(Inode
*in
) {
562 std::lock_guard
lock(client_lock
);
563 return _get_vino(in
);
565 // get inode from faked ino
566 Inode
*ll_get_inode(ino_t ino
);
567 Inode
*ll_get_inode(vinodeno_t vino
);
568 int ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
569 Inode
**out
, const UserPerm
& perms
);
570 int ll_lookup_inode(struct inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
);
571 int ll_lookup_vino(vinodeno_t vino
, const UserPerm
& perms
, Inode
**inode
);
572 int ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
573 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
574 const UserPerm
& perms
);
575 bool ll_forget(Inode
*in
, uint64_t count
);
576 bool ll_put(Inode
*in
);
577 int ll_get_snap_ref(snapid_t snap
);
579 int ll_getattr(Inode
*in
, struct stat
*st
, const UserPerm
& perms
);
580 int ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
581 unsigned int flags
, const UserPerm
& perms
);
582 int ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
583 const UserPerm
& perms
);
584 int ll_setattr(Inode
*in
, struct stat
*st
, int mask
,
585 const UserPerm
& perms
);
586 int ll_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
587 const UserPerm
& perms
);
588 int ll_setxattr(Inode
*in
, const char *name
, const void *value
, size_t size
,
589 int flags
, const UserPerm
& perms
);
590 int ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
);
591 int ll_listxattr(Inode
*in
, char *list
, size_t size
, const UserPerm
& perms
);
592 int ll_opendir(Inode
*in
, int flags
, dir_result_t
**dirpp
,
593 const UserPerm
& perms
);
594 int ll_releasedir(dir_result_t
* dirp
);
595 int ll_fsyncdir(dir_result_t
* dirp
);
596 int ll_readlink(Inode
*in
, char *buf
, size_t bufsize
, const UserPerm
& perms
);
597 int ll_mknod(Inode
*in
, const char *name
, mode_t mode
, dev_t rdev
,
598 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
);
599 int ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
, dev_t rdev
,
600 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
601 unsigned flags
, const UserPerm
& perms
);
602 int ll_mkdir(Inode
*in
, const char *name
, mode_t mode
, struct stat
*attr
,
603 Inode
**out
, const UserPerm
& perm
);
604 int ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
605 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
606 const UserPerm
& perms
);
607 int ll_symlink(Inode
*in
, const char *name
, const char *value
,
608 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
);
609 int ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
610 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
611 unsigned flags
, const UserPerm
& perms
);
612 int ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
);
613 int ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
);
614 int ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
615 const char *newname
, const UserPerm
& perm
);
616 int ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
617 const UserPerm
& perm
);
618 int ll_open(Inode
*in
, int flags
, Fh
**fh
, const UserPerm
& perms
);
619 int _ll_create(Inode
*parent
, const char *name
, mode_t mode
,
620 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
621 const UserPerm
& perms
);
622 int ll_create(Inode
*parent
, const char *name
, mode_t mode
, int flags
,
623 struct stat
*attr
, Inode
**out
, Fh
**fhp
,
624 const UserPerm
& perms
);
625 int ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
626 int oflags
, Inode
**outp
, Fh
**fhp
,
627 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
628 const UserPerm
& perms
);
629 int ll_read_block(Inode
*in
, uint64_t blockid
, char *buf
, uint64_t offset
,
630 uint64_t length
, file_layout_t
* layout
);
632 int ll_write_block(Inode
*in
, uint64_t blockid
,
633 char* buf
, uint64_t offset
,
634 uint64_t length
, file_layout_t
* layout
,
635 uint64_t snapseq
, uint32_t sync
);
636 int ll_commit_blocks(Inode
*in
, uint64_t offset
, uint64_t length
);
638 int ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
);
639 int ll_walk(const char* name
, Inode
**i
, struct ceph_statx
*stx
,
640 unsigned int want
, unsigned int flags
, const UserPerm
& perms
);
641 uint32_t ll_stripe_unit(Inode
*in
);
642 int ll_file_layout(Inode
*in
, file_layout_t
*layout
);
643 uint64_t ll_snap_seq(Inode
*in
);
645 int ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
);
646 int ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
);
647 int64_t ll_readv(struct Fh
*fh
, const struct iovec
*iov
, int iovcnt
, int64_t off
);
648 int64_t ll_writev(struct Fh
*fh
, const struct iovec
*iov
, int iovcnt
, int64_t off
);
649 loff_t
ll_lseek(Fh
*fh
, loff_t offset
, int whence
);
650 int ll_flush(Fh
*fh
);
651 int ll_fsync(Fh
*fh
, bool syncdataonly
);
652 int ll_sync_inode(Inode
*in
, bool syncdataonly
);
653 int ll_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
);
654 int ll_release(Fh
*fh
);
655 int ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
);
656 int ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
);
657 int ll_flock(Fh
*fh
, int cmd
, uint64_t owner
);
658 int ll_lazyio(Fh
*fh
, int enable
);
659 int ll_file_layout(Fh
*fh
, file_layout_t
*layout
);
660 void ll_interrupt(void *d
);
661 bool ll_handle_umask() {
662 return acl_type
!= NO_ACL
;
665 int ll_get_stripe_osd(struct Inode
*in
, uint64_t blockno
,
666 file_layout_t
* layout
);
667 uint64_t ll_get_internal_offset(struct Inode
*in
, uint64_t blockno
);
669 int ll_num_osds(void);
670 int ll_osdaddr(int osd
, uint32_t *addr
);
671 int ll_osdaddr(int osd
, char* buf
, size_t size
);
673 void _ll_register_callbacks(struct ceph_client_callback_args
*args
);
674 void ll_register_callbacks(struct ceph_client_callback_args
*args
); // deprecated
675 int ll_register_callbacks2(struct ceph_client_callback_args
*args
);
676 std::pair
<int, bool> test_dentry_handling(bool can_invalidate
);
678 const char** get_tracked_conf_keys() const override
;
679 void handle_conf_change(const ConfigProxy
& conf
,
680 const std::set
<std::string
> &changed
) override
;
681 uint32_t get_deleg_timeout() { return deleg_timeout
; }
682 int set_deleg_timeout(uint32_t timeout
);
683 int ll_delegation(Fh
*fh
, unsigned cmd
, ceph_deleg_cb_t cb
, void *priv
);
685 entity_name_t
get_myname() { return messenger
->get_myname(); }
686 void wait_on_list(std::list
<ceph::condition_variable
*>& ls
);
687 void signal_cond_list(std::list
<ceph::condition_variable
*>& ls
);
689 void set_filer_flags(int flags
);
690 void clear_filer_flags(int flags
);
692 void tear_down_cache();
694 void update_metadata(std::string
const &k
, std::string
const &v
);
696 client_t
get_nodeid() { return whoami
; }
698 inodeno_t
get_root_ino();
702 virtual void shutdown();
705 void cancel_commands(const MDSMap
& newmap
);
706 void handle_mds_map(const MConstRef
<MMDSMap
>& m
);
707 void handle_fs_map(const MConstRef
<MFSMap
>& m
);
708 void handle_fs_map_user(const MConstRef
<MFSMapUser
>& m
);
709 void handle_osd_map(const MConstRef
<MOSDMap
>& m
);
711 void handle_lease(const MConstRef
<MClientLease
>& m
);
714 int uninline_data(Inode
*in
, Context
*onfinish
);
717 void check_cap_issue(Inode
*in
, unsigned issued
);
718 void add_update_cap(Inode
*in
, MetaSession
*session
, uint64_t cap_id
,
719 unsigned issued
, unsigned wanted
, unsigned seq
, unsigned mseq
,
720 inodeno_t realm
, int flags
, const UserPerm
& perms
);
721 void remove_cap(Cap
*cap
, bool queue_release
);
722 void remove_all_caps(Inode
*in
);
723 void remove_session_caps(MetaSession
*session
, int err
);
724 int mark_caps_flushing(Inode
*in
, ceph_tid_t
*ptid
);
725 void adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
);
726 void flush_caps_sync();
727 void kick_flushing_caps(Inode
*in
, MetaSession
*session
);
728 void kick_flushing_caps(MetaSession
*session
);
729 void early_kick_flushing_caps(MetaSession
*session
);
730 int get_caps(Fh
*fh
, int need
, int want
, int *have
, loff_t endoff
);
731 int get_caps_used(Inode
*in
);
733 void maybe_update_snaprealm(SnapRealm
*realm
, snapid_t snap_created
, snapid_t snap_highwater
,
734 std::vector
<snapid_t
>& snaps
);
736 void handle_quota(const MConstRef
<MClientQuota
>& m
);
737 void handle_snap(const MConstRef
<MClientSnap
>& m
);
738 void handle_caps(const MConstRef
<MClientCaps
>& m
);
739 void handle_cap_import(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
740 void handle_cap_export(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
741 void handle_cap_trunc(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
742 void handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, const MConstRef
<MClientCaps
>& m
);
743 void handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
744 void handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, const MConstRef
<MClientCaps
>& m
);
745 void cap_delay_requeue(Inode
*in
);
747 void send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
, int flags
,
748 int used
, int want
, int retain
, int flush
,
749 ceph_tid_t flush_tid
);
751 void send_flush_snap(Inode
*in
, MetaSession
*session
, snapid_t follows
, CapSnap
& capsnap
);
753 void flush_snaps(Inode
*in
);
754 void get_cap_ref(Inode
*in
, int cap
);
755 void put_cap_ref(Inode
*in
, int cap
);
756 void wait_sync_caps(Inode
*in
, ceph_tid_t want
);
757 void wait_sync_caps(ceph_tid_t want
);
758 void queue_cap_snap(Inode
*in
, SnapContext
&old_snapc
);
759 void finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
);
761 void _schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
);
762 void _async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, std::string
& name
);
763 void _try_to_trim_inode(Inode
*in
, bool sched_inval
);
765 void _schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
);
766 void _invalidate_inode_cache(Inode
*in
);
767 void _invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
);
768 void _async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
);
770 void _schedule_ino_release_callback(Inode
*in
);
771 void _async_inode_release(vinodeno_t ino
);
773 bool _release(Inode
*in
);
776 * Initiate a flush of the data associated with the given inode.
777 * If you specify a Context, you are responsible for holding an inode
778 * reference for the duration of the flush. If not, _flush() will
779 * take the reference for you.
780 * @param in The Inode whose data you wish to flush.
781 * @param c The Context you wish us to complete once the data is
782 * flushed. If already flushed, this will be called in-line.
784 * @returns true if the data was already flushed, false otherwise.
786 bool _flush(Inode
*in
, Context
*c
);
787 void _flush_range(Inode
*in
, int64_t off
, uint64_t size
);
788 void _flushed(Inode
*in
);
789 void flush_set_callback(ObjectCacher::ObjectSet
*oset
);
791 void close_release(Inode
*in
);
792 void close_safe(Inode
*in
);
794 void lock_fh_pos(Fh
*f
);
795 void unlock_fh_pos(Fh
*f
);
798 void update_dir_dist(Inode
*in
, DirStat
*st
, mds_rank_t from
);
800 void clear_dir_complete_and_ordered(Inode
*diri
, bool complete
);
801 void insert_readdir_results(MetaRequest
*request
, MetaSession
*session
,
802 Inode
*diri
, Inode
*diri_other
);
803 Inode
* insert_trace(MetaRequest
*request
, MetaSession
*session
);
804 void update_inode_file_size(Inode
*in
, int issued
, uint64_t size
,
805 uint64_t truncate_seq
, uint64_t truncate_size
);
806 void update_inode_file_time(Inode
*in
, int issued
, uint64_t time_warp_seq
,
807 utime_t ctime
, utime_t mtime
, utime_t atime
);
809 Inode
*add_update_inode(InodeStat
*st
, utime_t ttl
, MetaSession
*session
,
810 const UserPerm
& request_perms
);
811 Dentry
*insert_dentry_inode(Dir
*dir
, const std::string
& dname
, LeaseStat
*dlease
,
812 Inode
*in
, utime_t from
, MetaSession
*session
,
813 Dentry
*old_dentry
= NULL
);
814 void update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
);
816 bool use_faked_inos() { return _use_faked_inos
; }
817 vinodeno_t
map_faked_ino(ino_t ino
);
819 //notify the mds to flush the mdlog
820 void flush_mdlog_sync(Inode
*in
);
821 void flush_mdlog_sync();
822 void flush_mdlog(MetaSession
*session
);
825 void renew_caps(MetaSession
*session
);
826 void flush_cap_releases();
827 void renew_and_flush_cap_releases();
829 void start_tick_thread();
831 void update_read_io_size(size_t size
) {
833 total_read_size
+= size
;
836 void update_write_io_size(size_t size
) {
838 total_write_size
+= size
;
841 void inc_dentry_nr() {
844 void dec_dentry_nr() {
853 std::tuple
<uint64_t, uint64_t, uint64_t> get_dlease_hit_rates() {
854 return std::make_tuple(dlease_hits
, dlease_misses
, dentry_nr
);
863 std::pair
<uint64_t, uint64_t> get_cap_hit_rates() {
864 return std::make_pair(cap_hits
, cap_misses
);
867 void inc_opened_files() {
870 void dec_opened_files() {
873 std::pair
<uint64_t, uint64_t> get_opened_files_rates() {
874 return std::make_pair(opened_files
, inode_map
.size());
877 void inc_pinned_icaps() {
880 void dec_pinned_icaps(uint64_t nr
=1) {
883 std::pair
<uint64_t, uint64_t> get_pinned_icaps_rates() {
884 return std::make_pair(pinned_icaps
, inode_map
.size());
887 void inc_opened_inodes() {
890 void dec_opened_inodes() {
893 std::pair
<uint64_t, uint64_t> get_opened_inodes_rates() {
894 return std::make_pair(opened_inodes
, inode_map
.size());
897 /* timer_lock for 'timer' */
898 ceph::mutex timer_lock
= ceph::make_mutex("Client::timer_lock");
902 std::thread upkeeper
;
903 ceph::condition_variable upkeep_cond
;
904 bool tick_thread_stopped
= false;
906 std::unique_ptr
<PerfCounters
> logger
;
907 std::unique_ptr
<MDSMap
> mdsmap
;
909 bool fuse_default_permissions
;
910 bool _collect_and_send_global_metrics
;
913 std::list
<ceph::condition_variable
*> waiting_for_reclaim
;
914 /* Flags for check_caps() */
915 static const unsigned CHECK_CAPS_NODELAY
= 0x1;
916 static const unsigned CHECK_CAPS_SYNCHRONOUS
= 0x2;
918 void check_caps(Inode
*in
, unsigned flags
);
920 void set_cap_epoch_barrier(epoch_t e
);
922 void handle_command_reply(const MConstRef
<MCommandReply
>& m
);
923 int fetch_fsmap(bool user
);
925 const std::string
&mds_spec
,
926 std::vector
<mds_gid_t
> *targets
);
928 void get_session_metadata(std::map
<std::string
, std::string
> *meta
) const;
929 bool have_open_session(mds_rank_t mds
);
930 void got_mds_push(MetaSession
*s
);
931 MetaSessionRef
_get_mds_session(mds_rank_t mds
, Connection
*con
); ///< return session for mds *and* con; null otherwise
932 MetaSessionRef
_get_or_open_mds_session(mds_rank_t mds
);
933 MetaSessionRef
_open_mds_session(mds_rank_t mds
);
934 void _close_mds_session(MetaSession
*s
);
935 void _closed_mds_session(MetaSession
*s
, int err
=0, bool rejected
=false);
936 bool _any_stale_sessions() const;
937 void _kick_stale_sessions();
938 void handle_client_session(const MConstRef
<MClientSession
>& m
);
939 void send_reconnect(MetaSession
*s
);
940 void resend_unsafe_requests(MetaSession
*s
);
941 void wait_unsafe_requests();
943 void dump_mds_requests(Formatter
*f
);
944 void dump_mds_sessions(Formatter
*f
, bool cap_dump
=false);
946 int make_request(MetaRequest
*req
, const UserPerm
& perms
,
947 InodeRef
*ptarget
= 0, bool *pcreated
= 0,
948 mds_rank_t use_mds
=-1, bufferlist
*pdirbl
=0,
949 size_t feature_needed
=ULONG_MAX
);
950 void put_request(MetaRequest
*request
);
951 void unregister_request(MetaRequest
*request
);
953 int verify_reply_trace(int r
, MetaSession
*session
, MetaRequest
*request
,
954 const MConstRef
<MClientReply
>& reply
,
955 InodeRef
*ptarget
, bool *pcreated
,
956 const UserPerm
& perms
);
957 void encode_cap_releases(MetaRequest
*request
, mds_rank_t mds
);
958 int encode_inode_release(Inode
*in
, MetaRequest
*req
,
959 mds_rank_t mds
, int drop
,
960 int unless
,int force
=0);
961 void encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
962 mds_rank_t mds
, int drop
, int unless
);
963 mds_rank_t
choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
=NULL
);
964 void connect_mds_targets(mds_rank_t mds
);
965 void send_request(MetaRequest
*request
, MetaSession
*session
,
966 bool drop_cap_releases
=false);
967 MRef
<MClientRequest
> build_client_request(MetaRequest
*request
, mds_rank_t mds
);
968 void kick_requests(MetaSession
*session
);
969 void kick_requests_closed(MetaSession
*session
);
970 void handle_client_request_forward(const MConstRef
<MClientRequestForward
>& reply
);
971 void handle_client_reply(const MConstRef
<MClientReply
>& reply
);
972 bool is_dir_operation(MetaRequest
*request
);
974 int path_walk(const filepath
& fp
, struct walk_dentry_result
* result
, const UserPerm
& perms
, bool followsym
=true, int mask
=0,
975 InodeRef dirinode
=nullptr);
976 int path_walk(const filepath
& fp
, InodeRef
*end
, const UserPerm
& perms
,
977 bool followsym
=true, int mask
=0, InodeRef dirinode
=nullptr);
979 // fake inode number for 32-bits ino_t
980 void _assign_faked_ino(Inode
*in
);
981 void _assign_faked_root(Inode
*in
);
982 void _release_faked_ino(Inode
*in
);
983 void _reset_faked_inos();
984 vinodeno_t
_map_faked_ino(ino_t ino
);
986 // Optional extra metadata about me to send to the MDS
987 void populate_metadata(const std::string
&mount_root
);
989 SnapRealm
*get_snap_realm(inodeno_t r
);
990 SnapRealm
*get_snap_realm_maybe(inodeno_t r
);
991 void put_snap_realm(SnapRealm
*realm
);
992 bool adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
);
993 void update_snap_trace(MetaSession
*session
, const bufferlist
& bl
, SnapRealm
**realm_ret
, bool must_flush
=true);
994 void invalidate_snaprealm_and_children(SnapRealm
*realm
);
996 void refresh_snapdir_attrs(Inode
*in
, Inode
*diri
);
997 Inode
*open_snapdir(Inode
*diri
);
1000 int fd
= free_fd_set
.range_start();
1001 free_fd_set
.erase(fd
, 1);
1004 void put_fd(int fd
) {
1005 free_fd_set
.insert(fd
, 1);
1009 * Resolve file descriptor, or return NULL.
1011 Fh
*get_filehandle(int fd
) {
1012 auto it
= fd_map
.find(fd
);
1013 if (it
== fd_map
.end())
1017 int get_fd_inode(int fd
, InodeRef
*in
);
1020 void wake_up_session_caps(MetaSession
*s
, bool reconnect
);
1022 void wait_on_context_list(std::list
<Context
*>& ls
);
1023 void signal_context_list(std::list
<Context
*>& ls
);
1025 // -- metadata cache stuff
1027 // decrease inode ref. delete if dangling.
1028 void _put_inode(Inode
*in
, int n
);
1029 void delay_put_inodes(bool wakeup
=false);
1030 void put_inode(Inode
*in
, int n
=1);
1031 void close_dir(Dir
*dir
);
1033 int subscribe_mdsmap(const std::string
&fs_name
="");
1035 void _abort_mds_sessions(int err
);
1037 // same as unmount() but for when the client_lock is already held
1038 void _unmount(bool abort
);
1040 //int get_cache_size() { return lru.lru_get_size(); }
1043 * Don't call this with in==NULL, use get_or_create for that
1044 * leave dn set to default NULL unless you're trying to add
1045 * a new inode to a pre-created Dentry
1047 Dentry
* link(Dir
*dir
, const std::string
& name
, Inode
*in
, Dentry
*dn
);
1048 void unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
);
1050 int fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
=0, nest_info_t
*rstat
=0);
1051 int fill_stat(InodeRef
& in
, struct stat
*st
, frag_info_t
*dirstat
=0, nest_info_t
*rstat
=0) {
1052 return fill_stat(in
.get(), st
, dirstat
, rstat
);
1055 void fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
);
1056 void fill_statx(InodeRef
& in
, unsigned int mask
, struct ceph_statx
*stx
) {
1057 return fill_statx(in
.get(), mask
, stx
);
1060 void touch_dn(Dentry
*dn
);
1063 void trim_cache(bool trim_kernel_dcache
=false);
1064 void trim_cache_for_reconnect(MetaSession
*s
);
1065 void trim_dentry(Dentry
*dn
);
1066 void trim_caps(MetaSession
*s
, uint64_t max
);
1067 void _invalidate_kernel_dcache();
1068 void _trim_negative_child_dentries(InodeRef
& in
);
1070 void dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
);
1071 void dump_cache(Formatter
*f
); // debug
1074 void force_session_readonly(MetaSession
*s
);
1076 void dump_status(Formatter
*f
); // debug
1078 bool ms_dispatch2(const MessageRef
& m
) override
;
1080 void ms_handle_connect(Connection
*con
) override
;
1081 bool ms_handle_reset(Connection
*con
) override
;
1082 void ms_handle_remote_reset(Connection
*con
) override
;
1083 bool ms_handle_refused(Connection
*con
) override
;
1087 Inode
* get_quota_root(Inode
*in
, const UserPerm
& perms
, quota_max_t type
=QUOTA_ANY
);
1088 bool check_quota_condition(Inode
*in
, const UserPerm
& perms
,
1089 std::function
<bool (const Inode
&)> test
);
1090 bool is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
);
1091 bool is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
1092 const UserPerm
& perms
);
1093 bool is_quota_bytes_approaching(Inode
*in
, const UserPerm
& perms
);
1095 int check_pool_perm(Inode
*in
, int need
);
1097 void handle_client_reclaim_reply(const MConstRef
<MClientReclaimReply
>& reply
);
1100 * Call this when an OSDMap is seen with a full flag (global or per pool)
1103 * @param pool the pool ID affected, or -1 if all.
1105 void _handle_full_flag(int64_t pool
);
1107 void _close_sessions();
1112 * The basic housekeeping parts of init (perf counters, admin socket)
1113 * that is independent of how objecters/monclient/messengers are
1116 void _finish_init();
1118 // global client lock
1119 // - protects Client and buffer cache both!
1120 ceph::mutex client_lock
= ceph::make_mutex("Client::client_lock");
1122 std::map
<snapid_t
, int> ll_snap_ref
;
1124 InodeRef root
= nullptr;
1125 map
<Inode
*, InodeRef
> root_parents
;
1126 Inode
* root_ancestor
= nullptr;
1127 LRU lru
; // lru list of Dentry's in our local metadata cache.
1131 std::unique_ptr
<Filer
> filer
;
1132 std::unique_ptr
<ObjectCacher
> objectcacher
;
1133 std::unique_ptr
<WritebackHandler
> writeback_handler
;
1135 Messenger
*messenger
;
1136 MonClient
*monclient
;
1141 /* The state migration mechanism */
1143 /* For the initialize_state */
1144 CLIENT_NEW
, // The initial state for the initialize_state or after Client::shutdown()
1145 CLIENT_INITIALIZING
, // At the beginning of the Client::init()
1146 CLIENT_INITIALIZED
, // At the end of CLient::init()
1148 /* For the mount_state */
1149 CLIENT_UNMOUNTED
, // The initial state for the mount_state or after unmounted
1150 CLIENT_MOUNTING
, // At the beginning of Client::mount()
1151 CLIENT_MOUNTED
, // At the end of Client::mount()
1152 CLIENT_UNMOUNTING
, // At the beginning of the Client::_unmout()
1155 typedef enum _state state_t
;
1156 using RWRef_t
= RWRef
<state_t
>;
1158 struct mount_state_t
: public RWRefState
<state_t
> {
1160 bool is_valid_state(state_t state
) const override
{
1162 case Client::CLIENT_MOUNTING
:
1163 case Client::CLIENT_MOUNTED
:
1164 case Client::CLIENT_UNMOUNTING
:
1165 case Client::CLIENT_UNMOUNTED
:
1172 int check_reader_state(state_t require
) const override
{
1173 if (require
== Client::CLIENT_MOUNTING
&&
1174 (state
== Client::CLIENT_MOUNTING
|| state
== Client::CLIENT_MOUNTED
))
1180 /* The state migration check */
1181 int check_writer_state(state_t require
) const override
{
1182 if (require
== Client::CLIENT_MOUNTING
&&
1183 state
== Client::CLIENT_UNMOUNTED
)
1185 else if (require
== Client::CLIENT_MOUNTED
&&
1186 state
== Client::CLIENT_MOUNTING
)
1188 else if (require
== Client::CLIENT_UNMOUNTING
&&
1189 state
== Client::CLIENT_MOUNTED
)
1191 else if (require
== Client::CLIENT_UNMOUNTED
&&
1192 state
== Client::CLIENT_UNMOUNTING
)
1198 mount_state_t(state_t state
, const char *lockname
, uint64_t reader_cnt
=0)
1199 : RWRefState (state
, lockname
, reader_cnt
) {}
1203 struct initialize_state_t
: public RWRefState
<state_t
> {
1205 bool is_valid_state(state_t state
) const override
{
1207 case Client::CLIENT_NEW
:
1208 case Client::CLIENT_INITIALIZING
:
1209 case Client::CLIENT_INITIALIZED
:
1216 int check_reader_state(state_t require
) const override
{
1217 if (require
== Client::CLIENT_INITIALIZED
&&
1218 state
>= Client::CLIENT_INITIALIZED
)
1224 /* The state migration check */
1225 int check_writer_state(state_t require
) const override
{
1226 if (require
== Client::CLIENT_INITIALIZING
&&
1227 (state
== Client::CLIENT_NEW
))
1229 else if (require
== Client::CLIENT_INITIALIZED
&&
1230 (state
== Client::CLIENT_INITIALIZING
))
1232 else if (require
== Client::CLIENT_NEW
&&
1233 (state
== Client::CLIENT_INITIALIZED
))
1239 initialize_state_t(state_t state
, const char *lockname
, uint64_t reader_cnt
=0)
1240 : RWRefState (state
, lockname
, reader_cnt
) {}
1241 ~initialize_state_t() {}
1244 struct mount_state_t mount_state
;
1245 struct initialize_state_t initialize_state
;
1248 struct C_Readahead
: public Context
{
1249 C_Readahead(Client
*c
, Fh
*f
);
1250 ~C_Readahead() override
;
1251 void finish(int r
) override
;
1258 * These define virtual xattrs exposing the recursive directory
1259 * statistics and layout metadata.
1262 const std::string name
;
1263 size_t (Client::*getxattr_cb
)(Inode
*in
, char *val
, size_t size
);
1264 int (Client::*setxattr_cb
)(Inode
*in
, const void *val
, size_t size
,
1265 const UserPerm
& perms
);
1267 bool (Client::*exists_cb
)(Inode
*in
);
1282 typedef std::function
<void(dir_result_t
*, MetaRequest
*, InodeRef
&, frag_t
)> fill_readdir_args_cb_t
;
1284 std::unique_ptr
<CephContext
, std::function
<void(CephContext
*)>> cct_deleter
;
1286 /* Flags for VXattr */
1287 static const unsigned VXATTR_RSTAT
= 0x1;
1288 static const unsigned VXATTR_DIRSTAT
= 0x2;
1290 static const VXattr _dir_vxattrs
[];
1291 static const VXattr _file_vxattrs
[];
1292 static const VXattr _common_vxattrs
[];
1295 bool is_reserved_vino(vinodeno_t
&vino
);
1297 void fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
);
1299 int _opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
);
1300 void _readdir_drop_dirp_buffer(dir_result_t
*dirp
);
1301 bool _readdir_have_frag(dir_result_t
*dirp
);
1302 void _readdir_next_frag(dir_result_t
*dirp
);
1303 void _readdir_rechoose_frag(dir_result_t
*dirp
);
1304 int _readdir_get_frag(int op
, dir_result_t
*dirp
,
1305 fill_readdir_args_cb_t fill_req_cb
);
1306 int _readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
, int caps
, bool getref
);
1307 int _readdir_r_cb(int op
,
1310 fill_readdir_args_cb_t fill_cb
,
1317 void _closedir(dir_result_t
*dirp
);
1320 void _fragmap_remove_non_leaves(Inode
*in
);
1321 void _fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
);
1323 void _ll_get(Inode
*in
);
1324 int _ll_put(Inode
*in
, uint64_t num
);
1325 void _ll_drop_pins();
1327 Fh
*_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
);
1328 int _release_fh(Fh
*fh
);
1329 void _put_fh(Fh
*fh
);
1331 std::pair
<int, bool> _do_remount(bool retry_on_error
);
1333 int _read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
, bool *checkeof
);
1334 int _read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
);
1336 bool _dentry_valid(const Dentry
*dn
);
1338 // internal interface
1339 // call these with client_lock held!
1340 int _do_lookup(Inode
*dir
, const std::string
& name
, int mask
, InodeRef
*target
,
1341 const UserPerm
& perms
);
1343 int _lookup(Inode
*dir
, const std::string
& dname
, int mask
, InodeRef
*target
,
1344 const UserPerm
& perm
, std::string
* alternate_name
=nullptr,
1345 bool is_rename
=false);
1347 int _link(Inode
*in
, Inode
*dir
, const char *name
, const UserPerm
& perm
, std::string alternate_name
,
1349 int _unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
);
1350 int _rename(Inode
*olddir
, const char *oname
, Inode
*ndir
, const char *nname
, const UserPerm
& perm
, std::string alternate_name
);
1351 int _mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
1352 InodeRef
*inp
= 0, const std::map
<std::string
, std::string
> &metadata
={},
1353 std::string alternate_name
="");
1354 int _rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
);
1355 int _symlink(Inode
*dir
, const char *name
, const char *target
,
1356 const UserPerm
& perms
, std::string alternate_name
, InodeRef
*inp
= 0);
1357 int _mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
1358 const UserPerm
& perms
, InodeRef
*inp
= 0);
1359 int _do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1360 const UserPerm
& perms
, InodeRef
*inp
,
1361 std::vector
<uint8_t>* aux
=nullptr);
1362 void stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
);
1363 int __setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1364 const UserPerm
& perms
, InodeRef
*inp
= 0);
1365 int _setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
1366 const UserPerm
& perms
);
1367 int _setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
1368 const UserPerm
& perms
);
1369 int _ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1370 const UserPerm
& perms
, InodeRef
*inp
= 0);
1371 int _getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
=false);
1372 int _getattr(InodeRef
&in
, int mask
, const UserPerm
& perms
, bool force
=false) {
1373 return _getattr(in
.get(), mask
, perms
, force
);
1375 int _readlink(Inode
*in
, char *buf
, size_t size
);
1376 int _getxattr(Inode
*in
, const char *name
, void *value
, size_t len
,
1377 const UserPerm
& perms
);
1378 int _getxattr(InodeRef
&in
, const char *name
, void *value
, size_t len
,
1379 const UserPerm
& perms
);
1380 int _getvxattr(Inode
*in
, const UserPerm
& perms
, const char *attr_name
,
1381 ssize_t size
, void *value
, mds_rank_t rank
);
1382 int _listxattr(Inode
*in
, char *names
, size_t len
, const UserPerm
& perms
);
1383 int _do_setxattr(Inode
*in
, const char *name
, const void *value
, size_t len
,
1384 int flags
, const UserPerm
& perms
);
1385 int _setxattr(Inode
*in
, const char *name
, const void *value
, size_t len
,
1386 int flags
, const UserPerm
& perms
);
1387 int _setxattr(InodeRef
&in
, const char *name
, const void *value
, size_t len
,
1388 int flags
, const UserPerm
& perms
);
1389 int _setxattr_check_data_pool(std::string
& name
, std::string
& value
, const OSDMap
*osdmap
);
1390 void _setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t len
);
1391 int _removexattr(Inode
*in
, const char *nm
, const UserPerm
& perms
);
1392 int _removexattr(InodeRef
&in
, const char *nm
, const UserPerm
& perms
);
1393 int _open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
1394 const UserPerm
& perms
);
1395 int _renew_caps(Inode
*in
);
1396 int _create(Inode
*in
, const char *name
, int flags
, mode_t mode
, InodeRef
*inp
,
1397 Fh
**fhp
, int stripe_unit
, int stripe_count
, int object_size
,
1398 const char *data_pool
, bool *created
, const UserPerm
&perms
,
1399 std::string alternate_name
);
1401 loff_t
_lseek(Fh
*fh
, loff_t offset
, int whence
);
1402 int64_t _read(Fh
*fh
, int64_t offset
, uint64_t size
, bufferlist
*bl
);
1403 int64_t _write(Fh
*fh
, int64_t offset
, uint64_t size
, const char *buf
,
1404 const struct iovec
*iov
, int iovcnt
);
1405 int64_t _preadv_pwritev_locked(Fh
*fh
, const struct iovec
*iov
,
1406 unsigned iovcnt
, int64_t offset
,
1407 bool write
, bool clamp_to_int
);
1408 int _preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
,
1409 int64_t offset
, bool write
);
1411 int _fsync(Fh
*fh
, bool syncdataonly
);
1412 int _fsync(Inode
*in
, bool syncdataonly
);
1414 int clear_suid_sgid(Inode
*in
, const UserPerm
& perms
, bool defer
=false);
1415 int _fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
);
1416 int _getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
);
1417 int _setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
);
1418 int _flock(Fh
*fh
, int cmd
, uint64_t owner
);
1419 int _lazyio(Fh
*fh
, int enable
);
1421 Dentry
*get_or_create(Inode
*dir
, const char* name
);
1423 int xattr_permission(Inode
*in
, const char *name
, unsigned want
,
1424 const UserPerm
& perms
);
1425 int may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1426 const UserPerm
& perms
);
1427 int may_open(Inode
*in
, int flags
, const UserPerm
& perms
);
1428 int may_lookup(Inode
*dir
, const UserPerm
& perms
);
1429 int may_create(Inode
*dir
, const UserPerm
& perms
);
1430 int may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
);
1431 int may_hardlink(Inode
*in
, const UserPerm
& perms
);
1433 int _getattr_for_perm(Inode
*in
, const UserPerm
& perms
);
1435 vinodeno_t
_get_vino(Inode
*in
);
1437 bool _vxattrcb_fscrypt_auth_exists(Inode
*in
);
1438 size_t _vxattrcb_fscrypt_auth(Inode
*in
, char *val
, size_t size
);
1439 int _vxattrcb_fscrypt_auth_set(Inode
*in
, const void *val
, size_t size
, const UserPerm
& perms
);
1440 bool _vxattrcb_fscrypt_file_exists(Inode
*in
);
1441 size_t _vxattrcb_fscrypt_file(Inode
*in
, char *val
, size_t size
);
1442 int _vxattrcb_fscrypt_file_set(Inode
*in
, const void *val
, size_t size
, const UserPerm
& perms
);
1443 bool _vxattrcb_quota_exists(Inode
*in
);
1444 size_t _vxattrcb_quota(Inode
*in
, char *val
, size_t size
);
1445 size_t _vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
);
1446 size_t _vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
);
1448 bool _vxattrcb_layout_exists(Inode
*in
);
1449 size_t _vxattrcb_layout(Inode
*in
, char *val
, size_t size
);
1450 size_t _vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
);
1451 size_t _vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
);
1452 size_t _vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
);
1453 size_t _vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
);
1454 size_t _vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
);
1455 size_t _vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
);
1456 size_t _vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
);
1457 size_t _vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
);
1458 size_t _vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
);
1459 size_t _vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
);
1460 size_t _vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
);
1461 size_t _vxattrcb_dir_rsnaps(Inode
*in
, char *val
, size_t size
);
1462 size_t _vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
);
1463 size_t _vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
);
1465 bool _vxattrcb_dir_pin_exists(Inode
*in
);
1466 size_t _vxattrcb_dir_pin(Inode
*in
, char *val
, size_t size
);
1468 bool _vxattrcb_snap_btime_exists(Inode
*in
);
1469 size_t _vxattrcb_snap_btime(Inode
*in
, char *val
, size_t size
);
1471 size_t _vxattrcb_caps(Inode
*in
, char *val
, size_t size
);
1473 bool _vxattrcb_mirror_info_exists(Inode
*in
);
1474 size_t _vxattrcb_mirror_info(Inode
*in
, char *val
, size_t size
);
1476 size_t _vxattrcb_cluster_fsid(Inode
*in
, char *val
, size_t size
);
1477 size_t _vxattrcb_client_id(Inode
*in
, char *val
, size_t size
);
1479 static const VXattr
*_get_vxattrs(Inode
*in
);
1480 static const VXattr
*_match_vxattr(Inode
*in
, const char *name
);
1482 int _do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
1483 struct flock
*fl
, uint64_t owner
, bool removing
=false);
1484 int _interrupt_filelock(MetaRequest
*req
);
1485 void _encode_filelocks(Inode
*in
, bufferlist
& bl
);
1486 void _release_filelocks(Fh
*fh
);
1487 void _update_lock_state(struct flock
*fl
, uint64_t owner
, ceph_lock_state_t
*lock_state
);
1489 int _posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
1490 const UserPerm
& perms
);
1491 int _posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
);
1492 int _posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
);
1494 mds_rank_t
_get_random_up_mds() const;
1496 int _ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
);
1497 int _lookup_parent(Inode
*in
, const UserPerm
& perms
, Inode
**parent
=NULL
);
1498 int _lookup_name(Inode
*in
, Inode
*parent
, const UserPerm
& perms
);
1499 int _lookup_vino(vinodeno_t ino
, const UserPerm
& perms
, Inode
**inode
=NULL
);
1500 bool _ll_forget(Inode
*in
, uint64_t count
);
1502 void collect_and_send_metrics();
1503 void collect_and_send_global_metrics();
1505 void update_io_stat_metadata(utime_t latency
);
1506 void update_io_stat_read(utime_t latency
);
1507 void update_io_stat_write(utime_t latency
);
1509 uint32_t deleg_timeout
= 0;
1511 client_switch_interrupt_callback_t switch_interrupt_cb
= nullptr;
1512 client_remount_callback_t remount_cb
= nullptr;
1513 client_ino_callback_t ino_invalidate_cb
= nullptr;
1514 client_dentry_callback_t dentry_invalidate_cb
= nullptr;
1515 client_umask_callback_t umask_cb
= nullptr;
1516 client_ino_release_t ino_release_cb
= nullptr;
1517 void *callback_handle
= nullptr;
1518 bool can_invalidate_dentries
= false;
1520 Finisher async_ino_invalidator
;
1521 Finisher async_dentry_invalidator
;
1522 Finisher interrupt_finisher
;
1523 Finisher remount_finisher
;
1524 Finisher async_ino_releasor
;
1525 Finisher objecter_finisher
;
1527 ceph::coarse_mono_time last_cap_renew
;
1529 CommandHook m_command_hook
;
1531 int user_id
, group_id
;
1532 int acl_type
= NO_ACL
;
1534 epoch_t cap_epoch_barrier
= 0;
1537 map
<mds_rank_t
, MetaSessionRef
> mds_sessions
; // mds -> push seq
1538 std::set
<mds_rank_t
> mds_ranks_closing
; // mds ranks currently tearing down sessions
1539 std::list
<ceph::condition_variable
*> waiting_for_mdsmap
;
1541 // FSMap, for when using mds_command
1542 std::list
<ceph::condition_variable
*> waiting_for_fsmap
;
1543 std::unique_ptr
<FSMap
> fsmap
;
1544 std::unique_ptr
<FSMapUser
> fsmap_user
;
1546 // This mutex only protects command_table
1547 ceph::mutex command_lock
= ceph::make_mutex("Client::command_lock");
1548 // MDS command state
1549 CommandTable
<MDSCommandOp
> command_table
;
1551 bool _use_faked_inos
;
1554 fs_cluster_id_t fscid
;
1556 // file handles, etc.
1557 interval_set
<int> free_fd_set
; // unused fds
1558 ceph::unordered_map
<int, Fh
*> fd_map
;
1559 set
<Fh
*> ll_unclosed_fh_set
;
1560 ceph::unordered_set
<dir_result_t
*> opened_dirs
;
1561 uint64_t fd_gen
= 1;
1563 bool mount_aborted
= false;
1564 bool blocklisted
= false;
1566 ceph::unordered_map
<vinodeno_t
, Inode
*> inode_map
;
1567 ceph::unordered_map
<ino_t
, vinodeno_t
> faked_ino_map
;
1568 interval_set
<ino_t
> free_faked_inos
;
1569 ino_t last_used_faked_ino
;
1570 ino_t last_used_faked_root
;
1572 int local_osd
= -CEPHFS_ENXIO
;
1573 epoch_t local_osd_epoch
= 0;
1576 ceph_tid_t last_tid
= 0;
1577 ceph_tid_t oldest_tid
= 0; // oldest incomplete mds request, excluding setfilelock requests
1578 map
<ceph_tid_t
, MetaRequest
*> mds_requests
;
1581 ceph_tid_t last_flush_tid
= 1;
1583 xlist
<Inode
*> delayed_list
;
1584 int num_flushing_caps
= 0;
1585 ceph::unordered_map
<inodeno_t
,SnapRealm
*> snap_realms
;
1586 std::map
<std::string
, std::string
> metadata
;
1588 ceph::coarse_mono_time last_auto_reconnect
;
1589 std::chrono::seconds caps_release_delay
, mount_timeout
;
1591 std::ofstream traceout
;
1593 ceph::condition_variable mount_cond
, sync_cond
;
1595 std::map
<std::pair
<int64_t,std::string
>, int> pool_perms
;
1596 std::list
<ceph::condition_variable
*> waiting_for_pool_perm
;
1598 std::list
<ceph::condition_variable
*> waiting_for_rename
;
1600 uint64_t retries_on_invalidate
= 0;
1603 int reclaim_errno
= 0;
1604 epoch_t reclaim_osd_epoch
= 0;
1605 entity_addrvec_t reclaim_target_addrs
;
1607 // dentry lease metrics
1608 uint64_t dentry_nr
= 0;
1609 uint64_t dlease_hits
= 0;
1610 uint64_t dlease_misses
= 0;
1612 uint64_t cap_hits
= 0;
1613 uint64_t cap_misses
= 0;
1615 uint64_t opened_files
= 0;
1616 uint64_t pinned_icaps
= 0;
1617 uint64_t opened_inodes
= 0;
1619 uint64_t total_read_ops
= 0;
1620 uint64_t total_read_size
= 0;
1622 uint64_t total_write_ops
= 0;
1623 uint64_t total_write_size
= 0;
1625 ceph::spinlock delay_i_lock
;
1626 std::map
<Inode
*,int> delay_i_release
;
1628 uint64_t nr_metadata_request
= 0;
1629 uint64_t nr_read_request
= 0;
1630 uint64_t nr_write_request
= 0;
1634 * Specialization of Client that manages its own Objecter instance
1635 * and handles init/shutdown of messenger/monclient
1637 class StandaloneClient
: public Client
1640 StandaloneClient(Messenger
*m
, MonClient
*mc
, boost::asio::io_context
& ictx
);
1642 ~StandaloneClient() override
;
1644 int init() override
;
1645 void shutdown() override
;