1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
19 #include "common/CommandTable.h"
20 #include "common/Finisher.h"
21 #include "common/Mutex.h"
22 #include "common/Timer.h"
23 #include "common/cmdparse.h"
24 #include "common/compiler_extensions.h"
25 #include "include/cephfs/ceph_statx.h"
26 #include "include/filepath.h"
27 #include "include/interval_set.h"
28 #include "include/lru.h"
29 #include "include/types.h"
30 #include "include/unordered_map.h"
31 #include "include/unordered_set.h"
32 #include "mds/mdstypes.h"
33 #include "msg/Dispatcher.h"
34 #include "msg/MessageRef.h"
35 #include "msg/Messenger.h"
36 #include "osdc/ObjectCacher.h"
39 #include "MetaSession.h"
64 class WritebackHandler
;
81 class MDSCommandOp
: public CommandOp
86 explicit MDSCommandOp(ceph_tid_t t
) : CommandOp(t
) {}
89 /* error code for ceph_fuse */
90 #define CEPH_FUSE_NO_MDS_UP -((1<<16)+0) /* no mds up deteced in ceph_fuse */
91 #define CEPH_FUSE_LAST -((1<<16)+1) /* (unused) */
93 // ============================================
94 // types for my local metadata cache
97 - Dentries live in an LRU loop. they get expired based on last access.
98 see include/lru.h. items can be bumped to "mid" or "top" of list, etc.
99 - Inode has ref count for each Fh, Dir, or Dentry that points to it.
100 - when Inode ref goes to 0, it's expired.
101 - when Dir is empty, it's removed (and it's Inode ref--)
107 explicit DirEntry(const string
&s
) : d_name(s
), stmask(0) {}
108 DirEntry(const string
&n
, struct stat
& s
, int stm
) : d_name(n
), st(s
), stmask(stm
) {}
123 class ceph_lock_state_t
;
126 typedef void (*client_ino_callback_t
)(void *handle
, vinodeno_t ino
, int64_t off
, int64_t len
);
128 typedef void (*client_dentry_callback_t
)(void *handle
, vinodeno_t dirino
,
129 vinodeno_t ino
, string
& name
);
130 typedef int (*client_remount_callback_t
)(void *handle
);
132 typedef void(*client_switch_interrupt_callback_t
)(void *handle
, void *data
);
133 typedef mode_t (*client_umask_callback_t
)(void *handle
);
135 /* Callback for delegation recalls */
136 typedef void (*ceph_deleg_cb_t
)(Fh
*fh
, void *priv
);
138 struct client_callback_args
{
140 client_ino_callback_t ino_cb
;
141 client_dentry_callback_t dentry_cb
;
142 client_switch_interrupt_callback_t switch_intr_cb
;
143 client_remount_callback_t remount_cb
;
144 client_umask_callback_t umask_cb
;
147 // ========================================================
150 struct dir_result_t
{
151 static const int SHIFT
= 28;
152 static const int64_t MASK
= (1 << SHIFT
) - 1;
153 static const int64_t HASH
= 0xFFULL
<< (SHIFT
+ 24); // impossible frag bits
154 static const loff_t END
= 1ULL << (SHIFT
+ 32);
160 explicit dentry(int64_t o
) : offset(o
) {}
161 dentry(int64_t o
, const string
& n
, const InodeRef
& in
) :
162 offset(o
), name(n
), inode(in
) {}
164 struct dentry_off_lt
{
165 bool operator()(const dentry
& d
, int64_t off
) const {
166 return dir_result_t::fpos_cmp(d
.offset
, off
) < 0;
171 explicit dir_result_t(Inode
*in
, const UserPerm
& perms
);
174 static uint64_t make_fpos(unsigned h
, unsigned l
, bool hash
) {
175 uint64_t v
= ((uint64_t)h
<< SHIFT
) | (uint64_t)l
;
179 ceph_assert((v
& HASH
) != HASH
);
182 static unsigned fpos_high(uint64_t p
) {
183 unsigned v
= (p
& (END
-1)) >> SHIFT
;
184 if ((p
& HASH
) == HASH
)
185 return ceph_frag_value(v
);
188 static unsigned fpos_low(uint64_t p
) {
191 static int fpos_cmp(uint64_t l
, uint64_t r
) {
192 int c
= ceph_frag_compare(fpos_high(l
), fpos_high(r
));
195 if (fpos_low(l
) == fpos_low(r
))
197 return fpos_low(l
) < fpos_low(r
) ? -1 : 1;
200 unsigned offset_high() { return fpos_high(offset
); }
201 unsigned offset_low() { return fpos_low(offset
); }
203 void set_end() { offset
|= END
; }
204 bool at_end() { return (offset
& END
); }
206 void set_hash_order() { offset
|= HASH
; }
207 bool hash_order() { return (offset
& HASH
) == HASH
; }
213 return buffer_frag
.contains(offset_high());
215 return buffer_frag
== frag_t(offset_high());
229 int64_t offset
; // hash order:
230 // (0xff << 52) | ((24 bits hash) << 28) |
231 // (the nth entry has hash collision);
233 // ((frag value) << 28) | (the nth entry in frag);
235 unsigned next_offset
; // offset of next chunk (last_name's + 1)
236 string last_name
; // last entry in previous chunk
238 uint64_t release_count
;
239 uint64_t ordered_count
;
240 unsigned cache_index
;
241 int start_shared_gen
; // dir shared_gen at start of readdir
246 vector
<dentry
> buffer
;
249 class Client
: public Dispatcher
, public md_config_obs_t
{
251 friend class C_Block_Sync
; // Calls block map and protected helpers
252 friend class C_Client_CacheInvalidate
; // calls ino_invalidate_cb
253 friend class C_Client_DentryInvalidate
; // calls dentry_invalidate_cb
254 friend class C_Client_FlushComplete
; // calls put_inode()
255 friend class C_Client_Remount
;
256 friend class C_Client_RequestInterrupt
;
257 friend class C_Deleg_Timeout
; // Asserts on client_lock, called when a delegation is unreturned
258 friend class SyntheticClient
;
259 friend void intrusive_ptr_release(Inode
*in
);
261 using Dispatcher::cct
;
263 typedef int (*add_dirent_cb_t
)(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
);
265 class CommandHook
: public AdminSocketHook
{
267 explicit CommandHook(Client
*client
);
268 bool call(std::string_view command
, const cmdmap_t
& cmdmap
,
269 std::string_view format
, bufferlist
& out
) override
;
274 Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
);
275 Client(const Client
&) = delete;
276 Client(const Client
&&) = delete;
277 virtual ~Client() override
;
279 static UserPerm
pick_my_perms(CephContext
*c
) {
280 uid_t uid
= c
->_conf
->client_mount_uid
>= 0 ? c
->_conf
->client_mount_uid
: -1;
281 gid_t gid
= c
->_conf
->client_mount_gid
>= 0 ? c
->_conf
->client_mount_gid
: -1;
282 return UserPerm(uid
, gid
);
284 UserPerm
pick_my_perms() {
285 uid_t uid
= user_id
>= 0 ? user_id
: -1;
286 gid_t gid
= group_id
>= 0 ? group_id
: -1;
287 return UserPerm(uid
, gid
);
290 int mount(const std::string
&mount_root
, const UserPerm
& perms
,
291 bool require_mds
=false, const std::string
&fs_name
="");
295 void set_uuid(const std::string
& uuid
);
296 void set_session_timeout(unsigned timeout
);
297 int start_reclaim(const std::string
& uuid
, unsigned flags
,
298 const std::string
& fs_name
);
299 void finish_reclaim();
301 fs_cluster_id_t
get_fs_cid() {
306 const std::string
&mds_spec
,
307 const std::vector
<std::string
>& cmd
,
308 const bufferlist
& inbl
,
309 bufferlist
*poutbl
, std::string
*prs
, Context
*onfinish
);
311 // these should (more or less) mirror the actual system calls.
312 int statfs(const char *path
, struct statvfs
*stbuf
, const UserPerm
& perms
);
315 int chdir(const char *s
, std::string
&new_cwd
, const UserPerm
& perms
);
316 void _getcwd(std::string
& cwd
, const UserPerm
& perms
);
317 void getcwd(std::string
& cwd
, const UserPerm
& perms
);
320 int opendir(const char *name
, dir_result_t
**dirpp
, const UserPerm
& perms
);
321 int closedir(dir_result_t
*dirp
);
324 * Fill a directory listing from dirp, invoking cb for each entry
325 * with the given pointer, the dirent, the struct stat, the stmask,
328 * Returns 0 if it reached the end of the directory.
329 * If @a cb returns a negative error code, stop and return that.
331 int readdir_r_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
332 unsigned want
=0, unsigned flags
=AT_NO_ATTR_SYNC
,
335 struct dirent
* readdir(dir_result_t
*d
);
336 int readdir_r(dir_result_t
*dirp
, struct dirent
*de
);
337 int readdirplus_r(dir_result_t
*dirp
, struct dirent
*de
, struct ceph_statx
*stx
, unsigned want
, unsigned flags
, Inode
**out
);
339 int getdir(const char *relpath
, list
<string
>& names
,
340 const UserPerm
& perms
); // get the whole dir at once.
343 * Returns the length of the buffer that got filled in, or -errno.
344 * If it returns -ERANGE you just need to increase the size of the
345 * buffer and try again.
347 int _getdents(dir_result_t
*dirp
, char *buf
, int buflen
, bool ful
); // get a bunch of dentries at once
348 int getdents(dir_result_t
*dirp
, char *buf
, int buflen
) {
349 return _getdents(dirp
, buf
, buflen
, true);
351 int getdnames(dir_result_t
*dirp
, char *buf
, int buflen
) {
352 return _getdents(dirp
, buf
, buflen
, false);
355 void rewinddir(dir_result_t
*dirp
);
356 loff_t
telldir(dir_result_t
*dirp
);
357 void seekdir(dir_result_t
*dirp
, loff_t offset
);
359 int link(const char *existing
, const char *newname
, const UserPerm
& perm
);
360 int unlink(const char *path
, const UserPerm
& perm
);
361 int rename(const char *from
, const char *to
, const UserPerm
& perm
);
364 int mkdir(const char *path
, mode_t mode
, const UserPerm
& perm
);
365 int mkdirs(const char *path
, mode_t mode
, const UserPerm
& perms
);
366 int rmdir(const char *path
, const UserPerm
& perms
);
369 int readlink(const char *path
, char *buf
, loff_t size
, const UserPerm
& perms
);
371 int symlink(const char *existing
, const char *newname
, const UserPerm
& perms
);
374 unsigned statx_to_mask(unsigned int flags
, unsigned int want
);
375 int stat(const char *path
, struct stat
*stbuf
, const UserPerm
& perms
,
376 frag_info_t
*dirstat
=0, int mask
=CEPH_STAT_CAP_INODE_ALL
);
377 int statx(const char *path
, struct ceph_statx
*stx
,
378 const UserPerm
& perms
,
379 unsigned int want
, unsigned int flags
);
380 int lstat(const char *path
, struct stat
*stbuf
, const UserPerm
& perms
,
381 frag_info_t
*dirstat
=0, int mask
=CEPH_STAT_CAP_INODE_ALL
);
383 int setattr(const char *relpath
, struct stat
*attr
, int mask
,
384 const UserPerm
& perms
);
385 int setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
386 const UserPerm
& perms
, int flags
=0);
387 int fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
);
388 int fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
);
389 int chmod(const char *path
, mode_t mode
, const UserPerm
& perms
);
390 int fchmod(int fd
, mode_t mode
, const UserPerm
& perms
);
391 int lchmod(const char *path
, mode_t mode
, const UserPerm
& perms
);
392 int chown(const char *path
, uid_t new_uid
, gid_t new_gid
,
393 const UserPerm
& perms
);
394 int fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
);
395 int lchown(const char *path
, uid_t new_uid
, gid_t new_gid
,
396 const UserPerm
& perms
);
397 int utime(const char *path
, struct utimbuf
*buf
, const UserPerm
& perms
);
398 int lutime(const char *path
, struct utimbuf
*buf
, const UserPerm
& perms
);
399 int futime(int fd
, struct utimbuf
*buf
, const UserPerm
& perms
);
400 int utimes(const char *relpath
, struct timeval times
[2], const UserPerm
& perms
);
401 int lutimes(const char *relpath
, struct timeval times
[2], const UserPerm
& perms
);
402 int futimes(int fd
, struct timeval times
[2], const UserPerm
& perms
);
403 int futimens(int fd
, struct timespec times
[2], const UserPerm
& perms
);
404 int flock(int fd
, int operation
, uint64_t owner
);
405 int truncate(const char *path
, loff_t size
, const UserPerm
& perms
);
408 int mknod(const char *path
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
=0);
409 int open(const char *path
, int flags
, const UserPerm
& perms
, mode_t mode
=0);
410 int open(const char *path
, int flags
, const UserPerm
& perms
,
411 mode_t mode
, int stripe_unit
, int stripe_count
, int object_size
,
412 const char *data_pool
);
413 int lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
414 const UserPerm
& perms
);
415 int lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
=NULL
);
416 int lookup_name(Inode
*in
, Inode
*parent
, const UserPerm
& perms
);
418 loff_t
lseek(int fd
, loff_t offset
, int whence
);
419 int read(int fd
, char *buf
, loff_t size
, loff_t offset
=-1);
420 int preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
=-1);
421 int write(int fd
, const char *buf
, loff_t size
, loff_t offset
=-1);
422 int pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
=-1);
423 int fake_write_size(int fd
, loff_t size
);
424 int ftruncate(int fd
, loff_t size
, const UserPerm
& perms
);
425 int fsync(int fd
, bool syncdataonly
);
426 int fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
,
427 int mask
=CEPH_STAT_CAP_INODE_ALL
);
428 int fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
429 unsigned int want
, unsigned int flags
);
430 int fallocate(int fd
, int mode
, loff_t offset
, loff_t length
);
432 // full path xattr ops
433 int getxattr(const char *path
, const char *name
, void *value
, size_t size
,
434 const UserPerm
& perms
);
435 int lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
436 const UserPerm
& perms
);
437 int fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
438 const UserPerm
& perms
);
439 int listxattr(const char *path
, char *list
, size_t size
, const UserPerm
& perms
);
440 int llistxattr(const char *path
, char *list
, size_t size
, const UserPerm
& perms
);
441 int flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
);
442 int removexattr(const char *path
, const char *name
, const UserPerm
& perms
);
443 int lremovexattr(const char *path
, const char *name
, const UserPerm
& perms
);
444 int fremovexattr(int fd
, const char *name
, const UserPerm
& perms
);
445 int setxattr(const char *path
, const char *name
, const void *value
,
446 size_t size
, int flags
, const UserPerm
& perms
);
447 int lsetxattr(const char *path
, const char *name
, const void *value
,
448 size_t size
, int flags
, const UserPerm
& perms
);
449 int fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
450 int flags
, const UserPerm
& perms
);
453 int64_t drop_caches();
456 int lazyio(int fd
, int enable
);
457 int lazyio_propogate(int fd
, loff_t offset
, size_t count
);
458 int lazyio_synchronize(int fd
, loff_t offset
, size_t count
);
460 // expose file layout
461 int describe_layout(const char *path
, file_layout_t
* layout
,
462 const UserPerm
& perms
);
463 int fdescribe_layout(int fd
, file_layout_t
* layout
);
464 int get_file_stripe_address(int fd
, loff_t offset
, vector
<entity_addr_t
>& address
);
465 int get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, vector
<int>& osds
);
466 int get_osd_addr(int osd
, entity_addr_t
& addr
);
469 int64_t get_default_pool_id();
473 int get_pool_replication(int64_t pool
);
474 int64_t get_pool_id(const char *pool_name
);
475 string
get_pool_name(int64_t pool
);
476 int get_osd_crush_location(int id
, vector
<pair
<string
, string
> >& path
);
478 int enumerate_layout(int fd
, vector
<ObjectExtent
>& result
,
479 loff_t length
, loff_t offset
);
481 int mksnap(const char *path
, const char *name
, const UserPerm
& perm
);
482 int rmsnap(const char *path
, const char *name
, const UserPerm
& perm
);
484 // Inode permission checking
485 int inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
);
488 int get_caps_issued(int fd
);
489 int get_caps_issued(const char *path
, const UserPerm
& perms
);
491 snapid_t
ll_get_snapid(Inode
*in
);
492 vinodeno_t
ll_get_vino(Inode
*in
) {
493 std::lock_guard
lock(client_lock
);
494 return _get_vino(in
);
496 // get inode from faked ino
497 Inode
*ll_get_inode(ino_t ino
);
498 Inode
*ll_get_inode(vinodeno_t vino
);
499 int ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
500 Inode
**out
, const UserPerm
& perms
);
501 int ll_lookup_inode(struct inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
);
502 int ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
503 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
504 const UserPerm
& perms
);
505 bool ll_forget(Inode
*in
, int count
);
506 bool ll_put(Inode
*in
);
507 int ll_get_snap_ref(snapid_t snap
);
509 int ll_getattr(Inode
*in
, struct stat
*st
, const UserPerm
& perms
);
510 int ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
511 unsigned int flags
, const UserPerm
& perms
);
512 int ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
513 const UserPerm
& perms
);
514 int ll_setattr(Inode
*in
, struct stat
*st
, int mask
,
515 const UserPerm
& perms
);
516 int ll_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
517 const UserPerm
& perms
);
518 int ll_setxattr(Inode
*in
, const char *name
, const void *value
, size_t size
,
519 int flags
, const UserPerm
& perms
);
520 int ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
);
521 int ll_listxattr(Inode
*in
, char *list
, size_t size
, const UserPerm
& perms
);
522 int ll_opendir(Inode
*in
, int flags
, dir_result_t
**dirpp
,
523 const UserPerm
& perms
);
524 int ll_releasedir(dir_result_t
* dirp
);
525 int ll_fsyncdir(dir_result_t
* dirp
);
526 int ll_readlink(Inode
*in
, char *buf
, size_t bufsize
, const UserPerm
& perms
);
527 int ll_mknod(Inode
*in
, const char *name
, mode_t mode
, dev_t rdev
,
528 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
);
529 int ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
, dev_t rdev
,
530 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
531 unsigned flags
, const UserPerm
& perms
);
532 int ll_mkdir(Inode
*in
, const char *name
, mode_t mode
, struct stat
*attr
,
533 Inode
**out
, const UserPerm
& perm
);
534 int ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
535 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
536 const UserPerm
& perms
);
537 int ll_symlink(Inode
*in
, const char *name
, const char *value
,
538 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
);
539 int ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
540 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
541 unsigned flags
, const UserPerm
& perms
);
542 int ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
);
543 int ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
);
544 int ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
545 const char *newname
, const UserPerm
& perm
);
546 int ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
547 const UserPerm
& perm
);
548 int ll_open(Inode
*in
, int flags
, Fh
**fh
, const UserPerm
& perms
);
549 int _ll_create(Inode
*parent
, const char *name
, mode_t mode
,
550 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
551 const UserPerm
& perms
);
552 int ll_create(Inode
*parent
, const char *name
, mode_t mode
, int flags
,
553 struct stat
*attr
, Inode
**out
, Fh
**fhp
,
554 const UserPerm
& perms
);
555 int ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
556 int oflags
, Inode
**outp
, Fh
**fhp
,
557 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
558 const UserPerm
& perms
);
559 int ll_read_block(Inode
*in
, uint64_t blockid
, char *buf
, uint64_t offset
,
560 uint64_t length
, file_layout_t
* layout
);
562 int ll_write_block(Inode
*in
, uint64_t blockid
,
563 char* buf
, uint64_t offset
,
564 uint64_t length
, file_layout_t
* layout
,
565 uint64_t snapseq
, uint32_t sync
);
566 int ll_commit_blocks(Inode
*in
, uint64_t offset
, uint64_t length
);
568 int ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
);
569 int ll_walk(const char* name
, Inode
**i
, struct ceph_statx
*stx
,
570 unsigned int want
, unsigned int flags
, const UserPerm
& perms
);
571 uint32_t ll_stripe_unit(Inode
*in
);
572 int ll_file_layout(Inode
*in
, file_layout_t
*layout
);
573 uint64_t ll_snap_seq(Inode
*in
);
575 int ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
);
576 int ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
);
577 int64_t ll_readv(struct Fh
*fh
, const struct iovec
*iov
, int iovcnt
, int64_t off
);
578 int64_t ll_writev(struct Fh
*fh
, const struct iovec
*iov
, int iovcnt
, int64_t off
);
579 loff_t
ll_lseek(Fh
*fh
, loff_t offset
, int whence
);
580 int ll_flush(Fh
*fh
);
581 int ll_fsync(Fh
*fh
, bool syncdataonly
);
582 int ll_sync_inode(Inode
*in
, bool syncdataonly
);
583 int ll_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
);
584 int ll_release(Fh
*fh
);
585 int ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
);
586 int ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
);
587 int ll_flock(Fh
*fh
, int cmd
, uint64_t owner
);
588 int ll_lazyio(Fh
*fh
, int enable
);
589 int ll_file_layout(Fh
*fh
, file_layout_t
*layout
);
590 void ll_interrupt(void *d
);
591 bool ll_handle_umask() {
592 return acl_type
!= NO_ACL
;
595 int ll_get_stripe_osd(struct Inode
*in
, uint64_t blockno
,
596 file_layout_t
* layout
);
597 uint64_t ll_get_internal_offset(struct Inode
*in
, uint64_t blockno
);
599 int ll_num_osds(void);
600 int ll_osdaddr(int osd
, uint32_t *addr
);
601 int ll_osdaddr(int osd
, char* buf
, size_t size
);
603 void ll_register_callbacks(struct client_callback_args
*args
);
604 int test_dentry_handling(bool can_invalidate
);
606 const char** get_tracked_conf_keys() const override
;
607 void handle_conf_change(const ConfigProxy
& conf
,
608 const std::set
<std::string
> &changed
) override
;
609 uint32_t get_deleg_timeout() { return deleg_timeout
; }
610 int set_deleg_timeout(uint32_t timeout
);
611 int ll_delegation(Fh
*fh
, unsigned cmd
, ceph_deleg_cb_t cb
, void *priv
);
613 entity_name_t
get_myname() { return messenger
->get_myname(); }
614 void wait_on_list(list
<Cond
*>& ls
);
615 void signal_cond_list(list
<Cond
*>& ls
);
617 void set_filer_flags(int flags
);
618 void clear_filer_flags(int flags
);
620 void tear_down_cache();
622 void update_metadata(std::string
const &k
, std::string
const &v
);
624 client_t
get_nodeid() { return whoami
; }
626 inodeno_t
get_root_ino();
630 virtual void shutdown();
633 void handle_mds_map(const MConstRef
<MMDSMap
>& m
);
634 void handle_fs_map(const MConstRef
<MFSMap
>& m
);
635 void handle_fs_map_user(const MConstRef
<MFSMapUser
>& m
);
636 void handle_osd_map(const MConstRef
<MOSDMap
>& m
);
638 void handle_lease(const MConstRef
<MClientLease
>& m
);
641 int uninline_data(Inode
*in
, Context
*onfinish
);
644 void check_cap_issue(Inode
*in
, unsigned issued
);
645 void add_update_cap(Inode
*in
, MetaSession
*session
, uint64_t cap_id
,
646 unsigned issued
, unsigned wanted
, unsigned seq
, unsigned mseq
,
647 inodeno_t realm
, int flags
, const UserPerm
& perms
);
648 void remove_cap(Cap
*cap
, bool queue_release
);
649 void remove_all_caps(Inode
*in
);
650 void remove_session_caps(MetaSession
*session
);
651 int mark_caps_flushing(Inode
*in
, ceph_tid_t
*ptid
);
652 void adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
);
653 void flush_caps_sync();
654 void flush_caps(Inode
*in
, MetaSession
*session
, bool sync
=false);
655 void kick_flushing_caps(MetaSession
*session
);
656 void early_kick_flushing_caps(MetaSession
*session
);
657 int get_caps(Inode
*in
, int need
, int want
, int *have
, loff_t endoff
);
658 int get_caps_used(Inode
*in
);
660 void maybe_update_snaprealm(SnapRealm
*realm
, snapid_t snap_created
, snapid_t snap_highwater
,
661 vector
<snapid_t
>& snaps
);
663 void handle_quota(const MConstRef
<MClientQuota
>& m
);
664 void handle_snap(const MConstRef
<MClientSnap
>& m
);
665 void handle_caps(const MConstRef
<MClientCaps
>& m
);
666 void handle_cap_import(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
667 void handle_cap_export(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
668 void handle_cap_trunc(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
669 void handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, const MConstRef
<MClientCaps
>& m
);
670 void handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, const MConstRef
<MClientCaps
>& m
);
671 void handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, const MConstRef
<MClientCaps
>& m
);
672 void cap_delay_requeue(Inode
*in
);
673 void send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
, bool sync
,
674 int used
, int want
, int retain
, int flush
,
675 ceph_tid_t flush_tid
);
677 void get_cap_ref(Inode
*in
, int cap
);
678 void put_cap_ref(Inode
*in
, int cap
);
679 void flush_snaps(Inode
*in
, bool all_again
=false);
680 void wait_sync_caps(Inode
*in
, ceph_tid_t want
);
681 void wait_sync_caps(ceph_tid_t want
);
682 void queue_cap_snap(Inode
*in
, SnapContext
&old_snapc
);
683 void finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
);
684 void _flushed_cap_snap(Inode
*in
, snapid_t seq
);
686 void _schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
);
687 void _async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, string
& name
);
688 void _try_to_trim_inode(Inode
*in
, bool sched_inval
);
690 void _schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
);
691 void _invalidate_inode_cache(Inode
*in
);
692 void _invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
);
693 void _async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
);
694 bool _release(Inode
*in
);
697 * Initiate a flush of the data associated with the given inode.
698 * If you specify a Context, you are responsible for holding an inode
699 * reference for the duration of the flush. If not, _flush() will
700 * take the reference for you.
701 * @param in The Inode whose data you wish to flush.
702 * @param c The Context you wish us to complete once the data is
703 * flushed. If already flushed, this will be called in-line.
705 * @returns true if the data was already flushed, false otherwise.
707 bool _flush(Inode
*in
, Context
*c
);
708 void _flush_range(Inode
*in
, int64_t off
, uint64_t size
);
709 void _flushed(Inode
*in
);
710 void flush_set_callback(ObjectCacher::ObjectSet
*oset
);
712 void close_release(Inode
*in
);
713 void close_safe(Inode
*in
);
715 void lock_fh_pos(Fh
*f
);
716 void unlock_fh_pos(Fh
*f
);
719 void update_dir_dist(Inode
*in
, DirStat
*st
);
721 void clear_dir_complete_and_ordered(Inode
*diri
, bool complete
);
722 void insert_readdir_results(MetaRequest
*request
, MetaSession
*session
, Inode
*diri
);
723 Inode
* insert_trace(MetaRequest
*request
, MetaSession
*session
);
724 void update_inode_file_size(Inode
*in
, int issued
, uint64_t size
,
725 uint64_t truncate_seq
, uint64_t truncate_size
);
726 void update_inode_file_time(Inode
*in
, int issued
, uint64_t time_warp_seq
,
727 utime_t ctime
, utime_t mtime
, utime_t atime
);
729 Inode
*add_update_inode(InodeStat
*st
, utime_t ttl
, MetaSession
*session
,
730 const UserPerm
& request_perms
);
731 Dentry
*insert_dentry_inode(Dir
*dir
, const string
& dname
, LeaseStat
*dlease
,
732 Inode
*in
, utime_t from
, MetaSession
*session
,
733 Dentry
*old_dentry
= NULL
);
734 void update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
);
736 bool use_faked_inos() { return _use_faked_inos
; }
737 vinodeno_t
map_faked_ino(ino_t ino
);
739 //notify the mds to flush the mdlog
740 void flush_mdlog_sync();
741 void flush_mdlog(MetaSession
*session
);
744 void renew_caps(MetaSession
*session
);
745 void flush_cap_releases();
748 xlist
<Inode
*> &get_dirty_list() { return dirty_list
; }
752 std::unique_ptr
<PerfCounters
> logger
;
753 std::unique_ptr
<MDSMap
> mdsmap
;
757 /* Flags for check_caps() */
758 static const unsigned CHECK_CAPS_NODELAY
= 0x1;
759 static const unsigned CHECK_CAPS_SYNCHRONOUS
= 0x2;
762 bool is_initialized() const { return initialized
; }
764 void check_caps(Inode
*in
, unsigned flags
);
766 void set_cap_epoch_barrier(epoch_t e
);
768 void handle_command_reply(const MConstRef
<MCommandReply
>& m
);
769 int fetch_fsmap(bool user
);
771 const std::string
&mds_spec
,
772 std::vector
<mds_gid_t
> *targets
);
774 void get_session_metadata(std::map
<std::string
, std::string
> *meta
) const;
775 bool have_open_session(mds_rank_t mds
);
776 void got_mds_push(MetaSession
*s
);
777 MetaSession
*_get_mds_session(mds_rank_t mds
, Connection
*con
); ///< return session for mds *and* con; null otherwise
778 MetaSession
*_get_or_open_mds_session(mds_rank_t mds
);
779 MetaSession
*_open_mds_session(mds_rank_t mds
);
780 void _close_mds_session(MetaSession
*s
);
781 void _closed_mds_session(MetaSession
*s
);
782 bool _any_stale_sessions() const;
783 void _kick_stale_sessions();
784 void handle_client_session(const MConstRef
<MClientSession
>& m
);
785 void send_reconnect(MetaSession
*s
);
786 void resend_unsafe_requests(MetaSession
*s
);
787 void wait_unsafe_requests();
789 void _sync_write_commit(Inode
*in
);
791 void dump_mds_requests(Formatter
*f
);
792 void dump_mds_sessions(Formatter
*f
);
794 int make_request(MetaRequest
*req
, const UserPerm
& perms
,
795 InodeRef
*ptarget
= 0, bool *pcreated
= 0,
796 mds_rank_t use_mds
=-1, bufferlist
*pdirbl
=0);
797 void put_request(MetaRequest
*request
);
798 void unregister_request(MetaRequest
*request
);
800 int verify_reply_trace(int r
, MetaRequest
*request
, const MConstRef
<MClientReply
>& reply
,
801 InodeRef
*ptarget
, bool *pcreated
,
802 const UserPerm
& perms
);
803 void encode_cap_releases(MetaRequest
*request
, mds_rank_t mds
);
804 int encode_inode_release(Inode
*in
, MetaRequest
*req
,
805 mds_rank_t mds
, int drop
,
806 int unless
,int force
=0);
807 void encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
808 mds_rank_t mds
, int drop
, int unless
);
809 mds_rank_t
choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
=NULL
);
810 void connect_mds_targets(mds_rank_t mds
);
811 void send_request(MetaRequest
*request
, MetaSession
*session
,
812 bool drop_cap_releases
=false);
813 MRef
<MClientRequest
> build_client_request(MetaRequest
*request
);
814 void kick_requests(MetaSession
*session
);
815 void kick_requests_closed(MetaSession
*session
);
816 void handle_client_request_forward(const MConstRef
<MClientRequestForward
>& reply
);
817 void handle_client_reply(const MConstRef
<MClientReply
>& reply
);
818 bool is_dir_operation(MetaRequest
*request
);
820 // fake inode number for 32-bits ino_t
821 void _assign_faked_ino(Inode
*in
);
822 void _assign_faked_root(Inode
*in
);
823 void _release_faked_ino(Inode
*in
);
824 void _reset_faked_inos();
825 vinodeno_t
_map_faked_ino(ino_t ino
);
827 // Optional extra metadata about me to send to the MDS
828 void populate_metadata(const std::string
&mount_root
);
830 SnapRealm
*get_snap_realm(inodeno_t r
);
831 SnapRealm
*get_snap_realm_maybe(inodeno_t r
);
832 void put_snap_realm(SnapRealm
*realm
);
833 bool adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
);
834 void update_snap_trace(const bufferlist
& bl
, SnapRealm
**realm_ret
, bool must_flush
=true);
835 void invalidate_snaprealm_and_children(SnapRealm
*realm
);
837 Inode
*open_snapdir(Inode
*diri
);
840 int fd
= free_fd_set
.range_start();
841 free_fd_set
.erase(fd
, 1);
844 void put_fd(int fd
) {
845 free_fd_set
.insert(fd
, 1);
849 * Resolve file descriptor, or return NULL.
851 Fh
*get_filehandle(int fd
) {
852 ceph::unordered_map
<int, Fh
*>::iterator p
= fd_map
.find(fd
);
853 if (p
== fd_map
.end())
859 void wake_up_session_caps(MetaSession
*s
, bool reconnect
);
861 void wait_on_context_list(list
<Context
*>& ls
);
862 void signal_context_list(list
<Context
*>& ls
);
864 // -- metadata cache stuff
866 // decrease inode ref. delete if dangling.
867 void put_inode(Inode
*in
, int n
=1);
868 void close_dir(Dir
*dir
);
870 int subscribe_mdsmap(const std::string
&fs_name
="");
872 void _abort_mds_sessions(int err
);
874 // same as unmount() but for when the client_lock is already held
875 void _unmount(bool abort
);
877 //int get_cache_size() { return lru.lru_get_size(); }
880 * Don't call this with in==NULL, use get_or_create for that
881 * leave dn set to default NULL unless you're trying to add
882 * a new inode to a pre-created Dentry
884 Dentry
* link(Dir
*dir
, const string
& name
, Inode
*in
, Dentry
*dn
);
885 void unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
);
887 // path traversal for high-level interface
888 int path_walk(const filepath
& fp
, InodeRef
*end
, const UserPerm
& perms
,
889 bool followsym
=true, int mask
=0);
891 int fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
=0, nest_info_t
*rstat
=0);
892 int fill_stat(InodeRef
& in
, struct stat
*st
, frag_info_t
*dirstat
=0, nest_info_t
*rstat
=0) {
893 return fill_stat(in
.get(), st
, dirstat
, rstat
);
896 void fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
);
897 void fill_statx(InodeRef
& in
, unsigned int mask
, struct ceph_statx
*stx
) {
898 return fill_statx(in
.get(), mask
, stx
);
901 void touch_dn(Dentry
*dn
);
904 void trim_cache(bool trim_kernel_dcache
=false);
905 void trim_cache_for_reconnect(MetaSession
*s
);
906 void trim_dentry(Dentry
*dn
);
907 void trim_caps(MetaSession
*s
, uint64_t max
);
908 void _invalidate_kernel_dcache();
909 void _trim_negative_child_dentries(InodeRef
& in
);
911 void dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
);
912 void dump_cache(Formatter
*f
); // debug
915 void force_session_readonly(MetaSession
*s
);
917 void dump_status(Formatter
*f
); // debug
919 bool ms_dispatch2(const MessageRef
& m
) override
;
921 void ms_handle_connect(Connection
*con
) override
;
922 bool ms_handle_reset(Connection
*con
) override
;
923 void ms_handle_remote_reset(Connection
*con
) override
;
924 bool ms_handle_refused(Connection
*con
) override
;
925 bool ms_get_authorizer(int dest_type
, AuthAuthorizer
**authorizer
) override
;
929 Inode
* get_quota_root(Inode
*in
, const UserPerm
& perms
);
930 bool check_quota_condition(Inode
*in
, const UserPerm
& perms
,
931 std::function
<bool (const Inode
&)> test
);
932 bool is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
);
933 bool is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
934 const UserPerm
& perms
);
935 bool is_quota_bytes_approaching(Inode
*in
, const UserPerm
& perms
);
937 int check_pool_perm(Inode
*in
, int need
);
939 void handle_client_reclaim_reply(const MConstRef
<MClientReclaimReply
>& reply
);
942 * Call this when an OSDMap is seen with a full flag (global or per pool)
945 * @param pool the pool ID affected, or -1 if all.
947 void _handle_full_flag(int64_t pool
);
949 void _close_sessions();
952 * The basic housekeeping parts of init (perf counters, admin socket)
953 * that is independent of how objecters/monclient/messengers are
958 // global client lock
959 // - protects Client and buffer cache both!
962 std::map
<snapid_t
, int> ll_snap_ref
;
964 Inode
* root
= nullptr;
965 map
<Inode
*, InodeRef
> root_parents
;
966 Inode
* root_ancestor
= nullptr;
967 LRU lru
; // lru list of Dentry's in our local metadata cache.
971 std::unique_ptr
<Filer
> filer
;
972 std::unique_ptr
<ObjectCacher
> objectcacher
;
973 std::unique_ptr
<WritebackHandler
> writeback_handler
;
975 Messenger
*messenger
;
976 MonClient
*monclient
;
983 struct C_Readahead
: public Context
{
984 C_Readahead(Client
*c
, Fh
*f
);
985 ~C_Readahead() override
;
986 void finish(int r
) override
;
993 * These define virtual xattrs exposing the recursive directory
994 * statistics and layout metadata.
998 size_t (Client::*getxattr_cb
)(Inode
*in
, char *val
, size_t size
);
999 bool readonly
, hidden
;
1000 bool (Client::*exists_cb
)(Inode
*in
);
1016 /* Flags for VXattr */
1017 static const unsigned VXATTR_RSTAT
= 0x1;
1019 static const VXattr _dir_vxattrs
[];
1020 static const VXattr _file_vxattrs
[];
1024 void fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
);
1026 int _opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
);
1027 void _readdir_drop_dirp_buffer(dir_result_t
*dirp
);
1028 bool _readdir_have_frag(dir_result_t
*dirp
);
1029 void _readdir_next_frag(dir_result_t
*dirp
);
1030 void _readdir_rechoose_frag(dir_result_t
*dirp
);
1031 int _readdir_get_frag(dir_result_t
*dirp
);
1032 int _readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
, int caps
, bool getref
);
1033 void _closedir(dir_result_t
*dirp
);
1036 void _fragmap_remove_non_leaves(Inode
*in
);
1037 void _fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
);
1039 void _ll_get(Inode
*in
);
1040 int _ll_put(Inode
*in
, int num
);
1041 void _ll_drop_pins();
1043 Fh
*_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
);
1044 int _release_fh(Fh
*fh
);
1045 void _put_fh(Fh
*fh
);
1047 int _do_remount(bool retry_on_error
);
1049 int _read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
, bool *checkeof
);
1050 int _read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
);
1052 // internal interface
1053 // call these with client_lock held!
1054 int _do_lookup(Inode
*dir
, const string
& name
, int mask
, InodeRef
*target
,
1055 const UserPerm
& perms
);
1057 int _lookup(Inode
*dir
, const string
& dname
, int mask
, InodeRef
*target
,
1058 const UserPerm
& perm
);
1060 int _link(Inode
*in
, Inode
*dir
, const char *name
, const UserPerm
& perm
,
1062 int _unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
);
1063 int _rename(Inode
*olddir
, const char *oname
, Inode
*ndir
, const char *nname
, const UserPerm
& perm
);
1064 int _mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
1066 int _rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
);
1067 int _symlink(Inode
*dir
, const char *name
, const char *target
,
1068 const UserPerm
& perms
, InodeRef
*inp
= 0);
1069 int _mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
1070 const UserPerm
& perms
, InodeRef
*inp
= 0);
1071 int _do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1072 const UserPerm
& perms
, InodeRef
*inp
);
1073 void stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
);
1074 int __setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1075 const UserPerm
& perms
, InodeRef
*inp
= 0);
1076 int _setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
1077 const UserPerm
& perms
);
1078 int _setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
1079 const UserPerm
& perms
);
1080 int _ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1081 const UserPerm
& perms
, InodeRef
*inp
= 0);
1082 int _getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
=false);
1083 int _getattr(InodeRef
&in
, int mask
, const UserPerm
& perms
, bool force
=false) {
1084 return _getattr(in
.get(), mask
, perms
, force
);
1086 int _readlink(Inode
*in
, char *buf
, size_t size
);
1087 int _getxattr(Inode
*in
, const char *name
, void *value
, size_t len
,
1088 const UserPerm
& perms
);
1089 int _getxattr(InodeRef
&in
, const char *name
, void *value
, size_t len
,
1090 const UserPerm
& perms
);
1091 int _listxattr(Inode
*in
, char *names
, size_t len
, const UserPerm
& perms
);
1092 int _do_setxattr(Inode
*in
, const char *name
, const void *value
, size_t len
,
1093 int flags
, const UserPerm
& perms
);
1094 int _setxattr(Inode
*in
, const char *name
, const void *value
, size_t len
,
1095 int flags
, const UserPerm
& perms
);
1096 int _setxattr(InodeRef
&in
, const char *name
, const void *value
, size_t len
,
1097 int flags
, const UserPerm
& perms
);
1098 int _setxattr_check_data_pool(string
& name
, string
& value
, const OSDMap
*osdmap
);
1099 void _setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t len
);
1100 int _removexattr(Inode
*in
, const char *nm
, const UserPerm
& perms
);
1101 int _removexattr(InodeRef
&in
, const char *nm
, const UserPerm
& perms
);
1102 int _open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
1103 const UserPerm
& perms
);
1104 int _renew_caps(Inode
*in
);
1105 int _create(Inode
*in
, const char *name
, int flags
, mode_t mode
, InodeRef
*inp
,
1106 Fh
**fhp
, int stripe_unit
, int stripe_count
, int object_size
,
1107 const char *data_pool
, bool *created
, const UserPerm
&perms
);
1109 loff_t
_lseek(Fh
*fh
, loff_t offset
, int whence
);
1110 int64_t _read(Fh
*fh
, int64_t offset
, uint64_t size
, bufferlist
*bl
);
1111 int64_t _write(Fh
*fh
, int64_t offset
, uint64_t size
, const char *buf
,
1112 const struct iovec
*iov
, int iovcnt
);
1113 int64_t _preadv_pwritev_locked(Fh
*f
, const struct iovec
*iov
,
1114 unsigned iovcnt
, int64_t offset
, bool write
, bool clamp_to_int
);
1115 int _preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
, int64_t offset
, bool write
);
1117 int _fsync(Fh
*fh
, bool syncdataonly
);
1118 int _fsync(Inode
*in
, bool syncdataonly
);
1120 int _fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
);
1121 int _getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
);
1122 int _setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
);
1123 int _flock(Fh
*fh
, int cmd
, uint64_t owner
);
1124 int _lazyio(Fh
*fh
, int enable
);
1126 int get_or_create(Inode
*dir
, const char* name
,
1127 Dentry
**pdn
, bool expect_null
=false);
1129 int xattr_permission(Inode
*in
, const char *name
, unsigned want
,
1130 const UserPerm
& perms
);
1131 int may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
1132 const UserPerm
& perms
);
1133 int may_open(Inode
*in
, int flags
, const UserPerm
& perms
);
1134 int may_lookup(Inode
*dir
, const UserPerm
& perms
);
1135 int may_create(Inode
*dir
, const UserPerm
& perms
);
1136 int may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
);
1137 int may_hardlink(Inode
*in
, const UserPerm
& perms
);
1139 int _getattr_for_perm(Inode
*in
, const UserPerm
& perms
);
1141 vinodeno_t
_get_vino(Inode
*in
);
1143 bool _vxattrcb_quota_exists(Inode
*in
);
1144 size_t _vxattrcb_quota(Inode
*in
, char *val
, size_t size
);
1145 size_t _vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
);
1146 size_t _vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
);
1148 bool _vxattrcb_layout_exists(Inode
*in
);
1149 size_t _vxattrcb_layout(Inode
*in
, char *val
, size_t size
);
1150 size_t _vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
);
1151 size_t _vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
);
1152 size_t _vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
);
1153 size_t _vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
);
1154 size_t _vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
);
1155 size_t _vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
);
1156 size_t _vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
);
1157 size_t _vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
);
1158 size_t _vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
);
1159 size_t _vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
);
1160 size_t _vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
);
1161 size_t _vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
);
1162 size_t _vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
);
1164 bool _vxattrcb_dir_pin_exists(Inode
*in
);
1165 size_t _vxattrcb_dir_pin(Inode
*in
, char *val
, size_t size
);
1167 bool _vxattrcb_snap_btime_exists(Inode
*in
);
1168 size_t _vxattrcb_snap_btime(Inode
*in
, char *val
, size_t size
);
1170 static const VXattr
*_get_vxattrs(Inode
*in
);
1171 static const VXattr
*_match_vxattr(Inode
*in
, const char *name
);
1173 int _do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
1174 struct flock
*fl
, uint64_t owner
, bool removing
=false);
1175 int _interrupt_filelock(MetaRequest
*req
);
1176 void _encode_filelocks(Inode
*in
, bufferlist
& bl
);
1177 void _release_filelocks(Fh
*fh
);
1178 void _update_lock_state(struct flock
*fl
, uint64_t owner
, ceph_lock_state_t
*lock_state
);
1180 int _posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
1181 const UserPerm
& perms
);
1182 int _posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
);
1183 int _posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
);
1185 mds_rank_t
_get_random_up_mds() const;
1187 int _ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
);
1188 int _lookup_parent(Inode
*in
, const UserPerm
& perms
, Inode
**parent
=NULL
);
1189 int _lookup_name(Inode
*in
, Inode
*parent
, const UserPerm
& perms
);
1190 int _lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
=NULL
);
1191 bool _ll_forget(Inode
*in
, int count
);
1194 uint32_t deleg_timeout
= 0;
1196 client_switch_interrupt_callback_t switch_interrupt_cb
= nullptr;
1197 client_remount_callback_t remount_cb
= nullptr;
1198 client_ino_callback_t ino_invalidate_cb
= nullptr;
1199 client_dentry_callback_t dentry_invalidate_cb
= nullptr;
1200 client_umask_callback_t umask_cb
= nullptr;
1201 void *callback_handle
= nullptr;
1202 bool can_invalidate_dentries
= false;
1204 Finisher async_ino_invalidator
;
1205 Finisher async_dentry_invalidator
;
1206 Finisher interrupt_finisher
;
1207 Finisher remount_finisher
;
1208 Finisher objecter_finisher
;
1210 Context
*tick_event
= nullptr;
1211 utime_t last_cap_renew
;
1213 CommandHook m_command_hook
;
1215 int user_id
, group_id
;
1216 int acl_type
= NO_ACL
;
1218 epoch_t cap_epoch_barrier
= 0;
1221 map
<mds_rank_t
, MetaSession
> mds_sessions
; // mds -> push seq
1222 list
<Cond
*> waiting_for_mdsmap
;
1224 // FSMap, for when using mds_command
1225 list
<Cond
*> waiting_for_fsmap
;
1226 std::unique_ptr
<FSMap
> fsmap
;
1227 std::unique_ptr
<FSMapUser
> fsmap_user
;
1229 // MDS command state
1230 CommandTable
<MDSCommandOp
> command_table
;
1232 bool _use_faked_inos
;
1235 fs_cluster_id_t fscid
;
1237 // file handles, etc.
1238 interval_set
<int> free_fd_set
; // unused fds
1239 ceph::unordered_map
<int, Fh
*> fd_map
;
1240 set
<Fh
*> ll_unclosed_fh_set
;
1241 ceph::unordered_set
<dir_result_t
*> opened_dirs
;
1243 bool initialized
= false;
1244 bool mounted
= false;
1245 bool unmounting
= false;
1246 bool blacklisted
= false;
1248 ceph::unordered_map
<vinodeno_t
, Inode
*> inode_map
;
1249 ceph::unordered_map
<ino_t
, vinodeno_t
> faked_ino_map
;
1250 interval_set
<ino_t
> free_faked_inos
;
1251 ino_t last_used_faked_ino
;
1252 ino_t last_used_faked_root
;
1254 // When an MDS has sent us a REJECT, remember that and don't
1255 // contact it again. Remember which inst rejected us, so that
1256 // when we talk to another inst with the same rank we can
1258 std::map
<mds_rank_t
, entity_addrvec_t
> rejected_by_mds
;
1260 int local_osd
= -ENXIO
;
1261 epoch_t local_osd_epoch
= 0;
1263 int unsafe_sync_write
= 0;
1266 ceph_tid_t last_tid
= 0;
1267 ceph_tid_t oldest_tid
= 0; // oldest incomplete mds request, excluding setfilelock requests
1268 map
<ceph_tid_t
, MetaRequest
*> mds_requests
;
1271 ceph_tid_t last_flush_tid
= 1;
1273 // dirty_list keeps all the dirty inodes before flushing.
1274 xlist
<Inode
*> delayed_list
, dirty_list
;
1275 int num_flushing_caps
= 0;
1276 ceph::unordered_map
<inodeno_t
,SnapRealm
*> snap_realms
;
1277 std::map
<std::string
, std::string
> metadata
;
1282 Cond mount_cond
, sync_cond
;
1284 std::map
<std::pair
<int64_t,std::string
>, int> pool_perms
;
1285 list
<Cond
*> waiting_for_pool_perm
;
1287 uint64_t retries_on_invalidate
= 0;
1290 list
<Cond
*> waiting_for_reclaim
;
1291 int reclaim_errno
= 0;
1292 epoch_t reclaim_osd_epoch
= 0;
1293 entity_addrvec_t reclaim_target_addrs
;
1297 * Specialization of Client that manages its own Objecter instance
1298 * and handles init/shutdown of messenger/monclient
1300 class StandaloneClient
: public Client
1303 StandaloneClient(Messenger
*m
, MonClient
*mc
);
1305 ~StandaloneClient() override
;
1307 int init() override
;
1308 void shutdown() override
;