#include "common/CommandTable.h"
#include "common/Finisher.h"
-#include "common/Mutex.h"
#include "common/Timer.h"
+#include "common/ceph_mutex.h"
#include "common/cmdparse.h"
#include "common/compiler_extensions.h"
-#include "include/cephfs/ceph_statx.h"
+#include "include/common_fwd.h"
+#include "include/cephfs/ceph_ll_client.h"
#include "include/filepath.h"
#include "include/interval_set.h"
#include "include/lru.h"
#include "include/types.h"
#include "include/unordered_map.h"
#include "include/unordered_set.h"
+#include "include/cephfs/metrics/Types.h"
#include "mds/mdstypes.h"
#include "msg/Dispatcher.h"
#include "msg/MessageRef.h"
#include "msg/Messenger.h"
#include "osdc/ObjectCacher.h"
+#include "RWRef.h"
#include "InodeRef.h"
#include "MetaSession.h"
#include "UserPerm.h"
#include <memory>
#include <set>
#include <string>
+#include <thread>
using std::set;
using std::map;
class FSMapUser;
class MonClient;
-class CephContext;
struct DirStat;
struct LeaseStat;
class Objecter;
class WritebackHandler;
-class PerfCounters;
class MDSMap;
class Message;
+class destructive_lock_ref_t;
enum {
l_c_first = 20000,
l_c_wrlat,
l_c_read,
l_c_fsync,
+ l_c_md_avg,
+ l_c_md_sqsum,
+ l_c_md_ops,
+ l_c_rd_avg,
+ l_c_rd_sqsum,
+ l_c_rd_ops,
+ l_c_wr_avg,
+ l_c_wr_sqsum,
+ l_c_wr_ops,
l_c_last,
};
/* getdir result */
struct DirEntry {
- explicit DirEntry(const string &s) : d_name(s), stmask(0) {}
- DirEntry(const string &n, struct stat& s, int stm) : d_name(n), st(s), stmask(stm) {}
+ explicit DirEntry(const std::string &s) : d_name(s), stmask(0) {}
+ DirEntry(const std::string &n, struct stat& s, int stm)
+ : d_name(n), st(s), stmask(stm) {}
- string d_name;
+ std::string d_name;
struct stat st;
int stmask;
};
struct MetaRequest;
class ceph_lock_state_t;
-
-typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, int64_t off, int64_t len);
-
-typedef void (*client_dentry_callback_t)(void *handle, vinodeno_t dirino,
- vinodeno_t ino, string& name);
-typedef int (*client_remount_callback_t)(void *handle);
-
-typedef void(*client_switch_interrupt_callback_t)(void *handle, void *data);
-typedef mode_t (*client_umask_callback_t)(void *handle);
-
-/* Callback for delegation recalls */
-typedef void (*ceph_deleg_cb_t)(Fh *fh, void *priv);
-
-struct client_callback_args {
- void *handle;
- client_ino_callback_t ino_cb;
- client_dentry_callback_t dentry_cb;
- client_switch_interrupt_callback_t switch_intr_cb;
- client_remount_callback_t remount_cb;
- client_umask_callback_t umask_cb;
-};
-
// ========================================================
// client interface
struct dentry {
int64_t offset;
- string name;
+ std::string name;
+ std::string alternate_name;
InodeRef inode;
explicit dentry(int64_t o) : offset(o) {}
- dentry(int64_t o, const string& n, const InodeRef& in) :
- offset(o), name(n), inode(in) {}
+ dentry(int64_t o, std::string n, std::string an, InodeRef in) :
+ offset(o), name(std::move(n)), alternate_name(std::move(an)), inode(std::move(in)) {}
};
struct dentry_off_lt {
bool operator()(const dentry& d, int64_t off) const {
// ((frag value) << 28) | (the nth entry in frag);
unsigned next_offset; // offset of next chunk (last_name's + 1)
- string last_name; // last entry in previous chunk
+ std::string last_name; // last entry in previous chunk
uint64_t release_count;
uint64_t ordered_count;
frag_t buffer_frag;
- vector<dentry> buffer;
+ std::vector<dentry> buffer;
+ struct dirent de;
};
class Client : public Dispatcher, public md_config_obs_t {
friend class C_Client_Remount;
friend class C_Client_RequestInterrupt;
friend class C_Deleg_Timeout; // Asserts on client_lock, called when a delegation is unreturned
+ friend class C_Client_CacheRelease; // Asserts on client_lock
friend class SyntheticClient;
friend void intrusive_ptr_release(Inode *in);
+ template <typename T> friend struct RWRefState;
+ template <typename T> friend class RWRef;
using Dispatcher::cct;
+ using clock = ceph::coarse_mono_clock;
typedef int (*add_dirent_cb_t)(void *p, struct dirent *de, struct ceph_statx *stx, off_t off, Inode *in);
+ struct walk_dentry_result {
+ InodeRef in;
+ std::string alternate_name;
+ };
+
class CommandHook : public AdminSocketHook {
public:
explicit CommandHook(Client *client);
- bool call(std::string_view command, const cmdmap_t& cmdmap,
- std::string_view format, bufferlist& out) override;
+ int call(std::string_view command, const cmdmap_t& cmdmap,
+ Formatter *f,
+ std::ostream& errss,
+ bufferlist& out) override;
private:
Client *m_client;
};
+ // snapshot info returned via get_snap_info(). nothing to do
+ // with SnapInfo on the MDS.
+ struct SnapInfo {
+ snapid_t id;
+ std::map<std::string, std::string> metadata;
+ };
+
Client(Messenger *m, MonClient *mc, Objecter *objecter_);
Client(const Client&) = delete;
Client(const Client&&) = delete;
int mount(const std::string &mount_root, const UserPerm& perms,
bool require_mds=false, const std::string &fs_name="");
void unmount();
+ bool is_unmounting() const {
+ return mount_state.check_current_state(CLIENT_UNMOUNTING);
+ }
+ bool is_mounted() const {
+ return mount_state.check_current_state(CLIENT_MOUNTED);
+ }
+ bool is_mounting() const {
+ return mount_state.check_current_state(CLIENT_MOUNTING);
+ }
+ bool is_initialized() const {
+ return initialize_state.check_current_state(CLIENT_INITIALIZED);
+ }
void abort_conn();
void set_uuid(const std::string& uuid);
// namespace ops
int opendir(const char *name, dir_result_t **dirpp, const UserPerm& perms);
+ int fdopendir(int dirfd, dir_result_t **dirpp, const UserPerm& perms);
int closedir(dir_result_t *dirp);
/**
* If @a cb returns a negative error code, stop and return that.
*/
int readdir_r_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
- unsigned want=0, unsigned flags=AT_NO_ATTR_SYNC,
+ unsigned want=0, unsigned flags=AT_STATX_DONT_SYNC,
bool getref=false);
struct dirent * readdir(dir_result_t *d);
int readdir_r(dir_result_t *dirp, struct dirent *de);
int readdirplus_r(dir_result_t *dirp, struct dirent *de, struct ceph_statx *stx, unsigned want, unsigned flags, Inode **out);
- int getdir(const char *relpath, list<string>& names,
+ int getdir(const char *relpath, std::list<std::string>& names,
const UserPerm& perms); // get the whole dir at once.
/**
* Returns the length of the buffer that got filled in, or -errno.
- * If it returns -ERANGE you just need to increase the size of the
+ * If it returns -CEPHFS_ERANGE you just need to increase the size of the
* buffer and try again.
*/
int _getdents(dir_result_t *dirp, char *buf, int buflen, bool ful); // get a bunch of dentries at once
loff_t telldir(dir_result_t *dirp);
void seekdir(dir_result_t *dirp, loff_t offset);
- int link(const char *existing, const char *newname, const UserPerm& perm);
+ int may_delete(const char *relpath, const UserPerm& perms);
+ int link(const char *existing, const char *newname, const UserPerm& perm, std::string alternate_name="");
int unlink(const char *path, const UserPerm& perm);
- int rename(const char *from, const char *to, const UserPerm& perm);
+ int unlinkat(int dirfd, const char *relpath, int flags, const UserPerm& perm);
+ int rename(const char *from, const char *to, const UserPerm& perm, std::string alternate_name="");
// dirs
- int mkdir(const char *path, mode_t mode, const UserPerm& perm);
+ int mkdir(const char *path, mode_t mode, const UserPerm& perm, std::string alternate_name="");
+ int mkdirat(int dirfd, const char *relpath, mode_t mode, const UserPerm& perm,
+ std::string alternate_name="");
int mkdirs(const char *path, mode_t mode, const UserPerm& perms);
int rmdir(const char *path, const UserPerm& perms);
// symlinks
int readlink(const char *path, char *buf, loff_t size, const UserPerm& perms);
+ int readlinkat(int dirfd, const char *relpath, char *buf, loff_t size, const UserPerm& perms);
+
+ int symlink(const char *existing, const char *newname, const UserPerm& perms, std::string alternate_name="");
+ int symlinkat(const char *target, int dirfd, const char *relpath, const UserPerm& perms,
+ std::string alternate_name="");
- int symlink(const char *existing, const char *newname, const UserPerm& perms);
+ // path traversal for high-level interface
+ int walk(std::string_view path, struct walk_dentry_result* result, const UserPerm& perms, bool followsym=true);
// inode stuff
unsigned statx_to_mask(unsigned int flags, unsigned int want);
int fsetattrx(int fd, struct ceph_statx *stx, int mask, const UserPerm& perms);
int chmod(const char *path, mode_t mode, const UserPerm& perms);
int fchmod(int fd, mode_t mode, const UserPerm& perms);
+ int chmodat(int dirfd, const char *relpath, mode_t mode, int flags, const UserPerm& perms);
int lchmod(const char *path, mode_t mode, const UserPerm& perms);
int chown(const char *path, uid_t new_uid, gid_t new_gid,
const UserPerm& perms);
int fchown(int fd, uid_t new_uid, gid_t new_gid, const UserPerm& perms);
int lchown(const char *path, uid_t new_uid, gid_t new_gid,
const UserPerm& perms);
+ int chownat(int dirfd, const char *relpath, uid_t new_uid, gid_t new_gid,
+ int flags, const UserPerm& perms);
int utime(const char *path, struct utimbuf *buf, const UserPerm& perms);
int lutime(const char *path, struct utimbuf *buf, const UserPerm& perms);
int futime(int fd, struct utimbuf *buf, const UserPerm& perms);
int lutimes(const char *relpath, struct timeval times[2], const UserPerm& perms);
int futimes(int fd, struct timeval times[2], const UserPerm& perms);
int futimens(int fd, struct timespec times[2], const UserPerm& perms);
+ int utimensat(int dirfd, const char *relpath, struct timespec times[2], int flags,
+ const UserPerm& perms);
int flock(int fd, int operation, uint64_t owner);
int truncate(const char *path, loff_t size, const UserPerm& perms);
// file ops
int mknod(const char *path, mode_t mode, const UserPerm& perms, dev_t rdev=0);
- int open(const char *path, int flags, const UserPerm& perms, mode_t mode=0);
+
+ int create_and_open(int dirfd, const char *relpath, int flags, const UserPerm& perms,
+ mode_t mode, int stripe_unit, int stripe_count, int object_size,
+ const char *data_pool, std::string alternate_name);
+ int open(const char *path, int flags, const UserPerm& perms, mode_t mode=0, std::string alternate_name="") {
+ return open(path, flags, perms, mode, 0, 0, 0, NULL, alternate_name);
+ }
int open(const char *path, int flags, const UserPerm& perms,
mode_t mode, int stripe_unit, int stripe_count, int object_size,
- const char *data_pool);
+ const char *data_pool, std::string alternate_name="");
+ int openat(int dirfd, const char *relpath, int flags, const UserPerm& perms,
+ mode_t mode, int stripe_unit, int stripe_count,
+ int object_size, const char *data_pool, std::string alternate_name);
+ int openat(int dirfd, const char *path, int flags, const UserPerm& perms, mode_t mode=0,
+ std::string alternate_name="") {
+ return openat(dirfd, path, flags, perms, mode, 0, 0, 0, NULL, alternate_name);
+ }
+
int lookup_hash(inodeno_t ino, inodeno_t dirino, const char *name,
const UserPerm& perms);
int lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
- int lookup_parent(Inode *in, const UserPerm& perms, Inode **parent=NULL);
int lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
+ int _close(int fd);
int close(int fd);
loff_t lseek(int fd, loff_t offset, int whence);
int read(int fd, char *buf, loff_t size, loff_t offset=-1);
int mask=CEPH_STAT_CAP_INODE_ALL);
int fstatx(int fd, struct ceph_statx *stx, const UserPerm& perms,
unsigned int want, unsigned int flags);
+ int statxat(int dirfd, const char *relpath,
+ struct ceph_statx *stx, const UserPerm& perms,
+ unsigned int want, unsigned int flags);
int fallocate(int fd, int mode, loff_t offset, loff_t length);
// full path xattr ops
int sync_fs();
int64_t drop_caches();
+ int get_snap_info(const char *path, const UserPerm &perms, SnapInfo *snap_info);
+
// hpc lazyio
int lazyio(int fd, int enable);
- int lazyio_propogate(int fd, loff_t offset, size_t count);
+ int lazyio_propagate(int fd, loff_t offset, size_t count);
int lazyio_synchronize(int fd, loff_t offset, size_t count);
// expose file layout
int describe_layout(const char *path, file_layout_t* layout,
const UserPerm& perms);
int fdescribe_layout(int fd, file_layout_t* layout);
- int get_file_stripe_address(int fd, loff_t offset, vector<entity_addr_t>& address);
- int get_file_extent_osds(int fd, loff_t off, loff_t *len, vector<int>& osds);
+ int get_file_stripe_address(int fd, loff_t offset, std::vector<entity_addr_t>& address);
+ int get_file_extent_osds(int fd, loff_t off, loff_t *len, std::vector<int>& osds);
int get_osd_addr(int osd, entity_addr_t& addr);
// expose mdsmap
int get_local_osd();
int get_pool_replication(int64_t pool);
int64_t get_pool_id(const char *pool_name);
- string get_pool_name(int64_t pool);
- int get_osd_crush_location(int id, vector<pair<string, string> >& path);
+ std::string get_pool_name(int64_t pool);
+ int get_osd_crush_location(int id, std::vector<std::pair<std::string, std::string> >& path);
- int enumerate_layout(int fd, vector<ObjectExtent>& result,
+ int enumerate_layout(int fd, std::vector<ObjectExtent>& result,
loff_t length, loff_t offset);
- int mksnap(const char *path, const char *name, const UserPerm& perm);
- int rmsnap(const char *path, const char *name, const UserPerm& perm);
+ int mksnap(const char *path, const char *name, const UserPerm& perm,
+ mode_t mode=0, const std::map<std::string, std::string> &metadata={});
+ int rmsnap(const char *path, const char *name, const UserPerm& perm, bool check_perms=false);
// Inode permission checking
int inode_permission(Inode *in, const UserPerm& perms, unsigned want);
int ll_lookup(Inode *parent, const char *name, struct stat *attr,
Inode **out, const UserPerm& perms);
int ll_lookup_inode(struct inodeno_t ino, const UserPerm& perms, Inode **inode);
+ int ll_lookup_vino(vinodeno_t vino, const UserPerm& perms, Inode **inode);
int ll_lookupx(Inode *parent, const char *name, Inode **out,
struct ceph_statx *stx, unsigned want, unsigned flags,
const UserPerm& perms);
- bool ll_forget(Inode *in, int count);
+ bool ll_forget(Inode *in, uint64_t count);
bool ll_put(Inode *in);
int ll_get_snap_ref(snapid_t snap);
int ll_osdaddr(int osd, uint32_t *addr);
int ll_osdaddr(int osd, char* buf, size_t size);
- void ll_register_callbacks(struct client_callback_args *args);
- int test_dentry_handling(bool can_invalidate);
+ void _ll_register_callbacks(struct ceph_client_callback_args *args);
+ void ll_register_callbacks(struct ceph_client_callback_args *args); // deprecated
+ int ll_register_callbacks2(struct ceph_client_callback_args *args);
+ std::pair<int, bool> test_dentry_handling(bool can_invalidate);
const char** get_tracked_conf_keys() const override;
void handle_conf_change(const ConfigProxy& conf,
int ll_delegation(Fh *fh, unsigned cmd, ceph_deleg_cb_t cb, void *priv);
entity_name_t get_myname() { return messenger->get_myname(); }
- void wait_on_list(list<Cond*>& ls);
- void signal_cond_list(list<Cond*>& ls);
+ void wait_on_list(std::list<ceph::condition_variable*>& ls);
+ void signal_cond_list(std::list<ceph::condition_variable*>& ls);
void set_filer_flags(int flags);
void clear_filer_flags(int flags);
virtual void shutdown();
// messaging
+ void cancel_commands(const MDSMap& newmap);
void handle_mds_map(const MConstRef<MMDSMap>& m);
void handle_fs_map(const MConstRef<MFSMap>& m);
void handle_fs_map_user(const MConstRef<MFSMapUser>& m);
inodeno_t realm, int flags, const UserPerm& perms);
void remove_cap(Cap *cap, bool queue_release);
void remove_all_caps(Inode *in);
- void remove_session_caps(MetaSession *session);
+ void remove_session_caps(MetaSession *session, int err);
int mark_caps_flushing(Inode *in, ceph_tid_t *ptid);
void adjust_session_flushing_caps(Inode *in, MetaSession *old_s, MetaSession *new_s);
void flush_caps_sync();
- void flush_caps(Inode *in, MetaSession *session, bool sync=false);
+ void kick_flushing_caps(Inode *in, MetaSession *session);
void kick_flushing_caps(MetaSession *session);
void early_kick_flushing_caps(MetaSession *session);
- int get_caps(Inode *in, int need, int want, int *have, loff_t endoff);
+ int get_caps(Fh *fh, int need, int want, int *have, loff_t endoff);
int get_caps_used(Inode *in);
void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater,
- vector<snapid_t>& snaps);
+ std::vector<snapid_t>& snaps);
void handle_quota(const MConstRef<MClientQuota>& m);
void handle_snap(const MConstRef<MClientSnap>& m);
void handle_cap_flushsnap_ack(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
void handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, const MConstRef<MClientCaps>& m);
void cap_delay_requeue(Inode *in);
- void send_cap(Inode *in, MetaSession *session, Cap *cap, bool sync,
+
+ void send_cap(Inode *in, MetaSession *session, Cap *cap, int flags,
int used, int want, int retain, int flush,
ceph_tid_t flush_tid);
+ void send_flush_snap(Inode *in, MetaSession *session, snapid_t follows, CapSnap& capsnap);
+
+ void flush_snaps(Inode *in);
void get_cap_ref(Inode *in, int cap);
void put_cap_ref(Inode *in, int cap);
- void flush_snaps(Inode *in, bool all_again=false);
void wait_sync_caps(Inode *in, ceph_tid_t want);
void wait_sync_caps(ceph_tid_t want);
void queue_cap_snap(Inode *in, SnapContext &old_snapc);
void finish_cap_snap(Inode *in, CapSnap &capsnap, int used);
- void _flushed_cap_snap(Inode *in, snapid_t seq);
void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
- void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
+ void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, std::string& name);
void _try_to_trim_inode(Inode *in, bool sched_inval);
void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len);
void _invalidate_inode_cache(Inode *in);
void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len);
void _async_invalidate(vinodeno_t ino, int64_t off, int64_t len);
+
+ void _schedule_ino_release_callback(Inode *in);
+ void _async_inode_release(vinodeno_t ino);
+
bool _release(Inode *in);
/**
void unlock_fh_pos(Fh *f);
// metadata cache
- void update_dir_dist(Inode *in, DirStat *st);
+ void update_dir_dist(Inode *in, DirStat *st, mds_rank_t from);
void clear_dir_complete_and_ordered(Inode *diri, bool complete);
void insert_readdir_results(MetaRequest *request, MetaSession *session, Inode *diri);
Inode *add_update_inode(InodeStat *st, utime_t ttl, MetaSession *session,
const UserPerm& request_perms);
- Dentry *insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dlease,
+ Dentry *insert_dentry_inode(Dir *dir, const std::string& dname, LeaseStat *dlease,
Inode *in, utime_t from, MetaSession *session,
Dentry *old_dentry = NULL);
void update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, MetaSession *session);
vinodeno_t map_faked_ino(ino_t ino);
//notify the mds to flush the mdlog
+ void flush_mdlog_sync(Inode *in);
void flush_mdlog_sync();
void flush_mdlog(MetaSession *session);
void renew_caps();
void renew_caps(MetaSession *session);
void flush_cap_releases();
+ void renew_and_flush_cap_releases();
void tick();
+ void start_tick_thread();
+
+ void update_read_io_size(size_t size) {
+ total_read_ops++;
+ total_read_size += size;
+ }
+
+ void update_write_io_size(size_t size) {
+ total_write_ops++;
+ total_write_size += size;
+ }
+
+ void inc_dentry_nr() {
+ ++dentry_nr;
+ }
+ void dec_dentry_nr() {
+ --dentry_nr;
+ }
+ void dlease_hit() {
+ ++dlease_hits;
+ }
+ void dlease_miss() {
+ ++dlease_misses;
+ }
+ std::tuple<uint64_t, uint64_t, uint64_t> get_dlease_hit_rates() {
+ return std::make_tuple(dlease_hits, dlease_misses, dentry_nr);
+ }
+
+ void cap_hit() {
+ ++cap_hits;
+ }
+ void cap_miss() {
+ ++cap_misses;
+ }
+ std::pair<uint64_t, uint64_t> get_cap_hit_rates() {
+ return std::make_pair(cap_hits, cap_misses);
+ }
+
+ void inc_opened_files() {
+ ++opened_files;
+ }
+ void dec_opened_files() {
+ --opened_files;
+ }
+ std::pair<uint64_t, uint64_t> get_opened_files_rates() {
+ return std::make_pair(opened_files, inode_map.size());
+ }
- xlist<Inode*> &get_dirty_list() { return dirty_list; }
+ void inc_pinned_icaps() {
+ ++pinned_icaps;
+ }
+ void dec_pinned_icaps(uint64_t nr=1) {
+ pinned_icaps -= nr;
+ }
+ std::pair<uint64_t, uint64_t> get_pinned_icaps_rates() {
+ return std::make_pair(pinned_icaps, inode_map.size());
+ }
+ void inc_opened_inodes() {
+ ++opened_inodes;
+ }
+ void dec_opened_inodes() {
+ --opened_inodes;
+ }
+ std::pair<uint64_t, uint64_t> get_opened_inodes_rates() {
+ return std::make_pair(opened_inodes, inode_map.size());
+ }
+
+ /* timer_lock for 'timer' */
+ ceph::mutex timer_lock = ceph::make_mutex("Client::timer_lock");
SafeTimer timer;
+ /* tick thread */
+ std::thread upkeeper;
+ ceph::condition_variable upkeep_cond;
+ bool tick_thread_stopped = false;
+
std::unique_ptr<PerfCounters> logger;
std::unique_ptr<MDSMap> mdsmap;
+ bool fuse_default_permissions;
+ bool _collect_and_send_global_metrics;
protected:
/* Flags for check_caps() */
static const unsigned CHECK_CAPS_NODELAY = 0x1;
static const unsigned CHECK_CAPS_SYNCHRONOUS = 0x2;
-
- bool is_initialized() const { return initialized; }
-
void check_caps(Inode *in, unsigned flags);
void set_cap_epoch_barrier(epoch_t e);
void get_session_metadata(std::map<std::string, std::string> *meta) const;
bool have_open_session(mds_rank_t mds);
void got_mds_push(MetaSession *s);
- MetaSession *_get_mds_session(mds_rank_t mds, Connection *con); ///< return session for mds *and* con; null otherwise
- MetaSession *_get_or_open_mds_session(mds_rank_t mds);
- MetaSession *_open_mds_session(mds_rank_t mds);
+ MetaSessionRef _get_mds_session(mds_rank_t mds, Connection *con); ///< return session for mds *and* con; null otherwise
+ MetaSessionRef _get_or_open_mds_session(mds_rank_t mds);
+ MetaSessionRef _open_mds_session(mds_rank_t mds);
void _close_mds_session(MetaSession *s);
- void _closed_mds_session(MetaSession *s);
+ void _closed_mds_session(MetaSession *s, int err=0, bool rejected=false);
bool _any_stale_sessions() const;
void _kick_stale_sessions();
void handle_client_session(const MConstRef<MClientSession>& m);
void resend_unsafe_requests(MetaSession *s);
void wait_unsafe_requests();
- void _sync_write_commit(Inode *in);
-
void dump_mds_requests(Formatter *f);
- void dump_mds_sessions(Formatter *f);
+ void dump_mds_sessions(Formatter *f, bool cap_dump=false);
int make_request(MetaRequest *req, const UserPerm& perms,
InodeRef *ptarget = 0, bool *pcreated = 0,
void put_request(MetaRequest *request);
void unregister_request(MetaRequest *request);
- int verify_reply_trace(int r, MetaRequest *request, const MConstRef<MClientReply>& reply,
+ int verify_reply_trace(int r, MetaSession *session, MetaRequest *request,
+ const MConstRef<MClientReply>& reply,
InodeRef *ptarget, bool *pcreated,
const UserPerm& perms);
void encode_cap_releases(MetaRequest *request, mds_rank_t mds);
void handle_client_reply(const MConstRef<MClientReply>& reply);
bool is_dir_operation(MetaRequest *request);
+ int path_walk(const filepath& fp, struct walk_dentry_result* result, const UserPerm& perms, bool followsym=true, int mask=0,
+ InodeRef dirinode=nullptr);
+ int path_walk(const filepath& fp, InodeRef *end, const UserPerm& perms,
+ bool followsym=true, int mask=0, InodeRef dirinode=nullptr);
+
// fake inode number for 32-bits ino_t
void _assign_faked_ino(Inode *in);
void _assign_faked_root(Inode *in);
* Resolve file descriptor, or return NULL.
*/
Fh *get_filehandle(int fd) {
- ceph::unordered_map<int, Fh*>::iterator p = fd_map.find(fd);
- if (p == fd_map.end())
+ auto it = fd_map.find(fd);
+ if (it == fd_map.end())
return NULL;
- return p->second;
+ return it->second;
}
+ int get_fd_inode(int fd, InodeRef *in);
// helpers
void wake_up_session_caps(MetaSession *s, bool reconnect);
- void wait_on_context_list(list<Context*>& ls);
- void signal_context_list(list<Context*>& ls);
+ void wait_on_context_list(std::list<Context*>& ls);
+ void signal_context_list(std::list<Context*>& ls);
// -- metadata cache stuff
// decrease inode ref. delete if dangling.
+ void _put_inode(Inode *in, int n);
+ void delay_put_inodes(bool wakeup=false);
void put_inode(Inode *in, int n=1);
void close_dir(Dir *dir);
* leave dn set to default NULL unless you're trying to add
* a new inode to a pre-created Dentry
*/
- Dentry* link(Dir *dir, const string& name, Inode *in, Dentry *dn);
+ Dentry* link(Dir *dir, const std::string& name, Inode *in, Dentry *dn);
void unlink(Dentry *dn, bool keepdir, bool keepdentry);
- // path traversal for high-level interface
- int path_walk(const filepath& fp, InodeRef *end, const UserPerm& perms,
- bool followsym=true, int mask=0);
-
int fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0);
int fill_stat(InodeRef& in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0) {
return fill_stat(in.get(), st, dirstat, rstat);
bool ms_handle_reset(Connection *con) override;
void ms_handle_remote_reset(Connection *con) override;
bool ms_handle_refused(Connection *con) override;
- bool ms_get_authorizer(int dest_type, AuthAuthorizer **authorizer) override;
int authenticate();
void _close_sessions();
+ void _pre_init();
+
/**
* The basic housekeeping parts of init (perf counters, admin socket)
* that is independent of how objecters/monclient/messengers are
// global client lock
// - protects Client and buffer cache both!
- Mutex client_lock;
+ ceph::mutex client_lock = ceph::make_mutex("Client::client_lock");
std::map<snapid_t, int> ll_snap_ref;
- Inode* root = nullptr;
+ InodeRef root = nullptr;
map<Inode*, InodeRef> root_parents;
Inode* root_ancestor = nullptr;
LRU lru; // lru list of Dentry's in our local metadata cache.
client_t whoami;
+ /* The state migration mechanism */
+ enum _state {
+ /* For the initialize_state */
+ CLIENT_NEW, // The initial state for the initialize_state or after Client::shutdown()
+ CLIENT_INITIALIZING, // At the beginning of the Client::init()
+ CLIENT_INITIALIZED, // At the end of CLient::init()
+
+ /* For the mount_state */
+ CLIENT_UNMOUNTED, // The initial state for the mount_state or after unmounted
+ CLIENT_MOUNTING, // At the beginning of Client::mount()
+ CLIENT_MOUNTED, // At the end of Client::mount()
+ CLIENT_UNMOUNTING, // At the beginning of the Client::_unmout()
+ };
+
+ typedef enum _state state_t;
+ using RWRef_t = RWRef<state_t>;
+
+ struct mount_state_t : public RWRefState<state_t> {
+ public:
+ bool is_valid_state(state_t state) const override {
+ switch (state) {
+ case Client::CLIENT_MOUNTING:
+ case Client::CLIENT_MOUNTED:
+ case Client::CLIENT_UNMOUNTING:
+ case Client::CLIENT_UNMOUNTED:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ int check_reader_state(state_t require) const override {
+ if (require == Client::CLIENT_MOUNTING &&
+ (state == Client::CLIENT_MOUNTING || state == Client::CLIENT_MOUNTED))
+ return true;
+ else
+ return false;
+ }
+
+ /* The state migration check */
+ int check_writer_state(state_t require) const override {
+ if (require == Client::CLIENT_MOUNTING &&
+ state == Client::CLIENT_UNMOUNTED)
+ return true;
+ else if (require == Client::CLIENT_MOUNTED &&
+ state == Client::CLIENT_MOUNTING)
+ return true;
+ else if (require == Client::CLIENT_UNMOUNTING &&
+ state == Client::CLIENT_MOUNTED)
+ return true;
+ else if (require == Client::CLIENT_UNMOUNTED &&
+ state == Client::CLIENT_UNMOUNTING)
+ return true;
+ else
+ return false;
+ }
+
+ mount_state_t(state_t state, const char *lockname, uint64_t reader_cnt=0)
+ : RWRefState (state, lockname, reader_cnt) {}
+ ~mount_state_t() {}
+ };
+
+ struct initialize_state_t : public RWRefState<state_t> {
+ public:
+ bool is_valid_state(state_t state) const override {
+ switch (state) {
+ case Client::CLIENT_NEW:
+ case Client::CLIENT_INITIALIZING:
+ case Client::CLIENT_INITIALIZED:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ int check_reader_state(state_t require) const override {
+ if (require == Client::CLIENT_INITIALIZED &&
+ state >= Client::CLIENT_INITIALIZED)
+ return true;
+ else
+ return false;
+ }
+
+ /* The state migration check */
+ int check_writer_state(state_t require) const override {
+ if (require == Client::CLIENT_INITIALIZING &&
+ (state == Client::CLIENT_NEW))
+ return true;
+ else if (require == Client::CLIENT_INITIALIZED &&
+ (state == Client::CLIENT_INITIALIZING))
+ return true;
+ else if (require == Client::CLIENT_NEW &&
+ (state == Client::CLIENT_INITIALIZED))
+ return true;
+ else
+ return false;
+ }
+
+ initialize_state_t(state_t state, const char *lockname, uint64_t reader_cnt=0)
+ : RWRefState (state, lockname, reader_cnt) {}
+ ~initialize_state_t() {}
+ };
+
+ struct mount_state_t mount_state;
+ struct initialize_state_t initialize_state;
private:
struct C_Readahead : public Context {
* statistics and layout metadata.
*/
struct VXattr {
- const string name;
- size_t (Client::*getxattr_cb)(Inode *in, char *val, size_t size);
- bool readonly, hidden;
- bool (Client::*exists_cb)(Inode *in);
- unsigned int flags;
+ const std::string name;
+ size_t (Client::*getxattr_cb)(Inode *in, char *val, size_t size);
+ bool readonly;
+ bool (Client::*exists_cb)(Inode *in);
+ unsigned int flags;
};
enum {
MAY_READ = 4,
};
+ std::unique_ptr<CephContext, std::function<void(CephContext*)>> cct_deleter;
/* Flags for VXattr */
static const unsigned VXATTR_RSTAT = 0x1;
+ static const unsigned VXATTR_DIRSTAT = 0x2;
static const VXattr _dir_vxattrs[];
static const VXattr _file_vxattrs[];
+ static const VXattr _common_vxattrs[];
+ bool is_reserved_vino(vinodeno_t &vino);
void fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off);
void _fragmap_remove_stopped_mds(Inode *in, mds_rank_t mds);
void _ll_get(Inode *in);
- int _ll_put(Inode *in, int num);
+ int _ll_put(Inode *in, uint64_t num);
void _ll_drop_pins();
Fh *_create_fh(Inode *in, int flags, int cmode, const UserPerm& perms);
int _release_fh(Fh *fh);
void _put_fh(Fh *fh);
- int _do_remount(bool retry_on_error);
+ std::pair<int, bool> _do_remount(bool retry_on_error);
int _read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl, bool *checkeof);
int _read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl);
+ bool _dentry_valid(const Dentry *dn);
+
// internal interface
// call these with client_lock held!
- int _do_lookup(Inode *dir, const string& name, int mask, InodeRef *target,
+ int _do_lookup(Inode *dir, const std::string& name, int mask, InodeRef *target,
const UserPerm& perms);
- int _lookup(Inode *dir, const string& dname, int mask, InodeRef *target,
- const UserPerm& perm);
+ int _lookup(Inode *dir, const std::string& dname, int mask, InodeRef *target,
+ const UserPerm& perm, std::string* alternate_name=nullptr);
- int _link(Inode *in, Inode *dir, const char *name, const UserPerm& perm,
+ int _link(Inode *in, Inode *dir, const char *name, const UserPerm& perm, std::string alternate_name,
InodeRef *inp = 0);
int _unlink(Inode *dir, const char *name, const UserPerm& perm);
- int _rename(Inode *olddir, const char *oname, Inode *ndir, const char *nname, const UserPerm& perm);
+ int _rename(Inode *olddir, const char *oname, Inode *ndir, const char *nname, const UserPerm& perm, std::string alternate_name);
int _mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& perm,
- InodeRef *inp = 0);
+ InodeRef *inp = 0, const std::map<std::string, std::string> &metadata={},
+ std::string alternate_name="");
int _rmdir(Inode *dir, const char *name, const UserPerm& perms);
int _symlink(Inode *dir, const char *name, const char *target,
- const UserPerm& perms, InodeRef *inp = 0);
+ const UserPerm& perms, std::string alternate_name, InodeRef *inp = 0);
int _mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
const UserPerm& perms, InodeRef *inp = 0);
int _do_setattr(Inode *in, struct ceph_statx *stx, int mask,
const UserPerm& perms);
int _getxattr(InodeRef &in, const char *name, void *value, size_t len,
const UserPerm& perms);
+ int _getvxattr(Inode *in, const UserPerm& perms, const char *attr_name,
+ ssize_t size, void *value, mds_rank_t rank);
int _listxattr(Inode *in, char *names, size_t len, const UserPerm& perms);
int _do_setxattr(Inode *in, const char *name, const void *value, size_t len,
int flags, const UserPerm& perms);
int flags, const UserPerm& perms);
int _setxattr(InodeRef &in, const char *name, const void *value, size_t len,
int flags, const UserPerm& perms);
- int _setxattr_check_data_pool(string& name, string& value, const OSDMap *osdmap);
+ int _setxattr_check_data_pool(std::string& name, std::string& value, const OSDMap *osdmap);
void _setxattr_maybe_wait_for_osdmap(const char *name, const void *value, size_t len);
int _removexattr(Inode *in, const char *nm, const UserPerm& perms);
int _removexattr(InodeRef &in, const char *nm, const UserPerm& perms);
int _renew_caps(Inode *in);
int _create(Inode *in, const char *name, int flags, mode_t mode, InodeRef *inp,
Fh **fhp, int stripe_unit, int stripe_count, int object_size,
- const char *data_pool, bool *created, const UserPerm &perms);
+ const char *data_pool, bool *created, const UserPerm &perms,
+ std::string alternate_name);
loff_t _lseek(Fh *fh, loff_t offset, int whence);
int64_t _read(Fh *fh, int64_t offset, uint64_t size, bufferlist *bl);
int64_t _write(Fh *fh, int64_t offset, uint64_t size, const char *buf,
const struct iovec *iov, int iovcnt);
- int64_t _preadv_pwritev_locked(Fh *f, const struct iovec *iov,
- unsigned iovcnt, int64_t offset, bool write, bool clamp_to_int);
- int _preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt, int64_t offset, bool write);
+ int64_t _preadv_pwritev_locked(Fh *fh, const struct iovec *iov,
+ unsigned iovcnt, int64_t offset,
+ bool write, bool clamp_to_int);
+ int _preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt,
+ int64_t offset, bool write);
int _flush(Fh *fh);
int _fsync(Fh *fh, bool syncdataonly);
int _fsync(Inode *in, bool syncdataonly);
size_t _vxattrcb_dir_rentries(Inode *in, char *val, size_t size);
size_t _vxattrcb_dir_rfiles(Inode *in, char *val, size_t size);
size_t _vxattrcb_dir_rsubdirs(Inode *in, char *val, size_t size);
+ size_t _vxattrcb_dir_rsnaps(Inode *in, char *val, size_t size);
size_t _vxattrcb_dir_rbytes(Inode *in, char *val, size_t size);
size_t _vxattrcb_dir_rctime(Inode *in, char *val, size_t size);
bool _vxattrcb_dir_pin_exists(Inode *in);
size_t _vxattrcb_dir_pin(Inode *in, char *val, size_t size);
- size_t _vxattrs_calcu_name_size(const VXattr *vxattrs);
+ bool _vxattrcb_snap_btime_exists(Inode *in);
+ size_t _vxattrcb_snap_btime(Inode *in, char *val, size_t size);
+
+ size_t _vxattrcb_caps(Inode *in, char *val, size_t size);
+
+ bool _vxattrcb_mirror_info_exists(Inode *in);
+ size_t _vxattrcb_mirror_info(Inode *in, char *val, size_t size);
+
+ size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size);
+ size_t _vxattrcb_client_id(Inode *in, char *val, size_t size);
static const VXattr *_get_vxattrs(Inode *in);
static const VXattr *_match_vxattr(Inode *in, const char *name);
- size_t _vxattrs_name_size(const VXattr *vxattrs) {
- if (vxattrs == _dir_vxattrs)
- return _dir_vxattrs_name_size;
- else if (vxattrs == _file_vxattrs)
- return _file_vxattrs_name_size;
- return 0;
- }
-
int _do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep,
struct flock *fl, uint64_t owner, bool removing=false);
int _interrupt_filelock(MetaRequest *req);
int _ll_getattr(Inode *in, int caps, const UserPerm& perms);
int _lookup_parent(Inode *in, const UserPerm& perms, Inode **parent=NULL);
int _lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
- int _lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
- bool _ll_forget(Inode *in, int count);
+ int _lookup_vino(vinodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
+ bool _ll_forget(Inode *in, uint64_t count);
+ void collect_and_send_metrics();
+ void collect_and_send_global_metrics();
+
+ void update_io_stat_metadata(utime_t latency);
+ void update_io_stat_read(utime_t latency);
+ void update_io_stat_write(utime_t latency);
uint32_t deleg_timeout = 0;
- size_t _file_vxattrs_name_size;
- size_t _dir_vxattrs_name_size;
client_switch_interrupt_callback_t switch_interrupt_cb = nullptr;
client_remount_callback_t remount_cb = nullptr;
client_ino_callback_t ino_invalidate_cb = nullptr;
client_dentry_callback_t dentry_invalidate_cb = nullptr;
client_umask_callback_t umask_cb = nullptr;
+ client_ino_release_t ino_release_cb = nullptr;
void *callback_handle = nullptr;
bool can_invalidate_dentries = false;
Finisher async_dentry_invalidator;
Finisher interrupt_finisher;
Finisher remount_finisher;
+ Finisher async_ino_releasor;
Finisher objecter_finisher;
- Context *tick_event = nullptr;
- utime_t last_cap_renew;
+ ceph::coarse_mono_time last_cap_renew;
CommandHook m_command_hook;
epoch_t cap_epoch_barrier = 0;
// mds sessions
- map<mds_rank_t, MetaSession> mds_sessions; // mds -> push seq
- list<Cond*> waiting_for_mdsmap;
+ map<mds_rank_t, MetaSessionRef> mds_sessions; // mds -> push seq
+ std::set<mds_rank_t> mds_ranks_closing; // mds ranks currently tearing down sessions
+ std::list<ceph::condition_variable*> waiting_for_mdsmap;
// FSMap, for when using mds_command
- list<Cond*> waiting_for_fsmap;
+ std::list<ceph::condition_variable*> waiting_for_fsmap;
std::unique_ptr<FSMap> fsmap;
std::unique_ptr<FSMapUser> fsmap_user;
+ // This mutex only protects command_table
+ ceph::mutex command_lock = ceph::make_mutex("Client::command_lock");
// MDS command state
CommandTable<MDSCommandOp> command_table;
ceph::unordered_map<int, Fh*> fd_map;
set<Fh*> ll_unclosed_fh_set;
ceph::unordered_set<dir_result_t*> opened_dirs;
+ uint64_t fd_gen = 1;
- bool initialized = false;
- bool mounted = false;
- bool unmounting = false;
- bool blacklisted = false;
+ bool mount_aborted = false;
+ bool blocklisted = false;
ceph::unordered_map<vinodeno_t, Inode*> inode_map;
ceph::unordered_map<ino_t, vinodeno_t> faked_ino_map;
ino_t last_used_faked_ino;
ino_t last_used_faked_root;
- // When an MDS has sent us a REJECT, remember that and don't
- // contact it again. Remember which inst rejected us, so that
- // when we talk to another inst with the same rank we can
- // try again.
- std::map<mds_rank_t, entity_addrvec_t> rejected_by_mds;
-
- int local_osd = -ENXIO;
+ int local_osd = -CEPHFS_ENXIO;
epoch_t local_osd_epoch = 0;
- int unsafe_sync_write = 0;
-
// mds requests
ceph_tid_t last_tid = 0;
ceph_tid_t oldest_tid = 0; // oldest incomplete mds request, excluding setfilelock requests
// cap flushing
ceph_tid_t last_flush_tid = 1;
- // dirty_list keeps all the dirty inodes before flushing.
- xlist<Inode*> delayed_list, dirty_list;
+ xlist<Inode*> delayed_list;
int num_flushing_caps = 0;
ceph::unordered_map<inodeno_t,SnapRealm*> snap_realms;
std::map<std::string, std::string> metadata;
+ ceph::coarse_mono_time last_auto_reconnect;
+ std::chrono::seconds caps_release_delay, mount_timeout;
// trace generation
- ofstream traceout;
+ std::ofstream traceout;
- Cond mount_cond, sync_cond;
+ ceph::condition_variable mount_cond, sync_cond;
std::map<std::pair<int64_t,std::string>, int> pool_perms;
- list<Cond*> waiting_for_pool_perm;
+ std::list<ceph::condition_variable*> waiting_for_pool_perm;
uint64_t retries_on_invalidate = 0;
// state reclaim
- list<Cond*> waiting_for_reclaim;
+ std::list<ceph::condition_variable*> waiting_for_reclaim;
int reclaim_errno = 0;
epoch_t reclaim_osd_epoch = 0;
entity_addrvec_t reclaim_target_addrs;
+
+ // dentry lease metrics
+ uint64_t dentry_nr = 0;
+ uint64_t dlease_hits = 0;
+ uint64_t dlease_misses = 0;
+
+ uint64_t cap_hits = 0;
+ uint64_t cap_misses = 0;
+
+ uint64_t opened_files = 0;
+ uint64_t pinned_icaps = 0;
+ uint64_t opened_inodes = 0;
+
+ uint64_t total_read_ops = 0;
+ uint64_t total_read_size = 0;
+
+ uint64_t total_write_ops = 0;
+ uint64_t total_write_size = 0;
+
+ ceph::spinlock delay_i_lock;
+ std::map<Inode*,int> delay_i_release;
+
+ uint64_t nr_metadata_request = 0;
+ uint64_t nr_read_request = 0;
+ uint64_t nr_write_request = 0;
};
/**
class StandaloneClient : public Client
{
public:
- StandaloneClient(Messenger *m, MonClient *mc);
+ StandaloneClient(Messenger *m, MonClient *mc, boost::asio::io_context& ictx);
~StandaloneClient() override;