]> git.proxmox.com Git - ceph.git/blob - ceph/src/client/Client.h
update source to Ceph Pacific 16.2.2
[ceph.git] / ceph / src / client / Client.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_CLIENT_H
17 #define CEPH_CLIENT_H
18
19 #include "common/CommandTable.h"
20 #include "common/Finisher.h"
21 #include "common/Timer.h"
22 #include "common/ceph_mutex.h"
23 #include "common/cmdparse.h"
24 #include "common/compiler_extensions.h"
25 #include "include/common_fwd.h"
26 #include "include/cephfs/ceph_ll_client.h"
27 #include "include/filepath.h"
28 #include "include/interval_set.h"
29 #include "include/lru.h"
30 #include "include/types.h"
31 #include "include/unordered_map.h"
32 #include "include/unordered_set.h"
33 #include "include/cephfs/metrics/Types.h"
34 #include "mds/mdstypes.h"
35 #include "msg/Dispatcher.h"
36 #include "msg/MessageRef.h"
37 #include "msg/Messenger.h"
38 #include "osdc/ObjectCacher.h"
39
40 #include "RWRef.h"
41 #include "InodeRef.h"
42 #include "MetaSession.h"
43 #include "UserPerm.h"
44
45 #include <fstream>
46 #include <map>
47 #include <memory>
48 #include <set>
49 #include <string>
50 #include <thread>
51
52 using std::set;
53 using std::map;
54 using std::fstream;
55
56 class FSMap;
57 class FSMapUser;
58 class MonClient;
59
60
61 struct DirStat;
62 struct LeaseStat;
63 struct InodeStat;
64
65 class Filer;
66 class Objecter;
67 class WritebackHandler;
68
69 class MDSMap;
70 class Message;
71 class destructive_lock_ref_t;
72
73 enum {
74 l_c_first = 20000,
75 l_c_reply,
76 l_c_lat,
77 l_c_wrlat,
78 l_c_read,
79 l_c_fsync,
80 l_c_last,
81 };
82
83
84 class MDSCommandOp : public CommandOp
85 {
86 public:
87 mds_gid_t mds_gid;
88
89 explicit MDSCommandOp(ceph_tid_t t) : CommandOp(t) {}
90 };
91
92 /* error code for ceph_fuse */
93 #define CEPH_FUSE_NO_MDS_UP -((1<<16)+0) /* no mds up deteced in ceph_fuse */
94 #define CEPH_FUSE_LAST -((1<<16)+1) /* (unused) */
95
96 // ============================================
97 // types for my local metadata cache
98 /* basic structure:
99
100 - Dentries live in an LRU loop. they get expired based on last access.
101 see include/lru.h. items can be bumped to "mid" or "top" of list, etc.
102 - Inode has ref count for each Fh, Dir, or Dentry that points to it.
103 - when Inode ref goes to 0, it's expired.
104 - when Dir is empty, it's removed (and it's Inode ref--)
105
106 */
107
108 /* getdir result */
109 struct DirEntry {
110 explicit DirEntry(const string &s) : d_name(s), stmask(0) {}
111 DirEntry(const string &n, struct stat& s, int stm) : d_name(n), st(s), stmask(stm) {}
112
113 string d_name;
114 struct stat st;
115 int stmask;
116 };
117
118 struct Cap;
119 class Dir;
120 class Dentry;
121 struct SnapRealm;
122 struct Fh;
123 struct CapSnap;
124
125 struct MetaRequest;
126 class ceph_lock_state_t;
127
128 // ========================================================
129 // client interface
130
131 struct dir_result_t {
132 static const int SHIFT = 28;
133 static const int64_t MASK = (1 << SHIFT) - 1;
134 static const int64_t HASH = 0xFFULL << (SHIFT + 24); // impossible frag bits
135 static const loff_t END = 1ULL << (SHIFT + 32);
136
137 struct dentry {
138 int64_t offset;
139 std::string name;
140 std::string alternate_name;
141 InodeRef inode;
142 explicit dentry(int64_t o) : offset(o) {}
143 dentry(int64_t o, std::string n, std::string an, InodeRef in) :
144 offset(o), name(std::move(n)), alternate_name(std::move(an)), inode(std::move(in)) {}
145 };
146 struct dentry_off_lt {
147 bool operator()(const dentry& d, int64_t off) const {
148 return dir_result_t::fpos_cmp(d.offset, off) < 0;
149 }
150 };
151
152
153 explicit dir_result_t(Inode *in, const UserPerm& perms);
154
155
156 static uint64_t make_fpos(unsigned h, unsigned l, bool hash) {
157 uint64_t v = ((uint64_t)h<< SHIFT) | (uint64_t)l;
158 if (hash)
159 v |= HASH;
160 else
161 ceph_assert((v & HASH) != HASH);
162 return v;
163 }
164 static unsigned fpos_high(uint64_t p) {
165 unsigned v = (p & (END-1)) >> SHIFT;
166 if ((p & HASH) == HASH)
167 return ceph_frag_value(v);
168 return v;
169 }
170 static unsigned fpos_low(uint64_t p) {
171 return p & MASK;
172 }
173 static int fpos_cmp(uint64_t l, uint64_t r) {
174 int c = ceph_frag_compare(fpos_high(l), fpos_high(r));
175 if (c)
176 return c;
177 if (fpos_low(l) == fpos_low(r))
178 return 0;
179 return fpos_low(l) < fpos_low(r) ? -1 : 1;
180 }
181
182 unsigned offset_high() { return fpos_high(offset); }
183 unsigned offset_low() { return fpos_low(offset); }
184
185 void set_end() { offset |= END; }
186 bool at_end() { return (offset & END); }
187
188 void set_hash_order() { offset |= HASH; }
189 bool hash_order() { return (offset & HASH) == HASH; }
190
191 bool is_cached() {
192 if (buffer.empty())
193 return false;
194 if (hash_order()) {
195 return buffer_frag.contains(offset_high());
196 } else {
197 return buffer_frag == frag_t(offset_high());
198 }
199 }
200
201 void reset() {
202 last_name.clear();
203 next_offset = 2;
204 offset = 0;
205 ordered_count = 0;
206 cache_index = 0;
207 buffer.clear();
208 }
209
210 InodeRef inode;
211 int64_t offset; // hash order:
212 // (0xff << 52) | ((24 bits hash) << 28) |
213 // (the nth entry has hash collision);
214 // frag+name order;
215 // ((frag value) << 28) | (the nth entry in frag);
216
217 unsigned next_offset; // offset of next chunk (last_name's + 1)
218 string last_name; // last entry in previous chunk
219
220 uint64_t release_count;
221 uint64_t ordered_count;
222 unsigned cache_index;
223 int start_shared_gen; // dir shared_gen at start of readdir
224 UserPerm perms;
225
226 frag_t buffer_frag;
227
228 vector<dentry> buffer;
229 struct dirent de;
230 };
231
232 class Client : public Dispatcher, public md_config_obs_t {
233 public:
234 friend class C_Block_Sync; // Calls block map and protected helpers
235 friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb
236 friend class C_Client_DentryInvalidate; // calls dentry_invalidate_cb
237 friend class C_Client_FlushComplete; // calls put_inode()
238 friend class C_Client_Remount;
239 friend class C_Client_RequestInterrupt;
240 friend class C_Deleg_Timeout; // Asserts on client_lock, called when a delegation is unreturned
241 friend class C_Client_CacheRelease; // Asserts on client_lock
242 friend class SyntheticClient;
243 friend void intrusive_ptr_release(Inode *in);
244 template <typename T> friend struct RWRefState;
245 template <typename T> friend class RWRef;
246
247 using Dispatcher::cct;
248 using clock = ceph::coarse_mono_clock;
249
250 typedef int (*add_dirent_cb_t)(void *p, struct dirent *de, struct ceph_statx *stx, off_t off, Inode *in);
251
252 struct walk_dentry_result {
253 InodeRef in;
254 std::string alternate_name;
255 };
256
257 class CommandHook : public AdminSocketHook {
258 public:
259 explicit CommandHook(Client *client);
260 int call(std::string_view command, const cmdmap_t& cmdmap,
261 Formatter *f,
262 std::ostream& errss,
263 bufferlist& out) override;
264 private:
265 Client *m_client;
266 };
267
268 // snapshot info returned via get_snap_info(). nothing to do
269 // with SnapInfo on the MDS.
270 struct SnapInfo {
271 snapid_t id;
272 std::map<std::string, std::string> metadata;
273 };
274
275 Client(Messenger *m, MonClient *mc, Objecter *objecter_);
276 Client(const Client&) = delete;
277 Client(const Client&&) = delete;
278 virtual ~Client() override;
279
280 static UserPerm pick_my_perms(CephContext *c) {
281 uid_t uid = c->_conf->client_mount_uid >= 0 ? c->_conf->client_mount_uid : -1;
282 gid_t gid = c->_conf->client_mount_gid >= 0 ? c->_conf->client_mount_gid : -1;
283 return UserPerm(uid, gid);
284 }
285 UserPerm pick_my_perms() {
286 uid_t uid = user_id >= 0 ? user_id : -1;
287 gid_t gid = group_id >= 0 ? group_id : -1;
288 return UserPerm(uid, gid);
289 }
290
291 int mount(const std::string &mount_root, const UserPerm& perms,
292 bool require_mds=false, const std::string &fs_name="");
293 void unmount();
294 bool is_unmounting() const {
295 return mount_state.check_current_state(CLIENT_UNMOUNTING);
296 }
297 bool is_mounted() const {
298 return mount_state.check_current_state(CLIENT_MOUNTED);
299 }
300 bool is_mounting() const {
301 return mount_state.check_current_state(CLIENT_MOUNTING);
302 }
303 bool is_initialized() const {
304 return initialize_state.check_current_state(CLIENT_INITIALIZED);
305 }
306 void abort_conn();
307
308 void set_uuid(const std::string& uuid);
309 void set_session_timeout(unsigned timeout);
310 int start_reclaim(const std::string& uuid, unsigned flags,
311 const std::string& fs_name);
312 void finish_reclaim();
313
314 fs_cluster_id_t get_fs_cid() {
315 return fscid;
316 }
317
318 int mds_command(
319 const std::string &mds_spec,
320 const std::vector<std::string>& cmd,
321 const bufferlist& inbl,
322 bufferlist *poutbl, std::string *prs, Context *onfinish);
323
324 // these should (more or less) mirror the actual system calls.
325 int statfs(const char *path, struct statvfs *stbuf, const UserPerm& perms);
326
327 // crap
328 int chdir(const char *s, std::string &new_cwd, const UserPerm& perms);
329 void _getcwd(std::string& cwd, const UserPerm& perms);
330 void getcwd(std::string& cwd, const UserPerm& perms);
331
332 // namespace ops
333 int opendir(const char *name, dir_result_t **dirpp, const UserPerm& perms);
334 int closedir(dir_result_t *dirp);
335
336 /**
337 * Fill a directory listing from dirp, invoking cb for each entry
338 * with the given pointer, the dirent, the struct stat, the stmask,
339 * and the offset.
340 *
341 * Returns 0 if it reached the end of the directory.
342 * If @a cb returns a negative error code, stop and return that.
343 */
344 int readdir_r_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
345 unsigned want=0, unsigned flags=AT_NO_ATTR_SYNC,
346 bool getref=false);
347
348 struct dirent * readdir(dir_result_t *d);
349 int readdir_r(dir_result_t *dirp, struct dirent *de);
350 int readdirplus_r(dir_result_t *dirp, struct dirent *de, struct ceph_statx *stx, unsigned want, unsigned flags, Inode **out);
351
352 int getdir(const char *relpath, list<string>& names,
353 const UserPerm& perms); // get the whole dir at once.
354
355 /**
356 * Returns the length of the buffer that got filled in, or -errno.
357 * If it returns -CEPHFS_ERANGE you just need to increase the size of the
358 * buffer and try again.
359 */
360 int _getdents(dir_result_t *dirp, char *buf, int buflen, bool ful); // get a bunch of dentries at once
361 int getdents(dir_result_t *dirp, char *buf, int buflen) {
362 return _getdents(dirp, buf, buflen, true);
363 }
364 int getdnames(dir_result_t *dirp, char *buf, int buflen) {
365 return _getdents(dirp, buf, buflen, false);
366 }
367
368 void rewinddir(dir_result_t *dirp);
369 loff_t telldir(dir_result_t *dirp);
370 void seekdir(dir_result_t *dirp, loff_t offset);
371
372 int may_delete(const char *relpath, const UserPerm& perms);
373 int link(const char *existing, const char *newname, const UserPerm& perm, std::string alternate_name="");
374 int unlink(const char *path, const UserPerm& perm);
375 int rename(const char *from, const char *to, const UserPerm& perm, std::string alternate_name="");
376
377 // dirs
378 int mkdir(const char *path, mode_t mode, const UserPerm& perm, std::string alternate_name="");
379 int mkdirs(const char *path, mode_t mode, const UserPerm& perms);
380 int rmdir(const char *path, const UserPerm& perms);
381
382 // symlinks
383 int readlink(const char *path, char *buf, loff_t size, const UserPerm& perms);
384
385 int symlink(const char *existing, const char *newname, const UserPerm& perms, std::string alternate_name="");
386
387 // path traversal for high-level interface
388 int walk(std::string_view path, struct walk_dentry_result* result, const UserPerm& perms, bool followsym=true);
389
390 // inode stuff
391 unsigned statx_to_mask(unsigned int flags, unsigned int want);
392 int stat(const char *path, struct stat *stbuf, const UserPerm& perms,
393 frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL);
394 int statx(const char *path, struct ceph_statx *stx,
395 const UserPerm& perms,
396 unsigned int want, unsigned int flags);
397 int lstat(const char *path, struct stat *stbuf, const UserPerm& perms,
398 frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL);
399
400 int setattr(const char *relpath, struct stat *attr, int mask,
401 const UserPerm& perms);
402 int setattrx(const char *relpath, struct ceph_statx *stx, int mask,
403 const UserPerm& perms, int flags=0);
404 int fsetattr(int fd, struct stat *attr, int mask, const UserPerm& perms);
405 int fsetattrx(int fd, struct ceph_statx *stx, int mask, const UserPerm& perms);
406 int chmod(const char *path, mode_t mode, const UserPerm& perms);
407 int fchmod(int fd, mode_t mode, const UserPerm& perms);
408 int lchmod(const char *path, mode_t mode, const UserPerm& perms);
409 int chown(const char *path, uid_t new_uid, gid_t new_gid,
410 const UserPerm& perms);
411 int fchown(int fd, uid_t new_uid, gid_t new_gid, const UserPerm& perms);
412 int lchown(const char *path, uid_t new_uid, gid_t new_gid,
413 const UserPerm& perms);
414 int utime(const char *path, struct utimbuf *buf, const UserPerm& perms);
415 int lutime(const char *path, struct utimbuf *buf, const UserPerm& perms);
416 int futime(int fd, struct utimbuf *buf, const UserPerm& perms);
417 int utimes(const char *relpath, struct timeval times[2], const UserPerm& perms);
418 int lutimes(const char *relpath, struct timeval times[2], const UserPerm& perms);
419 int futimes(int fd, struct timeval times[2], const UserPerm& perms);
420 int futimens(int fd, struct timespec times[2], const UserPerm& perms);
421 int flock(int fd, int operation, uint64_t owner);
422 int truncate(const char *path, loff_t size, const UserPerm& perms);
423
424 // file ops
425 int mknod(const char *path, mode_t mode, const UserPerm& perms, dev_t rdev=0);
426 int open(const char *path, int flags, const UserPerm& perms, mode_t mode=0, std::string alternate_name="") {
427 return open(path, flags, perms, mode, 0, 0, 0, NULL, alternate_name);
428 }
429 int open(const char *path, int flags, const UserPerm& perms,
430 mode_t mode, int stripe_unit, int stripe_count, int object_size,
431 const char *data_pool, std::string alternate_name="");
432 int lookup_hash(inodeno_t ino, inodeno_t dirino, const char *name,
433 const UserPerm& perms);
434 int lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
435 int lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
436 int close(int fd);
437 loff_t lseek(int fd, loff_t offset, int whence);
438 int read(int fd, char *buf, loff_t size, loff_t offset=-1);
439 int preadv(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
440 int write(int fd, const char *buf, loff_t size, loff_t offset=-1);
441 int pwritev(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
442 int fake_write_size(int fd, loff_t size);
443 int ftruncate(int fd, loff_t size, const UserPerm& perms);
444 int fsync(int fd, bool syncdataonly);
445 int fstat(int fd, struct stat *stbuf, const UserPerm& perms,
446 int mask=CEPH_STAT_CAP_INODE_ALL);
447 int fstatx(int fd, struct ceph_statx *stx, const UserPerm& perms,
448 unsigned int want, unsigned int flags);
449 int fallocate(int fd, int mode, loff_t offset, loff_t length);
450
451 // full path xattr ops
452 int getxattr(const char *path, const char *name, void *value, size_t size,
453 const UserPerm& perms);
454 int lgetxattr(const char *path, const char *name, void *value, size_t size,
455 const UserPerm& perms);
456 int fgetxattr(int fd, const char *name, void *value, size_t size,
457 const UserPerm& perms);
458 int listxattr(const char *path, char *list, size_t size, const UserPerm& perms);
459 int llistxattr(const char *path, char *list, size_t size, const UserPerm& perms);
460 int flistxattr(int fd, char *list, size_t size, const UserPerm& perms);
461 int removexattr(const char *path, const char *name, const UserPerm& perms);
462 int lremovexattr(const char *path, const char *name, const UserPerm& perms);
463 int fremovexattr(int fd, const char *name, const UserPerm& perms);
464 int setxattr(const char *path, const char *name, const void *value,
465 size_t size, int flags, const UserPerm& perms);
466 int lsetxattr(const char *path, const char *name, const void *value,
467 size_t size, int flags, const UserPerm& perms);
468 int fsetxattr(int fd, const char *name, const void *value, size_t size,
469 int flags, const UserPerm& perms);
470
471 int sync_fs();
472 int64_t drop_caches();
473
474 int get_snap_info(const char *path, const UserPerm &perms, SnapInfo *snap_info);
475
476 // hpc lazyio
477 int lazyio(int fd, int enable);
478 int lazyio_propagate(int fd, loff_t offset, size_t count);
479 int lazyio_synchronize(int fd, loff_t offset, size_t count);
480
481 // expose file layout
482 int describe_layout(const char *path, file_layout_t* layout,
483 const UserPerm& perms);
484 int fdescribe_layout(int fd, file_layout_t* layout);
485 int get_file_stripe_address(int fd, loff_t offset, vector<entity_addr_t>& address);
486 int get_file_extent_osds(int fd, loff_t off, loff_t *len, vector<int>& osds);
487 int get_osd_addr(int osd, entity_addr_t& addr);
488
489 // expose mdsmap
490 int64_t get_default_pool_id();
491
492 // expose osdmap
493 int get_local_osd();
494 int get_pool_replication(int64_t pool);
495 int64_t get_pool_id(const char *pool_name);
496 string get_pool_name(int64_t pool);
497 int get_osd_crush_location(int id, vector<pair<string, string> >& path);
498
499 int enumerate_layout(int fd, vector<ObjectExtent>& result,
500 loff_t length, loff_t offset);
501
502 int mksnap(const char *path, const char *name, const UserPerm& perm,
503 mode_t mode=0, const std::map<std::string, std::string> &metadata={});
504 int rmsnap(const char *path, const char *name, const UserPerm& perm, bool check_perms=false);
505
506 // Inode permission checking
507 int inode_permission(Inode *in, const UserPerm& perms, unsigned want);
508
509 // expose caps
510 int get_caps_issued(int fd);
511 int get_caps_issued(const char *path, const UserPerm& perms);
512
513 snapid_t ll_get_snapid(Inode *in);
514 vinodeno_t ll_get_vino(Inode *in) {
515 std::lock_guard lock(client_lock);
516 return _get_vino(in);
517 }
518 // get inode from faked ino
519 Inode *ll_get_inode(ino_t ino);
520 Inode *ll_get_inode(vinodeno_t vino);
521 int ll_lookup(Inode *parent, const char *name, struct stat *attr,
522 Inode **out, const UserPerm& perms);
523 int ll_lookup_inode(struct inodeno_t ino, const UserPerm& perms, Inode **inode);
524 int ll_lookup_vino(vinodeno_t vino, const UserPerm& perms, Inode **inode);
525 int ll_lookupx(Inode *parent, const char *name, Inode **out,
526 struct ceph_statx *stx, unsigned want, unsigned flags,
527 const UserPerm& perms);
528 bool ll_forget(Inode *in, uint64_t count);
529 bool ll_put(Inode *in);
530 int ll_get_snap_ref(snapid_t snap);
531
532 int ll_getattr(Inode *in, struct stat *st, const UserPerm& perms);
533 int ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want,
534 unsigned int flags, const UserPerm& perms);
535 int ll_setattrx(Inode *in, struct ceph_statx *stx, int mask,
536 const UserPerm& perms);
537 int ll_setattr(Inode *in, struct stat *st, int mask,
538 const UserPerm& perms);
539 int ll_getxattr(Inode *in, const char *name, void *value, size_t size,
540 const UserPerm& perms);
541 int ll_setxattr(Inode *in, const char *name, const void *value, size_t size,
542 int flags, const UserPerm& perms);
543 int ll_removexattr(Inode *in, const char *name, const UserPerm& perms);
544 int ll_listxattr(Inode *in, char *list, size_t size, const UserPerm& perms);
545 int ll_opendir(Inode *in, int flags, dir_result_t **dirpp,
546 const UserPerm& perms);
547 int ll_releasedir(dir_result_t* dirp);
548 int ll_fsyncdir(dir_result_t* dirp);
549 int ll_readlink(Inode *in, char *buf, size_t bufsize, const UserPerm& perms);
550 int ll_mknod(Inode *in, const char *name, mode_t mode, dev_t rdev,
551 struct stat *attr, Inode **out, const UserPerm& perms);
552 int ll_mknodx(Inode *parent, const char *name, mode_t mode, dev_t rdev,
553 Inode **out, struct ceph_statx *stx, unsigned want,
554 unsigned flags, const UserPerm& perms);
555 int ll_mkdir(Inode *in, const char *name, mode_t mode, struct stat *attr,
556 Inode **out, const UserPerm& perm);
557 int ll_mkdirx(Inode *parent, const char *name, mode_t mode, Inode **out,
558 struct ceph_statx *stx, unsigned want, unsigned flags,
559 const UserPerm& perms);
560 int ll_symlink(Inode *in, const char *name, const char *value,
561 struct stat *attr, Inode **out, const UserPerm& perms);
562 int ll_symlinkx(Inode *parent, const char *name, const char *value,
563 Inode **out, struct ceph_statx *stx, unsigned want,
564 unsigned flags, const UserPerm& perms);
565 int ll_unlink(Inode *in, const char *name, const UserPerm& perm);
566 int ll_rmdir(Inode *in, const char *name, const UserPerm& perms);
567 int ll_rename(Inode *parent, const char *name, Inode *newparent,
568 const char *newname, const UserPerm& perm);
569 int ll_link(Inode *in, Inode *newparent, const char *newname,
570 const UserPerm& perm);
571 int ll_open(Inode *in, int flags, Fh **fh, const UserPerm& perms);
572 int _ll_create(Inode *parent, const char *name, mode_t mode,
573 int flags, InodeRef *in, int caps, Fh **fhp,
574 const UserPerm& perms);
575 int ll_create(Inode *parent, const char *name, mode_t mode, int flags,
576 struct stat *attr, Inode **out, Fh **fhp,
577 const UserPerm& perms);
578 int ll_createx(Inode *parent, const char *name, mode_t mode,
579 int oflags, Inode **outp, Fh **fhp,
580 struct ceph_statx *stx, unsigned want, unsigned lflags,
581 const UserPerm& perms);
582 int ll_read_block(Inode *in, uint64_t blockid, char *buf, uint64_t offset,
583 uint64_t length, file_layout_t* layout);
584
585 int ll_write_block(Inode *in, uint64_t blockid,
586 char* buf, uint64_t offset,
587 uint64_t length, file_layout_t* layout,
588 uint64_t snapseq, uint32_t sync);
589 int ll_commit_blocks(Inode *in, uint64_t offset, uint64_t length);
590
591 int ll_statfs(Inode *in, struct statvfs *stbuf, const UserPerm& perms);
592 int ll_walk(const char* name, Inode **i, struct ceph_statx *stx,
593 unsigned int want, unsigned int flags, const UserPerm& perms);
594 uint32_t ll_stripe_unit(Inode *in);
595 int ll_file_layout(Inode *in, file_layout_t *layout);
596 uint64_t ll_snap_seq(Inode *in);
597
598 int ll_read(Fh *fh, loff_t off, loff_t len, bufferlist *bl);
599 int ll_write(Fh *fh, loff_t off, loff_t len, const char *data);
600 int64_t ll_readv(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off);
601 int64_t ll_writev(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off);
602 loff_t ll_lseek(Fh *fh, loff_t offset, int whence);
603 int ll_flush(Fh *fh);
604 int ll_fsync(Fh *fh, bool syncdataonly);
605 int ll_sync_inode(Inode *in, bool syncdataonly);
606 int ll_fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
607 int ll_release(Fh *fh);
608 int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner);
609 int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
610 int ll_flock(Fh *fh, int cmd, uint64_t owner);
611 int ll_lazyio(Fh *fh, int enable);
612 int ll_file_layout(Fh *fh, file_layout_t *layout);
613 void ll_interrupt(void *d);
614 bool ll_handle_umask() {
615 return acl_type != NO_ACL;
616 }
617
618 int ll_get_stripe_osd(struct Inode *in, uint64_t blockno,
619 file_layout_t* layout);
620 uint64_t ll_get_internal_offset(struct Inode *in, uint64_t blockno);
621
622 int ll_num_osds(void);
623 int ll_osdaddr(int osd, uint32_t *addr);
624 int ll_osdaddr(int osd, char* buf, size_t size);
625
626 void ll_register_callbacks(struct ceph_client_callback_args *args);
627 int test_dentry_handling(bool can_invalidate);
628
629 const char** get_tracked_conf_keys() const override;
630 void handle_conf_change(const ConfigProxy& conf,
631 const std::set <std::string> &changed) override;
632 uint32_t get_deleg_timeout() { return deleg_timeout; }
633 int set_deleg_timeout(uint32_t timeout);
634 int ll_delegation(Fh *fh, unsigned cmd, ceph_deleg_cb_t cb, void *priv);
635
636 entity_name_t get_myname() { return messenger->get_myname(); }
637 void wait_on_list(std::list<ceph::condition_variable*>& ls);
638 void signal_cond_list(std::list<ceph::condition_variable*>& ls);
639
640 void set_filer_flags(int flags);
641 void clear_filer_flags(int flags);
642
643 void tear_down_cache();
644
645 void update_metadata(std::string const &k, std::string const &v);
646
647 client_t get_nodeid() { return whoami; }
648
649 inodeno_t get_root_ino();
650 Inode *get_root();
651
652 virtual int init();
653 virtual void shutdown();
654
655 // messaging
656 void cancel_commands(const MDSMap& newmap);
657 void handle_mds_map(const MConstRef<MMDSMap>& m);
658 void handle_fs_map(const MConstRef<MFSMap>& m);
659 void handle_fs_map_user(const MConstRef<MFSMapUser>& m);
660 void handle_osd_map(const MConstRef<MOSDMap>& m);
661
662 void handle_lease(const MConstRef<MClientLease>& m);
663
664 // inline data
665 int uninline_data(Inode *in, Context *onfinish);
666
667 // file caps
668 void check_cap_issue(Inode *in, unsigned issued);
669 void add_update_cap(Inode *in, MetaSession *session, uint64_t cap_id,
670 unsigned issued, unsigned wanted, unsigned seq, unsigned mseq,
671 inodeno_t realm, int flags, const UserPerm& perms);
672 void remove_cap(Cap *cap, bool queue_release);
673 void remove_all_caps(Inode *in);
674 void remove_session_caps(MetaSession *session, int err);
675 int mark_caps_flushing(Inode *in, ceph_tid_t *ptid);
676 void adjust_session_flushing_caps(Inode *in, MetaSession *old_s, MetaSession *new_s);
677 void flush_caps_sync();
678 void kick_flushing_caps(Inode *in, MetaSession *session);
679 void kick_flushing_caps(MetaSession *session);
680 void early_kick_flushing_caps(MetaSession *session);
681 int get_caps(Fh *fh, int need, int want, int *have, loff_t endoff);
682 int get_caps_used(Inode *in);
683
684 void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater,
685 vector<snapid_t>& snaps);
686
687 void handle_quota(const MConstRef<MClientQuota>& m);
688 void handle_snap(const MConstRef<MClientSnap>& m);
689 void handle_caps(const MConstRef<MClientCaps>& m);
690 void handle_cap_import(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
691 void handle_cap_export(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
692 void handle_cap_trunc(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
693 void handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, const MConstRef<MClientCaps>& m);
694 void handle_cap_flushsnap_ack(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
695 void handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, const MConstRef<MClientCaps>& m);
696 void cap_delay_requeue(Inode *in);
697
698 void send_cap(Inode *in, MetaSession *session, Cap *cap, int flags,
699 int used, int want, int retain, int flush,
700 ceph_tid_t flush_tid);
701
702 void send_flush_snap(Inode *in, MetaSession *session, snapid_t follows, CapSnap& capsnap);
703
704 void flush_snaps(Inode *in);
705 void get_cap_ref(Inode *in, int cap);
706 void put_cap_ref(Inode *in, int cap);
707 void wait_sync_caps(Inode *in, ceph_tid_t want);
708 void wait_sync_caps(ceph_tid_t want);
709 void queue_cap_snap(Inode *in, SnapContext &old_snapc);
710 void finish_cap_snap(Inode *in, CapSnap &capsnap, int used);
711
712 void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
713 void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
714 void _try_to_trim_inode(Inode *in, bool sched_inval);
715
716 void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len);
717 void _invalidate_inode_cache(Inode *in);
718 void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len);
719 void _async_invalidate(vinodeno_t ino, int64_t off, int64_t len);
720
721 void _schedule_ino_release_callback(Inode *in);
722 void _async_inode_release(vinodeno_t ino);
723
724 bool _release(Inode *in);
725
726 /**
727 * Initiate a flush of the data associated with the given inode.
728 * If you specify a Context, you are responsible for holding an inode
729 * reference for the duration of the flush. If not, _flush() will
730 * take the reference for you.
731 * @param in The Inode whose data you wish to flush.
732 * @param c The Context you wish us to complete once the data is
733 * flushed. If already flushed, this will be called in-line.
734 *
735 * @returns true if the data was already flushed, false otherwise.
736 */
737 bool _flush(Inode *in, Context *c);
738 void _flush_range(Inode *in, int64_t off, uint64_t size);
739 void _flushed(Inode *in);
740 void flush_set_callback(ObjectCacher::ObjectSet *oset);
741
742 void close_release(Inode *in);
743 void close_safe(Inode *in);
744
745 void lock_fh_pos(Fh *f);
746 void unlock_fh_pos(Fh *f);
747
748 // metadata cache
749 void update_dir_dist(Inode *in, DirStat *st);
750
751 void clear_dir_complete_and_ordered(Inode *diri, bool complete);
752 void insert_readdir_results(MetaRequest *request, MetaSession *session, Inode *diri);
753 Inode* insert_trace(MetaRequest *request, MetaSession *session);
754 void update_inode_file_size(Inode *in, int issued, uint64_t size,
755 uint64_t truncate_seq, uint64_t truncate_size);
756 void update_inode_file_time(Inode *in, int issued, uint64_t time_warp_seq,
757 utime_t ctime, utime_t mtime, utime_t atime);
758
759 Inode *add_update_inode(InodeStat *st, utime_t ttl, MetaSession *session,
760 const UserPerm& request_perms);
761 Dentry *insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dlease,
762 Inode *in, utime_t from, MetaSession *session,
763 Dentry *old_dentry = NULL);
764 void update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, MetaSession *session);
765
766 bool use_faked_inos() { return _use_faked_inos; }
767 vinodeno_t map_faked_ino(ino_t ino);
768
769 //notify the mds to flush the mdlog
770 void flush_mdlog_sync();
771 void flush_mdlog(MetaSession *session);
772
773 void renew_caps();
774 void renew_caps(MetaSession *session);
775 void flush_cap_releases();
776 void renew_and_flush_cap_releases();
777 void tick();
778 void start_tick_thread();
779
780 void inc_dentry_nr() {
781 ++dentry_nr;
782 }
783 void dec_dentry_nr() {
784 --dentry_nr;
785 }
786 void dlease_hit() {
787 ++dlease_hits;
788 }
789 void dlease_miss() {
790 ++dlease_misses;
791 }
792 std::tuple<uint64_t, uint64_t, uint64_t> get_dlease_hit_rates() {
793 return std::make_tuple(dlease_hits, dlease_misses, dentry_nr);
794 }
795
796 void cap_hit() {
797 ++cap_hits;
798 }
799 void cap_miss() {
800 ++cap_misses;
801 }
802 std::pair<uint64_t, uint64_t> get_cap_hit_rates() {
803 return std::make_pair(cap_hits, cap_misses);
804 }
805
806 void inc_opened_files() {
807 ++opened_files;
808 }
809 void dec_opened_files() {
810 --opened_files;
811 }
812 std::pair<uint64_t, uint64_t> get_opened_files_rates() {
813 return std::make_pair(opened_files, inode_map.size());
814 }
815
816 void inc_pinned_icaps() {
817 ++pinned_icaps;
818 }
819 void dec_pinned_icaps(uint64_t nr=1) {
820 pinned_icaps -= nr;
821 }
822 std::pair<uint64_t, uint64_t> get_pinned_icaps_rates() {
823 return std::make_pair(pinned_icaps, inode_map.size());
824 }
825
826 void inc_opened_inodes() {
827 ++opened_inodes;
828 }
829 void dec_opened_inodes() {
830 --opened_inodes;
831 }
832 std::pair<uint64_t, uint64_t> get_opened_inodes_rates() {
833 return std::make_pair(opened_inodes, inode_map.size());
834 }
835
836 xlist<Inode*> &get_dirty_list() { return dirty_list; }
837
838 /* timer_lock for 'timer' */
839 ceph::mutex timer_lock = ceph::make_mutex("Client::timer_lock");
840 SafeTimer timer;
841
842 /* tick thread */
843 std::thread upkeeper;
844 ceph::condition_variable upkeep_cond;
845 bool tick_thread_stopped = false;
846
847 std::unique_ptr<PerfCounters> logger;
848 std::unique_ptr<MDSMap> mdsmap;
849
850 bool fuse_default_permissions;
851
852 protected:
853 /* Flags for check_caps() */
854 static const unsigned CHECK_CAPS_NODELAY = 0x1;
855 static const unsigned CHECK_CAPS_SYNCHRONOUS = 0x2;
856
857 void check_caps(Inode *in, unsigned flags);
858
859 void set_cap_epoch_barrier(epoch_t e);
860
861 void handle_command_reply(const MConstRef<MCommandReply>& m);
862 int fetch_fsmap(bool user);
863 int resolve_mds(
864 const std::string &mds_spec,
865 std::vector<mds_gid_t> *targets);
866
867 void get_session_metadata(std::map<std::string, std::string> *meta) const;
868 bool have_open_session(mds_rank_t mds);
869 void got_mds_push(MetaSession *s);
870 MetaSession *_get_mds_session(mds_rank_t mds, Connection *con); ///< return session for mds *and* con; null otherwise
871 MetaSession *_get_or_open_mds_session(mds_rank_t mds);
872 MetaSession *_open_mds_session(mds_rank_t mds);
873 void _close_mds_session(MetaSession *s);
874 void _closed_mds_session(MetaSession *s, int err=0, bool rejected=false);
875 bool _any_stale_sessions() const;
876 void _kick_stale_sessions();
877 void handle_client_session(const MConstRef<MClientSession>& m);
878 void send_reconnect(MetaSession *s);
879 void resend_unsafe_requests(MetaSession *s);
880 void wait_unsafe_requests();
881
882 void dump_mds_requests(Formatter *f);
883 void dump_mds_sessions(Formatter *f, bool cap_dump=false);
884
885 int make_request(MetaRequest *req, const UserPerm& perms,
886 InodeRef *ptarget = 0, bool *pcreated = 0,
887 mds_rank_t use_mds=-1, bufferlist *pdirbl=0);
888 void put_request(MetaRequest *request);
889 void unregister_request(MetaRequest *request);
890
891 int verify_reply_trace(int r, MetaSession *session, MetaRequest *request,
892 const MConstRef<MClientReply>& reply,
893 InodeRef *ptarget, bool *pcreated,
894 const UserPerm& perms);
895 void encode_cap_releases(MetaRequest *request, mds_rank_t mds);
896 int encode_inode_release(Inode *in, MetaRequest *req,
897 mds_rank_t mds, int drop,
898 int unless,int force=0);
899 void encode_dentry_release(Dentry *dn, MetaRequest *req,
900 mds_rank_t mds, int drop, int unless);
901 mds_rank_t choose_target_mds(MetaRequest *req, Inode** phash_diri=NULL);
902 void connect_mds_targets(mds_rank_t mds);
903 void send_request(MetaRequest *request, MetaSession *session,
904 bool drop_cap_releases=false);
905 MRef<MClientRequest> build_client_request(MetaRequest *request);
906 void kick_requests(MetaSession *session);
907 void kick_requests_closed(MetaSession *session);
908 void handle_client_request_forward(const MConstRef<MClientRequestForward>& reply);
909 void handle_client_reply(const MConstRef<MClientReply>& reply);
910 bool is_dir_operation(MetaRequest *request);
911
912 int path_walk(const filepath& fp, struct walk_dentry_result* result, const UserPerm& perms, bool followsym=true, int mask=0);
913 int path_walk(const filepath& fp, InodeRef *end, const UserPerm& perms,
914 bool followsym=true, int mask=0);
915
916 // fake inode number for 32-bits ino_t
917 void _assign_faked_ino(Inode *in);
918 void _assign_faked_root(Inode *in);
919 void _release_faked_ino(Inode *in);
920 void _reset_faked_inos();
921 vinodeno_t _map_faked_ino(ino_t ino);
922
923 // Optional extra metadata about me to send to the MDS
924 void populate_metadata(const std::string &mount_root);
925
926 SnapRealm *get_snap_realm(inodeno_t r);
927 SnapRealm *get_snap_realm_maybe(inodeno_t r);
928 void put_snap_realm(SnapRealm *realm);
929 bool adjust_realm_parent(SnapRealm *realm, inodeno_t parent);
930 void update_snap_trace(const bufferlist& bl, SnapRealm **realm_ret, bool must_flush=true);
931 void invalidate_snaprealm_and_children(SnapRealm *realm);
932
933 Inode *open_snapdir(Inode *diri);
934
935 int get_fd() {
936 int fd = free_fd_set.range_start();
937 free_fd_set.erase(fd, 1);
938 return fd;
939 }
940 void put_fd(int fd) {
941 free_fd_set.insert(fd, 1);
942 }
943
944 /*
945 * Resolve file descriptor, or return NULL.
946 */
947 Fh *get_filehandle(int fd) {
948 auto it = fd_map.find(fd);
949 if (it == fd_map.end())
950 return NULL;
951 return it->second;
952 }
953
954 // helpers
955 void wake_up_session_caps(MetaSession *s, bool reconnect);
956
957 void wait_on_context_list(list<Context*>& ls);
958 void signal_context_list(list<Context*>& ls);
959
960 // -- metadata cache stuff
961
962 // decrease inode ref. delete if dangling.
963 void _put_inode(Inode *in, int n);
964 void delay_put_inodes(bool wakeup=false);
965 void put_inode(Inode *in, int n=1);
966 void close_dir(Dir *dir);
967
968 int subscribe_mdsmap(const std::string &fs_name="");
969
970 void _abort_mds_sessions(int err);
971
972 // same as unmount() but for when the client_lock is already held
973 void _unmount(bool abort);
974
975 //int get_cache_size() { return lru.lru_get_size(); }
976
977 /**
978 * Don't call this with in==NULL, use get_or_create for that
979 * leave dn set to default NULL unless you're trying to add
980 * a new inode to a pre-created Dentry
981 */
982 Dentry* link(Dir *dir, const string& name, Inode *in, Dentry *dn);
983 void unlink(Dentry *dn, bool keepdir, bool keepdentry);
984
985 int fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0);
986 int fill_stat(InodeRef& in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0) {
987 return fill_stat(in.get(), st, dirstat, rstat);
988 }
989
990 void fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx);
991 void fill_statx(InodeRef& in, unsigned int mask, struct ceph_statx *stx) {
992 return fill_statx(in.get(), mask, stx);
993 }
994
995 void touch_dn(Dentry *dn);
996
997 // trim cache.
998 void trim_cache(bool trim_kernel_dcache=false);
999 void trim_cache_for_reconnect(MetaSession *s);
1000 void trim_dentry(Dentry *dn);
1001 void trim_caps(MetaSession *s, uint64_t max);
1002 void _invalidate_kernel_dcache();
1003 void _trim_negative_child_dentries(InodeRef& in);
1004
1005 void dump_inode(Formatter *f, Inode *in, set<Inode*>& did, bool disconnected);
1006 void dump_cache(Formatter *f); // debug
1007
1008 // force read-only
1009 void force_session_readonly(MetaSession *s);
1010
1011 void dump_status(Formatter *f); // debug
1012
1013 bool ms_dispatch2(const MessageRef& m) override;
1014
1015 void ms_handle_connect(Connection *con) override;
1016 bool ms_handle_reset(Connection *con) override;
1017 void ms_handle_remote_reset(Connection *con) override;
1018 bool ms_handle_refused(Connection *con) override;
1019
1020 int authenticate();
1021
1022 Inode* get_quota_root(Inode *in, const UserPerm& perms);
1023 bool check_quota_condition(Inode *in, const UserPerm& perms,
1024 std::function<bool (const Inode &)> test);
1025 bool is_quota_files_exceeded(Inode *in, const UserPerm& perms);
1026 bool is_quota_bytes_exceeded(Inode *in, int64_t new_bytes,
1027 const UserPerm& perms);
1028 bool is_quota_bytes_approaching(Inode *in, const UserPerm& perms);
1029
1030 int check_pool_perm(Inode *in, int need);
1031
1032 void handle_client_reclaim_reply(const MConstRef<MClientReclaimReply>& reply);
1033
1034 /**
1035 * Call this when an OSDMap is seen with a full flag (global or per pool)
1036 * set.
1037 *
1038 * @param pool the pool ID affected, or -1 if all.
1039 */
1040 void _handle_full_flag(int64_t pool);
1041
1042 void _close_sessions();
1043
1044 void _pre_init();
1045
1046 /**
1047 * The basic housekeeping parts of init (perf counters, admin socket)
1048 * that is independent of how objecters/monclient/messengers are
1049 * being set up.
1050 */
1051 void _finish_init();
1052
1053 // global client lock
1054 // - protects Client and buffer cache both!
1055 ceph::mutex client_lock = ceph::make_mutex("Client::client_lock");
1056
1057 std::map<snapid_t, int> ll_snap_ref;
1058
1059 Inode* root = nullptr;
1060 map<Inode*, InodeRef> root_parents;
1061 Inode* root_ancestor = nullptr;
1062 LRU lru; // lru list of Dentry's in our local metadata cache.
1063
1064 InodeRef cwd;
1065
1066 std::unique_ptr<Filer> filer;
1067 std::unique_ptr<ObjectCacher> objectcacher;
1068 std::unique_ptr<WritebackHandler> writeback_handler;
1069
1070 Messenger *messenger;
1071 MonClient *monclient;
1072 Objecter *objecter;
1073
1074 client_t whoami;
1075
1076 /* The state migration mechanism */
1077 enum _state {
1078 /* For the initialize_state */
1079 CLIENT_NEW, // The initial state for the initialize_state or after Client::shutdown()
1080 CLIENT_INITIALIZING, // At the beginning of the Client::init()
1081 CLIENT_INITIALIZED, // At the end of CLient::init()
1082
1083 /* For the mount_state */
1084 CLIENT_UNMOUNTED, // The initial state for the mount_state or after unmounted
1085 CLIENT_MOUNTING, // At the beginning of Client::mount()
1086 CLIENT_MOUNTED, // At the end of Client::mount()
1087 CLIENT_UNMOUNTING, // At the beginning of the Client::_unmout()
1088 };
1089
1090 typedef enum _state state_t;
1091 using RWRef_t = RWRef<state_t>;
1092
1093 struct mount_state_t : public RWRefState<state_t> {
1094 public:
1095 bool is_valid_state(state_t state) const override {
1096 switch (state) {
1097 case Client::CLIENT_MOUNTING:
1098 case Client::CLIENT_MOUNTED:
1099 case Client::CLIENT_UNMOUNTING:
1100 case Client::CLIENT_UNMOUNTED:
1101 return true;
1102 default:
1103 return false;
1104 }
1105 }
1106
1107 int check_reader_state(state_t require) const override {
1108 if (require == Client::CLIENT_MOUNTING &&
1109 (state == Client::CLIENT_MOUNTING || state == Client::CLIENT_MOUNTED))
1110 return true;
1111 else
1112 return false;
1113 }
1114
1115 /* The state migration check */
1116 int check_writer_state(state_t require) const override {
1117 if (require == Client::CLIENT_MOUNTING &&
1118 state == Client::CLIENT_UNMOUNTED)
1119 return true;
1120 else if (require == Client::CLIENT_MOUNTED &&
1121 state == Client::CLIENT_MOUNTING)
1122 return true;
1123 else if (require == Client::CLIENT_UNMOUNTING &&
1124 state == Client::CLIENT_MOUNTED)
1125 return true;
1126 else if (require == Client::CLIENT_UNMOUNTED &&
1127 state == Client::CLIENT_UNMOUNTING)
1128 return true;
1129 else
1130 return false;
1131 }
1132
1133 mount_state_t(state_t state, const char *lockname, uint64_t reader_cnt=0)
1134 : RWRefState (state, lockname, reader_cnt) {}
1135 ~mount_state_t() {}
1136 };
1137
1138 struct initialize_state_t : public RWRefState<state_t> {
1139 public:
1140 bool is_valid_state(state_t state) const override {
1141 switch (state) {
1142 case Client::CLIENT_NEW:
1143 case Client::CLIENT_INITIALIZING:
1144 case Client::CLIENT_INITIALIZED:
1145 return true;
1146 default:
1147 return false;
1148 }
1149 }
1150
1151 int check_reader_state(state_t require) const override {
1152 if (require == Client::CLIENT_INITIALIZED &&
1153 state >= Client::CLIENT_INITIALIZED)
1154 return true;
1155 else
1156 return false;
1157 }
1158
1159 /* The state migration check */
1160 int check_writer_state(state_t require) const override {
1161 if (require == Client::CLIENT_INITIALIZING &&
1162 (state == Client::CLIENT_NEW))
1163 return true;
1164 else if (require == Client::CLIENT_INITIALIZED &&
1165 (state == Client::CLIENT_INITIALIZING))
1166 return true;
1167 else if (require == Client::CLIENT_NEW &&
1168 (state == Client::CLIENT_INITIALIZED))
1169 return true;
1170 else
1171 return false;
1172 }
1173
1174 initialize_state_t(state_t state, const char *lockname, uint64_t reader_cnt=0)
1175 : RWRefState (state, lockname, reader_cnt) {}
1176 ~initialize_state_t() {}
1177 };
1178
1179 struct mount_state_t mount_state;
1180 struct initialize_state_t initialize_state;
1181
1182 private:
1183 struct C_Readahead : public Context {
1184 C_Readahead(Client *c, Fh *f);
1185 ~C_Readahead() override;
1186 void finish(int r) override;
1187
1188 Client *client;
1189 Fh *f;
1190 };
1191
1192 /*
1193 * These define virtual xattrs exposing the recursive directory
1194 * statistics and layout metadata.
1195 */
1196 struct VXattr {
1197 const string name;
1198 size_t (Client::*getxattr_cb)(Inode *in, char *val, size_t size);
1199 bool readonly;
1200 bool (Client::*exists_cb)(Inode *in);
1201 unsigned int flags;
1202 };
1203
1204 enum {
1205 NO_ACL = 0,
1206 POSIX_ACL,
1207 };
1208
1209 enum {
1210 MAY_EXEC = 1,
1211 MAY_WRITE = 2,
1212 MAY_READ = 4,
1213 };
1214
1215 std::unique_ptr<CephContext, std::function<void(CephContext*)>> cct_deleter;
1216
1217 /* Flags for VXattr */
1218 static const unsigned VXATTR_RSTAT = 0x1;
1219 static const unsigned VXATTR_DIRSTAT = 0x2;
1220
1221 static const VXattr _dir_vxattrs[];
1222 static const VXattr _file_vxattrs[];
1223 static const VXattr _common_vxattrs[];
1224
1225
1226
1227 void fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off);
1228
1229 int _opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms);
1230 void _readdir_drop_dirp_buffer(dir_result_t *dirp);
1231 bool _readdir_have_frag(dir_result_t *dirp);
1232 void _readdir_next_frag(dir_result_t *dirp);
1233 void _readdir_rechoose_frag(dir_result_t *dirp);
1234 int _readdir_get_frag(dir_result_t *dirp);
1235 int _readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, int caps, bool getref);
1236 void _closedir(dir_result_t *dirp);
1237
1238 // other helpers
1239 void _fragmap_remove_non_leaves(Inode *in);
1240 void _fragmap_remove_stopped_mds(Inode *in, mds_rank_t mds);
1241
1242 void _ll_get(Inode *in);
1243 int _ll_put(Inode *in, uint64_t num);
1244 void _ll_drop_pins();
1245
1246 Fh *_create_fh(Inode *in, int flags, int cmode, const UserPerm& perms);
1247 int _release_fh(Fh *fh);
1248 void _put_fh(Fh *fh);
1249
1250 int _do_remount(bool retry_on_error);
1251
1252 int _read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl, bool *checkeof);
1253 int _read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl);
1254
1255 bool _dentry_valid(const Dentry *dn);
1256
1257 // internal interface
1258 // call these with client_lock held!
1259 int _do_lookup(Inode *dir, const string& name, int mask, InodeRef *target,
1260 const UserPerm& perms);
1261
1262 int _lookup(Inode *dir, const string& dname, int mask, InodeRef *target,
1263 const UserPerm& perm, std::string* alternate_name=nullptr);
1264
1265 int _link(Inode *in, Inode *dir, const char *name, const UserPerm& perm, std::string alternate_name,
1266 InodeRef *inp = 0);
1267 int _unlink(Inode *dir, const char *name, const UserPerm& perm);
1268 int _rename(Inode *olddir, const char *oname, Inode *ndir, const char *nname, const UserPerm& perm, std::string alternate_name);
1269 int _mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& perm,
1270 InodeRef *inp = 0, const std::map<std::string, std::string> &metadata={},
1271 std::string alternate_name="");
1272 int _rmdir(Inode *dir, const char *name, const UserPerm& perms);
1273 int _symlink(Inode *dir, const char *name, const char *target,
1274 const UserPerm& perms, std::string alternate_name, InodeRef *inp = 0);
1275 int _mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
1276 const UserPerm& perms, InodeRef *inp = 0);
1277 int _do_setattr(Inode *in, struct ceph_statx *stx, int mask,
1278 const UserPerm& perms, InodeRef *inp);
1279 void stat_to_statx(struct stat *st, struct ceph_statx *stx);
1280 int __setattrx(Inode *in, struct ceph_statx *stx, int mask,
1281 const UserPerm& perms, InodeRef *inp = 0);
1282 int _setattrx(InodeRef &in, struct ceph_statx *stx, int mask,
1283 const UserPerm& perms);
1284 int _setattr(InodeRef &in, struct stat *attr, int mask,
1285 const UserPerm& perms);
1286 int _ll_setattrx(Inode *in, struct ceph_statx *stx, int mask,
1287 const UserPerm& perms, InodeRef *inp = 0);
1288 int _getattr(Inode *in, int mask, const UserPerm& perms, bool force=false);
1289 int _getattr(InodeRef &in, int mask, const UserPerm& perms, bool force=false) {
1290 return _getattr(in.get(), mask, perms, force);
1291 }
1292 int _readlink(Inode *in, char *buf, size_t size);
1293 int _getxattr(Inode *in, const char *name, void *value, size_t len,
1294 const UserPerm& perms);
1295 int _getxattr(InodeRef &in, const char *name, void *value, size_t len,
1296 const UserPerm& perms);
1297 int _listxattr(Inode *in, char *names, size_t len, const UserPerm& perms);
1298 int _do_setxattr(Inode *in, const char *name, const void *value, size_t len,
1299 int flags, const UserPerm& perms);
1300 int _setxattr(Inode *in, const char *name, const void *value, size_t len,
1301 int flags, const UserPerm& perms);
1302 int _setxattr(InodeRef &in, const char *name, const void *value, size_t len,
1303 int flags, const UserPerm& perms);
1304 int _setxattr_check_data_pool(string& name, string& value, const OSDMap *osdmap);
1305 void _setxattr_maybe_wait_for_osdmap(const char *name, const void *value, size_t len);
1306 int _removexattr(Inode *in, const char *nm, const UserPerm& perms);
1307 int _removexattr(InodeRef &in, const char *nm, const UserPerm& perms);
1308 int _open(Inode *in, int flags, mode_t mode, Fh **fhp,
1309 const UserPerm& perms);
1310 int _renew_caps(Inode *in);
1311 int _create(Inode *in, const char *name, int flags, mode_t mode, InodeRef *inp,
1312 Fh **fhp, int stripe_unit, int stripe_count, int object_size,
1313 const char *data_pool, bool *created, const UserPerm &perms,
1314 std::string alternate_name);
1315
1316 loff_t _lseek(Fh *fh, loff_t offset, int whence);
1317 int64_t _read(Fh *fh, int64_t offset, uint64_t size, bufferlist *bl);
1318 int64_t _write(Fh *fh, int64_t offset, uint64_t size, const char *buf,
1319 const struct iovec *iov, int iovcnt);
1320 int64_t _preadv_pwritev_locked(Fh *fh, const struct iovec *iov,
1321 unsigned iovcnt, int64_t offset,
1322 bool write, bool clamp_to_int,
1323 std::unique_lock<ceph::mutex> &cl);
1324 int _preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt, int64_t offset, bool write);
1325 int _flush(Fh *fh);
1326 int _fsync(Fh *fh, bool syncdataonly);
1327 int _fsync(Inode *in, bool syncdataonly);
1328 int _sync_fs();
1329 int _fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
1330 int _getlk(Fh *fh, struct flock *fl, uint64_t owner);
1331 int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
1332 int _flock(Fh *fh, int cmd, uint64_t owner);
1333 int _lazyio(Fh *fh, int enable);
1334
1335 int get_or_create(Inode *dir, const char* name,
1336 Dentry **pdn, bool expect_null=false);
1337
1338 int xattr_permission(Inode *in, const char *name, unsigned want,
1339 const UserPerm& perms);
1340 int may_setattr(Inode *in, struct ceph_statx *stx, int mask,
1341 const UserPerm& perms);
1342 int may_open(Inode *in, int flags, const UserPerm& perms);
1343 int may_lookup(Inode *dir, const UserPerm& perms);
1344 int may_create(Inode *dir, const UserPerm& perms);
1345 int may_delete(Inode *dir, const char *name, const UserPerm& perms);
1346 int may_hardlink(Inode *in, const UserPerm& perms);
1347
1348 int _getattr_for_perm(Inode *in, const UserPerm& perms);
1349
1350 vinodeno_t _get_vino(Inode *in);
1351
1352 bool _vxattrcb_quota_exists(Inode *in);
1353 size_t _vxattrcb_quota(Inode *in, char *val, size_t size);
1354 size_t _vxattrcb_quota_max_bytes(Inode *in, char *val, size_t size);
1355 size_t _vxattrcb_quota_max_files(Inode *in, char *val, size_t size);
1356
1357 bool _vxattrcb_layout_exists(Inode *in);
1358 size_t _vxattrcb_layout(Inode *in, char *val, size_t size);
1359 size_t _vxattrcb_layout_stripe_unit(Inode *in, char *val, size_t size);
1360 size_t _vxattrcb_layout_stripe_count(Inode *in, char *val, size_t size);
1361 size_t _vxattrcb_layout_object_size(Inode *in, char *val, size_t size);
1362 size_t _vxattrcb_layout_pool(Inode *in, char *val, size_t size);
1363 size_t _vxattrcb_layout_pool_namespace(Inode *in, char *val, size_t size);
1364 size_t _vxattrcb_dir_entries(Inode *in, char *val, size_t size);
1365 size_t _vxattrcb_dir_files(Inode *in, char *val, size_t size);
1366 size_t _vxattrcb_dir_subdirs(Inode *in, char *val, size_t size);
1367 size_t _vxattrcb_dir_rentries(Inode *in, char *val, size_t size);
1368 size_t _vxattrcb_dir_rfiles(Inode *in, char *val, size_t size);
1369 size_t _vxattrcb_dir_rsubdirs(Inode *in, char *val, size_t size);
1370 size_t _vxattrcb_dir_rsnaps(Inode *in, char *val, size_t size);
1371 size_t _vxattrcb_dir_rbytes(Inode *in, char *val, size_t size);
1372 size_t _vxattrcb_dir_rctime(Inode *in, char *val, size_t size);
1373
1374 bool _vxattrcb_dir_pin_exists(Inode *in);
1375 size_t _vxattrcb_dir_pin(Inode *in, char *val, size_t size);
1376
1377 bool _vxattrcb_snap_btime_exists(Inode *in);
1378 size_t _vxattrcb_snap_btime(Inode *in, char *val, size_t size);
1379
1380 bool _vxattrcb_mirror_info_exists(Inode *in);
1381 size_t _vxattrcb_mirror_info(Inode *in, char *val, size_t size);
1382
1383 size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size);
1384 size_t _vxattrcb_client_id(Inode *in, char *val, size_t size);
1385
1386 static const VXattr *_get_vxattrs(Inode *in);
1387 static const VXattr *_match_vxattr(Inode *in, const char *name);
1388
1389 int _do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep,
1390 struct flock *fl, uint64_t owner, bool removing=false);
1391 int _interrupt_filelock(MetaRequest *req);
1392 void _encode_filelocks(Inode *in, bufferlist& bl);
1393 void _release_filelocks(Fh *fh);
1394 void _update_lock_state(struct flock *fl, uint64_t owner, ceph_lock_state_t *lock_state);
1395
1396 int _posix_acl_create(Inode *dir, mode_t *mode, bufferlist& xattrs_bl,
1397 const UserPerm& perms);
1398 int _posix_acl_chmod(Inode *in, mode_t mode, const UserPerm& perms);
1399 int _posix_acl_permission(Inode *in, const UserPerm& perms, unsigned want);
1400
1401 mds_rank_t _get_random_up_mds() const;
1402
1403 int _ll_getattr(Inode *in, int caps, const UserPerm& perms);
1404 int _lookup_parent(Inode *in, const UserPerm& perms, Inode **parent=NULL);
1405 int _lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
1406 int _lookup_vino(vinodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
1407 bool _ll_forget(Inode *in, uint64_t count);
1408
1409 void collect_and_send_metrics();
1410 void collect_and_send_global_metrics();
1411
1412 uint32_t deleg_timeout = 0;
1413
1414 client_switch_interrupt_callback_t switch_interrupt_cb = nullptr;
1415 client_remount_callback_t remount_cb = nullptr;
1416 client_ino_callback_t ino_invalidate_cb = nullptr;
1417 client_dentry_callback_t dentry_invalidate_cb = nullptr;
1418 client_umask_callback_t umask_cb = nullptr;
1419 client_ino_release_t ino_release_cb = nullptr;
1420 void *callback_handle = nullptr;
1421 bool can_invalidate_dentries = false;
1422
1423 Finisher async_ino_invalidator;
1424 Finisher async_dentry_invalidator;
1425 Finisher interrupt_finisher;
1426 Finisher remount_finisher;
1427 Finisher async_ino_releasor;
1428 Finisher objecter_finisher;
1429
1430 utime_t last_cap_renew;
1431
1432 CommandHook m_command_hook;
1433
1434 int user_id, group_id;
1435 int acl_type = NO_ACL;
1436
1437 epoch_t cap_epoch_barrier = 0;
1438
1439 // mds sessions
1440 map<mds_rank_t, MetaSession> mds_sessions; // mds -> push seq
1441 std::set<mds_rank_t> mds_ranks_closing; // mds ranks currently tearing down sessions
1442 std::list<ceph::condition_variable*> waiting_for_mdsmap;
1443
1444 // FSMap, for when using mds_command
1445 std::list<ceph::condition_variable*> waiting_for_fsmap;
1446 std::unique_ptr<FSMap> fsmap;
1447 std::unique_ptr<FSMapUser> fsmap_user;
1448
1449 // This mutex only protects command_table
1450 ceph::mutex command_lock = ceph::make_mutex("Client::command_lock");
1451 // MDS command state
1452 CommandTable<MDSCommandOp> command_table;
1453
1454 bool _use_faked_inos;
1455
1456 // Cluster fsid
1457 fs_cluster_id_t fscid;
1458
1459 // file handles, etc.
1460 interval_set<int> free_fd_set; // unused fds
1461 ceph::unordered_map<int, Fh*> fd_map;
1462 set<Fh*> ll_unclosed_fh_set;
1463 ceph::unordered_set<dir_result_t*> opened_dirs;
1464 uint64_t fd_gen = 1;
1465
1466 bool mount_aborted = false;
1467 bool blocklisted = false;
1468
1469 ceph::unordered_map<vinodeno_t, Inode*> inode_map;
1470 ceph::unordered_map<ino_t, vinodeno_t> faked_ino_map;
1471 interval_set<ino_t> free_faked_inos;
1472 ino_t last_used_faked_ino;
1473 ino_t last_used_faked_root;
1474
1475 int local_osd = -CEPHFS_ENXIO;
1476 epoch_t local_osd_epoch = 0;
1477
1478 // mds requests
1479 ceph_tid_t last_tid = 0;
1480 ceph_tid_t oldest_tid = 0; // oldest incomplete mds request, excluding setfilelock requests
1481 map<ceph_tid_t, MetaRequest*> mds_requests;
1482
1483 // cap flushing
1484 ceph_tid_t last_flush_tid = 1;
1485
1486 // dirty_list keeps all the dirty inodes before flushing.
1487 xlist<Inode*> delayed_list, dirty_list;
1488 int num_flushing_caps = 0;
1489 ceph::unordered_map<inodeno_t,SnapRealm*> snap_realms;
1490 std::map<std::string, std::string> metadata;
1491
1492 utime_t last_auto_reconnect;
1493
1494 // trace generation
1495 ofstream traceout;
1496
1497 ceph::condition_variable mount_cond, sync_cond;
1498
1499 std::map<std::pair<int64_t,std::string>, int> pool_perms;
1500 std::list<ceph::condition_variable*> waiting_for_pool_perm;
1501
1502 uint64_t retries_on_invalidate = 0;
1503
1504 // state reclaim
1505 std::list<ceph::condition_variable*> waiting_for_reclaim;
1506 int reclaim_errno = 0;
1507 epoch_t reclaim_osd_epoch = 0;
1508 entity_addrvec_t reclaim_target_addrs;
1509
1510 // dentry lease metrics
1511 uint64_t dentry_nr = 0;
1512 uint64_t dlease_hits = 0;
1513 uint64_t dlease_misses = 0;
1514
1515 uint64_t cap_hits = 0;
1516 uint64_t cap_misses = 0;
1517
1518 uint64_t opened_files = 0;
1519 uint64_t pinned_icaps = 0;
1520 uint64_t opened_inodes = 0;
1521
1522 ceph::spinlock delay_i_lock;
1523 std::map<Inode*,int> delay_i_release;
1524 };
1525
1526 /**
1527 * Specialization of Client that manages its own Objecter instance
1528 * and handles init/shutdown of messenger/monclient
1529 */
1530 class StandaloneClient : public Client
1531 {
1532 public:
1533 StandaloneClient(Messenger *m, MonClient *mc, boost::asio::io_context& ictx);
1534
1535 ~StandaloneClient() override;
1536
1537 int init() override;
1538 void shutdown() override;
1539 };
1540
1541 #endif