]> git.proxmox.com Git - ceph.git/blob - ceph/src/client/Client.h
update ceph source to reef 18.2.1
[ceph.git] / ceph / src / client / Client.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_CLIENT_H
17 #define CEPH_CLIENT_H
18
19 #include "common/CommandTable.h"
20 #include "common/Finisher.h"
21 #include "common/Timer.h"
22 #include "common/ceph_mutex.h"
23 #include "common/cmdparse.h"
24 #include "common/compiler_extensions.h"
25 #include "include/common_fwd.h"
26 #include "include/cephfs/ceph_ll_client.h"
27 #include "include/filepath.h"
28 #include "include/interval_set.h"
29 #include "include/lru.h"
30 #include "include/types.h"
31 #include "include/unordered_map.h"
32 #include "include/unordered_set.h"
33 #include "include/cephfs/metrics/Types.h"
34 #include "mds/mdstypes.h"
35 #include "include/cephfs/types.h"
36 #include "msg/Dispatcher.h"
37 #include "msg/MessageRef.h"
38 #include "msg/Messenger.h"
39 #include "osdc/ObjectCacher.h"
40
41 #include "RWRef.h"
42 #include "InodeRef.h"
43 #include "MetaSession.h"
44 #include "UserPerm.h"
45
46 #include <fstream>
47 #include <map>
48 #include <memory>
49 #include <set>
50 #include <string>
51 #include <thread>
52
53 using std::set;
54 using std::map;
55 using std::fstream;
56
57 class FSMap;
58 class FSMapUser;
59 class MonClient;
60
61
62 struct DirStat;
63 struct LeaseStat;
64 struct InodeStat;
65
66 class Filer;
67 class Objecter;
68 class WritebackHandler;
69
70 class MDSMap;
71 class Message;
72 class destructive_lock_ref_t;
73
74 enum {
75 l_c_first = 20000,
76 l_c_reply,
77 l_c_lat,
78 l_c_wrlat,
79 l_c_read,
80 l_c_fsync,
81 l_c_md_avg,
82 l_c_md_sqsum,
83 l_c_md_ops,
84 l_c_rd_avg,
85 l_c_rd_sqsum,
86 l_c_rd_ops,
87 l_c_wr_avg,
88 l_c_wr_sqsum,
89 l_c_wr_ops,
90 l_c_last,
91 };
92
93
94 class MDSCommandOp : public CommandOp
95 {
96 public:
97 mds_gid_t mds_gid;
98
99 explicit MDSCommandOp(ceph_tid_t t) : CommandOp(t) {}
100 };
101
102 /* error code for ceph_fuse */
103 #define CEPH_FUSE_NO_MDS_UP -((1<<16)+0) /* no mds up deteced in ceph_fuse */
104 #define CEPH_FUSE_LAST -((1<<16)+1) /* (unused) */
105
106 // ============================================
107 // types for my local metadata cache
108 /* basic structure:
109
110 - Dentries live in an LRU loop. they get expired based on last access.
111 see include/lru.h. items can be bumped to "mid" or "top" of list, etc.
112 - Inode has ref count for each Fh, Dir, or Dentry that points to it.
113 - when Inode ref goes to 0, it's expired.
114 - when Dir is empty, it's removed (and it's Inode ref--)
115
116 */
117
118 /* getdir result */
119 struct DirEntry {
120 explicit DirEntry(const std::string &s) : d_name(s), stmask(0) {}
121 DirEntry(const std::string &n, struct stat& s, int stm)
122 : d_name(n), st(s), stmask(stm) {}
123
124 std::string d_name;
125 struct stat st;
126 int stmask;
127 };
128
129 struct Cap;
130 class Dir;
131 class Dentry;
132 struct SnapRealm;
133 struct Fh;
134 struct CapSnap;
135
136 struct MetaRequest;
137 class ceph_lock_state_t;
138
139 // ========================================================
140 // client interface
141
142 struct dir_result_t {
143 static const int SHIFT = 28;
144 static const int64_t MASK = (1 << SHIFT) - 1;
145 static const int64_t HASH = 0xFFULL << (SHIFT + 24); // impossible frag bits
146 static const loff_t END = 1ULL << (SHIFT + 32);
147
148 struct dentry {
149 int64_t offset;
150 std::string name;
151 std::string alternate_name;
152 InodeRef inode;
153 explicit dentry(int64_t o) : offset(o) {}
154 dentry(int64_t o, std::string n, std::string an, InodeRef in) :
155 offset(o), name(std::move(n)), alternate_name(std::move(an)), inode(std::move(in)) {}
156 };
157 struct dentry_off_lt {
158 bool operator()(const dentry& d, int64_t off) const {
159 return dir_result_t::fpos_cmp(d.offset, off) < 0;
160 }
161 };
162
163
164 explicit dir_result_t(Inode *in, const UserPerm& perms);
165
166
167 static uint64_t make_fpos(unsigned h, unsigned l, bool hash) {
168 uint64_t v = ((uint64_t)h<< SHIFT) | (uint64_t)l;
169 if (hash)
170 v |= HASH;
171 else
172 ceph_assert((v & HASH) != HASH);
173 return v;
174 }
175 static unsigned fpos_high(uint64_t p) {
176 unsigned v = (p & (END-1)) >> SHIFT;
177 if ((p & HASH) == HASH)
178 return ceph_frag_value(v);
179 return v;
180 }
181 static unsigned fpos_low(uint64_t p) {
182 return p & MASK;
183 }
184 static int fpos_cmp(uint64_t l, uint64_t r) {
185 int c = ceph_frag_compare(fpos_high(l), fpos_high(r));
186 if (c)
187 return c;
188 if (fpos_low(l) == fpos_low(r))
189 return 0;
190 return fpos_low(l) < fpos_low(r) ? -1 : 1;
191 }
192
193 unsigned offset_high() { return fpos_high(offset); }
194 unsigned offset_low() { return fpos_low(offset); }
195
196 void set_end() { offset |= END; }
197 bool at_end() { return (offset & END); }
198
199 void set_hash_order() { offset |= HASH; }
200 bool hash_order() { return (offset & HASH) == HASH; }
201
202 bool is_cached() {
203 if (buffer.empty())
204 return false;
205 if (hash_order()) {
206 return buffer_frag.contains(offset_high());
207 } else {
208 return buffer_frag == frag_t(offset_high());
209 }
210 }
211
212 void reset() {
213 last_name.clear();
214 next_offset = 2;
215 offset = 0;
216 ordered_count = 0;
217 cache_index = 0;
218 buffer.clear();
219 }
220
221 InodeRef inode;
222 int64_t offset; // hash order:
223 // (0xff << 52) | ((24 bits hash) << 28) |
224 // (the nth entry has hash collision);
225 // frag+name order;
226 // ((frag value) << 28) | (the nth entry in frag);
227
228 unsigned next_offset; // offset of next chunk (last_name's + 1)
229 std::string last_name; // last entry in previous chunk
230
231 uint64_t release_count;
232 uint64_t ordered_count;
233 unsigned cache_index;
234 int start_shared_gen; // dir shared_gen at start of readdir
235 UserPerm perms;
236
237 frag_t buffer_frag;
238
239 std::vector<dentry> buffer;
240 struct dirent de;
241 };
242
243 class Client : public Dispatcher, public md_config_obs_t {
244 public:
245 friend class C_Block_Sync; // Calls block map and protected helpers
246 friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb
247 friend class C_Client_DentryInvalidate; // calls dentry_invalidate_cb
248 friend class C_Client_FlushComplete; // calls put_inode()
249 friend class C_Client_Remount;
250 friend class C_Client_RequestInterrupt;
251 friend class C_Deleg_Timeout; // Asserts on client_lock, called when a delegation is unreturned
252 friend class C_Client_CacheRelease; // Asserts on client_lock
253 friend class SyntheticClient;
254 friend void intrusive_ptr_release(Inode *in);
255 template <typename T> friend struct RWRefState;
256 template <typename T> friend class RWRef;
257
258 using Dispatcher::cct;
259 using clock = ceph::coarse_mono_clock;
260
261 typedef int (*add_dirent_cb_t)(void *p, struct dirent *de, struct ceph_statx *stx, off_t off, Inode *in);
262
263 struct walk_dentry_result {
264 InodeRef in;
265 std::string alternate_name;
266 };
267
268 class CommandHook : public AdminSocketHook {
269 public:
270 explicit CommandHook(Client *client);
271 int call(std::string_view command, const cmdmap_t& cmdmap,
272 const bufferlist&,
273 Formatter *f,
274 std::ostream& errss,
275 bufferlist& out) override;
276 private:
277 Client *m_client;
278 };
279
280 // snapshot info returned via get_snap_info(). nothing to do
281 // with SnapInfo on the MDS.
282 struct SnapInfo {
283 snapid_t id;
284 std::map<std::string, std::string> metadata;
285 };
286
287 Client(Messenger *m, MonClient *mc, Objecter *objecter_);
288 Client(const Client&) = delete;
289 Client(const Client&&) = delete;
290 virtual ~Client() override;
291
292 static UserPerm pick_my_perms(CephContext *c) {
293 uid_t uid = c->_conf->client_mount_uid >= 0 ? c->_conf->client_mount_uid : -1;
294 gid_t gid = c->_conf->client_mount_gid >= 0 ? c->_conf->client_mount_gid : -1;
295 return UserPerm(uid, gid);
296 }
297 UserPerm pick_my_perms() {
298 uid_t uid = user_id >= 0 ? user_id : -1;
299 gid_t gid = group_id >= 0 ? group_id : -1;
300 return UserPerm(uid, gid);
301 }
302
303 int mount(const std::string &mount_root, const UserPerm& perms,
304 bool require_mds=false, const std::string &fs_name="");
305 void unmount();
306 bool is_unmounting() const {
307 return mount_state.check_current_state(CLIENT_UNMOUNTING);
308 }
309 bool is_mounted() const {
310 return mount_state.check_current_state(CLIENT_MOUNTED);
311 }
312 bool is_mounting() const {
313 return mount_state.check_current_state(CLIENT_MOUNTING);
314 }
315 bool is_initialized() const {
316 return initialize_state.check_current_state(CLIENT_INITIALIZED);
317 }
318 void abort_conn();
319
320 void set_uuid(const std::string& uuid);
321 void set_session_timeout(unsigned timeout);
322 int start_reclaim(const std::string& uuid, unsigned flags,
323 const std::string& fs_name);
324 void finish_reclaim();
325
326 fs_cluster_id_t get_fs_cid() {
327 return fscid;
328 }
329
330 int mds_command(
331 const std::string &mds_spec,
332 const std::vector<std::string>& cmd,
333 const bufferlist& inbl,
334 bufferlist *poutbl, std::string *prs, Context *onfinish);
335
336 // these should (more or less) mirror the actual system calls.
337 int statfs(const char *path, struct statvfs *stbuf, const UserPerm& perms);
338
339 // crap
340 int chdir(const char *s, std::string &new_cwd, const UserPerm& perms);
341 void _getcwd(std::string& cwd, const UserPerm& perms);
342 void getcwd(std::string& cwd, const UserPerm& perms);
343
344 // namespace ops
345 int opendir(const char *name, dir_result_t **dirpp, const UserPerm& perms);
346 int fdopendir(int dirfd, dir_result_t **dirpp, const UserPerm& perms);
347 int closedir(dir_result_t *dirp);
348
349 /**
350 * Fill a directory listing from dirp, invoking cb for each entry
351 * with the given pointer, the dirent, the struct stat, the stmask,
352 * and the offset.
353 *
354 * Returns 0 if it reached the end of the directory.
355 * If @a cb returns a negative error code, stop and return that.
356 */
357 int readdir_r_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
358 unsigned want=0, unsigned flags=AT_STATX_DONT_SYNC,
359 bool getref=false);
360
361 struct dirent * readdir(dir_result_t *d);
362 int readdir_r(dir_result_t *dirp, struct dirent *de);
363 int readdirplus_r(dir_result_t *dirp, struct dirent *de, struct ceph_statx *stx, unsigned want, unsigned flags, Inode **out);
364
365 /*
366 * Get the next snapshot delta entry.
367 *
368 */
369 int readdir_snapdiff(dir_result_t* dir1, snapid_t snap2,
370 struct dirent* out_de, snapid_t* out_snap);
371
372 int getdir(const char *relpath, std::list<std::string>& names,
373 const UserPerm& perms); // get the whole dir at once.
374
375 /**
376 * Returns the length of the buffer that got filled in, or -errno.
377 * If it returns -CEPHFS_ERANGE you just need to increase the size of the
378 * buffer and try again.
379 */
380 int _getdents(dir_result_t *dirp, char *buf, int buflen, bool ful); // get a bunch of dentries at once
381 int getdents(dir_result_t *dirp, char *buf, int buflen) {
382 return _getdents(dirp, buf, buflen, true);
383 }
384 int getdnames(dir_result_t *dirp, char *buf, int buflen) {
385 return _getdents(dirp, buf, buflen, false);
386 }
387
388 void rewinddir(dir_result_t *dirp);
389 loff_t telldir(dir_result_t *dirp);
390 void seekdir(dir_result_t *dirp, loff_t offset);
391
392 int may_delete(const char *relpath, const UserPerm& perms);
393 int link(const char *existing, const char *newname, const UserPerm& perm, std::string alternate_name="");
394 int unlink(const char *path, const UserPerm& perm);
395 int unlinkat(int dirfd, const char *relpath, int flags, const UserPerm& perm);
396 int rename(const char *from, const char *to, const UserPerm& perm, std::string alternate_name="");
397
398 // dirs
399 int mkdir(const char *path, mode_t mode, const UserPerm& perm, std::string alternate_name="");
400 int mkdirat(int dirfd, const char *relpath, mode_t mode, const UserPerm& perm,
401 std::string alternate_name="");
402 int mkdirs(const char *path, mode_t mode, const UserPerm& perms);
403 int rmdir(const char *path, const UserPerm& perms);
404
405 // symlinks
406 int readlink(const char *path, char *buf, loff_t size, const UserPerm& perms);
407 int readlinkat(int dirfd, const char *relpath, char *buf, loff_t size, const UserPerm& perms);
408
409 int symlink(const char *existing, const char *newname, const UserPerm& perms, std::string alternate_name="");
410 int symlinkat(const char *target, int dirfd, const char *relpath, const UserPerm& perms,
411 std::string alternate_name="");
412
413 // path traversal for high-level interface
414 int walk(std::string_view path, struct walk_dentry_result* result, const UserPerm& perms, bool followsym=true);
415
416 // inode stuff
417 unsigned statx_to_mask(unsigned int flags, unsigned int want);
418 int stat(const char *path, struct stat *stbuf, const UserPerm& perms,
419 frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL);
420 int statx(const char *path, struct ceph_statx *stx,
421 const UserPerm& perms,
422 unsigned int want, unsigned int flags);
423 int lstat(const char *path, struct stat *stbuf, const UserPerm& perms,
424 frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL);
425
426 int setattr(const char *relpath, struct stat *attr, int mask,
427 const UserPerm& perms);
428 int setattrx(const char *relpath, struct ceph_statx *stx, int mask,
429 const UserPerm& perms, int flags=0);
430 int fsetattr(int fd, struct stat *attr, int mask, const UserPerm& perms);
431 int fsetattrx(int fd, struct ceph_statx *stx, int mask, const UserPerm& perms);
432 int chmod(const char *path, mode_t mode, const UserPerm& perms);
433 int fchmod(int fd, mode_t mode, const UserPerm& perms);
434 int chmodat(int dirfd, const char *relpath, mode_t mode, int flags, const UserPerm& perms);
435 int lchmod(const char *path, mode_t mode, const UserPerm& perms);
436 int chown(const char *path, uid_t new_uid, gid_t new_gid,
437 const UserPerm& perms);
438 int fchown(int fd, uid_t new_uid, gid_t new_gid, const UserPerm& perms);
439 int lchown(const char *path, uid_t new_uid, gid_t new_gid,
440 const UserPerm& perms);
441 int chownat(int dirfd, const char *relpath, uid_t new_uid, gid_t new_gid,
442 int flags, const UserPerm& perms);
443 int utime(const char *path, struct utimbuf *buf, const UserPerm& perms);
444 int lutime(const char *path, struct utimbuf *buf, const UserPerm& perms);
445 int futime(int fd, struct utimbuf *buf, const UserPerm& perms);
446 int utimes(const char *relpath, struct timeval times[2], const UserPerm& perms);
447 int lutimes(const char *relpath, struct timeval times[2], const UserPerm& perms);
448 int futimes(int fd, struct timeval times[2], const UserPerm& perms);
449 int futimens(int fd, struct timespec times[2], const UserPerm& perms);
450 int utimensat(int dirfd, const char *relpath, struct timespec times[2], int flags,
451 const UserPerm& perms);
452 int flock(int fd, int operation, uint64_t owner);
453 int truncate(const char *path, loff_t size, const UserPerm& perms);
454
455 // file ops
456 int mknod(const char *path, mode_t mode, const UserPerm& perms, dev_t rdev=0);
457
458 int create_and_open(int dirfd, const char *relpath, int flags, const UserPerm& perms,
459 mode_t mode, int stripe_unit, int stripe_count, int object_size,
460 const char *data_pool, std::string alternate_name);
461 int open(const char *path, int flags, const UserPerm& perms, mode_t mode=0, std::string alternate_name="") {
462 return open(path, flags, perms, mode, 0, 0, 0, NULL, alternate_name);
463 }
464 int open(const char *path, int flags, const UserPerm& perms,
465 mode_t mode, int stripe_unit, int stripe_count, int object_size,
466 const char *data_pool, std::string alternate_name="");
467 int openat(int dirfd, const char *relpath, int flags, const UserPerm& perms,
468 mode_t mode, int stripe_unit, int stripe_count,
469 int object_size, const char *data_pool, std::string alternate_name);
470 int openat(int dirfd, const char *path, int flags, const UserPerm& perms, mode_t mode=0,
471 std::string alternate_name="") {
472 return openat(dirfd, path, flags, perms, mode, 0, 0, 0, NULL, alternate_name);
473 }
474
475 int lookup_hash(inodeno_t ino, inodeno_t dirino, const char *name,
476 const UserPerm& perms);
477 int lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
478 int lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
479 int _close(int fd);
480 int close(int fd);
481 loff_t lseek(int fd, loff_t offset, int whence);
482 int read(int fd, char *buf, loff_t size, loff_t offset=-1);
483 int preadv(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
484 int write(int fd, const char *buf, loff_t size, loff_t offset=-1);
485 int pwritev(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
486 int fake_write_size(int fd, loff_t size);
487 int ftruncate(int fd, loff_t size, const UserPerm& perms);
488 int fsync(int fd, bool syncdataonly);
489 int fstat(int fd, struct stat *stbuf, const UserPerm& perms,
490 int mask=CEPH_STAT_CAP_INODE_ALL);
491 int fstatx(int fd, struct ceph_statx *stx, const UserPerm& perms,
492 unsigned int want, unsigned int flags);
493 int statxat(int dirfd, const char *relpath,
494 struct ceph_statx *stx, const UserPerm& perms,
495 unsigned int want, unsigned int flags);
496 int fallocate(int fd, int mode, loff_t offset, loff_t length);
497
498 // full path xattr ops
499 int getxattr(const char *path, const char *name, void *value, size_t size,
500 const UserPerm& perms);
501 int lgetxattr(const char *path, const char *name, void *value, size_t size,
502 const UserPerm& perms);
503 int fgetxattr(int fd, const char *name, void *value, size_t size,
504 const UserPerm& perms);
505 int listxattr(const char *path, char *list, size_t size, const UserPerm& perms);
506 int llistxattr(const char *path, char *list, size_t size, const UserPerm& perms);
507 int flistxattr(int fd, char *list, size_t size, const UserPerm& perms);
508 int removexattr(const char *path, const char *name, const UserPerm& perms);
509 int lremovexattr(const char *path, const char *name, const UserPerm& perms);
510 int fremovexattr(int fd, const char *name, const UserPerm& perms);
511 int setxattr(const char *path, const char *name, const void *value,
512 size_t size, int flags, const UserPerm& perms);
513 int lsetxattr(const char *path, const char *name, const void *value,
514 size_t size, int flags, const UserPerm& perms);
515 int fsetxattr(int fd, const char *name, const void *value, size_t size,
516 int flags, const UserPerm& perms);
517
518 int sync_fs();
519 int64_t drop_caches();
520
521 int get_snap_info(const char *path, const UserPerm &perms, SnapInfo *snap_info);
522
523 // hpc lazyio
524 int lazyio(int fd, int enable);
525 int lazyio_propagate(int fd, loff_t offset, size_t count);
526 int lazyio_synchronize(int fd, loff_t offset, size_t count);
527
528 // expose file layout
529 int describe_layout(const char *path, file_layout_t* layout,
530 const UserPerm& perms);
531 int fdescribe_layout(int fd, file_layout_t* layout);
532 int get_file_stripe_address(int fd, loff_t offset, std::vector<entity_addr_t>& address);
533 int get_file_extent_osds(int fd, loff_t off, loff_t *len, std::vector<int>& osds);
534 int get_osd_addr(int osd, entity_addr_t& addr);
535
536 // expose mdsmap
537 int64_t get_default_pool_id();
538
539 // expose osdmap
540 int get_local_osd();
541 int get_pool_replication(int64_t pool);
542 int64_t get_pool_id(const char *pool_name);
543 std::string get_pool_name(int64_t pool);
544 int get_osd_crush_location(int id, std::vector<std::pair<std::string, std::string> >& path);
545
546 int enumerate_layout(int fd, std::vector<ObjectExtent>& result,
547 loff_t length, loff_t offset);
548
549 int mksnap(const char *path, const char *name, const UserPerm& perm,
550 mode_t mode=0, const std::map<std::string, std::string> &metadata={});
551 int rmsnap(const char *path, const char *name, const UserPerm& perm, bool check_perms=false);
552
553 // Inode permission checking
554 int inode_permission(Inode *in, const UserPerm& perms, unsigned want);
555
556 // expose caps
557 int get_caps_issued(int fd);
558 int get_caps_issued(const char *path, const UserPerm& perms);
559
560 snapid_t ll_get_snapid(Inode *in);
561 vinodeno_t ll_get_vino(Inode *in) {
562 std::lock_guard lock(client_lock);
563 return _get_vino(in);
564 }
565 // get inode from faked ino
566 Inode *ll_get_inode(ino_t ino);
567 Inode *ll_get_inode(vinodeno_t vino);
568 int ll_lookup(Inode *parent, const char *name, struct stat *attr,
569 Inode **out, const UserPerm& perms);
570 int ll_lookup_inode(struct inodeno_t ino, const UserPerm& perms, Inode **inode);
571 int ll_lookup_vino(vinodeno_t vino, const UserPerm& perms, Inode **inode);
572 int ll_lookupx(Inode *parent, const char *name, Inode **out,
573 struct ceph_statx *stx, unsigned want, unsigned flags,
574 const UserPerm& perms);
575 bool ll_forget(Inode *in, uint64_t count);
576 bool ll_put(Inode *in);
577 int ll_get_snap_ref(snapid_t snap);
578
579 int ll_getattr(Inode *in, struct stat *st, const UserPerm& perms);
580 int ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want,
581 unsigned int flags, const UserPerm& perms);
582 int ll_setattrx(Inode *in, struct ceph_statx *stx, int mask,
583 const UserPerm& perms);
584 int ll_setattr(Inode *in, struct stat *st, int mask,
585 const UserPerm& perms);
586 int ll_getxattr(Inode *in, const char *name, void *value, size_t size,
587 const UserPerm& perms);
588 int ll_setxattr(Inode *in, const char *name, const void *value, size_t size,
589 int flags, const UserPerm& perms);
590 int ll_removexattr(Inode *in, const char *name, const UserPerm& perms);
591 int ll_listxattr(Inode *in, char *list, size_t size, const UserPerm& perms);
592 int ll_opendir(Inode *in, int flags, dir_result_t **dirpp,
593 const UserPerm& perms);
594 int ll_releasedir(dir_result_t* dirp);
595 int ll_fsyncdir(dir_result_t* dirp);
596 int ll_readlink(Inode *in, char *buf, size_t bufsize, const UserPerm& perms);
597 int ll_mknod(Inode *in, const char *name, mode_t mode, dev_t rdev,
598 struct stat *attr, Inode **out, const UserPerm& perms);
599 int ll_mknodx(Inode *parent, const char *name, mode_t mode, dev_t rdev,
600 Inode **out, struct ceph_statx *stx, unsigned want,
601 unsigned flags, const UserPerm& perms);
602 int ll_mkdir(Inode *in, const char *name, mode_t mode, struct stat *attr,
603 Inode **out, const UserPerm& perm);
604 int ll_mkdirx(Inode *parent, const char *name, mode_t mode, Inode **out,
605 struct ceph_statx *stx, unsigned want, unsigned flags,
606 const UserPerm& perms);
607 int ll_symlink(Inode *in, const char *name, const char *value,
608 struct stat *attr, Inode **out, const UserPerm& perms);
609 int ll_symlinkx(Inode *parent, const char *name, const char *value,
610 Inode **out, struct ceph_statx *stx, unsigned want,
611 unsigned flags, const UserPerm& perms);
612 int ll_unlink(Inode *in, const char *name, const UserPerm& perm);
613 int ll_rmdir(Inode *in, const char *name, const UserPerm& perms);
614 int ll_rename(Inode *parent, const char *name, Inode *newparent,
615 const char *newname, const UserPerm& perm);
616 int ll_link(Inode *in, Inode *newparent, const char *newname,
617 const UserPerm& perm);
618 int ll_open(Inode *in, int flags, Fh **fh, const UserPerm& perms);
619 int _ll_create(Inode *parent, const char *name, mode_t mode,
620 int flags, InodeRef *in, int caps, Fh **fhp,
621 const UserPerm& perms);
622 int ll_create(Inode *parent, const char *name, mode_t mode, int flags,
623 struct stat *attr, Inode **out, Fh **fhp,
624 const UserPerm& perms);
625 int ll_createx(Inode *parent, const char *name, mode_t mode,
626 int oflags, Inode **outp, Fh **fhp,
627 struct ceph_statx *stx, unsigned want, unsigned lflags,
628 const UserPerm& perms);
629 int ll_read_block(Inode *in, uint64_t blockid, char *buf, uint64_t offset,
630 uint64_t length, file_layout_t* layout);
631
632 int ll_write_block(Inode *in, uint64_t blockid,
633 char* buf, uint64_t offset,
634 uint64_t length, file_layout_t* layout,
635 uint64_t snapseq, uint32_t sync);
636 int ll_commit_blocks(Inode *in, uint64_t offset, uint64_t length);
637
638 int ll_statfs(Inode *in, struct statvfs *stbuf, const UserPerm& perms);
639 int ll_walk(const char* name, Inode **i, struct ceph_statx *stx,
640 unsigned int want, unsigned int flags, const UserPerm& perms);
641 uint32_t ll_stripe_unit(Inode *in);
642 int ll_file_layout(Inode *in, file_layout_t *layout);
643 uint64_t ll_snap_seq(Inode *in);
644
645 int ll_read(Fh *fh, loff_t off, loff_t len, bufferlist *bl);
646 int ll_write(Fh *fh, loff_t off, loff_t len, const char *data);
647 int64_t ll_readv(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off);
648 int64_t ll_writev(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off);
649 loff_t ll_lseek(Fh *fh, loff_t offset, int whence);
650 int ll_flush(Fh *fh);
651 int ll_fsync(Fh *fh, bool syncdataonly);
652 int ll_sync_inode(Inode *in, bool syncdataonly);
653 int ll_fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
654 int ll_release(Fh *fh);
655 int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner);
656 int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
657 int ll_flock(Fh *fh, int cmd, uint64_t owner);
658 int ll_lazyio(Fh *fh, int enable);
659 int ll_file_layout(Fh *fh, file_layout_t *layout);
660 void ll_interrupt(void *d);
661 bool ll_handle_umask() {
662 return acl_type != NO_ACL;
663 }
664
665 int ll_get_stripe_osd(struct Inode *in, uint64_t blockno,
666 file_layout_t* layout);
667 uint64_t ll_get_internal_offset(struct Inode *in, uint64_t blockno);
668
669 int ll_num_osds(void);
670 int ll_osdaddr(int osd, uint32_t *addr);
671 int ll_osdaddr(int osd, char* buf, size_t size);
672
673 void _ll_register_callbacks(struct ceph_client_callback_args *args);
674 void ll_register_callbacks(struct ceph_client_callback_args *args); // deprecated
675 int ll_register_callbacks2(struct ceph_client_callback_args *args);
676 std::pair<int, bool> test_dentry_handling(bool can_invalidate);
677
678 const char** get_tracked_conf_keys() const override;
679 void handle_conf_change(const ConfigProxy& conf,
680 const std::set <std::string> &changed) override;
681 uint32_t get_deleg_timeout() { return deleg_timeout; }
682 int set_deleg_timeout(uint32_t timeout);
683 int ll_delegation(Fh *fh, unsigned cmd, ceph_deleg_cb_t cb, void *priv);
684
685 entity_name_t get_myname() { return messenger->get_myname(); }
686 void wait_on_list(std::list<ceph::condition_variable*>& ls);
687 void signal_cond_list(std::list<ceph::condition_variable*>& ls);
688
689 void set_filer_flags(int flags);
690 void clear_filer_flags(int flags);
691
692 void tear_down_cache();
693
694 void update_metadata(std::string const &k, std::string const &v);
695
696 client_t get_nodeid() { return whoami; }
697
698 inodeno_t get_root_ino();
699 Inode *get_root();
700
701 virtual int init();
702 virtual void shutdown();
703
704 // messaging
705 void cancel_commands(const MDSMap& newmap);
706 void handle_mds_map(const MConstRef<MMDSMap>& m);
707 void handle_fs_map(const MConstRef<MFSMap>& m);
708 void handle_fs_map_user(const MConstRef<MFSMapUser>& m);
709 void handle_osd_map(const MConstRef<MOSDMap>& m);
710
711 void handle_lease(const MConstRef<MClientLease>& m);
712
713 // inline data
714 int uninline_data(Inode *in, Context *onfinish);
715
716 // file caps
717 void check_cap_issue(Inode *in, unsigned issued);
718 void add_update_cap(Inode *in, MetaSession *session, uint64_t cap_id,
719 unsigned issued, unsigned wanted, unsigned seq, unsigned mseq,
720 inodeno_t realm, int flags, const UserPerm& perms);
721 void remove_cap(Cap *cap, bool queue_release);
722 void remove_all_caps(Inode *in);
723 void remove_session_caps(MetaSession *session, int err);
724 int mark_caps_flushing(Inode *in, ceph_tid_t *ptid);
725 void adjust_session_flushing_caps(Inode *in, MetaSession *old_s, MetaSession *new_s);
726 void flush_caps_sync();
727 void kick_flushing_caps(Inode *in, MetaSession *session);
728 void kick_flushing_caps(MetaSession *session);
729 void early_kick_flushing_caps(MetaSession *session);
730 int get_caps(Fh *fh, int need, int want, int *have, loff_t endoff);
731 int get_caps_used(Inode *in);
732
733 void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater,
734 std::vector<snapid_t>& snaps);
735
736 void handle_quota(const MConstRef<MClientQuota>& m);
737 void handle_snap(const MConstRef<MClientSnap>& m);
738 void handle_caps(const MConstRef<MClientCaps>& m);
739 void handle_cap_import(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
740 void handle_cap_export(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
741 void handle_cap_trunc(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
742 void handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, const MConstRef<MClientCaps>& m);
743 void handle_cap_flushsnap_ack(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
744 void handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, const MConstRef<MClientCaps>& m);
745 void cap_delay_requeue(Inode *in);
746
747 void send_cap(Inode *in, MetaSession *session, Cap *cap, int flags,
748 int used, int want, int retain, int flush,
749 ceph_tid_t flush_tid);
750
751 void send_flush_snap(Inode *in, MetaSession *session, snapid_t follows, CapSnap& capsnap);
752
753 void flush_snaps(Inode *in);
754 void get_cap_ref(Inode *in, int cap);
755 void put_cap_ref(Inode *in, int cap);
756 void wait_sync_caps(Inode *in, ceph_tid_t want);
757 void wait_sync_caps(ceph_tid_t want);
758 void queue_cap_snap(Inode *in, SnapContext &old_snapc);
759 void finish_cap_snap(Inode *in, CapSnap &capsnap, int used);
760
761 void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
762 void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, std::string& name);
763 void _try_to_trim_inode(Inode *in, bool sched_inval);
764
765 void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len);
766 void _invalidate_inode_cache(Inode *in);
767 void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len);
768 void _async_invalidate(vinodeno_t ino, int64_t off, int64_t len);
769
770 void _schedule_ino_release_callback(Inode *in);
771 void _async_inode_release(vinodeno_t ino);
772
773 bool _release(Inode *in);
774
775 /**
776 * Initiate a flush of the data associated with the given inode.
777 * If you specify a Context, you are responsible for holding an inode
778 * reference for the duration of the flush. If not, _flush() will
779 * take the reference for you.
780 * @param in The Inode whose data you wish to flush.
781 * @param c The Context you wish us to complete once the data is
782 * flushed. If already flushed, this will be called in-line.
783 *
784 * @returns true if the data was already flushed, false otherwise.
785 */
786 bool _flush(Inode *in, Context *c);
787 void _flush_range(Inode *in, int64_t off, uint64_t size);
788 void _flushed(Inode *in);
789 void flush_set_callback(ObjectCacher::ObjectSet *oset);
790
791 void close_release(Inode *in);
792 void close_safe(Inode *in);
793
794 void lock_fh_pos(Fh *f);
795 void unlock_fh_pos(Fh *f);
796
797 // metadata cache
798 void update_dir_dist(Inode *in, DirStat *st, mds_rank_t from);
799
800 void clear_dir_complete_and_ordered(Inode *diri, bool complete);
801 void insert_readdir_results(MetaRequest *request, MetaSession *session,
802 Inode *diri, Inode *diri_other);
803 Inode* insert_trace(MetaRequest *request, MetaSession *session);
804 void update_inode_file_size(Inode *in, int issued, uint64_t size,
805 uint64_t truncate_seq, uint64_t truncate_size);
806 void update_inode_file_time(Inode *in, int issued, uint64_t time_warp_seq,
807 utime_t ctime, utime_t mtime, utime_t atime);
808
809 Inode *add_update_inode(InodeStat *st, utime_t ttl, MetaSession *session,
810 const UserPerm& request_perms);
811 Dentry *insert_dentry_inode(Dir *dir, const std::string& dname, LeaseStat *dlease,
812 Inode *in, utime_t from, MetaSession *session,
813 Dentry *old_dentry = NULL);
814 void update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, MetaSession *session);
815
816 bool use_faked_inos() { return _use_faked_inos; }
817 vinodeno_t map_faked_ino(ino_t ino);
818
819 //notify the mds to flush the mdlog
820 void flush_mdlog_sync(Inode *in);
821 void flush_mdlog_sync();
822 void flush_mdlog(MetaSession *session);
823
824 void renew_caps();
825 void renew_caps(MetaSession *session);
826 void flush_cap_releases();
827 void renew_and_flush_cap_releases();
828 void tick();
829 void start_tick_thread();
830
831 void update_read_io_size(size_t size) {
832 total_read_ops++;
833 total_read_size += size;
834 }
835
836 void update_write_io_size(size_t size) {
837 total_write_ops++;
838 total_write_size += size;
839 }
840
841 void inc_dentry_nr() {
842 ++dentry_nr;
843 }
844 void dec_dentry_nr() {
845 --dentry_nr;
846 }
847 void dlease_hit() {
848 ++dlease_hits;
849 }
850 void dlease_miss() {
851 ++dlease_misses;
852 }
853 std::tuple<uint64_t, uint64_t, uint64_t> get_dlease_hit_rates() {
854 return std::make_tuple(dlease_hits, dlease_misses, dentry_nr);
855 }
856
857 void cap_hit() {
858 ++cap_hits;
859 }
860 void cap_miss() {
861 ++cap_misses;
862 }
863 std::pair<uint64_t, uint64_t> get_cap_hit_rates() {
864 return std::make_pair(cap_hits, cap_misses);
865 }
866
867 void inc_opened_files() {
868 ++opened_files;
869 }
870 void dec_opened_files() {
871 --opened_files;
872 }
873 std::pair<uint64_t, uint64_t> get_opened_files_rates() {
874 return std::make_pair(opened_files, inode_map.size());
875 }
876
877 void inc_pinned_icaps() {
878 ++pinned_icaps;
879 }
880 void dec_pinned_icaps(uint64_t nr=1) {
881 pinned_icaps -= nr;
882 }
883 std::pair<uint64_t, uint64_t> get_pinned_icaps_rates() {
884 return std::make_pair(pinned_icaps, inode_map.size());
885 }
886
887 void inc_opened_inodes() {
888 ++opened_inodes;
889 }
890 void dec_opened_inodes() {
891 --opened_inodes;
892 }
893 std::pair<uint64_t, uint64_t> get_opened_inodes_rates() {
894 return std::make_pair(opened_inodes, inode_map.size());
895 }
896
897 /* timer_lock for 'timer' */
898 ceph::mutex timer_lock = ceph::make_mutex("Client::timer_lock");
899 SafeTimer timer;
900
901 /* tick thread */
902 std::thread upkeeper;
903 ceph::condition_variable upkeep_cond;
904 bool tick_thread_stopped = false;
905
906 std::unique_ptr<PerfCounters> logger;
907 std::unique_ptr<MDSMap> mdsmap;
908
909 bool fuse_default_permissions;
910 bool _collect_and_send_global_metrics;
911
912 protected:
913 std::list<ceph::condition_variable*> waiting_for_reclaim;
914 /* Flags for check_caps() */
915 static const unsigned CHECK_CAPS_NODELAY = 0x1;
916 static const unsigned CHECK_CAPS_SYNCHRONOUS = 0x2;
917
918 void check_caps(Inode *in, unsigned flags);
919
920 void set_cap_epoch_barrier(epoch_t e);
921
922 void handle_command_reply(const MConstRef<MCommandReply>& m);
923 int fetch_fsmap(bool user);
924 int resolve_mds(
925 const std::string &mds_spec,
926 std::vector<mds_gid_t> *targets);
927
928 void get_session_metadata(std::map<std::string, std::string> *meta) const;
929 bool have_open_session(mds_rank_t mds);
930 void got_mds_push(MetaSession *s);
931 MetaSessionRef _get_mds_session(mds_rank_t mds, Connection *con); ///< return session for mds *and* con; null otherwise
932 MetaSessionRef _get_or_open_mds_session(mds_rank_t mds);
933 MetaSessionRef _open_mds_session(mds_rank_t mds);
934 void _close_mds_session(MetaSession *s);
935 void _closed_mds_session(MetaSession *s, int err=0, bool rejected=false);
936 bool _any_stale_sessions() const;
937 void _kick_stale_sessions();
938 void handle_client_session(const MConstRef<MClientSession>& m);
939 void send_reconnect(MetaSession *s);
940 void resend_unsafe_requests(MetaSession *s);
941 void wait_unsafe_requests();
942
943 void dump_mds_requests(Formatter *f);
944 void dump_mds_sessions(Formatter *f, bool cap_dump=false);
945
946 int make_request(MetaRequest *req, const UserPerm& perms,
947 InodeRef *ptarget = 0, bool *pcreated = 0,
948 mds_rank_t use_mds=-1, bufferlist *pdirbl=0,
949 size_t feature_needed=ULONG_MAX);
950 void put_request(MetaRequest *request);
951 void unregister_request(MetaRequest *request);
952
953 int verify_reply_trace(int r, MetaSession *session, MetaRequest *request,
954 const MConstRef<MClientReply>& reply,
955 InodeRef *ptarget, bool *pcreated,
956 const UserPerm& perms);
957 void encode_cap_releases(MetaRequest *request, mds_rank_t mds);
958 int encode_inode_release(Inode *in, MetaRequest *req,
959 mds_rank_t mds, int drop,
960 int unless,int force=0);
961 void encode_dentry_release(Dentry *dn, MetaRequest *req,
962 mds_rank_t mds, int drop, int unless);
963 mds_rank_t choose_target_mds(MetaRequest *req, Inode** phash_diri=NULL);
964 void connect_mds_targets(mds_rank_t mds);
965 void send_request(MetaRequest *request, MetaSession *session,
966 bool drop_cap_releases=false);
967 MRef<MClientRequest> build_client_request(MetaRequest *request, mds_rank_t mds);
968 void kick_requests(MetaSession *session);
969 void kick_requests_closed(MetaSession *session);
970 void handle_client_request_forward(const MConstRef<MClientRequestForward>& reply);
971 void handle_client_reply(const MConstRef<MClientReply>& reply);
972 bool is_dir_operation(MetaRequest *request);
973
974 int path_walk(const filepath& fp, struct walk_dentry_result* result, const UserPerm& perms, bool followsym=true, int mask=0,
975 InodeRef dirinode=nullptr);
976 int path_walk(const filepath& fp, InodeRef *end, const UserPerm& perms,
977 bool followsym=true, int mask=0, InodeRef dirinode=nullptr);
978
979 // fake inode number for 32-bits ino_t
980 void _assign_faked_ino(Inode *in);
981 void _assign_faked_root(Inode *in);
982 void _release_faked_ino(Inode *in);
983 void _reset_faked_inos();
984 vinodeno_t _map_faked_ino(ino_t ino);
985
986 // Optional extra metadata about me to send to the MDS
987 void populate_metadata(const std::string &mount_root);
988
989 SnapRealm *get_snap_realm(inodeno_t r);
990 SnapRealm *get_snap_realm_maybe(inodeno_t r);
991 void put_snap_realm(SnapRealm *realm);
992 bool adjust_realm_parent(SnapRealm *realm, inodeno_t parent);
993 void update_snap_trace(MetaSession *session, const bufferlist& bl, SnapRealm **realm_ret, bool must_flush=true);
994 void invalidate_snaprealm_and_children(SnapRealm *realm);
995
996 void refresh_snapdir_attrs(Inode *in, Inode *diri);
997 Inode *open_snapdir(Inode *diri);
998
999 int get_fd() {
1000 int fd = free_fd_set.range_start();
1001 free_fd_set.erase(fd, 1);
1002 return fd;
1003 }
1004 void put_fd(int fd) {
1005 free_fd_set.insert(fd, 1);
1006 }
1007
1008 /*
1009 * Resolve file descriptor, or return NULL.
1010 */
1011 Fh *get_filehandle(int fd) {
1012 auto it = fd_map.find(fd);
1013 if (it == fd_map.end())
1014 return NULL;
1015 return it->second;
1016 }
1017 int get_fd_inode(int fd, InodeRef *in);
1018
1019 // helpers
1020 void wake_up_session_caps(MetaSession *s, bool reconnect);
1021
1022 void wait_on_context_list(std::list<Context*>& ls);
1023 void signal_context_list(std::list<Context*>& ls);
1024
1025 // -- metadata cache stuff
1026
1027 // decrease inode ref. delete if dangling.
1028 void _put_inode(Inode *in, int n);
1029 void delay_put_inodes(bool wakeup=false);
1030 void put_inode(Inode *in, int n=1);
1031 void close_dir(Dir *dir);
1032
1033 int subscribe_mdsmap(const std::string &fs_name="");
1034
1035 void _abort_mds_sessions(int err);
1036
1037 // same as unmount() but for when the client_lock is already held
1038 void _unmount(bool abort);
1039
1040 //int get_cache_size() { return lru.lru_get_size(); }
1041
1042 /**
1043 * Don't call this with in==NULL, use get_or_create for that
1044 * leave dn set to default NULL unless you're trying to add
1045 * a new inode to a pre-created Dentry
1046 */
1047 Dentry* link(Dir *dir, const std::string& name, Inode *in, Dentry *dn);
1048 void unlink(Dentry *dn, bool keepdir, bool keepdentry);
1049
1050 int fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0);
1051 int fill_stat(InodeRef& in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0) {
1052 return fill_stat(in.get(), st, dirstat, rstat);
1053 }
1054
1055 void fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx);
1056 void fill_statx(InodeRef& in, unsigned int mask, struct ceph_statx *stx) {
1057 return fill_statx(in.get(), mask, stx);
1058 }
1059
1060 void touch_dn(Dentry *dn);
1061
1062 // trim cache.
1063 void trim_cache(bool trim_kernel_dcache=false);
1064 void trim_cache_for_reconnect(MetaSession *s);
1065 void trim_dentry(Dentry *dn);
1066 void trim_caps(MetaSession *s, uint64_t max);
1067 void _invalidate_kernel_dcache();
1068 void _trim_negative_child_dentries(InodeRef& in);
1069
1070 void dump_inode(Formatter *f, Inode *in, set<Inode*>& did, bool disconnected);
1071 void dump_cache(Formatter *f); // debug
1072
1073 // force read-only
1074 void force_session_readonly(MetaSession *s);
1075
1076 void dump_status(Formatter *f); // debug
1077
1078 bool ms_dispatch2(const MessageRef& m) override;
1079
1080 void ms_handle_connect(Connection *con) override;
1081 bool ms_handle_reset(Connection *con) override;
1082 void ms_handle_remote_reset(Connection *con) override;
1083 bool ms_handle_refused(Connection *con) override;
1084
1085 int authenticate();
1086
1087 Inode* get_quota_root(Inode *in, const UserPerm& perms, quota_max_t type=QUOTA_ANY);
1088 bool check_quota_condition(Inode *in, const UserPerm& perms,
1089 std::function<bool (const Inode &)> test);
1090 bool is_quota_files_exceeded(Inode *in, const UserPerm& perms);
1091 bool is_quota_bytes_exceeded(Inode *in, int64_t new_bytes,
1092 const UserPerm& perms);
1093 bool is_quota_bytes_approaching(Inode *in, const UserPerm& perms);
1094
1095 int check_pool_perm(Inode *in, int need);
1096
1097 void handle_client_reclaim_reply(const MConstRef<MClientReclaimReply>& reply);
1098
1099 /**
1100 * Call this when an OSDMap is seen with a full flag (global or per pool)
1101 * set.
1102 *
1103 * @param pool the pool ID affected, or -1 if all.
1104 */
1105 void _handle_full_flag(int64_t pool);
1106
1107 void _close_sessions();
1108
1109 void _pre_init();
1110
1111 /**
1112 * The basic housekeeping parts of init (perf counters, admin socket)
1113 * that is independent of how objecters/monclient/messengers are
1114 * being set up.
1115 */
1116 void _finish_init();
1117
1118 // global client lock
1119 // - protects Client and buffer cache both!
1120 ceph::mutex client_lock = ceph::make_mutex("Client::client_lock");
1121
1122 std::map<snapid_t, int> ll_snap_ref;
1123
1124 InodeRef root = nullptr;
1125 map<Inode*, InodeRef> root_parents;
1126 Inode* root_ancestor = nullptr;
1127 LRU lru; // lru list of Dentry's in our local metadata cache.
1128
1129 InodeRef cwd;
1130
1131 std::unique_ptr<Filer> filer;
1132 std::unique_ptr<ObjectCacher> objectcacher;
1133 std::unique_ptr<WritebackHandler> writeback_handler;
1134
1135 Messenger *messenger;
1136 MonClient *monclient;
1137 Objecter *objecter;
1138
1139 client_t whoami;
1140
1141 /* The state migration mechanism */
1142 enum _state {
1143 /* For the initialize_state */
1144 CLIENT_NEW, // The initial state for the initialize_state or after Client::shutdown()
1145 CLIENT_INITIALIZING, // At the beginning of the Client::init()
1146 CLIENT_INITIALIZED, // At the end of CLient::init()
1147
1148 /* For the mount_state */
1149 CLIENT_UNMOUNTED, // The initial state for the mount_state or after unmounted
1150 CLIENT_MOUNTING, // At the beginning of Client::mount()
1151 CLIENT_MOUNTED, // At the end of Client::mount()
1152 CLIENT_UNMOUNTING, // At the beginning of the Client::_unmout()
1153 };
1154
1155 typedef enum _state state_t;
1156 using RWRef_t = RWRef<state_t>;
1157
1158 struct mount_state_t : public RWRefState<state_t> {
1159 public:
1160 bool is_valid_state(state_t state) const override {
1161 switch (state) {
1162 case Client::CLIENT_MOUNTING:
1163 case Client::CLIENT_MOUNTED:
1164 case Client::CLIENT_UNMOUNTING:
1165 case Client::CLIENT_UNMOUNTED:
1166 return true;
1167 default:
1168 return false;
1169 }
1170 }
1171
1172 int check_reader_state(state_t require) const override {
1173 if (require == Client::CLIENT_MOUNTING &&
1174 (state == Client::CLIENT_MOUNTING || state == Client::CLIENT_MOUNTED))
1175 return true;
1176 else
1177 return false;
1178 }
1179
1180 /* The state migration check */
1181 int check_writer_state(state_t require) const override {
1182 if (require == Client::CLIENT_MOUNTING &&
1183 state == Client::CLIENT_UNMOUNTED)
1184 return true;
1185 else if (require == Client::CLIENT_MOUNTED &&
1186 state == Client::CLIENT_MOUNTING)
1187 return true;
1188 else if (require == Client::CLIENT_UNMOUNTING &&
1189 state == Client::CLIENT_MOUNTED)
1190 return true;
1191 else if (require == Client::CLIENT_UNMOUNTED &&
1192 state == Client::CLIENT_UNMOUNTING)
1193 return true;
1194 else
1195 return false;
1196 }
1197
1198 mount_state_t(state_t state, const char *lockname, uint64_t reader_cnt=0)
1199 : RWRefState (state, lockname, reader_cnt) {}
1200 ~mount_state_t() {}
1201 };
1202
1203 struct initialize_state_t : public RWRefState<state_t> {
1204 public:
1205 bool is_valid_state(state_t state) const override {
1206 switch (state) {
1207 case Client::CLIENT_NEW:
1208 case Client::CLIENT_INITIALIZING:
1209 case Client::CLIENT_INITIALIZED:
1210 return true;
1211 default:
1212 return false;
1213 }
1214 }
1215
1216 int check_reader_state(state_t require) const override {
1217 if (require == Client::CLIENT_INITIALIZED &&
1218 state >= Client::CLIENT_INITIALIZED)
1219 return true;
1220 else
1221 return false;
1222 }
1223
1224 /* The state migration check */
1225 int check_writer_state(state_t require) const override {
1226 if (require == Client::CLIENT_INITIALIZING &&
1227 (state == Client::CLIENT_NEW))
1228 return true;
1229 else if (require == Client::CLIENT_INITIALIZED &&
1230 (state == Client::CLIENT_INITIALIZING))
1231 return true;
1232 else if (require == Client::CLIENT_NEW &&
1233 (state == Client::CLIENT_INITIALIZED))
1234 return true;
1235 else
1236 return false;
1237 }
1238
1239 initialize_state_t(state_t state, const char *lockname, uint64_t reader_cnt=0)
1240 : RWRefState (state, lockname, reader_cnt) {}
1241 ~initialize_state_t() {}
1242 };
1243
1244 struct mount_state_t mount_state;
1245 struct initialize_state_t initialize_state;
1246
1247 private:
1248 struct C_Readahead : public Context {
1249 C_Readahead(Client *c, Fh *f);
1250 ~C_Readahead() override;
1251 void finish(int r) override;
1252
1253 Client *client;
1254 Fh *f;
1255 };
1256
1257 /*
1258 * These define virtual xattrs exposing the recursive directory
1259 * statistics and layout metadata.
1260 */
1261 struct VXattr {
1262 const std::string name;
1263 size_t (Client::*getxattr_cb)(Inode *in, char *val, size_t size);
1264 int (Client::*setxattr_cb)(Inode *in, const void *val, size_t size,
1265 const UserPerm& perms);
1266 bool readonly;
1267 bool (Client::*exists_cb)(Inode *in);
1268 unsigned int flags;
1269 };
1270
1271 enum {
1272 NO_ACL = 0,
1273 POSIX_ACL,
1274 };
1275
1276 enum {
1277 MAY_EXEC = 1,
1278 MAY_WRITE = 2,
1279 MAY_READ = 4,
1280 };
1281
1282 typedef std::function<void(dir_result_t*, MetaRequest*, InodeRef&, frag_t)> fill_readdir_args_cb_t;
1283
1284 std::unique_ptr<CephContext, std::function<void(CephContext*)>> cct_deleter;
1285
1286 /* Flags for VXattr */
1287 static const unsigned VXATTR_RSTAT = 0x1;
1288 static const unsigned VXATTR_DIRSTAT = 0x2;
1289
1290 static const VXattr _dir_vxattrs[];
1291 static const VXattr _file_vxattrs[];
1292 static const VXattr _common_vxattrs[];
1293
1294
1295 bool is_reserved_vino(vinodeno_t &vino);
1296
1297 void fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off);
1298
1299 int _opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms);
1300 void _readdir_drop_dirp_buffer(dir_result_t *dirp);
1301 bool _readdir_have_frag(dir_result_t *dirp);
1302 void _readdir_next_frag(dir_result_t *dirp);
1303 void _readdir_rechoose_frag(dir_result_t *dirp);
1304 int _readdir_get_frag(int op, dir_result_t *dirp,
1305 fill_readdir_args_cb_t fill_req_cb);
1306 int _readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, int caps, bool getref);
1307 int _readdir_r_cb(int op,
1308 dir_result_t* d,
1309 add_dirent_cb_t cb,
1310 fill_readdir_args_cb_t fill_cb,
1311 void* p,
1312 unsigned want,
1313 unsigned flags,
1314 bool getref,
1315 bool bypass_cache);
1316
1317 void _closedir(dir_result_t *dirp);
1318
1319 // other helpers
1320 void _fragmap_remove_non_leaves(Inode *in);
1321 void _fragmap_remove_stopped_mds(Inode *in, mds_rank_t mds);
1322
1323 void _ll_get(Inode *in);
1324 int _ll_put(Inode *in, uint64_t num);
1325 void _ll_drop_pins();
1326
1327 Fh *_create_fh(Inode *in, int flags, int cmode, const UserPerm& perms);
1328 int _release_fh(Fh *fh);
1329 void _put_fh(Fh *fh);
1330
1331 std::pair<int, bool> _do_remount(bool retry_on_error);
1332
1333 int _read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl, bool *checkeof);
1334 int _read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl);
1335
1336 bool _dentry_valid(const Dentry *dn);
1337
1338 // internal interface
1339 // call these with client_lock held!
1340 int _do_lookup(Inode *dir, const std::string& name, int mask, InodeRef *target,
1341 const UserPerm& perms);
1342
1343 int _lookup(Inode *dir, const std::string& dname, int mask, InodeRef *target,
1344 const UserPerm& perm, std::string* alternate_name=nullptr,
1345 bool is_rename=false);
1346
1347 int _link(Inode *in, Inode *dir, const char *name, const UserPerm& perm, std::string alternate_name,
1348 InodeRef *inp = 0);
1349 int _unlink(Inode *dir, const char *name, const UserPerm& perm);
1350 int _rename(Inode *olddir, const char *oname, Inode *ndir, const char *nname, const UserPerm& perm, std::string alternate_name);
1351 int _mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& perm,
1352 InodeRef *inp = 0, const std::map<std::string, std::string> &metadata={},
1353 std::string alternate_name="");
1354 int _rmdir(Inode *dir, const char *name, const UserPerm& perms);
1355 int _symlink(Inode *dir, const char *name, const char *target,
1356 const UserPerm& perms, std::string alternate_name, InodeRef *inp = 0);
1357 int _mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
1358 const UserPerm& perms, InodeRef *inp = 0);
1359 int _do_setattr(Inode *in, struct ceph_statx *stx, int mask,
1360 const UserPerm& perms, InodeRef *inp,
1361 std::vector<uint8_t>* aux=nullptr);
1362 void stat_to_statx(struct stat *st, struct ceph_statx *stx);
1363 int __setattrx(Inode *in, struct ceph_statx *stx, int mask,
1364 const UserPerm& perms, InodeRef *inp = 0);
1365 int _setattrx(InodeRef &in, struct ceph_statx *stx, int mask,
1366 const UserPerm& perms);
1367 int _setattr(InodeRef &in, struct stat *attr, int mask,
1368 const UserPerm& perms);
1369 int _ll_setattrx(Inode *in, struct ceph_statx *stx, int mask,
1370 const UserPerm& perms, InodeRef *inp = 0);
1371 int _getattr(Inode *in, int mask, const UserPerm& perms, bool force=false);
1372 int _getattr(InodeRef &in, int mask, const UserPerm& perms, bool force=false) {
1373 return _getattr(in.get(), mask, perms, force);
1374 }
1375 int _readlink(Inode *in, char *buf, size_t size);
1376 int _getxattr(Inode *in, const char *name, void *value, size_t len,
1377 const UserPerm& perms);
1378 int _getxattr(InodeRef &in, const char *name, void *value, size_t len,
1379 const UserPerm& perms);
1380 int _getvxattr(Inode *in, const UserPerm& perms, const char *attr_name,
1381 ssize_t size, void *value, mds_rank_t rank);
1382 int _listxattr(Inode *in, char *names, size_t len, const UserPerm& perms);
1383 int _do_setxattr(Inode *in, const char *name, const void *value, size_t len,
1384 int flags, const UserPerm& perms);
1385 int _setxattr(Inode *in, const char *name, const void *value, size_t len,
1386 int flags, const UserPerm& perms);
1387 int _setxattr(InodeRef &in, const char *name, const void *value, size_t len,
1388 int flags, const UserPerm& perms);
1389 int _setxattr_check_data_pool(std::string& name, std::string& value, const OSDMap *osdmap);
1390 void _setxattr_maybe_wait_for_osdmap(const char *name, const void *value, size_t len);
1391 int _removexattr(Inode *in, const char *nm, const UserPerm& perms);
1392 int _removexattr(InodeRef &in, const char *nm, const UserPerm& perms);
1393 int _open(Inode *in, int flags, mode_t mode, Fh **fhp,
1394 const UserPerm& perms);
1395 int _renew_caps(Inode *in);
1396 int _create(Inode *in, const char *name, int flags, mode_t mode, InodeRef *inp,
1397 Fh **fhp, int stripe_unit, int stripe_count, int object_size,
1398 const char *data_pool, bool *created, const UserPerm &perms,
1399 std::string alternate_name);
1400
1401 loff_t _lseek(Fh *fh, loff_t offset, int whence);
1402 int64_t _read(Fh *fh, int64_t offset, uint64_t size, bufferlist *bl);
1403 int64_t _write(Fh *fh, int64_t offset, uint64_t size, const char *buf,
1404 const struct iovec *iov, int iovcnt);
1405 int64_t _preadv_pwritev_locked(Fh *fh, const struct iovec *iov,
1406 unsigned iovcnt, int64_t offset,
1407 bool write, bool clamp_to_int);
1408 int _preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt,
1409 int64_t offset, bool write);
1410 int _flush(Fh *fh);
1411 int _fsync(Fh *fh, bool syncdataonly);
1412 int _fsync(Inode *in, bool syncdataonly);
1413 int _sync_fs();
1414 int clear_suid_sgid(Inode *in, const UserPerm& perms, bool defer=false);
1415 int _fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
1416 int _getlk(Fh *fh, struct flock *fl, uint64_t owner);
1417 int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
1418 int _flock(Fh *fh, int cmd, uint64_t owner);
1419 int _lazyio(Fh *fh, int enable);
1420
1421 Dentry *get_or_create(Inode *dir, const char* name);
1422
1423 int xattr_permission(Inode *in, const char *name, unsigned want,
1424 const UserPerm& perms);
1425 int may_setattr(Inode *in, struct ceph_statx *stx, int mask,
1426 const UserPerm& perms);
1427 int may_open(Inode *in, int flags, const UserPerm& perms);
1428 int may_lookup(Inode *dir, const UserPerm& perms);
1429 int may_create(Inode *dir, const UserPerm& perms);
1430 int may_delete(Inode *dir, const char *name, const UserPerm& perms);
1431 int may_hardlink(Inode *in, const UserPerm& perms);
1432
1433 int _getattr_for_perm(Inode *in, const UserPerm& perms);
1434
1435 vinodeno_t _get_vino(Inode *in);
1436
1437 bool _vxattrcb_fscrypt_auth_exists(Inode *in);
1438 size_t _vxattrcb_fscrypt_auth(Inode *in, char *val, size_t size);
1439 int _vxattrcb_fscrypt_auth_set(Inode *in, const void *val, size_t size, const UserPerm& perms);
1440 bool _vxattrcb_fscrypt_file_exists(Inode *in);
1441 size_t _vxattrcb_fscrypt_file(Inode *in, char *val, size_t size);
1442 int _vxattrcb_fscrypt_file_set(Inode *in, const void *val, size_t size, const UserPerm& perms);
1443 bool _vxattrcb_quota_exists(Inode *in);
1444 size_t _vxattrcb_quota(Inode *in, char *val, size_t size);
1445 size_t _vxattrcb_quota_max_bytes(Inode *in, char *val, size_t size);
1446 size_t _vxattrcb_quota_max_files(Inode *in, char *val, size_t size);
1447
1448 bool _vxattrcb_layout_exists(Inode *in);
1449 size_t _vxattrcb_layout(Inode *in, char *val, size_t size);
1450 size_t _vxattrcb_layout_stripe_unit(Inode *in, char *val, size_t size);
1451 size_t _vxattrcb_layout_stripe_count(Inode *in, char *val, size_t size);
1452 size_t _vxattrcb_layout_object_size(Inode *in, char *val, size_t size);
1453 size_t _vxattrcb_layout_pool(Inode *in, char *val, size_t size);
1454 size_t _vxattrcb_layout_pool_namespace(Inode *in, char *val, size_t size);
1455 size_t _vxattrcb_dir_entries(Inode *in, char *val, size_t size);
1456 size_t _vxattrcb_dir_files(Inode *in, char *val, size_t size);
1457 size_t _vxattrcb_dir_subdirs(Inode *in, char *val, size_t size);
1458 size_t _vxattrcb_dir_rentries(Inode *in, char *val, size_t size);
1459 size_t _vxattrcb_dir_rfiles(Inode *in, char *val, size_t size);
1460 size_t _vxattrcb_dir_rsubdirs(Inode *in, char *val, size_t size);
1461 size_t _vxattrcb_dir_rsnaps(Inode *in, char *val, size_t size);
1462 size_t _vxattrcb_dir_rbytes(Inode *in, char *val, size_t size);
1463 size_t _vxattrcb_dir_rctime(Inode *in, char *val, size_t size);
1464
1465 bool _vxattrcb_dir_pin_exists(Inode *in);
1466 size_t _vxattrcb_dir_pin(Inode *in, char *val, size_t size);
1467
1468 bool _vxattrcb_snap_btime_exists(Inode *in);
1469 size_t _vxattrcb_snap_btime(Inode *in, char *val, size_t size);
1470
1471 size_t _vxattrcb_caps(Inode *in, char *val, size_t size);
1472
1473 bool _vxattrcb_mirror_info_exists(Inode *in);
1474 size_t _vxattrcb_mirror_info(Inode *in, char *val, size_t size);
1475
1476 size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size);
1477 size_t _vxattrcb_client_id(Inode *in, char *val, size_t size);
1478
1479 static const VXattr *_get_vxattrs(Inode *in);
1480 static const VXattr *_match_vxattr(Inode *in, const char *name);
1481
1482 int _do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep,
1483 struct flock *fl, uint64_t owner, bool removing=false);
1484 int _interrupt_filelock(MetaRequest *req);
1485 void _encode_filelocks(Inode *in, bufferlist& bl);
1486 void _release_filelocks(Fh *fh);
1487 void _update_lock_state(struct flock *fl, uint64_t owner, ceph_lock_state_t *lock_state);
1488
1489 int _posix_acl_create(Inode *dir, mode_t *mode, bufferlist& xattrs_bl,
1490 const UserPerm& perms);
1491 int _posix_acl_chmod(Inode *in, mode_t mode, const UserPerm& perms);
1492 int _posix_acl_permission(Inode *in, const UserPerm& perms, unsigned want);
1493
1494 mds_rank_t _get_random_up_mds() const;
1495
1496 int _ll_getattr(Inode *in, int caps, const UserPerm& perms);
1497 int _lookup_parent(Inode *in, const UserPerm& perms, Inode **parent=NULL);
1498 int _lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
1499 int _lookup_vino(vinodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
1500 bool _ll_forget(Inode *in, uint64_t count);
1501
1502 void collect_and_send_metrics();
1503 void collect_and_send_global_metrics();
1504
1505 void update_io_stat_metadata(utime_t latency);
1506 void update_io_stat_read(utime_t latency);
1507 void update_io_stat_write(utime_t latency);
1508
1509 uint32_t deleg_timeout = 0;
1510
1511 client_switch_interrupt_callback_t switch_interrupt_cb = nullptr;
1512 client_remount_callback_t remount_cb = nullptr;
1513 client_ino_callback_t ino_invalidate_cb = nullptr;
1514 client_dentry_callback_t dentry_invalidate_cb = nullptr;
1515 client_umask_callback_t umask_cb = nullptr;
1516 client_ino_release_t ino_release_cb = nullptr;
1517 void *callback_handle = nullptr;
1518 bool can_invalidate_dentries = false;
1519
1520 Finisher async_ino_invalidator;
1521 Finisher async_dentry_invalidator;
1522 Finisher interrupt_finisher;
1523 Finisher remount_finisher;
1524 Finisher async_ino_releasor;
1525 Finisher objecter_finisher;
1526
1527 ceph::coarse_mono_time last_cap_renew;
1528
1529 CommandHook m_command_hook;
1530
1531 int user_id, group_id;
1532 int acl_type = NO_ACL;
1533
1534 epoch_t cap_epoch_barrier = 0;
1535
1536 // mds sessions
1537 map<mds_rank_t, MetaSessionRef> mds_sessions; // mds -> push seq
1538 std::set<mds_rank_t> mds_ranks_closing; // mds ranks currently tearing down sessions
1539 std::list<ceph::condition_variable*> waiting_for_mdsmap;
1540
1541 // FSMap, for when using mds_command
1542 std::list<ceph::condition_variable*> waiting_for_fsmap;
1543 std::unique_ptr<FSMap> fsmap;
1544 std::unique_ptr<FSMapUser> fsmap_user;
1545
1546 // This mutex only protects command_table
1547 ceph::mutex command_lock = ceph::make_mutex("Client::command_lock");
1548 // MDS command state
1549 CommandTable<MDSCommandOp> command_table;
1550
1551 bool _use_faked_inos;
1552
1553 // Cluster fsid
1554 fs_cluster_id_t fscid;
1555
1556 // file handles, etc.
1557 interval_set<int> free_fd_set; // unused fds
1558 ceph::unordered_map<int, Fh*> fd_map;
1559 set<Fh*> ll_unclosed_fh_set;
1560 ceph::unordered_set<dir_result_t*> opened_dirs;
1561 uint64_t fd_gen = 1;
1562
1563 bool mount_aborted = false;
1564 bool blocklisted = false;
1565
1566 ceph::unordered_map<vinodeno_t, Inode*> inode_map;
1567 ceph::unordered_map<ino_t, vinodeno_t> faked_ino_map;
1568 interval_set<ino_t> free_faked_inos;
1569 ino_t last_used_faked_ino;
1570 ino_t last_used_faked_root;
1571
1572 int local_osd = -CEPHFS_ENXIO;
1573 epoch_t local_osd_epoch = 0;
1574
1575 // mds requests
1576 ceph_tid_t last_tid = 0;
1577 ceph_tid_t oldest_tid = 0; // oldest incomplete mds request, excluding setfilelock requests
1578 map<ceph_tid_t, MetaRequest*> mds_requests;
1579
1580 // cap flushing
1581 ceph_tid_t last_flush_tid = 1;
1582
1583 xlist<Inode*> delayed_list;
1584 int num_flushing_caps = 0;
1585 ceph::unordered_map<inodeno_t,SnapRealm*> snap_realms;
1586 std::map<std::string, std::string> metadata;
1587
1588 ceph::coarse_mono_time last_auto_reconnect;
1589 std::chrono::seconds caps_release_delay, mount_timeout;
1590 // trace generation
1591 std::ofstream traceout;
1592
1593 ceph::condition_variable mount_cond, sync_cond;
1594
1595 std::map<std::pair<int64_t,std::string>, int> pool_perms;
1596 std::list<ceph::condition_variable*> waiting_for_pool_perm;
1597
1598 std::list<ceph::condition_variable*> waiting_for_rename;
1599
1600 uint64_t retries_on_invalidate = 0;
1601
1602 // state reclaim
1603 int reclaim_errno = 0;
1604 epoch_t reclaim_osd_epoch = 0;
1605 entity_addrvec_t reclaim_target_addrs;
1606
1607 // dentry lease metrics
1608 uint64_t dentry_nr = 0;
1609 uint64_t dlease_hits = 0;
1610 uint64_t dlease_misses = 0;
1611
1612 uint64_t cap_hits = 0;
1613 uint64_t cap_misses = 0;
1614
1615 uint64_t opened_files = 0;
1616 uint64_t pinned_icaps = 0;
1617 uint64_t opened_inodes = 0;
1618
1619 uint64_t total_read_ops = 0;
1620 uint64_t total_read_size = 0;
1621
1622 uint64_t total_write_ops = 0;
1623 uint64_t total_write_size = 0;
1624
1625 ceph::spinlock delay_i_lock;
1626 std::map<Inode*,int> delay_i_release;
1627
1628 uint64_t nr_metadata_request = 0;
1629 uint64_t nr_read_request = 0;
1630 uint64_t nr_write_request = 0;
1631 };
1632
1633 /**
1634 * Specialization of Client that manages its own Objecter instance
1635 * and handles init/shutdown of messenger/monclient
1636 */
1637 class StandaloneClient : public Client
1638 {
1639 public:
1640 StandaloneClient(Messenger *m, MonClient *mc, boost::asio::io_context& ictx);
1641
1642 ~StandaloneClient() override;
1643
1644 int init() override;
1645 void shutdown() override;
1646 };
1647
1648 #endif