]> git.proxmox.com Git - ceph.git/blob - ceph/src/client/Client.h
import ceph 15.2.14
[ceph.git] / ceph / src / client / Client.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3 /*
4 * Ceph - scalable distributed file system
5 *
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
7 *
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
12 *
13 */
14
15
16 #ifndef CEPH_CLIENT_H
17 #define CEPH_CLIENT_H
18
19 #include "common/CommandTable.h"
20 #include "common/Finisher.h"
21 #include "common/Timer.h"
22 #include "common/ceph_mutex.h"
23 #include "common/cmdparse.h"
24 #include "common/compiler_extensions.h"
25 #include "include/common_fwd.h"
26 #include "include/cephfs/ceph_ll_client.h"
27 #include "include/filepath.h"
28 #include "include/interval_set.h"
29 #include "include/lru.h"
30 #include "include/types.h"
31 #include "include/unordered_map.h"
32 #include "include/unordered_set.h"
33 #include "mds/mdstypes.h"
34 #include "msg/Dispatcher.h"
35 #include "msg/MessageRef.h"
36 #include "msg/Messenger.h"
37 #include "osdc/ObjectCacher.h"
38
39 #include "InodeRef.h"
40 #include "MetaSession.h"
41 #include "UserPerm.h"
42
43 #include <fstream>
44 #include <map>
45 #include <memory>
46 #include <set>
47 #include <string>
48
49 using std::set;
50 using std::map;
51 using std::fstream;
52
53 class FSMap;
54 class FSMapUser;
55 class MonClient;
56
57
58 struct DirStat;
59 struct LeaseStat;
60 struct InodeStat;
61
62 class Filer;
63 class Objecter;
64 class WritebackHandler;
65
66 class MDSMap;
67 class Message;
68
69 enum {
70 l_c_first = 20000,
71 l_c_reply,
72 l_c_lat,
73 l_c_wrlat,
74 l_c_read,
75 l_c_fsync,
76 l_c_last,
77 };
78
79
80 class MDSCommandOp : public CommandOp
81 {
82 public:
83 mds_gid_t mds_gid;
84
85 explicit MDSCommandOp(ceph_tid_t t) : CommandOp(t) {}
86 };
87
88 /* error code for ceph_fuse */
89 #define CEPH_FUSE_NO_MDS_UP -((1<<16)+0) /* no mds up deteced in ceph_fuse */
90 #define CEPH_FUSE_LAST -((1<<16)+1) /* (unused) */
91
92 // ============================================
93 // types for my local metadata cache
94 /* basic structure:
95
96 - Dentries live in an LRU loop. they get expired based on last access.
97 see include/lru.h. items can be bumped to "mid" or "top" of list, etc.
98 - Inode has ref count for each Fh, Dir, or Dentry that points to it.
99 - when Inode ref goes to 0, it's expired.
100 - when Dir is empty, it's removed (and it's Inode ref--)
101
102 */
103
104 /* getdir result */
105 struct DirEntry {
106 explicit DirEntry(const string &s) : d_name(s), stmask(0) {}
107 DirEntry(const string &n, struct stat& s, int stm) : d_name(n), st(s), stmask(stm) {}
108
109 string d_name;
110 struct stat st;
111 int stmask;
112 };
113
114 struct Cap;
115 class Dir;
116 class Dentry;
117 struct SnapRealm;
118 struct Fh;
119 struct CapSnap;
120
121 struct MetaRequest;
122 class ceph_lock_state_t;
123
124 // ========================================================
125 // client interface
126
127 struct dir_result_t {
128 static const int SHIFT = 28;
129 static const int64_t MASK = (1 << SHIFT) - 1;
130 static const int64_t HASH = 0xFFULL << (SHIFT + 24); // impossible frag bits
131 static const loff_t END = 1ULL << (SHIFT + 32);
132
133 struct dentry {
134 int64_t offset;
135 string name;
136 InodeRef inode;
137 explicit dentry(int64_t o) : offset(o) {}
138 dentry(int64_t o, const string& n, const InodeRef& in) :
139 offset(o), name(n), inode(in) {}
140 };
141 struct dentry_off_lt {
142 bool operator()(const dentry& d, int64_t off) const {
143 return dir_result_t::fpos_cmp(d.offset, off) < 0;
144 }
145 };
146
147
148 explicit dir_result_t(Inode *in, const UserPerm& perms);
149
150
151 static uint64_t make_fpos(unsigned h, unsigned l, bool hash) {
152 uint64_t v = ((uint64_t)h<< SHIFT) | (uint64_t)l;
153 if (hash)
154 v |= HASH;
155 else
156 ceph_assert((v & HASH) != HASH);
157 return v;
158 }
159 static unsigned fpos_high(uint64_t p) {
160 unsigned v = (p & (END-1)) >> SHIFT;
161 if ((p & HASH) == HASH)
162 return ceph_frag_value(v);
163 return v;
164 }
165 static unsigned fpos_low(uint64_t p) {
166 return p & MASK;
167 }
168 static int fpos_cmp(uint64_t l, uint64_t r) {
169 int c = ceph_frag_compare(fpos_high(l), fpos_high(r));
170 if (c)
171 return c;
172 if (fpos_low(l) == fpos_low(r))
173 return 0;
174 return fpos_low(l) < fpos_low(r) ? -1 : 1;
175 }
176
177 unsigned offset_high() { return fpos_high(offset); }
178 unsigned offset_low() { return fpos_low(offset); }
179
180 void set_end() { offset |= END; }
181 bool at_end() { return (offset & END); }
182
183 void set_hash_order() { offset |= HASH; }
184 bool hash_order() { return (offset & HASH) == HASH; }
185
186 bool is_cached() {
187 if (buffer.empty())
188 return false;
189 if (hash_order()) {
190 return buffer_frag.contains(offset_high());
191 } else {
192 return buffer_frag == frag_t(offset_high());
193 }
194 }
195
196 void reset() {
197 last_name.clear();
198 next_offset = 2;
199 offset = 0;
200 ordered_count = 0;
201 cache_index = 0;
202 buffer.clear();
203 }
204
205 InodeRef inode;
206 int64_t offset; // hash order:
207 // (0xff << 52) | ((24 bits hash) << 28) |
208 // (the nth entry has hash collision);
209 // frag+name order;
210 // ((frag value) << 28) | (the nth entry in frag);
211
212 unsigned next_offset; // offset of next chunk (last_name's + 1)
213 string last_name; // last entry in previous chunk
214
215 uint64_t release_count;
216 uint64_t ordered_count;
217 unsigned cache_index;
218 int start_shared_gen; // dir shared_gen at start of readdir
219 UserPerm perms;
220
221 frag_t buffer_frag;
222
223 vector<dentry> buffer;
224 struct dirent de;
225 };
226
227 class Client : public Dispatcher, public md_config_obs_t {
228 public:
229 friend class C_Block_Sync; // Calls block map and protected helpers
230 friend class C_Client_CacheInvalidate; // calls ino_invalidate_cb
231 friend class C_Client_DentryInvalidate; // calls dentry_invalidate_cb
232 friend class C_Client_FlushComplete; // calls put_inode()
233 friend class C_Client_Remount;
234 friend class C_Client_RequestInterrupt;
235 friend class C_Deleg_Timeout; // Asserts on client_lock, called when a delegation is unreturned
236 friend class C_Client_CacheRelease; // Asserts on client_lock
237 friend class SyntheticClient;
238 friend void intrusive_ptr_release(Inode *in);
239
240 using Dispatcher::cct;
241
242 typedef int (*add_dirent_cb_t)(void *p, struct dirent *de, struct ceph_statx *stx, off_t off, Inode *in);
243
244 class CommandHook : public AdminSocketHook {
245 public:
246 explicit CommandHook(Client *client);
247 int call(std::string_view command, const cmdmap_t& cmdmap,
248 Formatter *f,
249 std::ostream& errss,
250 bufferlist& out) override;
251 private:
252 Client *m_client;
253 };
254
255 Client(Messenger *m, MonClient *mc, Objecter *objecter_);
256 Client(const Client&) = delete;
257 Client(const Client&&) = delete;
258 virtual ~Client() override;
259
260 static UserPerm pick_my_perms(CephContext *c) {
261 uid_t uid = c->_conf->client_mount_uid >= 0 ? c->_conf->client_mount_uid : -1;
262 gid_t gid = c->_conf->client_mount_gid >= 0 ? c->_conf->client_mount_gid : -1;
263 return UserPerm(uid, gid);
264 }
265 UserPerm pick_my_perms() {
266 uid_t uid = user_id >= 0 ? user_id : -1;
267 gid_t gid = group_id >= 0 ? group_id : -1;
268 return UserPerm(uid, gid);
269 }
270
271 int mount(const std::string &mount_root, const UserPerm& perms,
272 bool require_mds=false, const std::string &fs_name="");
273 void unmount();
274 void abort_conn();
275
276 void set_uuid(const std::string& uuid);
277 void set_session_timeout(unsigned timeout);
278 int start_reclaim(const std::string& uuid, unsigned flags,
279 const std::string& fs_name);
280 void finish_reclaim();
281
282 fs_cluster_id_t get_fs_cid() {
283 return fscid;
284 }
285
286 int mds_command(
287 const std::string &mds_spec,
288 const std::vector<std::string>& cmd,
289 const bufferlist& inbl,
290 bufferlist *poutbl, std::string *prs, Context *onfinish);
291
292 // these should (more or less) mirror the actual system calls.
293 int statfs(const char *path, struct statvfs *stbuf, const UserPerm& perms);
294
295 // crap
296 int chdir(const char *s, std::string &new_cwd, const UserPerm& perms);
297 void _getcwd(std::string& cwd, const UserPerm& perms);
298 void getcwd(std::string& cwd, const UserPerm& perms);
299
300 // namespace ops
301 int opendir(const char *name, dir_result_t **dirpp, const UserPerm& perms);
302 int closedir(dir_result_t *dirp);
303
304 /**
305 * Fill a directory listing from dirp, invoking cb for each entry
306 * with the given pointer, the dirent, the struct stat, the stmask,
307 * and the offset.
308 *
309 * Returns 0 if it reached the end of the directory.
310 * If @a cb returns a negative error code, stop and return that.
311 */
312 int readdir_r_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p,
313 unsigned want=0, unsigned flags=AT_NO_ATTR_SYNC,
314 bool getref=false);
315
316 struct dirent * readdir(dir_result_t *d);
317 int readdir_r(dir_result_t *dirp, struct dirent *de);
318 int readdirplus_r(dir_result_t *dirp, struct dirent *de, struct ceph_statx *stx, unsigned want, unsigned flags, Inode **out);
319
320 int getdir(const char *relpath, list<string>& names,
321 const UserPerm& perms); // get the whole dir at once.
322
323 /**
324 * Returns the length of the buffer that got filled in, or -errno.
325 * If it returns -ERANGE you just need to increase the size of the
326 * buffer and try again.
327 */
328 int _getdents(dir_result_t *dirp, char *buf, int buflen, bool ful); // get a bunch of dentries at once
329 int getdents(dir_result_t *dirp, char *buf, int buflen) {
330 return _getdents(dirp, buf, buflen, true);
331 }
332 int getdnames(dir_result_t *dirp, char *buf, int buflen) {
333 return _getdents(dirp, buf, buflen, false);
334 }
335
336 void rewinddir(dir_result_t *dirp);
337 loff_t telldir(dir_result_t *dirp);
338 void seekdir(dir_result_t *dirp, loff_t offset);
339
340 int link(const char *existing, const char *newname, const UserPerm& perm);
341 int unlink(const char *path, const UserPerm& perm);
342 int rename(const char *from, const char *to, const UserPerm& perm);
343
344 // dirs
345 int mkdir(const char *path, mode_t mode, const UserPerm& perm);
346 int mkdirs(const char *path, mode_t mode, const UserPerm& perms);
347 int rmdir(const char *path, const UserPerm& perms);
348
349 // symlinks
350 int readlink(const char *path, char *buf, loff_t size, const UserPerm& perms);
351
352 int symlink(const char *existing, const char *newname, const UserPerm& perms);
353
354 // inode stuff
355 unsigned statx_to_mask(unsigned int flags, unsigned int want);
356 int stat(const char *path, struct stat *stbuf, const UserPerm& perms,
357 frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL);
358 int statx(const char *path, struct ceph_statx *stx,
359 const UserPerm& perms,
360 unsigned int want, unsigned int flags);
361 int lstat(const char *path, struct stat *stbuf, const UserPerm& perms,
362 frag_info_t *dirstat=0, int mask=CEPH_STAT_CAP_INODE_ALL);
363
364 int setattr(const char *relpath, struct stat *attr, int mask,
365 const UserPerm& perms);
366 int setattrx(const char *relpath, struct ceph_statx *stx, int mask,
367 const UserPerm& perms, int flags=0);
368 int fsetattr(int fd, struct stat *attr, int mask, const UserPerm& perms);
369 int fsetattrx(int fd, struct ceph_statx *stx, int mask, const UserPerm& perms);
370 int chmod(const char *path, mode_t mode, const UserPerm& perms);
371 int fchmod(int fd, mode_t mode, const UserPerm& perms);
372 int lchmod(const char *path, mode_t mode, const UserPerm& perms);
373 int chown(const char *path, uid_t new_uid, gid_t new_gid,
374 const UserPerm& perms);
375 int fchown(int fd, uid_t new_uid, gid_t new_gid, const UserPerm& perms);
376 int lchown(const char *path, uid_t new_uid, gid_t new_gid,
377 const UserPerm& perms);
378 int utime(const char *path, struct utimbuf *buf, const UserPerm& perms);
379 int lutime(const char *path, struct utimbuf *buf, const UserPerm& perms);
380 int futime(int fd, struct utimbuf *buf, const UserPerm& perms);
381 int utimes(const char *relpath, struct timeval times[2], const UserPerm& perms);
382 int lutimes(const char *relpath, struct timeval times[2], const UserPerm& perms);
383 int futimes(int fd, struct timeval times[2], const UserPerm& perms);
384 int futimens(int fd, struct timespec times[2], const UserPerm& perms);
385 int flock(int fd, int operation, uint64_t owner);
386 int truncate(const char *path, loff_t size, const UserPerm& perms);
387
388 // file ops
389 int mknod(const char *path, mode_t mode, const UserPerm& perms, dev_t rdev=0);
390 int open(const char *path, int flags, const UserPerm& perms, mode_t mode=0);
391 int open(const char *path, int flags, const UserPerm& perms,
392 mode_t mode, int stripe_unit, int stripe_count, int object_size,
393 const char *data_pool);
394 int lookup_hash(inodeno_t ino, inodeno_t dirino, const char *name,
395 const UserPerm& perms);
396 int lookup_ino(inodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
397 int lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
398 int close(int fd);
399 loff_t lseek(int fd, loff_t offset, int whence);
400 int read(int fd, char *buf, loff_t size, loff_t offset=-1);
401 int preadv(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
402 int write(int fd, const char *buf, loff_t size, loff_t offset=-1);
403 int pwritev(int fd, const struct iovec *iov, int iovcnt, loff_t offset=-1);
404 int fake_write_size(int fd, loff_t size);
405 int ftruncate(int fd, loff_t size, const UserPerm& perms);
406 int fsync(int fd, bool syncdataonly);
407 int fstat(int fd, struct stat *stbuf, const UserPerm& perms,
408 int mask=CEPH_STAT_CAP_INODE_ALL);
409 int fstatx(int fd, struct ceph_statx *stx, const UserPerm& perms,
410 unsigned int want, unsigned int flags);
411 int fallocate(int fd, int mode, loff_t offset, loff_t length);
412
413 // full path xattr ops
414 int getxattr(const char *path, const char *name, void *value, size_t size,
415 const UserPerm& perms);
416 int lgetxattr(const char *path, const char *name, void *value, size_t size,
417 const UserPerm& perms);
418 int fgetxattr(int fd, const char *name, void *value, size_t size,
419 const UserPerm& perms);
420 int listxattr(const char *path, char *list, size_t size, const UserPerm& perms);
421 int llistxattr(const char *path, char *list, size_t size, const UserPerm& perms);
422 int flistxattr(int fd, char *list, size_t size, const UserPerm& perms);
423 int removexattr(const char *path, const char *name, const UserPerm& perms);
424 int lremovexattr(const char *path, const char *name, const UserPerm& perms);
425 int fremovexattr(int fd, const char *name, const UserPerm& perms);
426 int setxattr(const char *path, const char *name, const void *value,
427 size_t size, int flags, const UserPerm& perms);
428 int lsetxattr(const char *path, const char *name, const void *value,
429 size_t size, int flags, const UserPerm& perms);
430 int fsetxattr(int fd, const char *name, const void *value, size_t size,
431 int flags, const UserPerm& perms);
432
433 int sync_fs();
434 int64_t drop_caches();
435
436 // hpc lazyio
437 int lazyio(int fd, int enable);
438 int lazyio_propagate(int fd, loff_t offset, size_t count);
439 int lazyio_synchronize(int fd, loff_t offset, size_t count);
440
441 // expose file layout
442 int describe_layout(const char *path, file_layout_t* layout,
443 const UserPerm& perms);
444 int fdescribe_layout(int fd, file_layout_t* layout);
445 int get_file_stripe_address(int fd, loff_t offset, vector<entity_addr_t>& address);
446 int get_file_extent_osds(int fd, loff_t off, loff_t *len, vector<int>& osds);
447 int get_osd_addr(int osd, entity_addr_t& addr);
448
449 // expose mdsmap
450 int64_t get_default_pool_id();
451
452 // expose osdmap
453 int get_local_osd();
454 int get_pool_replication(int64_t pool);
455 int64_t get_pool_id(const char *pool_name);
456 string get_pool_name(int64_t pool);
457 int get_osd_crush_location(int id, vector<pair<string, string> >& path);
458
459 int enumerate_layout(int fd, vector<ObjectExtent>& result,
460 loff_t length, loff_t offset);
461
462 int mksnap(const char *path, const char *name, const UserPerm& perm);
463 int rmsnap(const char *path, const char *name, const UserPerm& perm);
464
465 // Inode permission checking
466 int inode_permission(Inode *in, const UserPerm& perms, unsigned want);
467
468 // expose caps
469 int get_caps_issued(int fd);
470 int get_caps_issued(const char *path, const UserPerm& perms);
471
472 snapid_t ll_get_snapid(Inode *in);
473 vinodeno_t ll_get_vino(Inode *in) {
474 std::lock_guard lock(client_lock);
475 return _get_vino(in);
476 }
477 // get inode from faked ino
478 Inode *ll_get_inode(ino_t ino);
479 Inode *ll_get_inode(vinodeno_t vino);
480 int ll_lookup(Inode *parent, const char *name, struct stat *attr,
481 Inode **out, const UserPerm& perms);
482 int ll_lookup_inode(struct inodeno_t ino, const UserPerm& perms, Inode **inode);
483 int ll_lookup_vino(vinodeno_t vino, const UserPerm& perms, Inode **inode);
484 int ll_lookupx(Inode *parent, const char *name, Inode **out,
485 struct ceph_statx *stx, unsigned want, unsigned flags,
486 const UserPerm& perms);
487 bool ll_forget(Inode *in, uint64_t count);
488 bool ll_put(Inode *in);
489 int ll_get_snap_ref(snapid_t snap);
490
491 int ll_getattr(Inode *in, struct stat *st, const UserPerm& perms);
492 int ll_getattrx(Inode *in, struct ceph_statx *stx, unsigned int want,
493 unsigned int flags, const UserPerm& perms);
494 int ll_setattrx(Inode *in, struct ceph_statx *stx, int mask,
495 const UserPerm& perms);
496 int ll_setattr(Inode *in, struct stat *st, int mask,
497 const UserPerm& perms);
498 int ll_getxattr(Inode *in, const char *name, void *value, size_t size,
499 const UserPerm& perms);
500 int ll_setxattr(Inode *in, const char *name, const void *value, size_t size,
501 int flags, const UserPerm& perms);
502 int ll_removexattr(Inode *in, const char *name, const UserPerm& perms);
503 int ll_listxattr(Inode *in, char *list, size_t size, const UserPerm& perms);
504 int ll_opendir(Inode *in, int flags, dir_result_t **dirpp,
505 const UserPerm& perms);
506 int ll_releasedir(dir_result_t* dirp);
507 int ll_fsyncdir(dir_result_t* dirp);
508 int ll_readlink(Inode *in, char *buf, size_t bufsize, const UserPerm& perms);
509 int ll_mknod(Inode *in, const char *name, mode_t mode, dev_t rdev,
510 struct stat *attr, Inode **out, const UserPerm& perms);
511 int ll_mknodx(Inode *parent, const char *name, mode_t mode, dev_t rdev,
512 Inode **out, struct ceph_statx *stx, unsigned want,
513 unsigned flags, const UserPerm& perms);
514 int ll_mkdir(Inode *in, const char *name, mode_t mode, struct stat *attr,
515 Inode **out, const UserPerm& perm);
516 int ll_mkdirx(Inode *parent, const char *name, mode_t mode, Inode **out,
517 struct ceph_statx *stx, unsigned want, unsigned flags,
518 const UserPerm& perms);
519 int ll_symlink(Inode *in, const char *name, const char *value,
520 struct stat *attr, Inode **out, const UserPerm& perms);
521 int ll_symlinkx(Inode *parent, const char *name, const char *value,
522 Inode **out, struct ceph_statx *stx, unsigned want,
523 unsigned flags, const UserPerm& perms);
524 int ll_unlink(Inode *in, const char *name, const UserPerm& perm);
525 int ll_rmdir(Inode *in, const char *name, const UserPerm& perms);
526 int ll_rename(Inode *parent, const char *name, Inode *newparent,
527 const char *newname, const UserPerm& perm);
528 int ll_link(Inode *in, Inode *newparent, const char *newname,
529 const UserPerm& perm);
530 int ll_open(Inode *in, int flags, Fh **fh, const UserPerm& perms);
531 int _ll_create(Inode *parent, const char *name, mode_t mode,
532 int flags, InodeRef *in, int caps, Fh **fhp,
533 const UserPerm& perms);
534 int ll_create(Inode *parent, const char *name, mode_t mode, int flags,
535 struct stat *attr, Inode **out, Fh **fhp,
536 const UserPerm& perms);
537 int ll_createx(Inode *parent, const char *name, mode_t mode,
538 int oflags, Inode **outp, Fh **fhp,
539 struct ceph_statx *stx, unsigned want, unsigned lflags,
540 const UserPerm& perms);
541 int ll_read_block(Inode *in, uint64_t blockid, char *buf, uint64_t offset,
542 uint64_t length, file_layout_t* layout);
543
544 int ll_write_block(Inode *in, uint64_t blockid,
545 char* buf, uint64_t offset,
546 uint64_t length, file_layout_t* layout,
547 uint64_t snapseq, uint32_t sync);
548 int ll_commit_blocks(Inode *in, uint64_t offset, uint64_t length);
549
550 int ll_statfs(Inode *in, struct statvfs *stbuf, const UserPerm& perms);
551 int ll_walk(const char* name, Inode **i, struct ceph_statx *stx,
552 unsigned int want, unsigned int flags, const UserPerm& perms);
553 uint32_t ll_stripe_unit(Inode *in);
554 int ll_file_layout(Inode *in, file_layout_t *layout);
555 uint64_t ll_snap_seq(Inode *in);
556
557 int ll_read(Fh *fh, loff_t off, loff_t len, bufferlist *bl);
558 int ll_write(Fh *fh, loff_t off, loff_t len, const char *data);
559 int64_t ll_readv(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off);
560 int64_t ll_writev(struct Fh *fh, const struct iovec *iov, int iovcnt, int64_t off);
561 loff_t ll_lseek(Fh *fh, loff_t offset, int whence);
562 int ll_flush(Fh *fh);
563 int ll_fsync(Fh *fh, bool syncdataonly);
564 int ll_sync_inode(Inode *in, bool syncdataonly);
565 int ll_fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
566 int ll_release(Fh *fh);
567 int ll_getlk(Fh *fh, struct flock *fl, uint64_t owner);
568 int ll_setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
569 int ll_flock(Fh *fh, int cmd, uint64_t owner);
570 int ll_lazyio(Fh *fh, int enable);
571 int ll_file_layout(Fh *fh, file_layout_t *layout);
572 void ll_interrupt(void *d);
573 bool ll_handle_umask() {
574 return acl_type != NO_ACL;
575 }
576
577 int ll_get_stripe_osd(struct Inode *in, uint64_t blockno,
578 file_layout_t* layout);
579 uint64_t ll_get_internal_offset(struct Inode *in, uint64_t blockno);
580
581 int ll_num_osds(void);
582 int ll_osdaddr(int osd, uint32_t *addr);
583 int ll_osdaddr(int osd, char* buf, size_t size);
584
585 void ll_register_callbacks(struct ceph_client_callback_args *args);
586 int test_dentry_handling(bool can_invalidate);
587
588 const char** get_tracked_conf_keys() const override;
589 void handle_conf_change(const ConfigProxy& conf,
590 const std::set <std::string> &changed) override;
591 uint32_t get_deleg_timeout() { return deleg_timeout; }
592 int set_deleg_timeout(uint32_t timeout);
593 int ll_delegation(Fh *fh, unsigned cmd, ceph_deleg_cb_t cb, void *priv);
594
595 entity_name_t get_myname() { return messenger->get_myname(); }
596 void wait_on_list(std::list<ceph::condition_variable*>& ls);
597 void signal_cond_list(std::list<ceph::condition_variable*>& ls);
598
599 void set_filer_flags(int flags);
600 void clear_filer_flags(int flags);
601
602 void tear_down_cache();
603
604 void update_metadata(std::string const &k, std::string const &v);
605
606 client_t get_nodeid() { return whoami; }
607
608 inodeno_t get_root_ino();
609 Inode *get_root();
610
611 virtual int init();
612 virtual void shutdown();
613
614 // messaging
615 void handle_mds_map(const MConstRef<MMDSMap>& m);
616 void handle_fs_map(const MConstRef<MFSMap>& m);
617 void handle_fs_map_user(const MConstRef<MFSMapUser>& m);
618 void handle_osd_map(const MConstRef<MOSDMap>& m);
619
620 void handle_lease(const MConstRef<MClientLease>& m);
621
622 // inline data
623 int uninline_data(Inode *in, Context *onfinish);
624
625 // file caps
626 void check_cap_issue(Inode *in, unsigned issued);
627 void add_update_cap(Inode *in, MetaSession *session, uint64_t cap_id,
628 unsigned issued, unsigned wanted, unsigned seq, unsigned mseq,
629 inodeno_t realm, int flags, const UserPerm& perms);
630 void remove_cap(Cap *cap, bool queue_release);
631 void remove_all_caps(Inode *in);
632 void remove_session_caps(MetaSession *session, int err);
633 int mark_caps_flushing(Inode *in, ceph_tid_t *ptid);
634 void adjust_session_flushing_caps(Inode *in, MetaSession *old_s, MetaSession *new_s);
635 void flush_caps_sync();
636 void kick_flushing_caps(Inode *in, MetaSession *session);
637 void kick_flushing_caps(MetaSession *session);
638 void early_kick_flushing_caps(MetaSession *session);
639 int get_caps(Fh *fh, int need, int want, int *have, loff_t endoff);
640 int get_caps_used(Inode *in);
641
642 void maybe_update_snaprealm(SnapRealm *realm, snapid_t snap_created, snapid_t snap_highwater,
643 vector<snapid_t>& snaps);
644
645 void handle_quota(const MConstRef<MClientQuota>& m);
646 void handle_snap(const MConstRef<MClientSnap>& m);
647 void handle_caps(const MConstRef<MClientCaps>& m);
648 void handle_cap_import(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
649 void handle_cap_export(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
650 void handle_cap_trunc(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
651 void handle_cap_flush_ack(MetaSession *session, Inode *in, Cap *cap, const MConstRef<MClientCaps>& m);
652 void handle_cap_flushsnap_ack(MetaSession *session, Inode *in, const MConstRef<MClientCaps>& m);
653 void handle_cap_grant(MetaSession *session, Inode *in, Cap *cap, const MConstRef<MClientCaps>& m);
654 void cap_delay_requeue(Inode *in);
655
656 void send_cap(Inode *in, MetaSession *session, Cap *cap, int flags,
657 int used, int want, int retain, int flush,
658 ceph_tid_t flush_tid);
659
660 void send_flush_snap(Inode *in, MetaSession *session, snapid_t follows, CapSnap& capsnap);
661
662 void flush_snaps(Inode *in);
663 void get_cap_ref(Inode *in, int cap);
664 void put_cap_ref(Inode *in, int cap);
665 void wait_sync_caps(Inode *in, ceph_tid_t want);
666 void wait_sync_caps(ceph_tid_t want);
667 void queue_cap_snap(Inode *in, SnapContext &old_snapc);
668 void finish_cap_snap(Inode *in, CapSnap &capsnap, int used);
669
670 void _schedule_invalidate_dentry_callback(Dentry *dn, bool del);
671 void _async_dentry_invalidate(vinodeno_t dirino, vinodeno_t ino, string& name);
672 void _try_to_trim_inode(Inode *in, bool sched_inval);
673
674 void _schedule_invalidate_callback(Inode *in, int64_t off, int64_t len);
675 void _invalidate_inode_cache(Inode *in);
676 void _invalidate_inode_cache(Inode *in, int64_t off, int64_t len);
677 void _async_invalidate(vinodeno_t ino, int64_t off, int64_t len);
678
679 void _schedule_ino_release_callback(Inode *in);
680 void _async_inode_release(vinodeno_t ino);
681
682 bool _release(Inode *in);
683
684 /**
685 * Initiate a flush of the data associated with the given inode.
686 * If you specify a Context, you are responsible for holding an inode
687 * reference for the duration of the flush. If not, _flush() will
688 * take the reference for you.
689 * @param in The Inode whose data you wish to flush.
690 * @param c The Context you wish us to complete once the data is
691 * flushed. If already flushed, this will be called in-line.
692 *
693 * @returns true if the data was already flushed, false otherwise.
694 */
695 bool _flush(Inode *in, Context *c);
696 void _flush_range(Inode *in, int64_t off, uint64_t size);
697 void _flushed(Inode *in);
698 void flush_set_callback(ObjectCacher::ObjectSet *oset);
699
700 void close_release(Inode *in);
701 void close_safe(Inode *in);
702
703 void lock_fh_pos(Fh *f);
704 void unlock_fh_pos(Fh *f);
705
706 // metadata cache
707 void update_dir_dist(Inode *in, DirStat *st);
708
709 void clear_dir_complete_and_ordered(Inode *diri, bool complete);
710 void insert_readdir_results(MetaRequest *request, MetaSession *session, Inode *diri);
711 Inode* insert_trace(MetaRequest *request, MetaSession *session);
712 void update_inode_file_size(Inode *in, int issued, uint64_t size,
713 uint64_t truncate_seq, uint64_t truncate_size);
714 void update_inode_file_time(Inode *in, int issued, uint64_t time_warp_seq,
715 utime_t ctime, utime_t mtime, utime_t atime);
716
717 Inode *add_update_inode(InodeStat *st, utime_t ttl, MetaSession *session,
718 const UserPerm& request_perms);
719 Dentry *insert_dentry_inode(Dir *dir, const string& dname, LeaseStat *dlease,
720 Inode *in, utime_t from, MetaSession *session,
721 Dentry *old_dentry = NULL);
722 void update_dentry_lease(Dentry *dn, LeaseStat *dlease, utime_t from, MetaSession *session);
723
724 bool use_faked_inos() { return _use_faked_inos; }
725 vinodeno_t map_faked_ino(ino_t ino);
726
727 //notify the mds to flush the mdlog
728 void flush_mdlog_sync();
729 void flush_mdlog(MetaSession *session);
730
731 void renew_caps();
732 void renew_caps(MetaSession *session);
733 void flush_cap_releases();
734 void tick();
735
736 xlist<Inode*> &get_dirty_list() { return dirty_list; }
737
738 SafeTimer timer;
739
740 std::unique_ptr<PerfCounters> logger;
741 std::unique_ptr<MDSMap> mdsmap;
742
743 bool fuse_default_permissions;
744
745 protected:
746 /* Flags for check_caps() */
747 static const unsigned CHECK_CAPS_NODELAY = 0x1;
748 static const unsigned CHECK_CAPS_SYNCHRONOUS = 0x2;
749
750
751 bool is_initialized() const { return initialized; }
752
753 void check_caps(Inode *in, unsigned flags);
754
755 void set_cap_epoch_barrier(epoch_t e);
756
757 void handle_command_reply(const MConstRef<MCommandReply>& m);
758 int fetch_fsmap(bool user);
759 int resolve_mds(
760 const std::string &mds_spec,
761 std::vector<mds_gid_t> *targets);
762
763 void get_session_metadata(std::map<std::string, std::string> *meta) const;
764 bool have_open_session(mds_rank_t mds);
765 void got_mds_push(MetaSession *s);
766 MetaSession *_get_mds_session(mds_rank_t mds, Connection *con); ///< return session for mds *and* con; null otherwise
767 MetaSession *_get_or_open_mds_session(mds_rank_t mds);
768 MetaSession *_open_mds_session(mds_rank_t mds);
769 void _close_mds_session(MetaSession *s);
770 void _closed_mds_session(MetaSession *s, int err=0, bool rejected=false);
771 bool _any_stale_sessions() const;
772 void _kick_stale_sessions();
773 void handle_client_session(const MConstRef<MClientSession>& m);
774 void send_reconnect(MetaSession *s);
775 void resend_unsafe_requests(MetaSession *s);
776 void wait_unsafe_requests();
777
778 void _sync_write_commit(Inode *in);
779
780 void dump_mds_requests(Formatter *f);
781 void dump_mds_sessions(Formatter *f, bool cap_dump=false);
782
783 int make_request(MetaRequest *req, const UserPerm& perms,
784 InodeRef *ptarget = 0, bool *pcreated = 0,
785 mds_rank_t use_mds=-1, bufferlist *pdirbl=0);
786 void put_request(MetaRequest *request);
787 void unregister_request(MetaRequest *request);
788
789 int verify_reply_trace(int r, MetaSession *session, MetaRequest *request,
790 const MConstRef<MClientReply>& reply,
791 InodeRef *ptarget, bool *pcreated,
792 const UserPerm& perms);
793 void encode_cap_releases(MetaRequest *request, mds_rank_t mds);
794 int encode_inode_release(Inode *in, MetaRequest *req,
795 mds_rank_t mds, int drop,
796 int unless,int force=0);
797 void encode_dentry_release(Dentry *dn, MetaRequest *req,
798 mds_rank_t mds, int drop, int unless);
799 mds_rank_t choose_target_mds(MetaRequest *req, Inode** phash_diri=NULL);
800 void connect_mds_targets(mds_rank_t mds);
801 void send_request(MetaRequest *request, MetaSession *session,
802 bool drop_cap_releases=false);
803 MRef<MClientRequest> build_client_request(MetaRequest *request);
804 void kick_requests(MetaSession *session);
805 void kick_requests_closed(MetaSession *session);
806 void handle_client_request_forward(const MConstRef<MClientRequestForward>& reply);
807 void handle_client_reply(const MConstRef<MClientReply>& reply);
808 bool is_dir_operation(MetaRequest *request);
809
810 // fake inode number for 32-bits ino_t
811 void _assign_faked_ino(Inode *in);
812 void _assign_faked_root(Inode *in);
813 void _release_faked_ino(Inode *in);
814 void _reset_faked_inos();
815 vinodeno_t _map_faked_ino(ino_t ino);
816
817 // Optional extra metadata about me to send to the MDS
818 void populate_metadata(const std::string &mount_root);
819
820 SnapRealm *get_snap_realm(inodeno_t r);
821 SnapRealm *get_snap_realm_maybe(inodeno_t r);
822 void put_snap_realm(SnapRealm *realm);
823 bool adjust_realm_parent(SnapRealm *realm, inodeno_t parent);
824 void update_snap_trace(const bufferlist& bl, SnapRealm **realm_ret, bool must_flush=true);
825 void invalidate_snaprealm_and_children(SnapRealm *realm);
826
827 Inode *open_snapdir(Inode *diri);
828
829 int get_fd() {
830 int fd = free_fd_set.range_start();
831 free_fd_set.erase(fd, 1);
832 return fd;
833 }
834 void put_fd(int fd) {
835 free_fd_set.insert(fd, 1);
836 }
837
838 /*
839 * Resolve file descriptor, or return NULL.
840 */
841 Fh *get_filehandle(int fd) {
842 ceph::unordered_map<int, Fh*>::iterator p = fd_map.find(fd);
843 if (p == fd_map.end())
844 return NULL;
845 return p->second;
846 }
847
848 // helpers
849 void wake_up_session_caps(MetaSession *s, bool reconnect);
850
851 void wait_on_context_list(list<Context*>& ls);
852 void signal_context_list(list<Context*>& ls);
853
854 // -- metadata cache stuff
855
856 // decrease inode ref. delete if dangling.
857 void put_inode(Inode *in, int n=1);
858 void close_dir(Dir *dir);
859
860 int subscribe_mdsmap(const std::string &fs_name="");
861
862 void _abort_mds_sessions(int err);
863
864 // same as unmount() but for when the client_lock is already held
865 void _unmount(bool abort);
866
867 //int get_cache_size() { return lru.lru_get_size(); }
868
869 /**
870 * Don't call this with in==NULL, use get_or_create for that
871 * leave dn set to default NULL unless you're trying to add
872 * a new inode to a pre-created Dentry
873 */
874 Dentry* link(Dir *dir, const string& name, Inode *in, Dentry *dn);
875 void unlink(Dentry *dn, bool keepdir, bool keepdentry);
876
877 // path traversal for high-level interface
878 int path_walk(const filepath& fp, InodeRef *end, const UserPerm& perms,
879 bool followsym=true, int mask=0);
880
881 int fill_stat(Inode *in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0);
882 int fill_stat(InodeRef& in, struct stat *st, frag_info_t *dirstat=0, nest_info_t *rstat=0) {
883 return fill_stat(in.get(), st, dirstat, rstat);
884 }
885
886 void fill_statx(Inode *in, unsigned int mask, struct ceph_statx *stx);
887 void fill_statx(InodeRef& in, unsigned int mask, struct ceph_statx *stx) {
888 return fill_statx(in.get(), mask, stx);
889 }
890
891 void touch_dn(Dentry *dn);
892
893 // trim cache.
894 void trim_cache(bool trim_kernel_dcache=false);
895 void trim_cache_for_reconnect(MetaSession *s);
896 void trim_dentry(Dentry *dn);
897 void trim_caps(MetaSession *s, uint64_t max);
898 void _invalidate_kernel_dcache();
899 void _trim_negative_child_dentries(InodeRef& in);
900
901 void dump_inode(Formatter *f, Inode *in, set<Inode*>& did, bool disconnected);
902 void dump_cache(Formatter *f); // debug
903
904 // force read-only
905 void force_session_readonly(MetaSession *s);
906
907 void dump_status(Formatter *f); // debug
908
909 bool ms_dispatch2(const MessageRef& m) override;
910
911 void ms_handle_connect(Connection *con) override;
912 bool ms_handle_reset(Connection *con) override;
913 void ms_handle_remote_reset(Connection *con) override;
914 bool ms_handle_refused(Connection *con) override;
915
916 int authenticate();
917
918 Inode* get_quota_root(Inode *in, const UserPerm& perms);
919 bool check_quota_condition(Inode *in, const UserPerm& perms,
920 std::function<bool (const Inode &)> test);
921 bool is_quota_files_exceeded(Inode *in, const UserPerm& perms);
922 bool is_quota_bytes_exceeded(Inode *in, int64_t new_bytes,
923 const UserPerm& perms);
924 bool is_quota_bytes_approaching(Inode *in, const UserPerm& perms);
925
926 int check_pool_perm(Inode *in, int need);
927
928 void handle_client_reclaim_reply(const MConstRef<MClientReclaimReply>& reply);
929
930 /**
931 * Call this when an OSDMap is seen with a full flag (global or per pool)
932 * set.
933 *
934 * @param pool the pool ID affected, or -1 if all.
935 */
936 void _handle_full_flag(int64_t pool);
937
938 void _close_sessions();
939
940 void _pre_init();
941
942 /**
943 * The basic housekeeping parts of init (perf counters, admin socket)
944 * that is independent of how objecters/monclient/messengers are
945 * being set up.
946 */
947 void _finish_init();
948
949 // global client lock
950 // - protects Client and buffer cache both!
951 ceph::mutex client_lock = ceph::make_mutex("Client::client_lock");
952 ;
953
954 std::map<snapid_t, int> ll_snap_ref;
955
956 Inode* root = nullptr;
957 map<Inode*, InodeRef> root_parents;
958 Inode* root_ancestor = nullptr;
959 LRU lru; // lru list of Dentry's in our local metadata cache.
960
961 InodeRef cwd;
962
963 std::unique_ptr<Filer> filer;
964 std::unique_ptr<ObjectCacher> objectcacher;
965 std::unique_ptr<WritebackHandler> writeback_handler;
966
967 Messenger *messenger;
968 MonClient *monclient;
969 Objecter *objecter;
970
971 client_t whoami;
972
973
974 private:
975 struct C_Readahead : public Context {
976 C_Readahead(Client *c, Fh *f);
977 ~C_Readahead() override;
978 void finish(int r) override;
979
980 Client *client;
981 Fh *f;
982 };
983
984 /*
985 * These define virtual xattrs exposing the recursive directory
986 * statistics and layout metadata.
987 */
988 struct VXattr {
989 const string name;
990 size_t (Client::*getxattr_cb)(Inode *in, char *val, size_t size);
991 bool readonly;
992 bool (Client::*exists_cb)(Inode *in);
993 unsigned int flags;
994 };
995
996 enum {
997 NO_ACL = 0,
998 POSIX_ACL,
999 };
1000
1001 enum {
1002 MAY_EXEC = 1,
1003 MAY_WRITE = 2,
1004 MAY_READ = 4,
1005 };
1006
1007
1008 /* Flags for VXattr */
1009 static const unsigned VXATTR_RSTAT = 0x1;
1010 static const unsigned VXATTR_DIRSTAT = 0x2;
1011
1012 static const VXattr _dir_vxattrs[];
1013 static const VXattr _file_vxattrs[];
1014 static const VXattr _common_vxattrs[];
1015
1016
1017 bool is_reserved_vino(vinodeno_t &vino);
1018
1019 void fill_dirent(struct dirent *de, const char *name, int type, uint64_t ino, loff_t next_off);
1020
1021 int _opendir(Inode *in, dir_result_t **dirpp, const UserPerm& perms);
1022 void _readdir_drop_dirp_buffer(dir_result_t *dirp);
1023 bool _readdir_have_frag(dir_result_t *dirp);
1024 void _readdir_next_frag(dir_result_t *dirp);
1025 void _readdir_rechoose_frag(dir_result_t *dirp);
1026 int _readdir_get_frag(dir_result_t *dirp);
1027 int _readdir_cache_cb(dir_result_t *dirp, add_dirent_cb_t cb, void *p, int caps, bool getref);
1028 void _closedir(dir_result_t *dirp);
1029
1030 // other helpers
1031 void _fragmap_remove_non_leaves(Inode *in);
1032 void _fragmap_remove_stopped_mds(Inode *in, mds_rank_t mds);
1033
1034 void _ll_get(Inode *in);
1035 int _ll_put(Inode *in, uint64_t num);
1036 void _ll_drop_pins();
1037
1038 Fh *_create_fh(Inode *in, int flags, int cmode, const UserPerm& perms);
1039 int _release_fh(Fh *fh);
1040 void _put_fh(Fh *fh);
1041
1042 int _do_remount(bool retry_on_error);
1043
1044 int _read_sync(Fh *f, uint64_t off, uint64_t len, bufferlist *bl, bool *checkeof);
1045 int _read_async(Fh *f, uint64_t off, uint64_t len, bufferlist *bl);
1046
1047 // internal interface
1048 // call these with client_lock held!
1049 int _do_lookup(Inode *dir, const string& name, int mask, InodeRef *target,
1050 const UserPerm& perms);
1051
1052 int _lookup(Inode *dir, const string& dname, int mask, InodeRef *target,
1053 const UserPerm& perm);
1054
1055 int _link(Inode *in, Inode *dir, const char *name, const UserPerm& perm,
1056 InodeRef *inp = 0);
1057 int _unlink(Inode *dir, const char *name, const UserPerm& perm);
1058 int _rename(Inode *olddir, const char *oname, Inode *ndir, const char *nname, const UserPerm& perm);
1059 int _mkdir(Inode *dir, const char *name, mode_t mode, const UserPerm& perm,
1060 InodeRef *inp = 0);
1061 int _rmdir(Inode *dir, const char *name, const UserPerm& perms);
1062 int _symlink(Inode *dir, const char *name, const char *target,
1063 const UserPerm& perms, InodeRef *inp = 0);
1064 int _mknod(Inode *dir, const char *name, mode_t mode, dev_t rdev,
1065 const UserPerm& perms, InodeRef *inp = 0);
1066 int _do_setattr(Inode *in, struct ceph_statx *stx, int mask,
1067 const UserPerm& perms, InodeRef *inp);
1068 void stat_to_statx(struct stat *st, struct ceph_statx *stx);
1069 int __setattrx(Inode *in, struct ceph_statx *stx, int mask,
1070 const UserPerm& perms, InodeRef *inp = 0);
1071 int _setattrx(InodeRef &in, struct ceph_statx *stx, int mask,
1072 const UserPerm& perms);
1073 int _setattr(InodeRef &in, struct stat *attr, int mask,
1074 const UserPerm& perms);
1075 int _ll_setattrx(Inode *in, struct ceph_statx *stx, int mask,
1076 const UserPerm& perms, InodeRef *inp = 0);
1077 int _getattr(Inode *in, int mask, const UserPerm& perms, bool force=false);
1078 int _getattr(InodeRef &in, int mask, const UserPerm& perms, bool force=false) {
1079 return _getattr(in.get(), mask, perms, force);
1080 }
1081 int _readlink(Inode *in, char *buf, size_t size);
1082 int _getxattr(Inode *in, const char *name, void *value, size_t len,
1083 const UserPerm& perms);
1084 int _getxattr(InodeRef &in, const char *name, void *value, size_t len,
1085 const UserPerm& perms);
1086 int _listxattr(Inode *in, char *names, size_t len, const UserPerm& perms);
1087 int _do_setxattr(Inode *in, const char *name, const void *value, size_t len,
1088 int flags, const UserPerm& perms);
1089 int _setxattr(Inode *in, const char *name, const void *value, size_t len,
1090 int flags, const UserPerm& perms);
1091 int _setxattr(InodeRef &in, const char *name, const void *value, size_t len,
1092 int flags, const UserPerm& perms);
1093 int _setxattr_check_data_pool(string& name, string& value, const OSDMap *osdmap);
1094 void _setxattr_maybe_wait_for_osdmap(const char *name, const void *value, size_t len);
1095 int _removexattr(Inode *in, const char *nm, const UserPerm& perms);
1096 int _removexattr(InodeRef &in, const char *nm, const UserPerm& perms);
1097 int _open(Inode *in, int flags, mode_t mode, Fh **fhp,
1098 const UserPerm& perms);
1099 int _renew_caps(Inode *in);
1100 int _create(Inode *in, const char *name, int flags, mode_t mode, InodeRef *inp,
1101 Fh **fhp, int stripe_unit, int stripe_count, int object_size,
1102 const char *data_pool, bool *created, const UserPerm &perms);
1103
1104 loff_t _lseek(Fh *fh, loff_t offset, int whence);
1105 int64_t _read(Fh *fh, int64_t offset, uint64_t size, bufferlist *bl);
1106 int64_t _write(Fh *fh, int64_t offset, uint64_t size, const char *buf,
1107 const struct iovec *iov, int iovcnt);
1108 int64_t _preadv_pwritev_locked(Fh *f, const struct iovec *iov,
1109 unsigned iovcnt, int64_t offset, bool write, bool clamp_to_int);
1110 int _preadv_pwritev(int fd, const struct iovec *iov, unsigned iovcnt, int64_t offset, bool write);
1111 int _flush(Fh *fh);
1112 int _fsync(Fh *fh, bool syncdataonly);
1113 int _fsync(Inode *in, bool syncdataonly);
1114 int _sync_fs();
1115 int _fallocate(Fh *fh, int mode, int64_t offset, int64_t length);
1116 int _getlk(Fh *fh, struct flock *fl, uint64_t owner);
1117 int _setlk(Fh *fh, struct flock *fl, uint64_t owner, int sleep);
1118 int _flock(Fh *fh, int cmd, uint64_t owner);
1119 int _lazyio(Fh *fh, int enable);
1120
1121 int get_or_create(Inode *dir, const char* name,
1122 Dentry **pdn, bool expect_null=false);
1123
1124 int xattr_permission(Inode *in, const char *name, unsigned want,
1125 const UserPerm& perms);
1126 int may_setattr(Inode *in, struct ceph_statx *stx, int mask,
1127 const UserPerm& perms);
1128 int may_open(Inode *in, int flags, const UserPerm& perms);
1129 int may_lookup(Inode *dir, const UserPerm& perms);
1130 int may_create(Inode *dir, const UserPerm& perms);
1131 int may_delete(Inode *dir, const char *name, const UserPerm& perms);
1132 int may_hardlink(Inode *in, const UserPerm& perms);
1133
1134 int _getattr_for_perm(Inode *in, const UserPerm& perms);
1135
1136 vinodeno_t _get_vino(Inode *in);
1137
1138 bool _vxattrcb_quota_exists(Inode *in);
1139 size_t _vxattrcb_quota(Inode *in, char *val, size_t size);
1140 size_t _vxattrcb_quota_max_bytes(Inode *in, char *val, size_t size);
1141 size_t _vxattrcb_quota_max_files(Inode *in, char *val, size_t size);
1142
1143 bool _vxattrcb_layout_exists(Inode *in);
1144 size_t _vxattrcb_layout(Inode *in, char *val, size_t size);
1145 size_t _vxattrcb_layout_stripe_unit(Inode *in, char *val, size_t size);
1146 size_t _vxattrcb_layout_stripe_count(Inode *in, char *val, size_t size);
1147 size_t _vxattrcb_layout_object_size(Inode *in, char *val, size_t size);
1148 size_t _vxattrcb_layout_pool(Inode *in, char *val, size_t size);
1149 size_t _vxattrcb_layout_pool_namespace(Inode *in, char *val, size_t size);
1150 size_t _vxattrcb_dir_entries(Inode *in, char *val, size_t size);
1151 size_t _vxattrcb_dir_files(Inode *in, char *val, size_t size);
1152 size_t _vxattrcb_dir_subdirs(Inode *in, char *val, size_t size);
1153 size_t _vxattrcb_dir_rentries(Inode *in, char *val, size_t size);
1154 size_t _vxattrcb_dir_rfiles(Inode *in, char *val, size_t size);
1155 size_t _vxattrcb_dir_rsubdirs(Inode *in, char *val, size_t size);
1156 size_t _vxattrcb_dir_rbytes(Inode *in, char *val, size_t size);
1157 size_t _vxattrcb_dir_rctime(Inode *in, char *val, size_t size);
1158
1159 bool _vxattrcb_dir_pin_exists(Inode *in);
1160 size_t _vxattrcb_dir_pin(Inode *in, char *val, size_t size);
1161
1162 bool _vxattrcb_snap_btime_exists(Inode *in);
1163 size_t _vxattrcb_snap_btime(Inode *in, char *val, size_t size);
1164
1165 size_t _vxattrcb_cluster_fsid(Inode *in, char *val, size_t size);
1166 size_t _vxattrcb_client_id(Inode *in, char *val, size_t size);
1167
1168 static const VXattr *_get_vxattrs(Inode *in);
1169 static const VXattr *_match_vxattr(Inode *in, const char *name);
1170
1171 int _do_filelock(Inode *in, Fh *fh, int lock_type, int op, int sleep,
1172 struct flock *fl, uint64_t owner, bool removing=false);
1173 int _interrupt_filelock(MetaRequest *req);
1174 void _encode_filelocks(Inode *in, bufferlist& bl);
1175 void _release_filelocks(Fh *fh);
1176 void _update_lock_state(struct flock *fl, uint64_t owner, ceph_lock_state_t *lock_state);
1177
1178 int _posix_acl_create(Inode *dir, mode_t *mode, bufferlist& xattrs_bl,
1179 const UserPerm& perms);
1180 int _posix_acl_chmod(Inode *in, mode_t mode, const UserPerm& perms);
1181 int _posix_acl_permission(Inode *in, const UserPerm& perms, unsigned want);
1182
1183 mds_rank_t _get_random_up_mds() const;
1184
1185 int _ll_getattr(Inode *in, int caps, const UserPerm& perms);
1186 int _lookup_parent(Inode *in, const UserPerm& perms, Inode **parent=NULL);
1187 int _lookup_name(Inode *in, Inode *parent, const UserPerm& perms);
1188 int _lookup_vino(vinodeno_t ino, const UserPerm& perms, Inode **inode=NULL);
1189 bool _ll_forget(Inode *in, uint64_t count);
1190
1191
1192 uint32_t deleg_timeout = 0;
1193
1194 client_switch_interrupt_callback_t switch_interrupt_cb = nullptr;
1195 client_remount_callback_t remount_cb = nullptr;
1196 client_ino_callback_t ino_invalidate_cb = nullptr;
1197 client_dentry_callback_t dentry_invalidate_cb = nullptr;
1198 client_umask_callback_t umask_cb = nullptr;
1199 client_ino_release_t ino_release_cb = nullptr;
1200 void *callback_handle = nullptr;
1201 bool can_invalidate_dentries = false;
1202
1203 Finisher async_ino_invalidator;
1204 Finisher async_dentry_invalidator;
1205 Finisher interrupt_finisher;
1206 Finisher remount_finisher;
1207 Finisher async_ino_releasor;
1208 Finisher objecter_finisher;
1209
1210 Context *tick_event = nullptr;
1211 utime_t last_cap_renew;
1212
1213 CommandHook m_command_hook;
1214
1215 int user_id, group_id;
1216 int acl_type = NO_ACL;
1217
1218 epoch_t cap_epoch_barrier = 0;
1219
1220 // mds sessions
1221 map<mds_rank_t, MetaSession> mds_sessions; // mds -> push seq
1222 std::set<mds_rank_t> mds_ranks_closing; // mds ranks currently tearing down sessions
1223 std::list<ceph::condition_variable*> waiting_for_mdsmap;
1224
1225 // FSMap, for when using mds_command
1226 std::list<ceph::condition_variable*> waiting_for_fsmap;
1227 std::unique_ptr<FSMap> fsmap;
1228 std::unique_ptr<FSMapUser> fsmap_user;
1229
1230 // MDS command state
1231 CommandTable<MDSCommandOp> command_table;
1232
1233 bool _use_faked_inos;
1234
1235 // Cluster fsid
1236 fs_cluster_id_t fscid;
1237
1238 // file handles, etc.
1239 interval_set<int> free_fd_set; // unused fds
1240 ceph::unordered_map<int, Fh*> fd_map;
1241 set<Fh*> ll_unclosed_fh_set;
1242 ceph::unordered_set<dir_result_t*> opened_dirs;
1243 uint64_t fd_gen = 1;
1244
1245 bool initialized = false;
1246 bool mounted = false;
1247 bool unmounting = false;
1248 bool blacklisted = false;
1249
1250 ceph::unordered_map<vinodeno_t, Inode*> inode_map;
1251 ceph::unordered_map<ino_t, vinodeno_t> faked_ino_map;
1252 interval_set<ino_t> free_faked_inos;
1253 ino_t last_used_faked_ino;
1254 ino_t last_used_faked_root;
1255
1256 int local_osd = -ENXIO;
1257 epoch_t local_osd_epoch = 0;
1258
1259 int unsafe_sync_write = 0;
1260
1261 // mds requests
1262 ceph_tid_t last_tid = 0;
1263 ceph_tid_t oldest_tid = 0; // oldest incomplete mds request, excluding setfilelock requests
1264 map<ceph_tid_t, MetaRequest*> mds_requests;
1265
1266 // cap flushing
1267 ceph_tid_t last_flush_tid = 1;
1268
1269 // dirty_list keeps all the dirty inodes before flushing.
1270 xlist<Inode*> delayed_list, dirty_list;
1271 int num_flushing_caps = 0;
1272 ceph::unordered_map<inodeno_t,SnapRealm*> snap_realms;
1273 std::map<std::string, std::string> metadata;
1274
1275 utime_t last_auto_reconnect;
1276
1277 // trace generation
1278 ofstream traceout;
1279
1280 ceph::condition_variable mount_cond, sync_cond;
1281
1282 std::map<std::pair<int64_t,std::string>, int> pool_perms;
1283 std::list<ceph::condition_variable*> waiting_for_pool_perm;
1284
1285 uint64_t retries_on_invalidate = 0;
1286
1287 // state reclaim
1288 std::list<ceph::condition_variable*> waiting_for_reclaim;
1289 int reclaim_errno = 0;
1290 epoch_t reclaim_osd_epoch = 0;
1291 entity_addrvec_t reclaim_target_addrs;
1292 };
1293
1294 /**
1295 * Specialization of Client that manages its own Objecter instance
1296 * and handles init/shutdown of messenger/monclient
1297 */
1298 class StandaloneClient : public Client
1299 {
1300 public:
1301 StandaloneClient(Messenger *m, MonClient *mc);
1302
1303 ~StandaloneClient() override;
1304
1305 int init() override;
1306 void shutdown() override;
1307 };
1308
1309 #endif