]> git.proxmox.com Git - ceph.git/blob - ceph/src/client/Inode.h
4fa9c6938558ac86f3f3753d387fc0d8fef5e101
[ceph.git] / ceph / src / client / Inode.h
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
3
4 #ifndef CEPH_CLIENT_INODE_H
5 #define CEPH_CLIENT_INODE_H
6
7 #include <numeric>
8
9 #include "include/compat.h"
10 #include "include/ceph_assert.h"
11 #include "include/types.h"
12 #include "include/xlist.h"
13
14 #include "mds/flock.h"
15 #include "mds/mdstypes.h" // hrm
16
17 #include "osdc/ObjectCacher.h"
18
19 #include "InodeRef.h"
20 #include "MetaSession.h"
21 #include "UserPerm.h"
22 #include "Delegation.h"
23
24 class Client;
25 class Dentry;
26 class Dir;
27 struct SnapRealm;
28 struct Inode;
29 class MetaRequest;
30 class filepath;
31 class Fh;
32
33 class Cap {
34 public:
35 Cap() = delete;
36 Cap(Inode &i, MetaSession *s) : inode(i),
37 session(s),
38 gen(s->cap_gen),
39 cap_item(this)
40 {
41 s->caps.push_back(&cap_item);
42 }
43 ~Cap() {
44 cap_item.remove_myself();
45 }
46
47 void touch(void) {
48 // move to back of LRU
49 session->caps.push_back(&cap_item);
50 }
51
52 void dump(Formatter *f) const;
53
54 Inode &inode;
55 MetaSession *session;
56 uint64_t cap_id = 0;
57 unsigned issued = 0;
58 unsigned implemented = 0;
59 unsigned wanted = 0; // as known to mds.
60 uint64_t seq = 0;
61 uint64_t issue_seq = 0;
62 __u32 mseq = 0; // migration seq
63 __u32 gen;
64 UserPerm latest_perms;
65
66 private:
67 /* Note that this Cap will not move (see Inode::caps):
68 *
69 * Section 23.1.2#8
70 * The insert members shall not affect the validity of iterators and
71 * references to the container, and the erase members shall invalidate only
72 * iterators and references to the erased elements.
73 */
74 xlist<Cap *>::item cap_item;
75 };
76
77 struct CapSnap {
78 //snapid_t follows; // map key
79 InodeRef in;
80 SnapContext context;
81 int issued, dirty;
82
83 uint64_t size;
84 utime_t ctime, btime, mtime, atime;
85 version_t time_warp_seq;
86 uint64_t change_attr;
87 uint32_t mode;
88 uid_t uid;
89 gid_t gid;
90 map<string,bufferptr> xattrs;
91 version_t xattr_version;
92
93 bufferlist inline_data;
94 version_t inline_version;
95
96 bool writing, dirty_data;
97 uint64_t flush_tid;
98
99 int64_t cap_dirtier_uid;
100 int64_t cap_dirtier_gid;
101
102 explicit CapSnap(Inode *i)
103 : in(i), issued(0), dirty(0), size(0), time_warp_seq(0), change_attr(0),
104 mode(0), uid(0), gid(0), xattr_version(0), inline_version(0),
105 writing(false), dirty_data(false), flush_tid(0), cap_dirtier_uid(-1),
106 cap_dirtier_gid(-1)
107 {}
108
109 void dump(Formatter *f) const;
110 };
111
112 // inode flags
113 #define I_COMPLETE (1 << 0)
114 #define I_DIR_ORDERED (1 << 1)
115 #define I_SNAPDIR_OPEN (1 << 2)
116 #define I_KICK_FLUSH (1 << 3)
117 #define I_CAP_DROPPED (1 << 4)
118 #define I_ERROR_FILELOCK (1 << 5)
119
120 struct Inode {
121 Client *client;
122
123 // -- the actual inode --
124 inodeno_t ino; // ORDER DEPENDENCY: oset
125 snapid_t snapid;
126 ino_t faked_ino;
127
128 uint32_t rdev; // if special file
129
130 // affected by any inode change...
131 utime_t ctime; // inode change time
132 utime_t btime; // birth time
133
134 // perm (namespace permissions)
135 uint32_t mode;
136 uid_t uid;
137 gid_t gid;
138
139 // nlink
140 int32_t nlink;
141
142 // file (data access)
143 ceph_dir_layout dir_layout;
144 file_layout_t layout;
145 uint64_t size; // on directory, # dentries
146 uint32_t truncate_seq;
147 uint64_t truncate_size;
148 utime_t mtime; // file data modify time.
149 utime_t atime; // file data access time.
150 uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes())
151 uint64_t change_attr;
152
153 uint64_t max_size; // max size we can write to
154
155 // dirfrag, recursive accountin
156 frag_info_t dirstat;
157 nest_info_t rstat;
158
159 // special stuff
160 version_t version; // auth only
161 version_t xattr_version;
162 utime_t snap_btime; // snapshot creation (birth) time
163 std::map<std::string, std::string> snap_metadata;
164
165 // inline data
166 version_t inline_version;
167 bufferlist inline_data;
168
169 bool fscrypt = false; // fscrypt enabled ?
170
171 bool is_root() const { return ino == MDS_INO_ROOT; }
172 bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; }
173 bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; }
174 bool is_file() const { return (mode & S_IFMT) == S_IFREG; }
175
176 bool has_dir_layout() const {
177 return layout != file_layout_t();
178 }
179
180 __u32 hash_dentry_name(const string &dn) {
181 int which = dir_layout.dl_dir_hash;
182 if (!which)
183 which = CEPH_STR_HASH_LINUX;
184 ceph_assert(ceph_str_hash_valid(which));
185 return ceph_str_hash(which, dn.data(), dn.length());
186 }
187
188 unsigned flags;
189
190 quota_info_t quota;
191
192 bool is_complete_and_ordered() {
193 static const unsigned wants = I_COMPLETE | I_DIR_ORDERED;
194 return (flags & wants) == wants;
195 }
196
197 // about the dir (if this is one!)
198 Dir *dir; // if i'm a dir.
199 fragtree_t dirfragtree;
200 uint64_t dir_release_count, dir_ordered_count;
201 bool dir_hashed, dir_replicated;
202
203 // per-mds caps
204 std::map<mds_rank_t, Cap> caps; // mds -> Cap
205 Cap *auth_cap;
206 int64_t cap_dirtier_uid;
207 int64_t cap_dirtier_gid;
208 unsigned dirty_caps, flushing_caps;
209 std::map<ceph_tid_t, int> flushing_cap_tids;
210 int shared_gen, cache_gen;
211 int snap_caps, snap_cap_refs;
212 utime_t hold_caps_until;
213 xlist<Inode*>::item delay_cap_item, dirty_cap_item, flushing_cap_item;
214
215 SnapRealm *snaprealm;
216 xlist<Inode*>::item snaprealm_item;
217 InodeRef snapdir_parent; // only if we are a snapdir inode
218 map<snapid_t,CapSnap> cap_snaps; // pending flush to mds
219
220 //int open_by_mode[CEPH_FILE_MODE_NUM];
221 map<int,int> open_by_mode;
222 map<int,int> cap_refs;
223
224 ObjectCacher::ObjectSet oset; // ORDER DEPENDENCY: ino
225
226 uint64_t reported_size, wanted_max_size, requested_max_size;
227
228 int _ref; // ref count. 1 for each dentry, fh that links to me.
229 uint64_t ll_ref; // separate ref count for ll client
230 xlist<Dentry *> dentries; // if i'm linked to a dentry.
231 string symlink; // symlink content, if it's a symlink
232 map<string,bufferptr> xattrs;
233 map<frag_t,int> fragmap; // known frag -> mds mappings
234 map<frag_t, std::vector<mds_rank_t>> frag_repmap; // non-auth mds mappings
235
236 std::list<ceph::condition_variable*> waitfor_caps;
237 std::list<ceph::condition_variable*> waitfor_commit;
238 std::list<ceph::condition_variable*> waitfor_deleg;
239
240 Dentry *get_first_parent() {
241 ceph_assert(!dentries.empty());
242 return *dentries.begin();
243 }
244
245 void make_long_path(filepath& p);
246 void make_short_path(filepath& p);
247 void make_nosnap_relative_path(filepath& p);
248
249 void get();
250 int _put(int n=1);
251
252 int get_num_ref() {
253 return _ref;
254 }
255
256 void ll_get() {
257 ll_ref++;
258 }
259 void ll_put(uint64_t n=1) {
260 ceph_assert(ll_ref >= n);
261 ll_ref -= n;
262 }
263
264 // file locks
265 std::unique_ptr<ceph_lock_state_t> fcntl_locks;
266 std::unique_ptr<ceph_lock_state_t> flock_locks;
267
268 bool has_any_filelocks() {
269 return
270 (fcntl_locks && !fcntl_locks->empty()) ||
271 (flock_locks && !flock_locks->empty());
272 }
273
274 list<Delegation> delegations;
275
276 xlist<MetaRequest*> unsafe_ops;
277
278 std::set<Fh*> fhs;
279
280 mds_rank_t dir_pin;
281
282 Inode(Client *c, vinodeno_t vino, file_layout_t *newlayout)
283 : client(c), ino(vino.ino), snapid(vino.snapid), faked_ino(0),
284 rdev(0), mode(0), uid(0), gid(0), nlink(0),
285 size(0), truncate_seq(1), truncate_size(-1),
286 time_warp_seq(0), change_attr(0), max_size(0), version(0),
287 xattr_version(0), inline_version(0), flags(0),
288 dir(0), dir_release_count(1), dir_ordered_count(1),
289 dir_hashed(false), dir_replicated(false), auth_cap(NULL),
290 cap_dirtier_uid(-1), cap_dirtier_gid(-1),
291 dirty_caps(0), flushing_caps(0), shared_gen(0), cache_gen(0),
292 snap_caps(0), snap_cap_refs(0),
293 delay_cap_item(this), dirty_cap_item(this), flushing_cap_item(this),
294 snaprealm(0), snaprealm_item(this),
295 oset((void *)this, newlayout->pool_id, this->ino),
296 reported_size(0), wanted_max_size(0), requested_max_size(0),
297 _ref(0), ll_ref(0), dir_pin(MDS_RANK_NONE)
298 {
299 memset(&dir_layout, 0, sizeof(dir_layout));
300 }
301 ~Inode();
302
303 vinodeno_t vino() const { return vinodeno_t(ino, snapid); }
304
305 struct Compare {
306 bool operator() (Inode* const & left, Inode* const & right) {
307 if (left->ino.val < right->ino.val) {
308 return (left->snapid.val < right->snapid.val);
309 }
310 return false;
311 }
312 };
313
314 bool check_mode(const UserPerm& perms, unsigned want);
315
316 // CAPS --------
317 void get_open_ref(int mode);
318 bool put_open_ref(int mode);
319
320 void get_cap_ref(int cap);
321 int put_cap_ref(int cap);
322 bool is_any_caps();
323 bool cap_is_valid(const Cap &cap) const;
324 int caps_issued(int *implemented = 0) const;
325 void try_touch_cap(mds_rank_t mds);
326 bool caps_issued_mask(unsigned mask, bool allow_impl=false);
327 int caps_used();
328 int caps_file_wanted();
329 int caps_wanted();
330 int caps_mds_wanted();
331 int caps_dirty();
332 const UserPerm *get_best_perms();
333
334 bool have_valid_size();
335 Dir *open_dir();
336
337 void add_fh(Fh *f) {fhs.insert(f);}
338 void rm_fh(Fh *f) {fhs.erase(f);}
339 void set_async_err(int r);
340 void dump(Formatter *f) const;
341
342 void break_all_delegs() { break_deleg(false); };
343
344 void recall_deleg(bool skip_read);
345 bool has_recalled_deleg();
346 int set_deleg(Fh *fh, unsigned type, ceph_deleg_cb_t cb, void *priv);
347 void unset_deleg(Fh *fh);
348
349 void mark_caps_dirty(int caps);
350 void mark_caps_clean();
351 private:
352 // how many opens for write on this Inode?
353 long open_count_for_write()
354 {
355 return (long)(open_by_mode[CEPH_FILE_MODE_RDWR] +
356 open_by_mode[CEPH_FILE_MODE_WR]);
357 };
358
359 // how many opens of any sort on this inode?
360 long open_count()
361 {
362 return (long) std::accumulate(open_by_mode.begin(), open_by_mode.end(), 0,
363 [] (int value, const std::map<int, int>::value_type& p)
364 { return value + p.second; });
365 };
366
367 void break_deleg(bool skip_read);
368 bool delegations_broken(bool skip_read);
369
370 };
371
372 ostream& operator<<(ostream &out, const Inode &in);
373
374 #endif