]> git.proxmox.com Git - ceph.git/blame - ceph/src/client/Inode.h
import new upstream nautilus stable release 14.2.8
[ceph.git] / ceph / src / client / Inode.h
CommitLineData
7c673cae
FG
1// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2// vim: ts=8 sw=2 smarttab
3
4#ifndef CEPH_CLIENT_INODE_H
5#define CEPH_CLIENT_INODE_H
6
b32b8144
FG
7#include <numeric>
8
11fdf7f2 9#include "include/ceph_assert.h"
7c673cae
FG
10#include "include/types.h"
11#include "include/xlist.h"
12
11fdf7f2 13#include "mds/flock.h"
7c673cae
FG
14#include "mds/mdstypes.h" // hrm
15
16#include "osdc/ObjectCacher.h"
7c673cae
FG
17
18#include "InodeRef.h"
11fdf7f2 19#include "MetaSession.h"
7c673cae 20#include "UserPerm.h"
b32b8144 21#include "Delegation.h"
7c673cae
FG
22
23class Client;
7c673cae
FG
24class Dentry;
25class Dir;
26struct SnapRealm;
27struct Inode;
7c673cae
FG
28class MetaRequest;
29class filepath;
30class Fh;
31
11fdf7f2
TL
32class Cap {
33public:
34 Cap() = delete;
35 Cap(Inode &i, MetaSession *s) : inode(i),
36 session(s),
37 gen(s->cap_gen),
38 cap_item(this)
39 {
40 s->caps.push_back(&cap_item);
41 }
42 ~Cap() {
43 cap_item.remove_myself();
44 }
45
46 void touch(void) {
47 // move to back of LRU
48 session->caps.push_back(&cap_item);
49 }
50
51 void dump(Formatter *f) const;
52
53 Inode &inode;
7c673cae 54 MetaSession *session;
11fdf7f2
TL
55 uint64_t cap_id = 0;
56 unsigned issued = 0;
57 unsigned implemented = 0;
58 unsigned wanted = 0; // as known to mds.
59 uint64_t seq = 0;
60 uint64_t issue_seq = 0;
61 __u32 mseq = 0; // migration seq
7c673cae
FG
62 __u32 gen;
63 UserPerm latest_perms;
64
11fdf7f2
TL
65private:
66 /* Note that this Cap will not move (see Inode::caps):
67 *
68 * Section 23.1.2#8
69 * The insert members shall not affect the validity of iterators and
70 * references to the container, and the erase members shall invalidate only
71 * iterators and references to the erased elements.
72 */
73 xlist<Cap *>::item cap_item;
7c673cae
FG
74};
75
76struct CapSnap {
77 //snapid_t follows; // map key
78 InodeRef in;
79 SnapContext context;
80 int issued, dirty;
81
82 uint64_t size;
83 utime_t ctime, btime, mtime, atime;
84 version_t time_warp_seq;
85 uint64_t change_attr;
86 uint32_t mode;
87 uid_t uid;
88 gid_t gid;
89 map<string,bufferptr> xattrs;
90 version_t xattr_version;
91
92 bufferlist inline_data;
93 version_t inline_version;
94
95 bool writing, dirty_data;
96 uint64_t flush_tid;
97
11fdf7f2
TL
98 int64_t cap_dirtier_uid;
99 int64_t cap_dirtier_gid;
100
7c673cae
FG
101 explicit CapSnap(Inode *i)
102 : in(i), issued(0), dirty(0), size(0), time_warp_seq(0), change_attr(0),
103 mode(0), uid(0), gid(0), xattr_version(0), inline_version(0),
11fdf7f2
TL
104 writing(false), dirty_data(false), flush_tid(0), cap_dirtier_uid(-1),
105 cap_dirtier_gid(-1)
7c673cae
FG
106 {}
107
108 void dump(Formatter *f) const;
109};
110
111// inode flags
112#define I_COMPLETE 1
113#define I_DIR_ORDERED 2
114#define I_CAP_DROPPED 4
115#define I_SNAPDIR_OPEN 8
eafe8130 116#define I_KICK_FLUSH 16
7c673cae
FG
117
118struct Inode {
119 Client *client;
120
121 // -- the actual inode --
122 inodeno_t ino; // ORDER DEPENDENCY: oset
123 snapid_t snapid;
124 ino_t faked_ino;
125
126 uint32_t rdev; // if special file
127
128 // affected by any inode change...
129 utime_t ctime; // inode change time
130 utime_t btime; // birth time
131
132 // perm (namespace permissions)
133 uint32_t mode;
134 uid_t uid;
135 gid_t gid;
136
137 // nlink
138 int32_t nlink;
139
140 // file (data access)
141 ceph_dir_layout dir_layout;
142 file_layout_t layout;
143 uint64_t size; // on directory, # dentries
144 uint32_t truncate_seq;
145 uint64_t truncate_size;
146 utime_t mtime; // file data modify time.
147 utime_t atime; // file data access time.
148 uint32_t time_warp_seq; // count of (potential) mtime/atime timewarps (i.e., utimes())
149 uint64_t change_attr;
150
151 uint64_t max_size; // max size we can write to
152
153 // dirfrag, recursive accountin
154 frag_info_t dirstat;
155 nest_info_t rstat;
156
157 // special stuff
158 version_t version; // auth only
159 version_t xattr_version;
81eedcae 160 utime_t snap_btime; // snapshot creation (birth) time
7c673cae
FG
161
162 // inline data
163 version_t inline_version;
164 bufferlist inline_data;
165
166 bool is_root() const { return ino == MDS_INO_ROOT; }
167 bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; }
168 bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; }
169 bool is_file() const { return (mode & S_IFMT) == S_IFREG; }
170
171 bool has_dir_layout() const {
172 return layout != file_layout_t();
173 }
174
175 __u32 hash_dentry_name(const string &dn) {
176 int which = dir_layout.dl_dir_hash;
177 if (!which)
178 which = CEPH_STR_HASH_LINUX;
11fdf7f2 179 ceph_assert(ceph_str_hash_valid(which));
7c673cae
FG
180 return ceph_str_hash(which, dn.data(), dn.length());
181 }
182
183 unsigned flags;
184
185 quota_info_t quota;
186
187 bool is_complete_and_ordered() {
188 static const unsigned wants = I_COMPLETE | I_DIR_ORDERED;
189 return (flags & wants) == wants;
190 }
191
192 // about the dir (if this is one!)
193 Dir *dir; // if i'm a dir.
194 fragtree_t dirfragtree;
7c673cae
FG
195 uint64_t dir_release_count, dir_ordered_count;
196 bool dir_hashed, dir_replicated;
197
198 // per-mds caps
11fdf7f2 199 std::map<mds_rank_t, Cap> caps; // mds -> Cap
7c673cae
FG
200 Cap *auth_cap;
201 int64_t cap_dirtier_uid;
202 int64_t cap_dirtier_gid;
203 unsigned dirty_caps, flushing_caps;
204 std::map<ceph_tid_t, int> flushing_cap_tids;
205 int shared_gen, cache_gen;
206 int snap_caps, snap_cap_refs;
207 utime_t hold_caps_until;
28e407b8 208 xlist<Inode*>::item delay_cap_item, dirty_cap_item, flushing_cap_item;
7c673cae
FG
209
210 SnapRealm *snaprealm;
211 xlist<Inode*>::item snaprealm_item;
212 InodeRef snapdir_parent; // only if we are a snapdir inode
213 map<snapid_t,CapSnap> cap_snaps; // pending flush to mds
214
215 //int open_by_mode[CEPH_FILE_MODE_NUM];
216 map<int,int> open_by_mode;
217 map<int,int> cap_refs;
218
219 ObjectCacher::ObjectSet oset; // ORDER DEPENDENCY: ino
220
221 uint64_t reported_size, wanted_max_size, requested_max_size;
222
223 int _ref; // ref count. 1 for each dentry, fh that links to me.
494da23a 224 uint64_t ll_ref; // separate ref count for ll client
11fdf7f2 225 xlist<Dentry *> dentries; // if i'm linked to a dentry.
7c673cae
FG
226 string symlink; // symlink content, if it's a symlink
227 map<string,bufferptr> xattrs;
228 map<frag_t,int> fragmap; // known frag -> mds mappings
229
230 list<Cond*> waitfor_caps;
231 list<Cond*> waitfor_commit;
b32b8144 232 list<Cond*> waitfor_deleg;
7c673cae
FG
233
234 Dentry *get_first_parent() {
11fdf7f2
TL
235 ceph_assert(!dentries.empty());
236 return *dentries.begin();
7c673cae
FG
237 }
238
239 void make_long_path(filepath& p);
240 void make_nosnap_relative_path(filepath& p);
241
242 void get();
243 int _put(int n=1);
244
245 int get_num_ref() {
246 return _ref;
247 }
248
249 void ll_get() {
250 ll_ref++;
251 }
494da23a 252 void ll_put(uint64_t n=1) {
11fdf7f2 253 ceph_assert(ll_ref >= n);
7c673cae
FG
254 ll_ref -= n;
255 }
256
257 // file locks
11fdf7f2
TL
258 std::unique_ptr<ceph_lock_state_t> fcntl_locks;
259 std::unique_ptr<ceph_lock_state_t> flock_locks;
7c673cae 260
b32b8144
FG
261 list<Delegation> delegations;
262
7c673cae
FG
263 xlist<MetaRequest*> unsafe_ops;
264
265 std::set<Fh*> fhs;
266
11fdf7f2
TL
267 mds_rank_t dir_pin;
268
7c673cae
FG
269 Inode(Client *c, vinodeno_t vino, file_layout_t *newlayout)
270 : client(c), ino(vino.ino), snapid(vino.snapid), faked_ino(0),
271 rdev(0), mode(0), uid(0), gid(0), nlink(0),
272 size(0), truncate_seq(1), truncate_size(-1),
273 time_warp_seq(0), change_attr(0), max_size(0), version(0),
274 xattr_version(0), inline_version(0), flags(0),
275 dir(0), dir_release_count(1), dir_ordered_count(1),
276 dir_hashed(false), dir_replicated(false), auth_cap(NULL),
277 cap_dirtier_uid(-1), cap_dirtier_gid(-1),
278 dirty_caps(0), flushing_caps(0), shared_gen(0), cache_gen(0),
279 snap_caps(0), snap_cap_refs(0),
28e407b8 280 delay_cap_item(this), dirty_cap_item(this), flushing_cap_item(this),
7c673cae
FG
281 snaprealm(0), snaprealm_item(this),
282 oset((void *)this, newlayout->pool_id, this->ino),
283 reported_size(0), wanted_max_size(0), requested_max_size(0),
11fdf7f2 284 _ref(0), ll_ref(0), dir_pin(MDS_RANK_NONE)
7c673cae
FG
285 {
286 memset(&dir_layout, 0, sizeof(dir_layout));
7c673cae
FG
287 }
288 ~Inode();
289
290 vinodeno_t vino() const { return vinodeno_t(ino, snapid); }
291
292 struct Compare {
293 bool operator() (Inode* const & left, Inode* const & right) {
294 if (left->ino.val < right->ino.val) {
295 return (left->snapid.val < right->snapid.val);
296 }
297 return false;
298 }
299 };
300
301 bool check_mode(const UserPerm& perms, unsigned want);
302
303 // CAPS --------
304 void get_open_ref(int mode);
305 bool put_open_ref(int mode);
306
307 void get_cap_ref(int cap);
308 int put_cap_ref(int cap);
309 bool is_any_caps();
11fdf7f2 310 bool cap_is_valid(const Cap &cap) const;
7c673cae 311 int caps_issued(int *implemented = 0) const;
7c673cae 312 void try_touch_cap(mds_rank_t mds);
94b18763 313 bool caps_issued_mask(unsigned mask, bool allow_impl=false);
7c673cae
FG
314 int caps_used();
315 int caps_file_wanted();
316 int caps_wanted();
317 int caps_mds_wanted();
318 int caps_dirty();
319 const UserPerm *get_best_perms();
320
321 bool have_valid_size();
322 Dir *open_dir();
323
324 void add_fh(Fh *f) {fhs.insert(f);}
325 void rm_fh(Fh *f) {fhs.erase(f);}
326 void set_async_err(int r);
327 void dump(Formatter *f) const;
b32b8144
FG
328
329 void break_all_delegs() { break_deleg(false); };
330
331 void recall_deleg(bool skip_read);
332 bool has_recalled_deleg();
333 int set_deleg(Fh *fh, unsigned type, ceph_deleg_cb_t cb, void *priv);
334 void unset_deleg(Fh *fh);
335
28e407b8
AA
336 void mark_caps_dirty(int caps);
337 void mark_caps_clean();
b32b8144
FG
338private:
339 // how many opens for write on this Inode?
340 long open_count_for_write()
341 {
342 return (long)(open_by_mode[CEPH_FILE_MODE_RDWR] +
343 open_by_mode[CEPH_FILE_MODE_WR]);
344 };
345
346 // how many opens of any sort on this inode?
347 long open_count()
348 {
349 return (long) std::accumulate(open_by_mode.begin(), open_by_mode.end(), 0,
350 [] (int value, const std::map<int, int>::value_type& p)
351 { return value + p.second; });
352 };
353
354 void break_deleg(bool skip_read);
355 bool delegations_broken(bool skip_read);
356
7c673cae
FG
357};
358
359ostream& operator<<(ostream &out, const Inode &in);
360
361#endif