1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 #ifndef CEPH_CLIENT_INODE_H
5 #define CEPH_CLIENT_INODE_H
9 #include "include/ceph_assert.h"
10 #include "include/types.h"
11 #include "include/xlist.h"
13 #include "mds/flock.h"
14 #include "mds/mdstypes.h" // hrm
16 #include "osdc/ObjectCacher.h"
19 #include "MetaSession.h"
21 #include "Delegation.h"
35 Cap(Inode
&i
, MetaSession
*s
) : inode(i
),
40 s
->caps
.push_back(&cap_item
);
43 cap_item
.remove_myself();
47 // move to back of LRU
48 session
->caps
.push_back(&cap_item
);
51 void dump(Formatter
*f
) const;
57 unsigned implemented
= 0;
58 unsigned wanted
= 0; // as known to mds.
60 uint64_t issue_seq
= 0;
61 __u32 mseq
= 0; // migration seq
63 UserPerm latest_perms
;
66 /* Note that this Cap will not move (see Inode::caps):
69 * The insert members shall not affect the validity of iterators and
70 * references to the container, and the erase members shall invalidate only
71 * iterators and references to the erased elements.
73 xlist
<Cap
*>::item cap_item
;
77 //snapid_t follows; // map key
83 utime_t ctime
, btime
, mtime
, atime
;
84 version_t time_warp_seq
;
89 map
<string
,bufferptr
> xattrs
;
90 version_t xattr_version
;
92 bufferlist inline_data
;
93 version_t inline_version
;
95 bool writing
, dirty_data
;
98 int64_t cap_dirtier_uid
;
99 int64_t cap_dirtier_gid
;
101 explicit CapSnap(Inode
*i
)
102 : in(i
), issued(0), dirty(0), size(0), time_warp_seq(0), change_attr(0),
103 mode(0), uid(0), gid(0), xattr_version(0), inline_version(0),
104 writing(false), dirty_data(false), flush_tid(0), cap_dirtier_uid(-1),
108 void dump(Formatter
*f
) const;
112 #define I_COMPLETE (1 << 0)
113 #define I_DIR_ORDERED (1 << 1)
114 #define I_SNAPDIR_OPEN (1 << 2)
115 #define I_KICK_FLUSH (1 << 3)
116 #define I_CAP_DROPPED (1 << 4)
117 #define I_ERROR_FILELOCK (1 << 5)
122 // -- the actual inode --
123 inodeno_t ino
; // ORDER DEPENDENCY: oset
127 uint32_t rdev
; // if special file
129 // affected by any inode change...
130 utime_t ctime
; // inode change time
131 utime_t btime
; // birth time
133 // perm (namespace permissions)
141 // file (data access)
142 ceph_dir_layout dir_layout
;
143 file_layout_t layout
;
144 uint64_t size
; // on directory, # dentries
145 uint32_t truncate_seq
;
146 uint64_t truncate_size
;
147 utime_t mtime
; // file data modify time.
148 utime_t atime
; // file data access time.
149 uint32_t time_warp_seq
; // count of (potential) mtime/atime timewarps (i.e., utimes())
150 uint64_t change_attr
;
152 uint64_t max_size
; // max size we can write to
154 // dirfrag, recursive accountin
159 version_t version
; // auth only
160 version_t xattr_version
;
161 utime_t snap_btime
; // snapshot creation (birth) time
164 version_t inline_version
;
165 bufferlist inline_data
;
167 bool is_root() const { return ino
== CEPH_INO_ROOT
; }
168 bool is_symlink() const { return (mode
& S_IFMT
) == S_IFLNK
; }
169 bool is_dir() const { return (mode
& S_IFMT
) == S_IFDIR
; }
170 bool is_file() const { return (mode
& S_IFMT
) == S_IFREG
; }
172 bool has_dir_layout() const {
173 return layout
!= file_layout_t();
176 __u32
hash_dentry_name(const string
&dn
) {
177 int which
= dir_layout
.dl_dir_hash
;
179 which
= CEPH_STR_HASH_LINUX
;
180 ceph_assert(ceph_str_hash_valid(which
));
181 return ceph_str_hash(which
, dn
.data(), dn
.length());
188 bool is_complete_and_ordered() {
189 static const unsigned wants
= I_COMPLETE
| I_DIR_ORDERED
;
190 return (flags
& wants
) == wants
;
193 // about the dir (if this is one!)
194 Dir
*dir
; // if i'm a dir.
195 fragtree_t dirfragtree
;
196 uint64_t dir_release_count
, dir_ordered_count
;
197 bool dir_hashed
, dir_replicated
;
200 std::map
<mds_rank_t
, Cap
> caps
; // mds -> Cap
202 int64_t cap_dirtier_uid
;
203 int64_t cap_dirtier_gid
;
204 unsigned dirty_caps
, flushing_caps
;
205 std::map
<ceph_tid_t
, int> flushing_cap_tids
;
206 int shared_gen
, cache_gen
;
207 int snap_caps
, snap_cap_refs
;
208 utime_t hold_caps_until
;
209 xlist
<Inode
*>::item delay_cap_item
, dirty_cap_item
, flushing_cap_item
;
211 SnapRealm
*snaprealm
;
212 xlist
<Inode
*>::item snaprealm_item
;
213 InodeRef snapdir_parent
; // only if we are a snapdir inode
214 map
<snapid_t
,CapSnap
> cap_snaps
; // pending flush to mds
216 //int open_by_mode[CEPH_FILE_MODE_NUM];
217 map
<int,int> open_by_mode
;
218 map
<int,int> cap_refs
;
220 ObjectCacher::ObjectSet oset
; // ORDER DEPENDENCY: ino
222 uint64_t reported_size
, wanted_max_size
, requested_max_size
;
224 int _ref
; // ref count. 1 for each dentry, fh that links to me.
225 uint64_t ll_ref
; // separate ref count for ll client
226 xlist
<Dentry
*> dentries
; // if i'm linked to a dentry.
227 string symlink
; // symlink content, if it's a symlink
228 map
<string
,bufferptr
> xattrs
;
229 map
<frag_t
,int> fragmap
; // known frag -> mds mappings
231 std::list
<ceph::condition_variable
*> waitfor_caps
;
232 std::list
<ceph::condition_variable
*> waitfor_commit
;
233 std::list
<ceph::condition_variable
*> waitfor_deleg
;
235 Dentry
*get_first_parent() {
236 ceph_assert(!dentries
.empty());
237 return *dentries
.begin();
240 void make_long_path(filepath
& p
);
241 void make_short_path(filepath
& p
);
242 void make_nosnap_relative_path(filepath
& p
);
254 void ll_put(uint64_t n
=1) {
255 ceph_assert(ll_ref
>= n
);
260 std::unique_ptr
<ceph_lock_state_t
> fcntl_locks
;
261 std::unique_ptr
<ceph_lock_state_t
> flock_locks
;
263 bool has_any_filelocks() {
265 (fcntl_locks
&& !fcntl_locks
->empty()) ||
266 (flock_locks
&& !flock_locks
->empty());
269 list
<Delegation
> delegations
;
271 xlist
<MetaRequest
*> unsafe_ops
;
277 Inode(Client
*c
, vinodeno_t vino
, file_layout_t
*newlayout
)
278 : client(c
), ino(vino
.ino
), snapid(vino
.snapid
), faked_ino(0),
279 rdev(0), mode(0), uid(0), gid(0), nlink(0),
280 size(0), truncate_seq(1), truncate_size(-1),
281 time_warp_seq(0), change_attr(0), max_size(0), version(0),
282 xattr_version(0), inline_version(0), flags(0),
283 dir(0), dir_release_count(1), dir_ordered_count(1),
284 dir_hashed(false), dir_replicated(false), auth_cap(NULL
),
285 cap_dirtier_uid(-1), cap_dirtier_gid(-1),
286 dirty_caps(0), flushing_caps(0), shared_gen(0), cache_gen(0),
287 snap_caps(0), snap_cap_refs(0),
288 delay_cap_item(this), dirty_cap_item(this), flushing_cap_item(this),
289 snaprealm(0), snaprealm_item(this),
290 oset((void *)this, newlayout
->pool_id
, this->ino
),
291 reported_size(0), wanted_max_size(0), requested_max_size(0),
292 _ref(0), ll_ref(0), dir_pin(MDS_RANK_NONE
)
294 memset(&dir_layout
, 0, sizeof(dir_layout
));
298 vinodeno_t
vino() const { return vinodeno_t(ino
, snapid
); }
301 bool operator() (Inode
* const & left
, Inode
* const & right
) {
302 if (left
->ino
.val
< right
->ino
.val
) {
303 return (left
->snapid
.val
< right
->snapid
.val
);
309 bool check_mode(const UserPerm
& perms
, unsigned want
);
312 void get_open_ref(int mode
);
313 bool put_open_ref(int mode
);
315 void get_cap_ref(int cap
);
316 int put_cap_ref(int cap
);
318 bool cap_is_valid(const Cap
&cap
) const;
319 int caps_issued(int *implemented
= 0) const;
320 void try_touch_cap(mds_rank_t mds
);
321 bool caps_issued_mask(unsigned mask
, bool allow_impl
=false);
323 int caps_file_wanted();
325 int caps_mds_wanted();
327 const UserPerm
*get_best_perms();
329 bool have_valid_size();
332 void add_fh(Fh
*f
) {fhs
.insert(f
);}
333 void rm_fh(Fh
*f
) {fhs
.erase(f
);}
334 void set_async_err(int r
);
335 void dump(Formatter
*f
) const;
337 void break_all_delegs() { break_deleg(false); };
339 void recall_deleg(bool skip_read
);
340 bool has_recalled_deleg();
341 int set_deleg(Fh
*fh
, unsigned type
, ceph_deleg_cb_t cb
, void *priv
);
342 void unset_deleg(Fh
*fh
);
344 void mark_caps_dirty(int caps
);
345 void mark_caps_clean();
347 // how many opens for write on this Inode?
348 long open_count_for_write()
350 return (long)(open_by_mode
[CEPH_FILE_MODE_RDWR
] +
351 open_by_mode
[CEPH_FILE_MODE_WR
]);
354 // how many opens of any sort on this inode?
357 return (long) std::accumulate(open_by_mode
.begin(), open_by_mode
.end(), 0,
358 [] (int value
, const std::map
<int, int>::value_type
& p
)
359 { return value
+ p
.second
; });
362 void break_deleg(bool skip_read
);
363 bool delegations_broken(bool skip_read
);
367 ostream
& operator<<(ostream
&out
, const Inode
&in
);