]>
git.proxmox.com Git - ceph.git/blob - ceph/src/client/Inode.cc
1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
9 #include "MetaSession.h"
10 #include "ClientSnapRealm.h"
11 #include "Delegation.h"
13 #include "mds/flock.h"
17 cap_item
.remove_myself();
18 snaprealm_item
.remove_myself();
21 snapdir_parent
->flags
&= ~I_SNAPDIR_OPEN
;
22 snapdir_parent
.reset();
25 if (!oset
.objects
.empty()) {
26 lsubdout(client
->cct
, client
, 0) << __func__
<< ": leftover objects on inode 0x"
27 << std::hex
<< ino
<< std::dec
<< dendl
;
28 assert(oset
.objects
.empty());
31 if (!delegations
.empty()) {
32 lsubdout(client
->cct
, client
, 0) << __func__
<< ": leftover delegations on inode 0x"
33 << std::hex
<< ino
<< std::dec
<< dendl
;
34 assert(delegations
.empty());
41 ostream
& operator<<(ostream
&out
, const Inode
&in
)
43 out
<< in
.vino() << "("
44 << "faked_ino=" << in
.faked_ino
46 << " ll_ref=" << in
.ll_ref
47 << " cap_refs=" << in
.cap_refs
48 << " open=" << in
.open_by_mode
49 << " mode=" << oct
<< in
.mode
<< dec
50 << " size=" << in
.size
<< "/" << in
.max_size
51 << " mtime=" << in
.mtime
52 << " caps=" << ccap_string(in
.caps_issued());
53 if (!in
.caps
.empty()) {
55 for (auto p
= in
.caps
.begin(); p
!= in
.caps
.end(); ++p
) {
56 if (p
!= in
.caps
.begin())
58 out
<< p
->first
<< '=' << ccap_string(p
->second
->issued
);
63 out
<< " dirty_caps=" << ccap_string(in
.dirty_caps
);
65 out
<< " flushing_caps=" << ccap_string(in
.flushing_caps
);
67 if (in
.flags
& I_COMPLETE
)
71 out
<< " " << in
.oset
;
73 if (!in
.dn_set
.empty())
74 out
<< " parents=" << in
.dn_set
;
76 if (in
.is_dir() && in
.has_dir_layout())
77 out
<< " has_dir_layout";
79 if (in
.quota
.is_enable())
80 out
<< " " << in
.quota
;
82 out
<< ' ' << &in
<< ")";
87 void Inode::make_long_path(filepath
& p
)
89 if (!dn_set
.empty()) {
90 assert((*dn_set
.begin())->dir
&& (*dn_set
.begin())->dir
->parent_inode
);
91 (*dn_set
.begin())->dir
->parent_inode
->make_long_path(p
);
92 p
.push_dentry((*dn_set
.begin())->name
);
93 } else if (snapdir_parent
) {
94 snapdir_parent
->make_nosnap_relative_path(p
);
102 * make a filepath suitable for an mds request:
103 * - if we are non-snapped/live, the ino is sufficient, e.g. #1234
104 * - if we are snapped, make filepath relative to first non-snapped parent.
106 void Inode::make_nosnap_relative_path(filepath
& p
)
108 if (snapid
== CEPH_NOSNAP
) {
110 } else if (snapdir_parent
) {
111 snapdir_parent
->make_nosnap_relative_path(p
);
113 p
.push_dentry(empty
);
114 } else if (!dn_set
.empty()) {
115 assert((*dn_set
.begin())->dir
&& (*dn_set
.begin())->dir
->parent_inode
);
116 (*dn_set
.begin())->dir
->parent_inode
->make_nosnap_relative_path(p
);
117 p
.push_dentry((*dn_set
.begin())->name
);
123 void Inode::get_open_ref(int mode
)
125 open_by_mode
[mode
]++;
126 break_deleg(!(mode
& CEPH_FILE_MODE_WR
));
129 bool Inode::put_open_ref(int mode
)
131 //cout << "open_by_mode[" << mode << "] " << open_by_mode[mode] << " -> " << (open_by_mode[mode]-1) << std::endl;
132 if (--open_by_mode
[mode
] == 0)
137 void Inode::get_cap_ref(int cap
)
144 //cout << "inode " << *this << " get " << cap_string(c) << " " << (cap_refs[c]-1) << " -> " << cap_refs[c] << std::endl;
151 int Inode::put_cap_ref(int cap
)
158 if (cap_refs
[c
] <= 0) {
159 lderr(client
->cct
) << "put_cap_ref " << ccap_string(c
) << " went negative on " << *this << dendl
;
160 assert(cap_refs
[c
] > 0);
162 if (--cap_refs
[c
] == 0)
164 //cout << "inode " << *this << " put " << cap_string(c) << " " << (cap_refs[c]+1) << " -> " << cap_refs[c] << std::endl;
172 bool Inode::is_any_caps()
174 return !caps
.empty() || snap_caps
;
177 bool Inode::cap_is_valid(Cap
* cap
) const
179 /*cout << "cap_gen " << cap->session-> cap_gen << std::endl
180 << "session gen " << cap->gen << std::endl
181 << "cap expire " << cap->session->cap_ttl << std::endl
182 << "cur time " << ceph_clock_now(cct) << std::endl;*/
183 if ((cap
->session
->cap_gen
<= cap
->gen
)
184 && (ceph_clock_now() < cap
->session
->cap_ttl
)) {
190 int Inode::caps_issued(int *implemented
) const
194 for (map
<mds_rank_t
,Cap
*>::const_iterator it
= caps
.begin();
197 if (cap_is_valid(it
->second
)) {
198 c
|= it
->second
->issued
;
199 i
|= it
->second
->implemented
;
206 void Inode::touch_cap(Cap
*cap
)
208 // move to back of LRU
209 cap
->session
->caps
.push_back(&cap
->cap_item
);
212 void Inode::try_touch_cap(mds_rank_t mds
)
215 touch_cap(caps
[mds
]);
218 bool Inode::caps_issued_mask(unsigned mask
)
221 if ((c
& mask
) == mask
)
225 cap_is_valid(auth_cap
) &&
226 (auth_cap
->issued
& mask
) == mask
) {
231 for (map
<mds_rank_t
,Cap
*>::iterator it
= caps
.begin();
234 if (cap_is_valid(it
->second
)) {
235 if ((it
->second
->issued
& mask
) == mask
) {
236 touch_cap(it
->second
);
239 c
|= it
->second
->issued
;
242 if ((c
& mask
) == mask
) {
243 // bah.. touch them all
244 for (map
<mds_rank_t
,Cap
*>::iterator it
= caps
.begin();
247 touch_cap(it
->second
);
253 int Inode::caps_used()
256 for (map
<int,int>::iterator p
= cap_refs
.begin();
264 int Inode::caps_file_wanted()
267 for (map
<int,int>::iterator p
= open_by_mode
.begin();
268 p
!= open_by_mode
.end();
271 want
|= ceph_caps_for_mode(p
->first
);
275 int Inode::caps_wanted()
277 int want
= caps_file_wanted() | caps_used();
278 if (want
& CEPH_CAP_FILE_BUFFER
)
279 want
|= CEPH_CAP_FILE_EXCL
;
283 int Inode::caps_mds_wanted()
286 for (auto it
= caps
.begin(); it
!= caps
.end(); ++it
)
287 want
|= it
->second
->wanted
;
291 int Inode::caps_dirty()
293 return dirty_caps
| flushing_caps
;
296 const UserPerm
* Inode::get_best_perms()
298 const UserPerm
*perms
= NULL
;
299 for (const auto ci
: caps
) {
300 const UserPerm
& iperm
= ci
.second
->latest_perms
;
301 if (!perms
) { // we don't have any, take what's present
303 } else if (iperm
.uid() == uid
) {
304 if (iperm
.gid() == gid
) { // we have the best possible, return
307 if (perms
->uid() != uid
) { // take uid > gid every time
310 } else if (perms
->uid() != uid
&& iperm
.gid() == gid
) {
311 perms
= &iperm
; // a matching gid is better than nothing
317 bool Inode::have_valid_size()
319 // RD+RDCACHE or WR+WRBUFFER => valid size
320 if (caps_issued() & (CEPH_CAP_FILE_SHARED
| CEPH_CAP_FILE_EXCL
))
325 // open Dir for an inode. if it's not open, allocated it (and pin dentry in memory).
326 Dir
*Inode::open_dir()
330 lsubdout(client
->cct
, client
, 15) << "open_dir " << dir
<< " on " << this << dendl
;
331 assert(dn_set
.size() < 2); // dirs can't be hard-linked
333 (*dn_set
.begin())->get(); // pin dentry
339 bool Inode::check_mode(const UserPerm
& perms
, unsigned want
)
341 if (uid
== perms
.uid()) {
342 // if uid is owner, owner entry determines access
344 } else if (perms
.gid_in_groups(gid
)) {
345 // if a gid or sgid matches the owning group, group entry determines access
349 return (mode
& want
) == want
;
354 lsubdout(client
->cct
, client
, 15) << "inode.get on " << this << " " << ino
<< '.' << snapid
355 << " now " << _ref
<< dendl
;
358 //private method to put a reference; see Client::put_inode()
359 int Inode::_put(int n
) {
361 lsubdout(client
->cct
, client
, 15) << "inode.put on " << this << " " << ino
<< '.' << snapid
362 << " now " << _ref
<< dendl
;
368 void Inode::dump(Formatter
*f
) const
370 f
->dump_stream("ino") << ino
;
371 f
->dump_stream("snapid") << snapid
;
373 f
->dump_unsigned("rdev", rdev
);
374 f
->dump_stream("ctime") << ctime
;
375 f
->dump_stream("btime") << btime
;
376 f
->dump_stream("mode") << '0' << std::oct
<< mode
<< std::dec
;
377 f
->dump_unsigned("uid", uid
);
378 f
->dump_unsigned("gid", gid
);
379 f
->dump_int("nlink", nlink
);
381 f
->dump_unsigned("size", size
);
382 f
->dump_unsigned("max_size", max_size
);
383 f
->dump_unsigned("truncate_seq", truncate_seq
);
384 f
->dump_unsigned("truncate_size", truncate_size
);
385 f
->dump_stream("mtime") << mtime
;
386 f
->dump_stream("atime") << atime
;
387 f
->dump_unsigned("time_warp_seq", time_warp_seq
);
388 f
->dump_unsigned("change_attr", change_attr
);
390 f
->dump_object("layout", layout
);
392 f
->open_object_section("dir_layout");
393 ::dump(dir_layout
, f
);
396 f
->dump_bool("complete", flags
& I_COMPLETE
);
397 f
->dump_bool("ordered", flags
& I_DIR_ORDERED
);
399 /* FIXME when wip-mds-encoding is merged ***
400 f->open_object_section("dir_stat");
404 f->open_object_section("rstat");
410 f
->dump_unsigned("version", version
);
411 f
->dump_unsigned("xattr_version", xattr_version
);
412 f
->dump_unsigned("flags", flags
);
415 if (!dir_contacts
.empty()) {
416 f
->open_object_section("dir_contants");
417 for (set
<int>::iterator p
= dir_contacts
.begin(); p
!= dir_contacts
.end(); ++p
)
418 f
->dump_int("mds", *p
);
421 f
->dump_int("dir_hashed", (int)dir_hashed
);
422 f
->dump_int("dir_replicated", (int)dir_replicated
);
425 f
->open_array_section("caps");
426 for (map
<mds_rank_t
,Cap
*>::const_iterator p
= caps
.begin(); p
!= caps
.end(); ++p
) {
427 f
->open_object_section("cap");
428 f
->dump_int("mds", p
->first
);
429 if (p
->second
== auth_cap
)
430 f
->dump_int("auth", 1);
436 f
->dump_int("auth_cap", auth_cap
->session
->mds_num
);
438 f
->dump_stream("dirty_caps") << ccap_string(dirty_caps
);
440 f
->dump_stream("flushings_caps") << ccap_string(flushing_caps
);
441 f
->open_object_section("flushing_cap_tid");
442 for (map
<ceph_tid_t
, int>::const_iterator p
= flushing_cap_tids
.begin();
443 p
!= flushing_cap_tids
.end();
445 string
n(ccap_string(p
->second
));
446 f
->dump_unsigned(n
.c_str(), p
->first
);
450 f
->dump_int("shared_gen", shared_gen
);
451 f
->dump_int("cache_gen", cache_gen
);
453 f
->dump_int("snap_caps", snap_caps
);
454 f
->dump_int("snap_cap_refs", snap_cap_refs
);
457 f
->dump_stream("hold_caps_until") << hold_caps_until
;
460 f
->open_object_section("snaprealm");
464 if (!cap_snaps
.empty()) {
465 for (const auto &p
: cap_snaps
) {
466 f
->open_object_section("cap_snap");
467 f
->dump_stream("follows") << p
.first
;
474 if (!open_by_mode
.empty()) {
475 f
->open_array_section("open_by_mode");
476 for (map
<int,int>::const_iterator p
= open_by_mode
.begin(); p
!= open_by_mode
.end(); ++p
) {
477 f
->open_object_section("ref");
478 f
->dump_int("mode", p
->first
);
479 f
->dump_int("refs", p
->second
);
484 if (!cap_refs
.empty()) {
485 f
->open_array_section("cap_refs");
486 for (map
<int,int>::const_iterator p
= cap_refs
.begin(); p
!= cap_refs
.end(); ++p
) {
487 f
->open_object_section("cap_ref");
488 f
->dump_stream("cap") << ccap_string(p
->first
);
489 f
->dump_int("refs", p
->second
);
495 f
->dump_unsigned("reported_size", reported_size
);
496 if (wanted_max_size
!= max_size
)
497 f
->dump_unsigned("wanted_max_size", wanted_max_size
);
498 if (requested_max_size
!= max_size
)
499 f
->dump_unsigned("requested_max_size", requested_max_size
);
501 f
->dump_int("ref", _ref
);
502 f
->dump_int("ll_ref", ll_ref
);
504 if (!dn_set
.empty()) {
505 f
->open_array_section("parents");
506 for (set
<Dentry
*>::const_iterator p
= dn_set
.begin(); p
!= dn_set
.end(); ++p
) {
507 f
->open_object_section("dentry");
508 f
->dump_stream("dir_ino") << (*p
)->dir
->parent_inode
->ino
;
509 f
->dump_string("name", (*p
)->name
);
516 void Cap::dump(Formatter
*f
) const
518 f
->dump_int("mds", session
->mds_num
);
519 f
->dump_stream("ino") << inode
->ino
;
520 f
->dump_unsigned("cap_id", cap_id
);
521 f
->dump_stream("issued") << ccap_string(issued
);
522 if (implemented
!= issued
)
523 f
->dump_stream("implemented") << ccap_string(implemented
);
524 f
->dump_stream("wanted") << ccap_string(wanted
);
525 f
->dump_unsigned("seq", seq
);
526 f
->dump_unsigned("issue_seq", issue_seq
);
527 f
->dump_unsigned("mseq", mseq
);
528 f
->dump_unsigned("gen", gen
);
531 void CapSnap::dump(Formatter
*f
) const
533 f
->dump_stream("ino") << in
->ino
;
534 f
->dump_stream("issued") << ccap_string(issued
);
535 f
->dump_stream("dirty") << ccap_string(dirty
);
536 f
->dump_unsigned("size", size
);
537 f
->dump_stream("ctime") << ctime
;
538 f
->dump_stream("mtime") << mtime
;
539 f
->dump_stream("atime") << atime
;
540 f
->dump_int("time_warp_seq", time_warp_seq
);
541 f
->dump_stream("mode") << '0' << std::oct
<< mode
<< std::dec
;
542 f
->dump_unsigned("uid", uid
);
543 f
->dump_unsigned("gid", gid
);
544 if (!xattrs
.empty()) {
545 f
->open_object_section("xattr_lens");
546 for (map
<string
,bufferptr
>::const_iterator p
= xattrs
.begin(); p
!= xattrs
.end(); ++p
)
547 f
->dump_int(p
->first
.c_str(), p
->second
.length());
550 f
->dump_unsigned("xattr_version", xattr_version
);
551 f
->dump_int("writing", (int)writing
);
552 f
->dump_int("dirty_data", (int)dirty_data
);
553 f
->dump_unsigned("flush_tid", flush_tid
);
556 void Inode::set_async_err(int r
)
558 for (const auto &fh
: fhs
) {
563 bool Inode::has_recalled_deleg()
565 if (delegations
.empty())
568 // Either all delegations are recalled or none are. Just check the first.
569 Delegation
& deleg
= delegations
.front();
570 return deleg
.is_recalled();
573 void Inode::recall_deleg(bool skip_read
)
575 if (delegations
.empty())
579 for (list
<Delegation
>::iterator d
= delegations
.begin();
580 d
!= delegations
.end(); ++d
) {
582 Delegation
& deleg
= *d
;
583 deleg
.recall(skip_read
);
587 bool Inode::delegations_broken(bool skip_read
)
589 if (delegations
.empty()) {
590 lsubdout(client
->cct
, client
, 10) <<
591 __func__
<< ": delegations empty on " << *this << dendl
;
596 Delegation
& deleg
= delegations
.front();
597 lsubdout(client
->cct
, client
, 10) <<
598 __func__
<< ": read delegs only on " << *this << dendl
;
599 if (deleg
.get_type() == CEPH_FILE_MODE_RD
) {
603 lsubdout(client
->cct
, client
, 10) <<
604 __func__
<< ": not broken" << *this << dendl
;
608 void Inode::break_deleg(bool skip_read
)
610 lsubdout(client
->cct
, client
, 10) <<
611 __func__
<< ": breaking delegs on " << *this << dendl
;
613 recall_deleg(skip_read
);
615 while (!delegations_broken(skip_read
))
616 client
->wait_on_list(waitfor_deleg
);
620 * set_deleg: request a delegation on an open Fh
621 * @fh: filehandle on which to acquire it
622 * @type: delegation request type
623 * @cb: delegation recall callback function
624 * @priv: private pointer to be passed to callback
626 * Attempt to acquire a delegation on an open file handle. If there are no
627 * conflicts and we have the right caps, allocate a new delegation, fill it
628 * out and return 0. Return an error if we can't get one for any reason.
630 int Inode::set_deleg(Fh
*fh
, unsigned type
, ceph_deleg_cb_t cb
, void *priv
)
632 lsubdout(client
->cct
, client
, 10) <<
633 __func__
<< ": inode " << *this << dendl
;
636 * 0 deleg timeout means that they haven't been explicitly enabled. Don't
637 * allow it, with an unusual error to make it clear.
639 if (!client
->get_deleg_timeout())
642 // Just say no if we have any recalled delegs still outstanding
643 if (has_recalled_deleg()) {
644 lsubdout(client
->cct
, client
, 10) << __func__
<<
645 ": has_recalled_deleg" << dendl
;
649 // check vs. currently open files on this inode
651 case CEPH_DELEGATION_RD
:
652 if (open_count_for_write()) {
653 lsubdout(client
->cct
, client
, 10) << __func__
<<
654 ": open for write" << dendl
;
658 case CEPH_DELEGATION_WR
:
659 if (open_count() > 1) {
660 lsubdout(client
->cct
, client
, 10) << __func__
<< ": open" << dendl
;
669 * A delegation is essentially a long-held container for cap references that
670 * we delegate to the client until recalled. The caps required depend on the
671 * type of delegation (read vs. rw). This is entirely an opportunistic thing.
672 * If we don't have the necessary caps for the delegation, then we just don't
675 * In principle we could request the caps from the MDS, but a delegation is
676 * usually requested just after an open. If we don't have the necessary caps
677 * already, then it's likely that there is some sort of conflicting access.
679 * In the future, we may need to add a way to have this request caps more
680 * aggressively -- for instance, to handle WANT_DELEGATION for NFSv4.1+.
682 int need
= ceph_deleg_caps_for_type(type
);
683 if (!caps_issued_mask(need
)) {
684 lsubdout(client
->cct
, client
, 10) << __func__
<< ": cap mismatch, have="
685 << ccap_string(caps_issued()) << " need=" << ccap_string(need
) << dendl
;
689 for (list
<Delegation
>::iterator d
= delegations
.begin();
690 d
!= delegations
.end(); ++d
) {
691 Delegation
& deleg
= *d
;
692 if (deleg
.get_fh() == fh
) {
693 deleg
.reinit(type
, cb
, priv
);
698 delegations
.emplace_back(fh
, type
, cb
, priv
);
703 * unset_deleg - remove a delegation that was previously set
704 * @fh: file handle to clear delegation of
706 * Unlink delegation from the Inode (if there is one), put caps and free it.
708 void Inode::unset_deleg(Fh
*fh
)
710 for (list
<Delegation
>::iterator d
= delegations
.begin();
711 d
!= delegations
.end(); ++d
) {
712 Delegation
& deleg
= *d
;
713 if (deleg
.get_fh() == fh
) {
714 delegations
.erase(d
);
715 client
->signal_cond_list(waitfor_deleg
);