1 // -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
2 // vim: ts=8 sw=2 smarttab
4 * Ceph - scalable distributed file system
6 * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
8 * This is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License version 2.1, as published by the Free Software
11 * Foundation. See file COPYING.
18 #include <sys/types.h>
22 #include <sys/param.h>
25 #include <sys/utsname.h>
28 #include <boost/lexical_cast.hpp>
29 #include <boost/fusion/include/std_pair.hpp>
31 #if defined(__FreeBSD__)
32 #define XATTR_CREATE 0x1
33 #define XATTR_REPLACE 0x2
35 #include <sys/xattr.h>
38 #if defined(__linux__)
39 #include <linux/falloc.h>
42 #include <sys/statvfs.h>
44 #include "common/config.h"
45 #include "common/version.h"
48 #include "messages/MClientSession.h"
49 #include "messages/MClientReconnect.h"
50 #include "messages/MClientRequest.h"
51 #include "messages/MClientRequestForward.h"
52 #include "messages/MClientReply.h"
53 #include "messages/MClientCaps.h"
54 #include "messages/MClientLease.h"
55 #include "messages/MClientSnap.h"
56 #include "messages/MCommandReply.h"
57 #include "messages/MOSDMap.h"
58 #include "messages/MClientQuota.h"
59 #include "messages/MClientCapRelease.h"
60 #include "messages/MMDSMap.h"
61 #include "messages/MFSMap.h"
62 #include "messages/MFSMapUser.h"
64 #include "mon/MonClient.h"
66 #include "mds/flock.h"
67 #include "osd/OSDMap.h"
68 #include "osdc/Filer.h"
70 #include "common/Cond.h"
71 #include "common/Mutex.h"
72 #include "common/perf_counters.h"
73 #include "common/admin_socket.h"
74 #include "common/errno.h"
75 #include "include/str_list.h"
77 #define dout_subsys ceph_subsys_client
79 #include "include/lru.h"
80 #include "include/compat.h"
81 #include "include/stringify.h"
87 #include "ClientSnapRealm.h"
89 #include "MetaSession.h"
90 #include "MetaRequest.h"
91 #include "ObjecterWriteback.h"
92 #include "posix_acl.h"
94 #include "include/assert.h"
95 #include "include/stat.h"
97 #include "include/cephfs/ceph_statx.h"
106 #define dout_prefix *_dout << "client." << whoami << " "
108 #define tout(cct) if (!cct->_conf->client_trace.empty()) traceout
110 // FreeBSD fails to define this
114 // Darwin fails to define this
123 #define DEBUG_GETATTR_CAPS (CEPH_CAP_XATTR_SHARED)
125 void client_flush_set_callback(void *p
, ObjectCacher::ObjectSet
*oset
)
127 Client
*client
= static_cast<Client
*>(p
);
128 client
->flush_set_callback(oset
);
134 Client::CommandHook::CommandHook(Client
*client
) :
139 bool Client::CommandHook::call(std::string command
, cmdmap_t
& cmdmap
,
140 std::string format
, bufferlist
& out
)
142 Formatter
*f
= Formatter::create(format
);
143 f
->open_object_section("result");
144 m_client
->client_lock
.Lock();
145 if (command
== "mds_requests")
146 m_client
->dump_mds_requests(f
);
147 else if (command
== "mds_sessions")
148 m_client
->dump_mds_sessions(f
);
149 else if (command
== "dump_cache")
150 m_client
->dump_cache(f
);
151 else if (command
== "kick_stale_sessions")
152 m_client
->_kick_stale_sessions();
153 else if (command
== "status")
154 m_client
->dump_status(f
);
156 assert(0 == "bad command registered");
157 m_client
->client_lock
.Unlock();
167 dir_result_t::dir_result_t(Inode
*in
, const UserPerm
& perms
)
168 : inode(in
), offset(0), next_offset(2),
169 release_count(0), ordered_count(0), cache_index(0), start_shared_gen(0),
173 void Client::_reset_faked_inos()
176 free_faked_inos
.clear();
177 free_faked_inos
.insert(start
, (uint32_t)-1 - start
+ 1);
178 last_used_faked_ino
= 0;
179 _use_faked_inos
= sizeof(ino_t
) < 8 || cct
->_conf
->client_use_faked_inos
;
182 void Client::_assign_faked_ino(Inode
*in
)
184 interval_set
<ino_t
>::const_iterator it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
185 if (it
== free_faked_inos
.end() && last_used_faked_ino
> 0) {
186 last_used_faked_ino
= 0;
187 it
= free_faked_inos
.lower_bound(last_used_faked_ino
+ 1);
189 assert(it
!= free_faked_inos
.end());
190 if (last_used_faked_ino
< it
.get_start()) {
191 assert(it
.get_len() > 0);
192 last_used_faked_ino
= it
.get_start();
194 ++last_used_faked_ino
;
195 assert(it
.get_start() + it
.get_len() > last_used_faked_ino
);
197 in
->faked_ino
= last_used_faked_ino
;
198 free_faked_inos
.erase(in
->faked_ino
);
199 faked_ino_map
[in
->faked_ino
] = in
->vino();
202 void Client::_release_faked_ino(Inode
*in
)
204 free_faked_inos
.insert(in
->faked_ino
);
205 faked_ino_map
.erase(in
->faked_ino
);
208 vinodeno_t
Client::_map_faked_ino(ino_t ino
)
213 else if (faked_ino_map
.count(ino
))
214 vino
= faked_ino_map
[ino
];
216 vino
= vinodeno_t(0, CEPH_NOSNAP
);
217 ldout(cct
, 10) << "map_faked_ino " << ino
<< " -> " << vino
<< dendl
;
221 vinodeno_t
Client::map_faked_ino(ino_t ino
)
223 Mutex::Locker
lock(client_lock
);
224 return _map_faked_ino(ino
);
229 Client::Client(Messenger
*m
, MonClient
*mc
, Objecter
*objecter_
)
230 : Dispatcher(m
->cct
),
231 m_command_hook(this),
232 timer(m
->cct
, client_lock
),
233 callback_handle(NULL
),
234 switch_interrupt_cb(NULL
),
236 ino_invalidate_cb(NULL
),
237 dentry_invalidate_cb(NULL
),
240 can_invalidate_dentries(false),
241 require_remount(false),
242 async_ino_invalidator(m
->cct
),
243 async_dentry_invalidator(m
->cct
),
244 interrupt_finisher(m
->cct
),
245 remount_finisher(m
->cct
),
246 objecter_finisher(m
->cct
),
248 messenger(m
), monclient(mc
),
250 whoami(mc
->get_global_id()), cap_epoch_barrier(0),
251 last_tid(0), oldest_tid(0), last_flush_tid(1),
253 mounted(false), unmounting(false), blacklisted(false),
254 local_osd(-1), local_osd_epoch(0),
255 unsafe_sync_write(0),
256 client_lock("Client::client_lock")
262 num_flushing_caps
= 0;
264 _dir_vxattrs_name_size
= _vxattrs_calcu_name_size(_dir_vxattrs
);
265 _file_vxattrs_name_size
= _vxattrs_calcu_name_size(_file_vxattrs
);
267 user_id
= cct
->_conf
->client_mount_uid
;
268 group_id
= cct
->_conf
->client_mount_gid
;
271 if (cct
->_conf
->client_acl_type
== "posix_acl")
272 acl_type
= POSIX_ACL
;
274 lru
.lru_set_max(cct
->_conf
->client_cache_size
);
275 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
278 free_fd_set
.insert(10, 1<<30);
280 mdsmap
.reset(new MDSMap
);
283 writeback_handler
.reset(new ObjecterWriteback(objecter
, &objecter_finisher
,
285 objectcacher
.reset(new ObjectCacher(cct
, "libcephfs", *writeback_handler
, client_lock
,
286 client_flush_set_callback
, // all commit callback
288 cct
->_conf
->client_oc_size
,
289 cct
->_conf
->client_oc_max_objects
,
290 cct
->_conf
->client_oc_max_dirty
,
291 cct
->_conf
->client_oc_target_dirty
,
292 cct
->_conf
->client_oc_max_dirty_age
,
294 objecter_finisher
.start();
295 filer
.reset(new Filer(objecter
, &objecter_finisher
));
296 objecter
->enable_blacklist_events();
302 assert(!client_lock
.is_locked());
304 // It is necessary to hold client_lock, because any inode destruction
305 // may call into ObjectCacher, which asserts that it's lock (which is
306 // client_lock) is held.
309 client_lock
.Unlock();
312 void Client::tear_down_cache()
315 for (ceph::unordered_map
<int, Fh
*>::iterator it
= fd_map
.begin();
319 ldout(cct
, 1) << "tear_down_cache forcing close of fh " << it
->first
<< " ino " << fh
->inode
->ino
<< dendl
;
324 while (!opened_dirs
.empty()) {
325 dir_result_t
*dirp
= *opened_dirs
.begin();
326 ldout(cct
, 1) << "tear_down_cache forcing close of dir " << dirp
<< " ino " << dirp
->inode
->ino
<< dendl
;
336 assert(lru
.lru_get_size() == 0);
339 assert(inode_map
.size() <= 1 + root_parents
.size());
340 if (root
&& inode_map
.size() == 1 + root_parents
.size()) {
344 while (!root_parents
.empty())
345 root_parents
.erase(root_parents
.begin());
350 assert(inode_map
.empty());
353 inodeno_t
Client::get_root_ino()
355 Mutex::Locker
l(client_lock
);
356 if (use_faked_inos())
357 return root
->faked_ino
;
362 Inode
*Client::get_root()
364 Mutex::Locker
l(client_lock
);
372 void Client::dump_inode(Formatter
*f
, Inode
*in
, set
<Inode
*>& did
, bool disconnected
)
375 in
->make_long_path(path
);
376 ldout(cct
, 1) << "dump_inode: "
377 << (disconnected
? "DISCONNECTED ":"")
378 << "inode " << in
->ino
380 << " ref " << in
->get_num_ref()
384 f
->open_object_section("inode");
385 f
->dump_stream("path") << path
;
387 f
->dump_int("disconnected", 1);
394 ldout(cct
, 1) << " dir " << in
->dir
<< " size " << in
->dir
->dentries
.size() << dendl
;
395 for (ceph::unordered_map
<string
, Dentry
*>::iterator it
= in
->dir
->dentries
.begin();
396 it
!= in
->dir
->dentries
.end();
398 ldout(cct
, 1) << " " << in
->ino
<< " dn " << it
->first
<< " " << it
->second
<< " ref " << it
->second
->ref
<< dendl
;
400 f
->open_object_section("dentry");
404 if (it
->second
->inode
)
405 dump_inode(f
, it
->second
->inode
.get(), did
, false);
410 void Client::dump_cache(Formatter
*f
)
414 ldout(cct
, 1) << "dump_cache" << dendl
;
417 f
->open_array_section("cache");
420 dump_inode(f
, root
, did
, true);
422 // make a second pass to catch anything disconnected
423 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
424 it
!= inode_map
.end();
426 if (did
.count(it
->second
))
428 dump_inode(f
, it
->second
, did
, true);
435 void Client::dump_status(Formatter
*f
)
437 assert(client_lock
.is_locked_by_me());
439 ldout(cct
, 1) << __func__
<< dendl
;
441 const epoch_t osd_epoch
442 = objecter
->with_osdmap(std::mem_fn(&OSDMap::get_epoch
));
445 f
->open_object_section("metadata");
446 for (const auto& kv
: metadata
)
447 f
->dump_string(kv
.first
.c_str(), kv
.second
);
450 f
->dump_int("dentry_count", lru
.lru_get_size());
451 f
->dump_int("dentry_pinned_count", lru
.lru_get_num_pinned());
452 f
->dump_int("id", get_nodeid().v
);
453 f
->dump_int("inode_count", inode_map
.size());
454 f
->dump_int("mds_epoch", mdsmap
->get_epoch());
455 f
->dump_int("osd_epoch", osd_epoch
);
456 f
->dump_int("osd_epoch_barrier", cap_epoch_barrier
);
463 objectcacher
->start();
466 assert(!initialized
);
468 messenger
->add_dispatcher_tail(this);
469 client_lock
.Unlock();
475 void Client::_finish_init()
479 PerfCountersBuilder
plb(cct
, "client", l_c_first
, l_c_last
);
480 plb
.add_time_avg(l_c_reply
, "reply", "Latency of receiving a reply on metadata request");
481 plb
.add_time_avg(l_c_lat
, "lat", "Latency of processing a metadata request");
482 plb
.add_time_avg(l_c_wrlat
, "wrlat", "Latency of a file data write operation");
483 logger
.reset(plb
.create_perf_counters());
484 cct
->get_perfcounters_collection()->add(logger
.get());
486 client_lock
.Unlock();
488 cct
->_conf
->add_observer(this);
490 AdminSocket
* admin_socket
= cct
->get_admin_socket();
491 int ret
= admin_socket
->register_command("mds_requests",
494 "show in-progress mds requests");
496 lderr(cct
) << "error registering admin socket command: "
497 << cpp_strerror(-ret
) << dendl
;
499 ret
= admin_socket
->register_command("mds_sessions",
502 "show mds session state");
504 lderr(cct
) << "error registering admin socket command: "
505 << cpp_strerror(-ret
) << dendl
;
507 ret
= admin_socket
->register_command("dump_cache",
510 "show in-memory metadata cache contents");
512 lderr(cct
) << "error registering admin socket command: "
513 << cpp_strerror(-ret
) << dendl
;
515 ret
= admin_socket
->register_command("kick_stale_sessions",
516 "kick_stale_sessions",
518 "kick sessions that were remote reset");
520 lderr(cct
) << "error registering admin socket command: "
521 << cpp_strerror(-ret
) << dendl
;
523 ret
= admin_socket
->register_command("status",
526 "show overall client status");
528 lderr(cct
) << "error registering admin socket command: "
529 << cpp_strerror(-ret
) << dendl
;
534 client_lock
.Unlock();
537 void Client::shutdown()
539 ldout(cct
, 1) << "shutdown" << dendl
;
541 // If we were not mounted, but were being used for sending
542 // MDS commands, we may have sessions that need closing.
545 client_lock
.Unlock();
547 cct
->_conf
->remove_observer(this);
549 AdminSocket
* admin_socket
= cct
->get_admin_socket();
550 admin_socket
->unregister_command("mds_requests");
551 admin_socket
->unregister_command("mds_sessions");
552 admin_socket
->unregister_command("dump_cache");
553 admin_socket
->unregister_command("kick_stale_sessions");
554 admin_socket
->unregister_command("status");
556 if (ino_invalidate_cb
) {
557 ldout(cct
, 10) << "shutdown stopping cache invalidator finisher" << dendl
;
558 async_ino_invalidator
.wait_for_empty();
559 async_ino_invalidator
.stop();
562 if (dentry_invalidate_cb
) {
563 ldout(cct
, 10) << "shutdown stopping dentry invalidator finisher" << dendl
;
564 async_dentry_invalidator
.wait_for_empty();
565 async_dentry_invalidator
.stop();
568 if (switch_interrupt_cb
) {
569 ldout(cct
, 10) << "shutdown stopping interrupt finisher" << dendl
;
570 interrupt_finisher
.wait_for_empty();
571 interrupt_finisher
.stop();
575 ldout(cct
, 10) << "shutdown stopping remount finisher" << dendl
;
576 remount_finisher
.wait_for_empty();
577 remount_finisher
.stop();
580 objectcacher
->stop(); // outside of client_lock! this does a join.
586 client_lock
.Unlock();
588 objecter_finisher
.wait_for_empty();
589 objecter_finisher
.stop();
592 cct
->get_perfcounters_collection()->remove(logger
.get());
598 // ===================
599 // metadata cache stuff
601 void Client::trim_cache(bool trim_kernel_dcache
)
603 ldout(cct
, 20) << "trim_cache size " << lru
.lru_get_size() << " max " << lru
.lru_get_max() << dendl
;
605 while (lru
.lru_get_size() != last
) {
606 last
= lru
.lru_get_size();
608 if (lru
.lru_get_size() <= lru
.lru_get_max()) break;
611 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_get_next_expire());
618 if (trim_kernel_dcache
&& lru
.lru_get_size() > lru
.lru_get_max())
619 _invalidate_kernel_dcache();
622 if (lru
.lru_get_size() == 0 && root
&& root
->get_num_ref() == 0 && inode_map
.size() == 1 + root_parents
.size()) {
623 ldout(cct
, 15) << "trim_cache trimmed root " << root
<< dendl
;
627 while (!root_parents
.empty())
628 root_parents
.erase(root_parents
.begin());
634 void Client::trim_cache_for_reconnect(MetaSession
*s
)
636 mds_rank_t mds
= s
->mds_num
;
637 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
<< dendl
;
640 list
<Dentry
*> skipped
;
641 while (lru
.lru_get_size() > 0) {
642 Dentry
*dn
= static_cast<Dentry
*>(lru
.lru_expire());
646 if ((dn
->inode
&& dn
->inode
->caps
.count(mds
)) ||
647 dn
->dir
->parent_inode
->caps
.count(mds
)) {
651 skipped
.push_back(dn
);
654 for(list
<Dentry
*>::iterator p
= skipped
.begin(); p
!= skipped
.end(); ++p
)
655 lru
.lru_insert_mid(*p
);
657 ldout(cct
, 20) << "trim_cache_for_reconnect mds." << mds
658 << " trimmed " << trimmed
<< " dentries" << dendl
;
660 if (s
->caps
.size() > 0)
661 _invalidate_kernel_dcache();
664 void Client::trim_dentry(Dentry
*dn
)
666 ldout(cct
, 15) << "trim_dentry unlinking dn " << dn
->name
667 << " in dir " << hex
<< dn
->dir
->parent_inode
->ino
670 Inode
*diri
= dn
->dir
->parent_inode
;
671 diri
->dir_release_count
++;
672 clear_dir_complete_and_ordered(diri
, true);
674 unlink(dn
, false, false); // drop dir, drop dentry
678 void Client::update_inode_file_bits(Inode
*in
,
679 uint64_t truncate_seq
, uint64_t truncate_size
,
680 uint64_t size
, uint64_t change_attr
,
681 uint64_t time_warp_seq
, utime_t ctime
,
684 version_t inline_version
,
685 bufferlist
& inline_data
,
689 ldout(cct
, 10) << "update_inode_file_bits " << *in
<< " " << ccap_string(issued
)
690 << " mtime " << mtime
<< dendl
;
691 ldout(cct
, 25) << "truncate_seq: mds " << truncate_seq
<< " local "
692 << in
->truncate_seq
<< " time_warp_seq: mds " << time_warp_seq
693 << " local " << in
->time_warp_seq
<< dendl
;
694 uint64_t prior_size
= in
->size
;
696 if (inline_version
> in
->inline_version
) {
697 in
->inline_data
= inline_data
;
698 in
->inline_version
= inline_version
;
701 /* always take a newer change attr */
702 if (change_attr
> in
->change_attr
)
703 in
->change_attr
= change_attr
;
705 if (truncate_seq
> in
->truncate_seq
||
706 (truncate_seq
== in
->truncate_seq
&& size
> in
->size
)) {
707 ldout(cct
, 10) << "size " << in
->size
<< " -> " << size
<< dendl
;
709 in
->reported_size
= size
;
710 if (truncate_seq
!= in
->truncate_seq
) {
711 ldout(cct
, 10) << "truncate_seq " << in
->truncate_seq
<< " -> "
712 << truncate_seq
<< dendl
;
713 in
->truncate_seq
= truncate_seq
;
714 in
->oset
.truncate_seq
= truncate_seq
;
716 // truncate cached file data
717 if (prior_size
> size
) {
718 _invalidate_inode_cache(in
, truncate_size
, prior_size
- truncate_size
);
722 // truncate inline data
723 if (in
->inline_version
< CEPH_INLINE_NONE
) {
724 uint32_t len
= in
->inline_data
.length();
726 in
->inline_data
.splice(size
, len
- size
);
729 if (truncate_seq
>= in
->truncate_seq
&&
730 in
->truncate_size
!= truncate_size
) {
732 ldout(cct
, 10) << "truncate_size " << in
->truncate_size
<< " -> "
733 << truncate_size
<< dendl
;
734 in
->truncate_size
= truncate_size
;
735 in
->oset
.truncate_size
= truncate_size
;
737 ldout(cct
, 0) << "Hmmm, truncate_seq && truncate_size changed on non-file inode!" << dendl
;
741 // be careful with size, mtime, atime
742 if (issued
& (CEPH_CAP_FILE_EXCL
|
744 CEPH_CAP_FILE_BUFFER
|
746 CEPH_CAP_XATTR_EXCL
)) {
747 ldout(cct
, 30) << "Yay have enough caps to look at our times" << dendl
;
748 if (ctime
> in
->ctime
)
750 if (time_warp_seq
> in
->time_warp_seq
) {
751 ldout(cct
, 10) << "mds time_warp_seq " << time_warp_seq
<< " on inode " << *in
752 << " is higher than local time_warp_seq "
753 << in
->time_warp_seq
<< dendl
;
754 //the mds updated times, so take those!
757 in
->time_warp_seq
= time_warp_seq
;
758 } else if (time_warp_seq
== in
->time_warp_seq
) {
760 if (mtime
> in
->mtime
)
762 if (atime
> in
->atime
)
764 } else if (issued
& CEPH_CAP_FILE_EXCL
) {
765 //ignore mds values as we have a higher seq
768 ldout(cct
, 30) << "Don't have enough caps, just taking mds' time values" << dendl
;
769 if (time_warp_seq
>= in
->time_warp_seq
) {
773 in
->time_warp_seq
= time_warp_seq
;
777 ldout(cct
, 0) << "WARNING: " << *in
<< " mds time_warp_seq "
778 << time_warp_seq
<< " is lower than local time_warp_seq "
784 void Client::_fragmap_remove_non_leaves(Inode
*in
)
786 for (map
<frag_t
,int>::iterator p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
787 if (!in
->dirfragtree
.is_leaf(p
->first
))
788 in
->fragmap
.erase(p
++);
793 void Client::_fragmap_remove_stopped_mds(Inode
*in
, mds_rank_t mds
)
795 for (auto p
= in
->fragmap
.begin(); p
!= in
->fragmap
.end(); )
796 if (p
->second
== mds
)
797 in
->fragmap
.erase(p
++);
802 Inode
* Client::add_update_inode(InodeStat
*st
, utime_t from
,
803 MetaSession
*session
,
804 const UserPerm
& request_perms
)
807 bool was_new
= false;
808 if (inode_map
.count(st
->vino
)) {
809 in
= inode_map
[st
->vino
];
810 ldout(cct
, 12) << "add_update_inode had " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
812 in
= new Inode(this, st
->vino
, &st
->layout
);
813 inode_map
[st
->vino
] = in
;
815 if (use_faked_inos())
816 _assign_faked_ino(in
);
822 } else if (!mounted
) {
823 root_parents
[root_ancestor
] = in
;
828 in
->ino
= st
->vino
.ino
;
829 in
->snapid
= st
->vino
.snapid
;
830 in
->mode
= st
->mode
& S_IFMT
;
835 if (in
->is_symlink())
836 in
->symlink
= st
->symlink
;
839 ldout(cct
, 12) << "add_update_inode adding " << *in
<< " caps " << ccap_string(st
->cap
.caps
) << dendl
;
842 return in
; // as with readdir returning indoes in different snaprealms (no caps!)
844 // only update inode if mds info is strictly newer, or it is the same and projected (odd).
845 bool updating_inode
= false;
847 if (st
->version
== 0 ||
848 (in
->version
& ~1) < st
->version
) {
849 updating_inode
= true;
852 issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
853 issued
|= implemented
;
855 in
->version
= st
->version
;
857 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
861 in
->btime
= st
->btime
;
864 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
865 in
->nlink
= st
->nlink
;
868 in
->dirstat
= st
->dirstat
;
869 in
->rstat
= st
->rstat
;
870 in
->quota
= st
->quota
;
871 in
->layout
= st
->layout
;
874 in
->dir_layout
= st
->dir_layout
;
875 ldout(cct
, 20) << " dir hash is " << (int)in
->dir_layout
.dl_dir_hash
<< dendl
;
878 update_inode_file_bits(in
, st
->truncate_seq
, st
->truncate_size
, st
->size
,
879 st
->change_attr
, st
->time_warp_seq
, st
->ctime
,
880 st
->mtime
, st
->atime
, st
->inline_version
,
881 st
->inline_data
, issued
);
882 } else if (st
->inline_version
> in
->inline_version
) {
883 in
->inline_data
= st
->inline_data
;
884 in
->inline_version
= st
->inline_version
;
887 if ((in
->xattr_version
== 0 || !(issued
& CEPH_CAP_XATTR_EXCL
)) &&
888 st
->xattrbl
.length() &&
889 st
->xattr_version
> in
->xattr_version
) {
890 bufferlist::iterator p
= st
->xattrbl
.begin();
891 ::decode(in
->xattrs
, p
);
892 in
->xattr_version
= st
->xattr_version
;
895 // move me if/when version reflects fragtree changes.
896 if (in
->dirfragtree
!= st
->dirfragtree
) {
897 in
->dirfragtree
= st
->dirfragtree
;
898 _fragmap_remove_non_leaves(in
);
901 if (in
->snapid
== CEPH_NOSNAP
) {
902 add_update_cap(in
, session
, st
->cap
.cap_id
, st
->cap
.caps
, st
->cap
.seq
,
903 st
->cap
.mseq
, inodeno_t(st
->cap
.realm
), st
->cap
.flags
,
905 if (in
->auth_cap
&& in
->auth_cap
->session
== session
)
906 in
->max_size
= st
->max_size
;
908 in
->snap_caps
|= st
->cap
.caps
;
910 // setting I_COMPLETE needs to happen after adding the cap
911 if (updating_inode
&&
913 (st
->cap
.caps
& CEPH_CAP_FILE_SHARED
) &&
914 (issued
& CEPH_CAP_FILE_EXCL
) == 0 &&
915 in
->dirstat
.nfiles
== 0 &&
916 in
->dirstat
.nsubdirs
== 0) {
917 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on empty dir " << *in
<< dendl
;
918 in
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
920 ldout(cct
, 10) << " dir is open on empty dir " << in
->ino
<< " with "
921 << in
->dir
->dentries
.size() << " entries, marking all dentries null" << dendl
;
922 in
->dir
->readdir_cache
.clear();
923 for (auto p
= in
->dir
->dentries
.begin();
924 p
!= in
->dir
->dentries
.end();
926 unlink(p
->second
, true, true); // keep dir, keep dentry
928 if (in
->dir
->dentries
.empty())
938 * insert_dentry_inode - insert + link a single dentry + inode into the metadata cache.
940 Dentry
*Client::insert_dentry_inode(Dir
*dir
, const string
& dname
, LeaseStat
*dlease
,
941 Inode
*in
, utime_t from
, MetaSession
*session
,
945 if (dir
->dentries
.count(dname
))
946 dn
= dir
->dentries
[dname
];
948 ldout(cct
, 12) << "insert_dentry_inode '" << dname
<< "' vino " << in
->vino()
949 << " in dir " << dir
->parent_inode
->vino() << " dn " << dn
952 if (dn
&& dn
->inode
) {
953 if (dn
->inode
->vino() == in
->vino()) {
955 ldout(cct
, 12) << " had dentry " << dname
956 << " with correct vino " << dn
->inode
->vino()
959 ldout(cct
, 12) << " had dentry " << dname
960 << " with WRONG vino " << dn
->inode
->vino()
962 unlink(dn
, true, true); // keep dir, keep dentry
966 if (!dn
|| !dn
->inode
) {
967 InodeRef
tmp_ref(in
);
969 if (old_dentry
->dir
!= dir
) {
970 Inode
*old_diri
= old_dentry
->dir
->parent_inode
;
971 old_diri
->dir_ordered_count
++;
972 clear_dir_complete_and_ordered(old_diri
, false);
974 unlink(old_dentry
, dir
== old_dentry
->dir
, false); // drop dentry, keep dir open if its the same dir
976 Inode
*diri
= dir
->parent_inode
;
977 diri
->dir_ordered_count
++;
978 clear_dir_complete_and_ordered(diri
, false);
979 dn
= link(dir
, dname
, in
, dn
);
982 update_dentry_lease(dn
, dlease
, from
, session
);
986 void Client::update_dentry_lease(Dentry
*dn
, LeaseStat
*dlease
, utime_t from
, MetaSession
*session
)
989 dttl
+= (float)dlease
->duration_ms
/ 1000.0;
993 if (dlease
->mask
& CEPH_LOCK_DN
) {
994 if (dttl
> dn
->lease_ttl
) {
995 ldout(cct
, 10) << "got dentry lease on " << dn
->name
996 << " dur " << dlease
->duration_ms
<< "ms ttl " << dttl
<< dendl
;
997 dn
->lease_ttl
= dttl
;
998 dn
->lease_mds
= session
->mds_num
;
999 dn
->lease_seq
= dlease
->seq
;
1000 dn
->lease_gen
= session
->cap_gen
;
1003 dn
->cap_shared_gen
= dn
->dir
->parent_inode
->shared_gen
;
1008 * update MDS location cache for a single inode
1010 void Client::update_dir_dist(Inode
*in
, DirStat
*dst
)
1013 ldout(cct
, 20) << "got dirfrag map for " << in
->ino
<< " frag " << dst
->frag
<< " to mds " << dst
->auth
<< dendl
;
1014 if (dst
->auth
>= 0) {
1015 in
->fragmap
[dst
->frag
] = dst
->auth
;
1017 in
->fragmap
.erase(dst
->frag
);
1019 if (!in
->dirfragtree
.is_leaf(dst
->frag
)) {
1020 in
->dirfragtree
.force_to_leaf(cct
, dst
->frag
);
1021 _fragmap_remove_non_leaves(in
);
1025 in
->dir_replicated
= !dst
->dist
.empty(); // FIXME that's just one frag!
1029 if (!st->dirfrag_dist.empty()) { // FIXME
1030 set<int> dist = st->dirfrag_dist.begin()->second;
1031 if (dist.empty() && !in->dir_contacts.empty())
1032 ldout(cct, 9) << "lost dist spec for " << in->ino
1033 << " " << dist << dendl;
1034 if (!dist.empty() && in->dir_contacts.empty())
1035 ldout(cct, 9) << "got dist spec for " << in->ino
1036 << " " << dist << dendl;
1037 in->dir_contacts = dist;
1042 void Client::clear_dir_complete_and_ordered(Inode
*diri
, bool complete
)
1044 if (diri
->flags
& I_COMPLETE
) {
1046 ldout(cct
, 10) << " clearing (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
1047 diri
->flags
&= ~(I_COMPLETE
| I_DIR_ORDERED
);
1049 if (diri
->flags
& I_DIR_ORDERED
) {
1050 ldout(cct
, 10) << " clearing I_DIR_ORDERED on " << *diri
<< dendl
;
1051 diri
->flags
&= ~I_DIR_ORDERED
;
1055 diri
->dir
->readdir_cache
.clear();
1060 * insert results from readdir or lssnap into the metadata cache.
1062 void Client::insert_readdir_results(MetaRequest
*request
, MetaSession
*session
, Inode
*diri
) {
1064 MClientReply
*reply
= request
->reply
;
1065 ConnectionRef con
= request
->reply
->get_connection();
1066 uint64_t features
= con
->get_features();
1068 dir_result_t
*dirp
= request
->dirp
;
1071 // the extra buffer list is only set for readdir and lssnap replies
1072 bufferlist::iterator p
= reply
->get_extra_bl().begin();
1075 if (request
->head
.op
== CEPH_MDS_OP_LSSNAP
) {
1077 diri
= open_snapdir(diri
);
1080 // only open dir if we're actually adding stuff to it!
1081 Dir
*dir
= diri
->open_dir();
1091 bool end
= ((unsigned)flags
& CEPH_READDIR_FRAG_END
);
1092 bool hash_order
= ((unsigned)flags
& CEPH_READDIR_HASH_ORDER
);
1094 frag_t fg
= (unsigned)request
->head
.args
.readdir
.frag
;
1095 unsigned readdir_offset
= dirp
->next_offset
;
1096 string readdir_start
= dirp
->last_name
;
1097 assert(!readdir_start
.empty() || readdir_offset
== 2);
1099 unsigned last_hash
= 0;
1101 if (!readdir_start
.empty()) {
1102 last_hash
= ceph_frag_value(diri
->hash_dentry_name(readdir_start
));
1103 } else if (flags
& CEPH_READDIR_OFFSET_HASH
) {
1104 /* mds understands offset_hash */
1105 last_hash
= (unsigned)request
->head
.args
.readdir
.offset_hash
;
1109 if (fg
!= dst
.frag
) {
1110 ldout(cct
, 10) << "insert_trace got new frag " << fg
<< " -> " << dst
.frag
<< dendl
;
1114 readdir_start
.clear();
1115 dirp
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
, false);
1119 ldout(cct
, 10) << __func__
<< " " << numdn
<< " readdir items, end=" << end
1120 << ", hash_order=" << hash_order
1121 << ", readdir_start " << readdir_start
1122 << ", last_hash " << last_hash
1123 << ", next_offset " << readdir_offset
<< dendl
;
1125 if (diri
->snapid
!= CEPH_SNAPDIR
&&
1126 fg
.is_leftmost() && readdir_offset
== 2 &&
1127 !(hash_order
&& last_hash
)) {
1128 dirp
->release_count
= diri
->dir_release_count
;
1129 dirp
->ordered_count
= diri
->dir_ordered_count
;
1130 dirp
->start_shared_gen
= diri
->shared_gen
;
1131 dirp
->cache_index
= 0;
1134 dirp
->buffer_frag
= fg
;
1136 _readdir_drop_dirp_buffer(dirp
);
1137 dirp
->buffer
.reserve(numdn
);
1141 for (unsigned i
=0; i
<numdn
; i
++) {
1143 ::decode(dlease
, p
);
1144 InodeStat
ist(p
, features
);
1146 ldout(cct
, 15) << "" << i
<< ": '" << dname
<< "'" << dendl
;
1148 Inode
*in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1151 if (diri
->dir
->dentries
.count(dname
)) {
1152 Dentry
*olddn
= diri
->dir
->dentries
[dname
];
1153 if (olddn
->inode
!= in
) {
1154 // replace incorrect dentry
1155 unlink(olddn
, true, true); // keep dir, dentry
1156 dn
= link(dir
, dname
, in
, olddn
);
1157 assert(dn
== olddn
);
1165 dn
= link(dir
, dname
, in
, NULL
);
1168 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1170 unsigned hash
= ceph_frag_value(diri
->hash_dentry_name(dname
));
1171 if (hash
!= last_hash
)
1174 dn
->offset
= dir_result_t::make_fpos(hash
, readdir_offset
++, true);
1176 dn
->offset
= dir_result_t::make_fpos(fg
, readdir_offset
++, false);
1178 // add to readdir cache
1179 if (dirp
->release_count
== diri
->dir_release_count
&&
1180 dirp
->ordered_count
== diri
->dir_ordered_count
&&
1181 dirp
->start_shared_gen
== diri
->shared_gen
) {
1182 if (dirp
->cache_index
== dir
->readdir_cache
.size()) {
1184 assert(!dirp
->inode
->is_complete_and_ordered());
1185 dir
->readdir_cache
.reserve(dirp
->cache_index
+ numdn
);
1187 dir
->readdir_cache
.push_back(dn
);
1188 } else if (dirp
->cache_index
< dir
->readdir_cache
.size()) {
1189 if (dirp
->inode
->is_complete_and_ordered())
1190 assert(dir
->readdir_cache
[dirp
->cache_index
] == dn
);
1192 dir
->readdir_cache
[dirp
->cache_index
] = dn
;
1194 assert(0 == "unexpected readdir buffer idx");
1196 dirp
->cache_index
++;
1198 // add to cached result list
1199 dirp
->buffer
.push_back(dir_result_t::dentry(dn
->offset
, dname
, in
));
1200 ldout(cct
, 15) << __func__
<< " " << hex
<< dn
->offset
<< dec
<< ": '" << dname
<< "' -> " << in
->ino
<< dendl
;
1204 dirp
->last_name
= dname
;
1206 dirp
->next_offset
= 2;
1208 dirp
->next_offset
= readdir_offset
;
1210 if (dir
->is_empty())
1217 * insert a trace from a MDS reply into the cache.
1219 Inode
* Client::insert_trace(MetaRequest
*request
, MetaSession
*session
)
1221 MClientReply
*reply
= request
->reply
;
1222 int op
= request
->get_op();
1224 ldout(cct
, 10) << "insert_trace from " << request
->sent_stamp
<< " mds." << session
->mds_num
1225 << " is_target=" << (int)reply
->head
.is_target
1226 << " is_dentry=" << (int)reply
->head
.is_dentry
1229 bufferlist::iterator p
= reply
->get_trace_bl().begin();
1230 if (request
->got_unsafe
) {
1231 ldout(cct
, 10) << "insert_trace -- already got unsafe; ignoring" << dendl
;
1237 ldout(cct
, 10) << "insert_trace -- no trace" << dendl
;
1239 Dentry
*d
= request
->dentry();
1241 Inode
*diri
= d
->dir
->parent_inode
;
1242 diri
->dir_release_count
++;
1243 clear_dir_complete_and_ordered(diri
, true);
1246 if (d
&& reply
->get_result() == 0) {
1247 if (op
== CEPH_MDS_OP_RENAME
) {
1249 Dentry
*od
= request
->old_dentry();
1250 ldout(cct
, 10) << " unlinking rename src dn " << od
<< " for traceless reply" << dendl
;
1252 unlink(od
, true, true); // keep dir, dentry
1253 } else if (op
== CEPH_MDS_OP_RMDIR
||
1254 op
== CEPH_MDS_OP_UNLINK
) {
1256 ldout(cct
, 10) << " unlinking unlink/rmdir dn " << d
<< " for traceless reply" << dendl
;
1257 unlink(d
, true, true); // keep dir, dentry
1263 ConnectionRef con
= request
->reply
->get_connection();
1264 uint64_t features
= con
->get_features();
1265 ldout(cct
, 10) << " features 0x" << hex
<< features
<< dec
<< dendl
;
1268 SnapRealm
*realm
= NULL
;
1269 if (reply
->snapbl
.length())
1270 update_snap_trace(reply
->snapbl
, &realm
);
1272 ldout(cct
, 10) << " hrm "
1273 << " is_target=" << (int)reply
->head
.is_target
1274 << " is_dentry=" << (int)reply
->head
.is_dentry
1283 if (reply
->head
.is_dentry
) {
1284 dirst
.decode(p
, features
);
1287 ::decode(dlease
, p
);
1291 if (reply
->head
.is_target
) {
1292 ist
.decode(p
, features
);
1293 if (cct
->_conf
->client_debug_getattr_caps
) {
1294 unsigned wanted
= 0;
1295 if (op
== CEPH_MDS_OP_GETATTR
|| op
== CEPH_MDS_OP_LOOKUP
)
1296 wanted
= request
->head
.args
.getattr
.mask
;
1297 else if (op
== CEPH_MDS_OP_OPEN
|| op
== CEPH_MDS_OP_CREATE
)
1298 wanted
= request
->head
.args
.open
.mask
;
1300 if ((wanted
& CEPH_CAP_XATTR_SHARED
) &&
1301 !(ist
.xattr_version
> 0 && ist
.xattrbl
.length() > 0))
1302 assert(0 == "MDS reply does not contain xattrs");
1305 in
= add_update_inode(&ist
, request
->sent_stamp
, session
,
1310 if (reply
->head
.is_dentry
) {
1311 diri
= add_update_inode(&dirst
, request
->sent_stamp
, session
,
1313 update_dir_dist(diri
, &dst
); // dir stat info is attached to ..
1316 Dir
*dir
= diri
->open_dir();
1317 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
,
1318 (op
== CEPH_MDS_OP_RENAME
) ? request
->old_dentry() : NULL
);
1321 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1322 dn
= diri
->dir
->dentries
[dname
];
1324 diri
->dir_ordered_count
++;
1325 clear_dir_complete_and_ordered(diri
, false);
1326 unlink(dn
, true, true); // keep dir, dentry
1329 if (dlease
.duration_ms
> 0) {
1331 Dir
*dir
= diri
->open_dir();
1332 dn
= link(dir
, dname
, NULL
, NULL
);
1334 update_dentry_lease(dn
, &dlease
, request
->sent_stamp
, session
);
1337 } else if (op
== CEPH_MDS_OP_LOOKUPSNAP
||
1338 op
== CEPH_MDS_OP_MKSNAP
) {
1339 ldout(cct
, 10) << " faking snap lookup weirdness" << dendl
;
1340 // fake it for snap lookup
1341 vinodeno_t vino
= ist
.vino
;
1342 vino
.snapid
= CEPH_SNAPDIR
;
1343 assert(inode_map
.count(vino
));
1344 diri
= inode_map
[vino
];
1346 string dname
= request
->path
.last_dentry();
1349 dlease
.duration_ms
= 0;
1352 Dir
*dir
= diri
->open_dir();
1353 insert_dentry_inode(dir
, dname
, &dlease
, in
, request
->sent_stamp
, session
);
1355 if (diri
->dir
&& diri
->dir
->dentries
.count(dname
)) {
1356 Dentry
*dn
= diri
->dir
->dentries
[dname
];
1358 unlink(dn
, true, true); // keep dir, dentry
1364 if (op
== CEPH_MDS_OP_READDIR
||
1365 op
== CEPH_MDS_OP_LSSNAP
) {
1366 insert_readdir_results(request
, session
, in
);
1367 } else if (op
== CEPH_MDS_OP_LOOKUPNAME
) {
1368 // hack: return parent inode instead
1372 if (request
->dentry() == NULL
&& in
!= request
->inode()) {
1373 // pin the target inode if its parent dentry is not pinned
1374 request
->set_other_inode(in
);
1379 put_snap_realm(realm
);
1381 request
->target
= in
;
1387 mds_rank_t
Client::choose_target_mds(MetaRequest
*req
, Inode
** phash_diri
)
1389 mds_rank_t mds
= MDS_RANK_NONE
;
1391 bool is_hash
= false;
1397 if (req
->resend_mds
>= 0) {
1398 mds
= req
->resend_mds
;
1399 req
->resend_mds
= -1;
1400 ldout(cct
, 10) << "choose_target_mds resend_mds specified as mds." << mds
<< dendl
;
1404 if (cct
->_conf
->client_use_random_mds
)
1410 ldout(cct
, 20) << "choose_target_mds starting with req->inode " << *in
<< dendl
;
1411 if (req
->path
.depth()) {
1412 hash
= in
->hash_dentry_name(req
->path
[0]);
1413 ldout(cct
, 20) << "choose_target_mds inode dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1414 << " on " << req
->path
[0]
1415 << " => " << hash
<< dendl
;
1420 in
= de
->inode
.get();
1421 ldout(cct
, 20) << "choose_target_mds starting with req->dentry inode " << *in
<< dendl
;
1423 in
= de
->dir
->parent_inode
;
1424 hash
= in
->hash_dentry_name(de
->name
);
1425 ldout(cct
, 20) << "choose_target_mds dentry dir hash is " << (int)in
->dir_layout
.dl_dir_hash
1426 << " on " << de
->name
1427 << " => " << hash
<< dendl
;
1432 if (in
->snapid
!= CEPH_NOSNAP
) {
1433 ldout(cct
, 10) << "choose_target_mds " << *in
<< " is snapped, using nonsnap parent" << dendl
;
1434 while (in
->snapid
!= CEPH_NOSNAP
) {
1435 if (in
->snapid
== CEPH_SNAPDIR
)
1436 in
= in
->snapdir_parent
.get();
1437 else if (!in
->dn_set
.empty())
1438 /* In most cases there will only be one dentry, so getting it
1439 * will be the correct action. If there are multiple hard links,
1440 * I think the MDS should be able to redirect as needed*/
1441 in
= in
->get_first_parent()->dir
->parent_inode
;
1443 ldout(cct
, 10) << "got unlinked inode, can't look at parent" << dendl
;
1450 ldout(cct
, 20) << "choose_target_mds " << *in
<< " is_hash=" << is_hash
1451 << " hash=" << hash
<< dendl
;
1453 if (is_hash
&& S_ISDIR(in
->mode
) && !in
->fragmap
.empty()) {
1454 frag_t fg
= in
->dirfragtree
[hash
];
1455 if (in
->fragmap
.count(fg
)) {
1456 mds
= in
->fragmap
[fg
];
1459 ldout(cct
, 10) << "choose_target_mds from dirfragtree hash" << dendl
;
1464 if (req
->auth_is_best())
1466 if (!cap
&& !in
->caps
.empty())
1467 cap
= in
->caps
.begin()->second
;
1470 mds
= cap
->session
->mds_num
;
1471 ldout(cct
, 10) << "choose_target_mds from caps on inode " << *in
<< dendl
;
1478 mds
= _get_random_up_mds();
1479 ldout(cct
, 10) << "did not get mds through better means, so chose random mds " << mds
<< dendl
;
1483 ldout(cct
, 20) << "mds is " << mds
<< dendl
;
1488 void Client::connect_mds_targets(mds_rank_t mds
)
1490 ldout(cct
, 10) << "connect_mds_targets for mds." << mds
<< dendl
;
1491 assert(mds_sessions
.count(mds
));
1492 const MDSMap::mds_info_t
& info
= mdsmap
->get_mds_info(mds
);
1493 for (set
<mds_rank_t
>::const_iterator q
= info
.export_targets
.begin();
1494 q
!= info
.export_targets
.end();
1496 if (mds_sessions
.count(*q
) == 0 &&
1497 mdsmap
->is_clientreplay_or_active_or_stopping(*q
)) {
1498 ldout(cct
, 10) << "check_mds_sessions opening mds." << mds
1499 << " export target mds." << *q
<< dendl
;
1500 _open_mds_session(*q
);
1505 void Client::dump_mds_sessions(Formatter
*f
)
1507 f
->dump_int("id", get_nodeid().v
);
1508 f
->open_array_section("sessions");
1509 for (map
<mds_rank_t
,MetaSession
*>::const_iterator p
= mds_sessions
.begin(); p
!= mds_sessions
.end(); ++p
) {
1510 f
->open_object_section("session");
1515 f
->dump_int("mdsmap_epoch", mdsmap
->get_epoch());
1517 void Client::dump_mds_requests(Formatter
*f
)
1519 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
1520 p
!= mds_requests
.end();
1522 f
->open_object_section("request");
1528 int Client::verify_reply_trace(int r
,
1529 MetaRequest
*request
, MClientReply
*reply
,
1530 InodeRef
*ptarget
, bool *pcreated
,
1531 const UserPerm
& perms
)
1533 // check whether this request actually did the create, and set created flag
1534 bufferlist extra_bl
;
1535 inodeno_t created_ino
;
1536 bool got_created_ino
= false;
1537 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
;
1539 extra_bl
.claim(reply
->get_extra_bl());
1540 if (extra_bl
.length() >= 8) {
1541 // if the extra bufferlist has a buffer, we assume its the created inode
1542 // and that this request to create succeeded in actually creating
1543 // the inode (won the race with other create requests)
1544 ::decode(created_ino
, extra_bl
);
1545 got_created_ino
= true;
1546 ldout(cct
, 10) << "make_request created ino " << created_ino
<< dendl
;
1550 *pcreated
= got_created_ino
;
1552 if (request
->target
) {
1553 *ptarget
= request
->target
;
1554 ldout(cct
, 20) << "make_request target is " << *ptarget
->get() << dendl
;
1556 if (got_created_ino
&& (p
= inode_map
.find(vinodeno_t(created_ino
, CEPH_NOSNAP
))) != inode_map
.end()) {
1557 (*ptarget
) = p
->second
;
1558 ldout(cct
, 20) << "make_request created, target is " << *ptarget
->get() << dendl
;
1560 // we got a traceless reply, and need to look up what we just
1561 // created. for now, do this by name. someday, do this by the
1562 // ino... which we know! FIXME.
1564 Dentry
*d
= request
->dentry();
1567 ldout(cct
, 10) << "make_request got traceless reply, looking up #"
1568 << d
->dir
->parent_inode
->ino
<< "/" << d
->name
1569 << " got_ino " << got_created_ino
1570 << " ino " << created_ino
1572 r
= _do_lookup(d
->dir
->parent_inode
, d
->name
, request
->regetattr_mask
,
1575 // if the dentry is not linked, just do our best. see #5021.
1576 assert(0 == "how did this happen? i want logs!");
1579 Inode
*in
= request
->inode();
1580 ldout(cct
, 10) << "make_request got traceless reply, forcing getattr on #"
1581 << in
->ino
<< dendl
;
1582 r
= _getattr(in
, request
->regetattr_mask
, perms
, true);
1586 // verify ino returned in reply and trace_dist are the same
1587 if (got_created_ino
&&
1588 created_ino
.val
!= target
->ino
.val
) {
1589 ldout(cct
, 5) << "create got ino " << created_ino
<< " but then failed on lookup; EINTR?" << dendl
;
1593 ptarget
->swap(target
);
1605 * Blocking helper to make an MDS request.
1607 * If the ptarget flag is set, behavior changes slightly: the caller
1608 * expects to get a pointer to the inode we are creating or operating
1609 * on. As a result, we will follow up any traceless mutation reply
1610 * with a getattr or lookup to transparently handle a traceless reply
1611 * from the MDS (as when the MDS restarts and the client has to replay
1614 * @param request the MetaRequest to execute
1615 * @param perms The user uid/gid to execute as (eventually, full group lists?)
1616 * @param ptarget [optional] address to store a pointer to the target inode we want to create or operate on
1617 * @param pcreated [optional; required if ptarget] where to store a bool of whether our create atomically created a file
1618 * @param use_mds [optional] prefer a specific mds (-1 for default)
1619 * @param pdirbl [optional; disallowed if ptarget] where to pass extra reply payload to the caller
1621 int Client::make_request(MetaRequest
*request
,
1622 const UserPerm
& perms
,
1623 InodeRef
*ptarget
, bool *pcreated
,
1629 // assign a unique tid
1630 ceph_tid_t tid
= ++last_tid
;
1631 request
->set_tid(tid
);
1634 request
->op_stamp
= ceph_clock_now();
1637 mds_requests
[tid
] = request
->get();
1638 if (oldest_tid
== 0 && request
->get_op() != CEPH_MDS_OP_SETFILELOCK
)
1641 request
->set_caller_perms(perms
);
1643 if (cct
->_conf
->client_inject_fixed_oldest_tid
) {
1644 ldout(cct
, 20) << __func__
<< " injecting fixed oldest_client_tid(1)" << dendl
;
1645 request
->set_oldest_client_tid(1);
1647 request
->set_oldest_client_tid(oldest_tid
);
1652 request
->resend_mds
= use_mds
;
1655 if (request
->aborted())
1659 request
->abort(-EBLACKLISTED
);
1665 request
->caller_cond
= &caller_cond
;
1668 Inode
*hash_diri
= NULL
;
1669 mds_rank_t mds
= choose_target_mds(request
, &hash_diri
);
1670 int mds_state
= (mds
== MDS_RANK_NONE
) ? MDSMap::STATE_NULL
: mdsmap
->get_state(mds
);
1671 if (mds_state
!= MDSMap::STATE_ACTIVE
&& mds_state
!= MDSMap::STATE_STOPPING
) {
1672 if (mds_state
== MDSMap::STATE_NULL
&& mds
>= mdsmap
->get_max_mds()) {
1674 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, remove it from fragmap" << dendl
;
1675 _fragmap_remove_stopped_mds(hash_diri
, mds
);
1677 ldout(cct
, 10) << " target mds." << mds
<< " has stopped, trying a random mds" << dendl
;
1678 request
->resend_mds
= _get_random_up_mds();
1681 ldout(cct
, 10) << " target mds." << mds
<< " not active, waiting for new mdsmap" << dendl
;
1682 wait_on_list(waiting_for_mdsmap
);
1688 MetaSession
*session
= NULL
;
1689 if (!have_open_session(mds
)) {
1690 session
= _get_or_open_mds_session(mds
);
1693 if (session
->state
== MetaSession::STATE_OPENING
) {
1694 ldout(cct
, 10) << "waiting for session to mds." << mds
<< " to open" << dendl
;
1695 wait_on_context_list(session
->waiting_for_open
);
1696 // Abort requests on REJECT from MDS
1697 if (rejected_by_mds
.count(mds
)) {
1698 request
->abort(-EPERM
);
1704 if (!have_open_session(mds
))
1707 session
= mds_sessions
[mds
];
1711 send_request(request
, session
);
1714 ldout(cct
, 20) << "awaiting reply|forward|kick on " << &caller_cond
<< dendl
;
1715 request
->kick
= false;
1716 while (!request
->reply
&& // reply
1717 request
->resend_mds
< 0 && // forward
1719 caller_cond
.Wait(client_lock
);
1720 request
->caller_cond
= NULL
;
1722 // did we get a reply?
1727 if (!request
->reply
) {
1728 assert(request
->aborted());
1729 assert(!request
->got_unsafe
);
1730 r
= request
->get_abort_code();
1731 request
->item
.remove_myself();
1732 unregister_request(request
);
1733 put_request(request
); // ours
1738 MClientReply
*reply
= request
->reply
;
1739 request
->reply
= NULL
;
1740 r
= reply
->get_result();
1742 request
->success
= true;
1744 // kick dispatcher (we've got it!)
1745 assert(request
->dispatch_cond
);
1746 request
->dispatch_cond
->Signal();
1747 ldout(cct
, 20) << "sendrecv kickback on tid " << tid
<< " " << request
->dispatch_cond
<< dendl
;
1748 request
->dispatch_cond
= 0;
1750 if (r
>= 0 && ptarget
)
1751 r
= verify_reply_trace(r
, request
, reply
, ptarget
, pcreated
, perms
);
1754 pdirbl
->claim(reply
->get_extra_bl());
1757 utime_t lat
= ceph_clock_now();
1758 lat
-= request
->sent_stamp
;
1759 ldout(cct
, 20) << "lat " << lat
<< dendl
;
1760 logger
->tinc(l_c_lat
, lat
);
1761 logger
->tinc(l_c_reply
, lat
);
1763 put_request(request
);
1769 void Client::unregister_request(MetaRequest
*req
)
1771 mds_requests
.erase(req
->tid
);
1772 if (req
->tid
== oldest_tid
) {
1773 map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.upper_bound(oldest_tid
);
1775 if (p
== mds_requests
.end()) {
1779 if (p
->second
->get_op() != CEPH_MDS_OP_SETFILELOCK
) {
1780 oldest_tid
= p
->first
;
1789 void Client::put_request(MetaRequest
*request
)
1791 if (request
->_put()) {
1793 if (request
->success
)
1794 op
= request
->get_op();
1796 request
->take_other_inode(&other_in
);
1800 (op
== CEPH_MDS_OP_RMDIR
||
1801 op
== CEPH_MDS_OP_RENAME
||
1802 op
== CEPH_MDS_OP_RMSNAP
)) {
1803 _try_to_trim_inode(other_in
.get(), false);
1808 int Client::encode_inode_release(Inode
*in
, MetaRequest
*req
,
1809 mds_rank_t mds
, int drop
,
1810 int unless
, int force
)
1812 ldout(cct
, 20) << "encode_inode_release enter(in:" << *in
<< ", req:" << req
1813 << " mds:" << mds
<< ", drop:" << drop
<< ", unless:" << unless
1814 << ", have:" << ", force:" << force
<< ")" << dendl
;
1816 if (in
->caps
.count(mds
)) {
1817 Cap
*caps
= in
->caps
[mds
];
1818 drop
&= ~(in
->dirty_caps
| get_caps_used(in
));
1819 if ((drop
& caps
->issued
) &&
1820 !(unless
& caps
->issued
)) {
1821 ldout(cct
, 25) << "Dropping caps. Initial " << ccap_string(caps
->issued
) << dendl
;
1822 caps
->issued
&= ~drop
;
1823 caps
->implemented
&= ~drop
;
1825 ldout(cct
, 25) << "Now have: " << ccap_string(caps
->issued
) << dendl
;
1830 ceph_mds_request_release rel
;
1832 rel
.cap_id
= caps
->cap_id
;
1833 rel
.seq
= caps
->seq
;
1834 rel
.issue_seq
= caps
->issue_seq
;
1835 rel
.mseq
= caps
->mseq
;
1836 rel
.caps
= caps
->implemented
;
1837 rel
.wanted
= caps
->wanted
;
1840 req
->cap_releases
.push_back(MClientRequest::Release(rel
,""));
1843 ldout(cct
, 25) << "encode_inode_release exit(in:" << *in
<< ") released:"
1844 << released
<< dendl
;
1848 void Client::encode_dentry_release(Dentry
*dn
, MetaRequest
*req
,
1849 mds_rank_t mds
, int drop
, int unless
)
1851 ldout(cct
, 20) << "encode_dentry_release enter(dn:"
1852 << dn
<< ")" << dendl
;
1855 released
= encode_inode_release(dn
->dir
->parent_inode
, req
,
1856 mds
, drop
, unless
, 1);
1857 if (released
&& dn
->lease_mds
== mds
) {
1858 ldout(cct
, 25) << "preemptively releasing dn to mds" << dendl
;
1859 MClientRequest::Release
& rel
= req
->cap_releases
.back();
1860 rel
.item
.dname_len
= dn
->name
.length();
1861 rel
.item
.dname_seq
= dn
->lease_seq
;
1862 rel
.dname
= dn
->name
;
1864 ldout(cct
, 25) << "encode_dentry_release exit(dn:"
1865 << dn
<< ")" << dendl
;
1870 * This requires the MClientRequest *request member to be set.
1871 * It will error out horribly without one.
1872 * Additionally, if you set any *drop member, you'd better have
1873 * set the corresponding dentry!
1875 void Client::encode_cap_releases(MetaRequest
*req
, mds_rank_t mds
)
1877 ldout(cct
, 20) << "encode_cap_releases enter (req: "
1878 << req
<< ", mds: " << mds
<< ")" << dendl
;
1879 if (req
->inode_drop
&& req
->inode())
1880 encode_inode_release(req
->inode(), req
,
1881 mds
, req
->inode_drop
,
1884 if (req
->old_inode_drop
&& req
->old_inode())
1885 encode_inode_release(req
->old_inode(), req
,
1886 mds
, req
->old_inode_drop
,
1887 req
->old_inode_unless
);
1888 if (req
->other_inode_drop
&& req
->other_inode())
1889 encode_inode_release(req
->other_inode(), req
,
1890 mds
, req
->other_inode_drop
,
1891 req
->other_inode_unless
);
1893 if (req
->dentry_drop
&& req
->dentry())
1894 encode_dentry_release(req
->dentry(), req
,
1895 mds
, req
->dentry_drop
,
1896 req
->dentry_unless
);
1898 if (req
->old_dentry_drop
&& req
->old_dentry())
1899 encode_dentry_release(req
->old_dentry(), req
,
1900 mds
, req
->old_dentry_drop
,
1901 req
->old_dentry_unless
);
1902 ldout(cct
, 25) << "encode_cap_releases exit (req: "
1903 << req
<< ", mds " << mds
<<dendl
;
1906 bool Client::have_open_session(mds_rank_t mds
)
1909 mds_sessions
.count(mds
) &&
1910 (mds_sessions
[mds
]->state
== MetaSession::STATE_OPEN
||
1911 mds_sessions
[mds
]->state
== MetaSession::STATE_STALE
);
1914 MetaSession
*Client::_get_mds_session(mds_rank_t mds
, Connection
*con
)
1916 if (mds_sessions
.count(mds
) == 0)
1918 MetaSession
*s
= mds_sessions
[mds
];
1924 MetaSession
*Client::_get_or_open_mds_session(mds_rank_t mds
)
1926 if (mds_sessions
.count(mds
))
1927 return mds_sessions
[mds
];
1928 return _open_mds_session(mds
);
1932 * Populate a map of strings with client-identifying metadata,
1933 * such as the hostname. Call this once at initialization.
1935 void Client::populate_metadata(const std::string
&mount_root
)
1941 metadata
["hostname"] = u
.nodename
;
1942 ldout(cct
, 20) << __func__
<< " read hostname '" << u
.nodename
<< "'" << dendl
;
1944 ldout(cct
, 1) << __func__
<< " failed to read hostname (" << cpp_strerror(r
) << ")" << dendl
;
1947 metadata
["pid"] = stringify(getpid());
1949 // Ceph entity id (the '0' in "client.0")
1950 metadata
["entity_id"] = cct
->_conf
->name
.get_id();
1952 // Our mount position
1953 if (!mount_root
.empty()) {
1954 metadata
["root"] = mount_root
;
1958 metadata
["ceph_version"] = pretty_version_to_str();
1959 metadata
["ceph_sha1"] = git_version_to_str();
1961 // Apply any metadata from the user's configured overrides
1962 std::vector
<std::string
> tokens
;
1963 get_str_vec(cct
->_conf
->client_metadata
, ",", tokens
);
1964 for (const auto &i
: tokens
) {
1965 auto eqpos
= i
.find("=");
1966 // Throw out anything that isn't of the form "<str>=<str>"
1967 if (eqpos
== 0 || eqpos
== std::string::npos
|| eqpos
== i
.size()) {
1968 lderr(cct
) << "Invalid metadata keyval pair: '" << i
<< "'" << dendl
;
1971 metadata
[i
.substr(0, eqpos
)] = i
.substr(eqpos
+ 1);
1976 * Optionally add or override client metadata fields.
1978 void Client::update_metadata(std::string
const &k
, std::string
const &v
)
1980 Mutex::Locker
l(client_lock
);
1981 assert(initialized
);
1983 if (metadata
.count(k
)) {
1984 ldout(cct
, 1) << __func__
<< " warning, overriding metadata field '" << k
1985 << "' from '" << metadata
[k
] << "' to '" << v
<< "'" << dendl
;
1991 MetaSession
*Client::_open_mds_session(mds_rank_t mds
)
1993 ldout(cct
, 10) << "_open_mds_session mds." << mds
<< dendl
;
1994 assert(mds_sessions
.count(mds
) == 0);
1995 MetaSession
*session
= new MetaSession
;
1996 session
->mds_num
= mds
;
1998 session
->inst
= mdsmap
->get_inst(mds
);
1999 session
->con
= messenger
->get_connection(session
->inst
);
2000 session
->state
= MetaSession::STATE_OPENING
;
2001 session
->mds_state
= MDSMap::STATE_NULL
;
2002 mds_sessions
[mds
] = session
;
2004 // Maybe skip sending a request to open if this MDS daemon
2005 // has previously sent us a REJECT.
2006 if (rejected_by_mds
.count(mds
)) {
2007 if (rejected_by_mds
[mds
] == session
->inst
) {
2008 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " skipping "
2009 "because we were rejected" << dendl
;
2012 ldout(cct
, 4) << "_open_mds_session mds." << mds
<< " old inst "
2013 "rejected us, trying with new inst" << dendl
;
2014 rejected_by_mds
.erase(mds
);
2018 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_OPEN
);
2019 m
->client_meta
= metadata
;
2020 session
->con
->send_message(m
);
2024 void Client::_close_mds_session(MetaSession
*s
)
2026 ldout(cct
, 2) << "_close_mds_session mds." << s
->mds_num
<< " seq " << s
->seq
<< dendl
;
2027 s
->state
= MetaSession::STATE_CLOSING
;
2028 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2031 void Client::_closed_mds_session(MetaSession
*s
)
2033 s
->state
= MetaSession::STATE_CLOSED
;
2034 s
->con
->mark_down();
2035 signal_context_list(s
->waiting_for_open
);
2036 mount_cond
.Signal();
2037 remove_session_caps(s
);
2038 kick_requests_closed(s
);
2039 mds_sessions
.erase(s
->mds_num
);
2043 void Client::handle_client_session(MClientSession
*m
)
2045 mds_rank_t from
= mds_rank_t(m
->get_source().num());
2046 ldout(cct
, 10) << "handle_client_session " << *m
<< " from mds." << from
<< dendl
;
2048 MetaSession
*session
= _get_mds_session(from
, m
->get_connection().get());
2050 ldout(cct
, 10) << " discarding session message from sessionless mds " << m
->get_source_inst() << dendl
;
2055 switch (m
->get_op()) {
2056 case CEPH_SESSION_OPEN
:
2057 renew_caps(session
);
2058 session
->state
= MetaSession::STATE_OPEN
;
2060 mount_cond
.Signal();
2062 connect_mds_targets(from
);
2063 signal_context_list(session
->waiting_for_open
);
2066 case CEPH_SESSION_CLOSE
:
2067 _closed_mds_session(session
);
2070 case CEPH_SESSION_RENEWCAPS
:
2071 if (session
->cap_renew_seq
== m
->get_seq()) {
2073 session
->last_cap_renew_request
+ mdsmap
->get_session_timeout();
2074 wake_inode_waiters(session
);
2078 case CEPH_SESSION_STALE
:
2079 renew_caps(session
);
2082 case CEPH_SESSION_RECALL_STATE
:
2083 trim_caps(session
, m
->get_max_caps());
2086 case CEPH_SESSION_FLUSHMSG
:
2087 session
->con
->send_message(new MClientSession(CEPH_SESSION_FLUSHMSG_ACK
, m
->get_seq()));
2090 case CEPH_SESSION_FORCE_RO
:
2091 force_session_readonly(session
);
2094 case CEPH_SESSION_REJECT
:
2095 rejected_by_mds
[session
->mds_num
] = session
->inst
;
2096 _closed_mds_session(session
);
2107 bool Client::_any_stale_sessions() const
2109 assert(client_lock
.is_locked_by_me());
2111 for (const auto &i
: mds_sessions
) {
2112 if (i
.second
->state
== MetaSession::STATE_STALE
) {
2120 void Client::_kick_stale_sessions()
2122 ldout(cct
, 1) << "kick_stale_sessions" << dendl
;
2124 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2125 p
!= mds_sessions
.end(); ) {
2126 MetaSession
*s
= p
->second
;
2128 if (s
->state
== MetaSession::STATE_STALE
)
2129 _closed_mds_session(s
);
2133 void Client::send_request(MetaRequest
*request
, MetaSession
*session
,
2134 bool drop_cap_releases
)
2137 mds_rank_t mds
= session
->mds_num
;
2138 ldout(cct
, 10) << "send_request rebuilding request " << request
->get_tid()
2139 << " for mds." << mds
<< dendl
;
2140 MClientRequest
*r
= build_client_request(request
);
2141 if (request
->dentry()) {
2142 r
->set_dentry_wanted();
2144 if (request
->got_unsafe
) {
2145 r
->set_replayed_op();
2146 if (request
->target
)
2147 r
->head
.ino
= request
->target
->ino
;
2149 encode_cap_releases(request
, mds
);
2150 if (drop_cap_releases
) // we haven't send cap reconnect yet, drop cap releases
2151 request
->cap_releases
.clear();
2153 r
->releases
.swap(request
->cap_releases
);
2155 r
->set_mdsmap_epoch(mdsmap
->get_epoch());
2156 if (r
->head
.op
== CEPH_MDS_OP_SETXATTR
) {
2157 objecter
->with_osdmap([r
](const OSDMap
& o
) {
2158 r
->set_osdmap_epoch(o
.get_epoch());
2162 if (request
->mds
== -1) {
2163 request
->sent_stamp
= ceph_clock_now();
2164 ldout(cct
, 20) << "send_request set sent_stamp to " << request
->sent_stamp
<< dendl
;
2168 Inode
*in
= request
->inode();
2169 if (in
&& in
->caps
.count(mds
))
2170 request
->sent_on_mseq
= in
->caps
[mds
]->mseq
;
2172 session
->requests
.push_back(&request
->item
);
2174 ldout(cct
, 10) << "send_request " << *r
<< " to mds." << mds
<< dendl
;
2175 session
->con
->send_message(r
);
2178 MClientRequest
* Client::build_client_request(MetaRequest
*request
)
2180 MClientRequest
*req
= new MClientRequest(request
->get_op());
2181 req
->set_tid(request
->tid
);
2182 req
->set_stamp(request
->op_stamp
);
2183 memcpy(&req
->head
, &request
->head
, sizeof(ceph_mds_request_head
));
2185 // if the filepath's haven't been set, set them!
2186 if (request
->path
.empty()) {
2187 Inode
*in
= request
->inode();
2188 Dentry
*de
= request
->dentry();
2190 in
->make_nosnap_relative_path(request
->path
);
2193 de
->inode
->make_nosnap_relative_path(request
->path
);
2195 de
->dir
->parent_inode
->make_nosnap_relative_path(request
->path
);
2196 request
->path
.push_dentry(de
->name
);
2198 else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2199 << " No path, inode, or appropriately-endowed dentry given!"
2201 } else ldout(cct
, 1) << "Warning -- unable to construct a filepath!"
2202 << " No path, inode, or dentry given!"
2205 req
->set_filepath(request
->get_filepath());
2206 req
->set_filepath2(request
->get_filepath2());
2207 req
->set_data(request
->data
);
2208 req
->set_retry_attempt(request
->retry_attempt
++);
2209 req
->head
.num_fwd
= request
->num_fwd
;
2211 int gid_count
= request
->perms
.get_gids(&_gids
);
2212 req
->set_gid_list(gid_count
, _gids
);
2218 void Client::handle_client_request_forward(MClientRequestForward
*fwd
)
2220 mds_rank_t mds
= mds_rank_t(fwd
->get_source().num());
2221 MetaSession
*session
= _get_mds_session(mds
, fwd
->get_connection().get());
2226 ceph_tid_t tid
= fwd
->get_tid();
2228 if (mds_requests
.count(tid
) == 0) {
2229 ldout(cct
, 10) << "handle_client_request_forward no pending request on tid " << tid
<< dendl
;
2234 MetaRequest
*request
= mds_requests
[tid
];
2237 // reset retry counter
2238 request
->retry_attempt
= 0;
2240 // request not forwarded, or dest mds has no session.
2242 ldout(cct
, 10) << "handle_client_request tid " << tid
2243 << " fwd " << fwd
->get_num_fwd()
2244 << " to mds." << fwd
->get_dest_mds()
2245 << ", resending to " << fwd
->get_dest_mds()
2249 request
->item
.remove_myself();
2250 request
->num_fwd
= fwd
->get_num_fwd();
2251 request
->resend_mds
= fwd
->get_dest_mds();
2252 request
->caller_cond
->Signal();
2257 bool Client::is_dir_operation(MetaRequest
*req
)
2259 int op
= req
->get_op();
2260 if (op
== CEPH_MDS_OP_MKNOD
|| op
== CEPH_MDS_OP_LINK
||
2261 op
== CEPH_MDS_OP_UNLINK
|| op
== CEPH_MDS_OP_RENAME
||
2262 op
== CEPH_MDS_OP_MKDIR
|| op
== CEPH_MDS_OP_RMDIR
||
2263 op
== CEPH_MDS_OP_SYMLINK
|| op
== CEPH_MDS_OP_CREATE
)
2268 void Client::handle_client_reply(MClientReply
*reply
)
2270 mds_rank_t mds_num
= mds_rank_t(reply
->get_source().num());
2271 MetaSession
*session
= _get_mds_session(mds_num
, reply
->get_connection().get());
2277 ceph_tid_t tid
= reply
->get_tid();
2278 bool is_safe
= reply
->is_safe();
2280 if (mds_requests
.count(tid
) == 0) {
2281 lderr(cct
) << "handle_client_reply no pending request on tid " << tid
2282 << " safe is:" << is_safe
<< dendl
;
2286 MetaRequest
*request
= mds_requests
.at(tid
);
2288 ldout(cct
, 20) << "handle_client_reply got a reply. Safe:" << is_safe
2289 << " tid " << tid
<< dendl
;
2291 if (request
->got_unsafe
&& !is_safe
) {
2292 //duplicate response
2293 ldout(cct
, 0) << "got a duplicate reply on tid " << tid
<< " from mds "
2294 << mds_num
<< " safe:" << is_safe
<< dendl
;
2299 if (-ESTALE
== reply
->get_result()) { // see if we can get to proper MDS
2300 ldout(cct
, 20) << "got ESTALE on tid " << request
->tid
2301 << " from mds." << request
->mds
<< dendl
;
2302 request
->send_to_auth
= true;
2303 request
->resend_mds
= choose_target_mds(request
);
2304 Inode
*in
= request
->inode();
2305 if (request
->resend_mds
>= 0 &&
2306 request
->resend_mds
== request
->mds
&&
2308 in
->caps
.count(request
->resend_mds
) == 0 ||
2309 request
->sent_on_mseq
== in
->caps
[request
->resend_mds
]->mseq
)) {
2310 // have to return ESTALE
2312 request
->caller_cond
->Signal();
2316 ldout(cct
, 20) << "have to return ESTALE" << dendl
;
2319 assert(request
->reply
== NULL
);
2320 request
->reply
= reply
;
2321 insert_trace(request
, session
);
2323 // Handle unsafe reply
2325 request
->got_unsafe
= true;
2326 session
->unsafe_requests
.push_back(&request
->unsafe_item
);
2327 if (is_dir_operation(request
)) {
2328 Inode
*dir
= request
->inode();
2330 dir
->unsafe_ops
.push_back(&request
->unsafe_dir_item
);
2332 if (request
->target
) {
2333 InodeRef
&in
= request
->target
;
2334 in
->unsafe_ops
.push_back(&request
->unsafe_target_item
);
2338 // Only signal the caller once (on the first reply):
2339 // Either its an unsafe reply, or its a safe reply and no unsafe reply was sent.
2340 if (!is_safe
|| !request
->got_unsafe
) {
2342 request
->dispatch_cond
= &cond
;
2345 ldout(cct
, 20) << "handle_client_reply signalling caller " << (void*)request
->caller_cond
<< dendl
;
2346 request
->caller_cond
->Signal();
2348 // wake for kick back
2349 while (request
->dispatch_cond
) {
2350 ldout(cct
, 20) << "handle_client_reply awaiting kickback on tid " << tid
<< " " << &cond
<< dendl
;
2351 cond
.Wait(client_lock
);
2356 // the filesystem change is committed to disk
2357 // we're done, clean up
2358 if (request
->got_unsafe
) {
2359 request
->unsafe_item
.remove_myself();
2360 request
->unsafe_dir_item
.remove_myself();
2361 request
->unsafe_target_item
.remove_myself();
2362 signal_cond_list(request
->waitfor_safe
);
2364 request
->item
.remove_myself();
2365 unregister_request(request
);
2368 mount_cond
.Signal();
2371 void Client::_handle_full_flag(int64_t pool
)
2373 ldout(cct
, 1) << __func__
<< ": FULL: cancelling outstanding operations "
2374 << "on " << pool
<< dendl
;
2375 // Cancel all outstanding ops in this pool with -ENOSPC: it is necessary
2376 // to do this rather than blocking, because otherwise when we fill up we
2377 // potentially lock caps forever on files with dirty pages, and we need
2378 // to be able to release those caps to the MDS so that it can delete files
2379 // and free up space.
2380 epoch_t cancelled_epoch
= objecter
->op_cancel_writes(-ENOSPC
, pool
);
2382 // For all inodes with layouts in this pool and a pending flush write op
2383 // (i.e. one of the ones we will cancel), we've got to purge_set their data
2384 // from ObjectCacher so that it doesn't re-issue the write in response to
2385 // the ENOSPC error.
2386 // Fortunately since we're cancelling everything in a given pool, we don't
2387 // need to know which ops belong to which ObjectSet, we can just blow all
2388 // the un-flushed cached data away and mark any dirty inodes' async_err
2389 // field with -ENOSPC as long as we're sure all the ops we cancelled were
2390 // affecting this pool, and all the objectsets we're purging were also
2392 for (unordered_map
<vinodeno_t
,Inode
*>::iterator i
= inode_map
.begin();
2393 i
!= inode_map
.end(); ++i
)
2395 Inode
*inode
= i
->second
;
2396 if (inode
->oset
.dirty_or_tx
2397 && (pool
== -1 || inode
->layout
.pool_id
== pool
)) {
2398 ldout(cct
, 4) << __func__
<< ": FULL: inode 0x" << std::hex
<< i
->first
<< std::dec
2399 << " has dirty objects, purging and setting ENOSPC" << dendl
;
2400 objectcacher
->purge_set(&inode
->oset
);
2401 inode
->set_async_err(-ENOSPC
);
2405 if (cancelled_epoch
!= (epoch_t
)-1) {
2406 set_cap_epoch_barrier(cancelled_epoch
);
2410 void Client::handle_osd_map(MOSDMap
*m
)
2412 std::set
<entity_addr_t
> new_blacklists
;
2413 objecter
->consume_blacklist_events(&new_blacklists
);
2415 const auto myaddr
= messenger
->get_myaddr();
2416 if (!blacklisted
&& new_blacklists
.count(myaddr
)) {
2417 auto epoch
= objecter
->with_osdmap([](const OSDMap
&o
){
2418 return o
.get_epoch();
2420 lderr(cct
) << "I was blacklisted at osd epoch " << epoch
<< dendl
;
2422 for (std::map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2423 p
!= mds_requests
.end(); ) {
2424 auto req
= p
->second
;
2426 req
->abort(-EBLACKLISTED
);
2427 if (req
->caller_cond
) {
2429 req
->caller_cond
->Signal();
2433 // Progress aborts on any requests that were on this waitlist. Any
2434 // requests that were on a waiting_for_open session waitlist
2435 // will get kicked during close session below.
2436 signal_cond_list(waiting_for_mdsmap
);
2438 // Force-close all sessions: assume this is not abandoning any state
2439 // on the MDS side because the MDS will have seen the blacklist too.
2440 while(!mds_sessions
.empty()) {
2441 auto i
= mds_sessions
.begin();
2442 auto session
= i
->second
;
2443 _closed_mds_session(session
);
2446 // Since we know all our OSD ops will fail, cancel them all preemtively,
2447 // so that on an unhealthy cluster we can umount promptly even if e.g.
2448 // some PGs were inaccessible.
2449 objecter
->op_cancel_writes(-EBLACKLISTED
);
2451 } else if (blacklisted
) {
2452 // Handle case where we were blacklisted but no longer are
2453 blacklisted
= objecter
->with_osdmap([myaddr
](const OSDMap
&o
){
2454 return o
.is_blacklisted(myaddr
);});
2457 if (objecter
->osdmap_full_flag()) {
2458 _handle_full_flag(-1);
2460 // Accumulate local list of full pools so that I can drop
2461 // the objecter lock before re-entering objecter in
2463 std::vector
<int64_t> full_pools
;
2465 objecter
->with_osdmap([&full_pools
](const OSDMap
&o
) {
2466 for (const auto& kv
: o
.get_pools()) {
2467 if (kv
.second
.has_flag(pg_pool_t::FLAG_FULL
)) {
2468 full_pools
.push_back(kv
.first
);
2473 for (auto p
: full_pools
)
2474 _handle_full_flag(p
);
2476 // Subscribe to subsequent maps to watch for the full flag going
2477 // away. For the global full flag objecter does this for us, but
2478 // it pays no attention to the per-pool full flag so in this branch
2479 // we do it ourselves.
2480 if (!full_pools
.empty()) {
2481 objecter
->maybe_request_map();
2489 // ------------------------
2490 // incoming messages
2493 bool Client::ms_dispatch(Message
*m
)
2495 Mutex::Locker
l(client_lock
);
2497 ldout(cct
, 10) << "inactive, discarding " << *m
<< dendl
;
2502 switch (m
->get_type()) {
2503 // mounting and mds sessions
2504 case CEPH_MSG_MDS_MAP
:
2505 handle_mds_map(static_cast<MMDSMap
*>(m
));
2507 case CEPH_MSG_FS_MAP
:
2508 handle_fs_map(static_cast<MFSMap
*>(m
));
2510 case CEPH_MSG_FS_MAP_USER
:
2511 handle_fs_map_user(static_cast<MFSMapUser
*>(m
));
2513 case CEPH_MSG_CLIENT_SESSION
:
2514 handle_client_session(static_cast<MClientSession
*>(m
));
2517 case CEPH_MSG_OSD_MAP
:
2518 handle_osd_map(static_cast<MOSDMap
*>(m
));
2522 case CEPH_MSG_CLIENT_REQUEST_FORWARD
:
2523 handle_client_request_forward(static_cast<MClientRequestForward
*>(m
));
2525 case CEPH_MSG_CLIENT_REPLY
:
2526 handle_client_reply(static_cast<MClientReply
*>(m
));
2529 case CEPH_MSG_CLIENT_SNAP
:
2530 handle_snap(static_cast<MClientSnap
*>(m
));
2532 case CEPH_MSG_CLIENT_CAPS
:
2533 handle_caps(static_cast<MClientCaps
*>(m
));
2535 case CEPH_MSG_CLIENT_LEASE
:
2536 handle_lease(static_cast<MClientLease
*>(m
));
2538 case MSG_COMMAND_REPLY
:
2539 if (m
->get_source().type() == CEPH_ENTITY_TYPE_MDS
) {
2540 handle_command_reply(static_cast<MCommandReply
*>(m
));
2545 case CEPH_MSG_CLIENT_QUOTA
:
2546 handle_quota(static_cast<MClientQuota
*>(m
));
2555 ldout(cct
, 10) << "unmounting: trim pass, size was " << lru
.lru_get_size()
2556 << "+" << inode_map
.size() << dendl
;
2557 long unsigned size
= lru
.lru_get_size() + inode_map
.size();
2559 if (size
< lru
.lru_get_size() + inode_map
.size()) {
2560 ldout(cct
, 10) << "unmounting: trim pass, cache shrank, poking unmount()" << dendl
;
2561 mount_cond
.Signal();
2563 ldout(cct
, 10) << "unmounting: trim pass, size still " << lru
.lru_get_size()
2564 << "+" << inode_map
.size() << dendl
;
2571 void Client::handle_fs_map(MFSMap
*m
)
2573 fsmap
.reset(new FSMap(m
->get_fsmap()));
2576 signal_cond_list(waiting_for_fsmap
);
2578 monclient
->sub_got("fsmap", fsmap
->get_epoch());
2581 void Client::handle_fs_map_user(MFSMapUser
*m
)
2583 fsmap_user
.reset(new FSMapUser
);
2584 *fsmap_user
= m
->get_fsmap();
2587 monclient
->sub_got("fsmap.user", fsmap_user
->get_epoch());
2588 signal_cond_list(waiting_for_fsmap
);
2591 void Client::handle_mds_map(MMDSMap
* m
)
2593 if (m
->get_epoch() <= mdsmap
->get_epoch()) {
2594 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch()
2595 << " is identical to or older than our "
2596 << mdsmap
->get_epoch() << dendl
;
2601 ldout(cct
, 1) << "handle_mds_map epoch " << m
->get_epoch() << dendl
;
2603 std::unique_ptr
<MDSMap
> oldmap(new MDSMap
);
2604 oldmap
.swap(mdsmap
);
2606 mdsmap
->decode(m
->get_encoded());
2608 // Cancel any commands for missing or laggy GIDs
2609 std::list
<ceph_tid_t
> cancel_ops
;
2610 auto &commands
= command_table
.get_commands();
2611 for (const auto &i
: commands
) {
2612 auto &op
= i
.second
;
2613 const mds_gid_t op_mds_gid
= op
.mds_gid
;
2614 if (mdsmap
->is_dne_gid(op_mds_gid
) || mdsmap
->is_laggy_gid(op_mds_gid
)) {
2615 ldout(cct
, 1) << __func__
<< ": cancelling command op " << i
.first
<< dendl
;
2616 cancel_ops
.push_back(i
.first
);
2618 std::ostringstream ss
;
2619 ss
<< "MDS " << op_mds_gid
<< " went away";
2620 *(op
.outs
) = ss
.str();
2622 op
.con
->mark_down();
2624 op
.on_finish
->complete(-ETIMEDOUT
);
2629 for (std::list
<ceph_tid_t
>::iterator i
= cancel_ops
.begin();
2630 i
!= cancel_ops
.end(); ++i
) {
2631 command_table
.erase(*i
);
2635 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2636 p
!= mds_sessions
.end(); ) {
2637 mds_rank_t mds
= p
->first
;
2638 MetaSession
*session
= p
->second
;
2641 int oldstate
= oldmap
->get_state(mds
);
2642 int newstate
= mdsmap
->get_state(mds
);
2643 if (!mdsmap
->is_up(mds
)) {
2644 session
->con
->mark_down();
2645 } else if (mdsmap
->get_inst(mds
) != session
->inst
) {
2646 session
->con
->mark_down();
2647 session
->inst
= mdsmap
->get_inst(mds
);
2648 // When new MDS starts to take over, notify kernel to trim unused entries
2649 // in its dcache/icache. Hopefully, the kernel will release some unused
2650 // inodes before the new MDS enters reconnect state.
2651 trim_cache_for_reconnect(session
);
2652 } else if (oldstate
== newstate
)
2653 continue; // no change
2655 session
->mds_state
= newstate
;
2656 if (newstate
== MDSMap::STATE_RECONNECT
) {
2657 session
->con
= messenger
->get_connection(session
->inst
);
2658 send_reconnect(session
);
2659 } else if (newstate
>= MDSMap::STATE_ACTIVE
) {
2660 if (oldstate
< MDSMap::STATE_ACTIVE
) {
2661 // kick new requests
2662 kick_requests(session
);
2663 kick_flushing_caps(session
);
2664 signal_context_list(session
->waiting_for_open
);
2665 kick_maxsize_requests(session
);
2666 wake_inode_waiters(session
);
2668 connect_mds_targets(mds
);
2669 } else if (newstate
== MDSMap::STATE_NULL
&&
2670 mds
>= mdsmap
->get_max_mds()) {
2671 _closed_mds_session(session
);
2675 // kick any waiting threads
2676 signal_cond_list(waiting_for_mdsmap
);
2680 monclient
->sub_got("mdsmap", mdsmap
->get_epoch());
2683 void Client::send_reconnect(MetaSession
*session
)
2685 mds_rank_t mds
= session
->mds_num
;
2686 ldout(cct
, 10) << "send_reconnect to mds." << mds
<< dendl
;
2688 // trim unused caps to reduce MDS's cache rejoin time
2689 trim_cache_for_reconnect(session
);
2691 session
->readonly
= false;
2693 if (session
->release
) {
2694 session
->release
->put();
2695 session
->release
= NULL
;
2698 // reset my cap seq number
2700 //connect to the mds' offload targets
2701 connect_mds_targets(mds
);
2702 //make sure unsafe requests get saved
2703 resend_unsafe_requests(session
);
2705 MClientReconnect
*m
= new MClientReconnect
;
2707 // i have an open session.
2708 ceph::unordered_set
<inodeno_t
> did_snaprealm
;
2709 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
2710 p
!= inode_map
.end();
2712 Inode
*in
= p
->second
;
2713 if (in
->caps
.count(mds
)) {
2714 ldout(cct
, 10) << " caps on " << p
->first
2715 << " " << ccap_string(in
->caps
[mds
]->issued
)
2716 << " wants " << ccap_string(in
->caps_wanted())
2719 in
->make_long_path(path
);
2720 ldout(cct
, 10) << " path " << path
<< dendl
;
2723 _encode_filelocks(in
, flockbl
);
2725 Cap
*cap
= in
->caps
[mds
];
2726 cap
->seq
= 0; // reset seq.
2727 cap
->issue_seq
= 0; // reset seq.
2728 cap
->mseq
= 0; // reset seq.
2729 cap
->issued
= cap
->implemented
;
2731 snapid_t snap_follows
= 0;
2732 if (!in
->cap_snaps
.empty())
2733 snap_follows
= in
->cap_snaps
.begin()->first
;
2735 m
->add_cap(p
->first
.ino
,
2737 path
.get_ino(), path
.get_path(), // ino
2738 in
->caps_wanted(), // wanted
2739 cap
->issued
, // issued
2744 if (did_snaprealm
.count(in
->snaprealm
->ino
) == 0) {
2745 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
2746 m
->add_snaprealm(in
->snaprealm
->ino
, in
->snaprealm
->seq
, in
->snaprealm
->parent
);
2747 did_snaprealm
.insert(in
->snaprealm
->ino
);
2752 early_kick_flushing_caps(session
);
2754 session
->con
->send_message(m
);
2756 mount_cond
.Signal();
2760 void Client::kick_requests(MetaSession
*session
)
2762 ldout(cct
, 10) << "kick_requests for mds." << session
->mds_num
<< dendl
;
2763 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2764 p
!= mds_requests
.end();
2766 MetaRequest
*req
= p
->second
;
2767 if (req
->got_unsafe
)
2769 if (req
->aborted()) {
2770 if (req
->caller_cond
) {
2772 req
->caller_cond
->Signal();
2776 if (req
->retry_attempt
> 0)
2777 continue; // new requests only
2778 if (req
->mds
== session
->mds_num
) {
2779 send_request(p
->second
, session
);
2784 void Client::resend_unsafe_requests(MetaSession
*session
)
2786 for (xlist
<MetaRequest
*>::iterator iter
= session
->unsafe_requests
.begin();
2789 send_request(*iter
, session
);
2791 // also re-send old requests when MDS enters reconnect stage. So that MDS can
2792 // process completed requests in clientreplay stage.
2793 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2794 p
!= mds_requests
.end();
2796 MetaRequest
*req
= p
->second
;
2797 if (req
->got_unsafe
)
2801 if (req
->retry_attempt
== 0)
2802 continue; // old requests only
2803 if (req
->mds
== session
->mds_num
)
2804 send_request(req
, session
, true);
2808 void Client::wait_unsafe_requests()
2810 list
<MetaRequest
*> last_unsafe_reqs
;
2811 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
2812 p
!= mds_sessions
.end();
2814 MetaSession
*s
= p
->second
;
2815 if (!s
->unsafe_requests
.empty()) {
2816 MetaRequest
*req
= s
->unsafe_requests
.back();
2818 last_unsafe_reqs
.push_back(req
);
2822 for (list
<MetaRequest
*>::iterator p
= last_unsafe_reqs
.begin();
2823 p
!= last_unsafe_reqs
.end();
2825 MetaRequest
*req
= *p
;
2826 if (req
->unsafe_item
.is_on_list())
2827 wait_on_list(req
->waitfor_safe
);
2832 void Client::kick_requests_closed(MetaSession
*session
)
2834 ldout(cct
, 10) << "kick_requests_closed for mds." << session
->mds_num
<< dendl
;
2835 for (map
<ceph_tid_t
, MetaRequest
*>::iterator p
= mds_requests
.begin();
2836 p
!= mds_requests
.end(); ) {
2837 MetaRequest
*req
= p
->second
;
2839 if (req
->mds
== session
->mds_num
) {
2840 if (req
->caller_cond
) {
2842 req
->caller_cond
->Signal();
2844 req
->item
.remove_myself();
2845 if (req
->got_unsafe
) {
2846 lderr(cct
) << "kick_requests_closed removing unsafe request " << req
->get_tid() << dendl
;
2847 req
->unsafe_item
.remove_myself();
2848 req
->unsafe_dir_item
.remove_myself();
2849 req
->unsafe_target_item
.remove_myself();
2850 signal_cond_list(req
->waitfor_safe
);
2851 unregister_request(req
);
2855 assert(session
->requests
.empty());
2856 assert(session
->unsafe_requests
.empty());
2866 void Client::got_mds_push(MetaSession
*s
)
2869 ldout(cct
, 10) << " mds." << s
->mds_num
<< " seq now " << s
->seq
<< dendl
;
2870 if (s
->state
== MetaSession::STATE_CLOSING
) {
2871 s
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_CLOSE
, s
->seq
));
2875 void Client::handle_lease(MClientLease
*m
)
2877 ldout(cct
, 10) << "handle_lease " << *m
<< dendl
;
2879 assert(m
->get_action() == CEPH_MDS_LEASE_REVOKE
);
2881 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
2882 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
2888 got_mds_push(session
);
2890 ceph_seq_t seq
= m
->get_seq();
2893 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
2894 if (inode_map
.count(vino
) == 0) {
2895 ldout(cct
, 10) << " don't have vino " << vino
<< dendl
;
2898 in
= inode_map
[vino
];
2900 if (m
->get_mask() & CEPH_LOCK_DN
) {
2901 if (!in
->dir
|| in
->dir
->dentries
.count(m
->dname
) == 0) {
2902 ldout(cct
, 10) << " don't have dir|dentry " << m
->get_ino() << "/" << m
->dname
<<dendl
;
2905 Dentry
*dn
= in
->dir
->dentries
[m
->dname
];
2906 ldout(cct
, 10) << " revoked DN lease on " << dn
<< dendl
;
2911 m
->get_connection()->send_message(
2913 CEPH_MDS_LEASE_RELEASE
, seq
,
2914 m
->get_mask(), m
->get_ino(), m
->get_first(), m
->get_last(), m
->dname
));
2918 void Client::put_inode(Inode
*in
, int n
)
2920 ldout(cct
, 10) << "put_inode on " << *in
<< dendl
;
2921 int left
= in
->_put(n
);
2924 remove_all_caps(in
);
2926 ldout(cct
, 10) << "put_inode deleting " << *in
<< dendl
;
2927 bool unclean
= objectcacher
->release_set(&in
->oset
);
2929 inode_map
.erase(in
->vino());
2930 if (use_faked_inos())
2931 _release_faked_ino(in
);
2936 while (!root_parents
.empty())
2937 root_parents
.erase(root_parents
.begin());
2944 void Client::close_dir(Dir
*dir
)
2946 Inode
*in
= dir
->parent_inode
;
2947 ldout(cct
, 15) << "close_dir dir " << dir
<< " on " << in
<< dendl
;
2948 assert(dir
->is_empty());
2949 assert(in
->dir
== dir
);
2950 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
2951 if (!in
->dn_set
.empty())
2952 in
->get_first_parent()->put(); // unpin dentry
2956 put_inode(in
); // unpin inode
2960 * Don't call this with in==NULL, use get_or_create for that
2961 * leave dn set to default NULL unless you're trying to add
2962 * a new inode to a pre-created Dentry
2964 Dentry
* Client::link(Dir
*dir
, const string
& name
, Inode
*in
, Dentry
*dn
)
2967 // create a new Dentry
2973 dir
->dentries
[dn
->name
] = dn
;
2974 lru
.lru_insert_mid(dn
); // mid or top?
2976 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2977 << " dn " << dn
<< " (new dn)" << dendl
;
2979 ldout(cct
, 15) << "link dir " << dir
->parent_inode
<< " '" << name
<< "' to inode " << in
2980 << " dn " << dn
<< " (old dn)" << dendl
;
2983 if (in
) { // link to inode
2987 dn
->get(); // dir -> dn pin
2989 dn
->get(); // ll_ref -> dn pin
2992 assert(in
->dn_set
.count(dn
) == 0);
2994 // only one parent for directories!
2995 if (in
->is_dir() && !in
->dn_set
.empty()) {
2996 Dentry
*olddn
= in
->get_first_parent();
2997 assert(olddn
->dir
!= dir
|| olddn
->name
!= name
);
2998 Inode
*old_diri
= olddn
->dir
->parent_inode
;
2999 old_diri
->dir_release_count
++;
3000 clear_dir_complete_and_ordered(old_diri
, true);
3001 unlink(olddn
, true, true); // keep dir, dentry
3004 in
->dn_set
.insert(dn
);
3006 ldout(cct
, 20) << "link inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3012 void Client::unlink(Dentry
*dn
, bool keepdir
, bool keepdentry
)
3016 ldout(cct
, 15) << "unlink dir " << dn
->dir
->parent_inode
<< " '" << dn
->name
<< "' dn " << dn
3017 << " inode " << dn
->inode
<< dendl
;
3019 // unlink from inode
3023 dn
->put(); // dir -> dn pin
3025 dn
->put(); // ll_ref -> dn pin
3028 assert(in
->dn_set
.count(dn
));
3029 in
->dn_set
.erase(dn
);
3030 ldout(cct
, 20) << "unlink inode " << in
<< " parents now " << in
->dn_set
<< dendl
;
3036 ldout(cct
, 15) << "unlink removing '" << dn
->name
<< "' dn " << dn
<< dendl
;
3039 dn
->dir
->dentries
.erase(dn
->name
);
3040 if (dn
->dir
->is_empty() && !keepdir
)
3051 * For asynchronous flushes, check for errors from the IO and
3052 * update the inode if necessary
3054 class C_Client_FlushComplete
: public Context
{
3059 C_Client_FlushComplete(Client
*c
, Inode
*in
) : client(c
), inode(in
) { }
3060 void finish(int r
) override
{
3061 assert(client
->client_lock
.is_locked_by_me());
3063 client_t
const whoami
= client
->whoami
; // For the benefit of ldout prefix
3064 ldout(client
->cct
, 1) << "I/O error from flush on inode " << inode
3065 << " 0x" << std::hex
<< inode
->ino
<< std::dec
3066 << ": " << r
<< "(" << cpp_strerror(r
) << ")" << dendl
;
3067 inode
->set_async_err(r
);
3077 void Client::get_cap_ref(Inode
*in
, int cap
)
3079 if ((cap
& CEPH_CAP_FILE_BUFFER
) &&
3080 in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] == 0) {
3081 ldout(cct
, 5) << "get_cap_ref got first FILE_BUFFER ref on " << *in
<< dendl
;
3084 if ((cap
& CEPH_CAP_FILE_CACHE
) &&
3085 in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3086 ldout(cct
, 5) << "get_cap_ref got first FILE_CACHE ref on " << *in
<< dendl
;
3089 in
->get_cap_ref(cap
);
3092 void Client::put_cap_ref(Inode
*in
, int cap
)
3094 int last
= in
->put_cap_ref(cap
);
3097 int drop
= last
& ~in
->caps_issued();
3098 if (in
->snapid
== CEPH_NOSNAP
) {
3099 if ((last
& CEPH_CAP_FILE_WR
) &&
3100 !in
->cap_snaps
.empty() &&
3101 in
->cap_snaps
.rbegin()->second
.writing
) {
3102 ldout(cct
, 10) << "put_cap_ref finishing pending cap_snap on " << *in
<< dendl
;
3103 in
->cap_snaps
.rbegin()->second
.writing
= 0;
3104 finish_cap_snap(in
, in
->cap_snaps
.rbegin()->second
, get_caps_used(in
));
3105 signal_cond_list(in
->waitfor_caps
); // wake up blocked sync writers
3107 if (last
& CEPH_CAP_FILE_BUFFER
) {
3108 for (auto &p
: in
->cap_snaps
)
3109 p
.second
.dirty_data
= 0;
3110 signal_cond_list(in
->waitfor_commit
);
3111 ldout(cct
, 5) << "put_cap_ref dropped last FILE_BUFFER ref on " << *in
<< dendl
;
3115 if (last
& CEPH_CAP_FILE_CACHE
) {
3116 ldout(cct
, 5) << "put_cap_ref dropped last FILE_CACHE ref on " << *in
<< dendl
;
3122 put_inode(in
, put_nref
);
3126 int Client::get_caps(Inode
*in
, int need
, int want
, int *phave
, loff_t endoff
)
3128 int r
= check_pool_perm(in
, need
);
3133 int file_wanted
= in
->caps_file_wanted();
3134 if ((file_wanted
& need
) != need
) {
3135 ldout(cct
, 10) << "get_caps " << *in
<< " need " << ccap_string(need
)
3136 << " file_wanted " << ccap_string(file_wanted
) << ", EBADF "
3142 int have
= in
->caps_issued(&implemented
);
3144 bool waitfor_caps
= false;
3145 bool waitfor_commit
= false;
3147 if (have
& need
& CEPH_CAP_FILE_WR
) {
3149 (endoff
>= (loff_t
)in
->max_size
||
3150 endoff
> (loff_t
)(in
->size
<< 1)) &&
3151 endoff
> (loff_t
)in
->wanted_max_size
) {
3152 ldout(cct
, 10) << "wanted_max_size " << in
->wanted_max_size
<< " -> " << endoff
<< dendl
;
3153 in
->wanted_max_size
= endoff
;
3157 if (endoff
>= 0 && endoff
> (loff_t
)in
->max_size
) {
3158 ldout(cct
, 10) << "waiting on max_size, endoff " << endoff
<< " max_size " << in
->max_size
<< " on " << *in
<< dendl
;
3159 waitfor_caps
= true;
3161 if (!in
->cap_snaps
.empty()) {
3162 if (in
->cap_snaps
.rbegin()->second
.writing
) {
3163 ldout(cct
, 10) << "waiting on cap_snap write to complete" << dendl
;
3164 waitfor_caps
= true;
3166 for (auto &p
: in
->cap_snaps
) {
3167 if (p
.second
.dirty_data
) {
3168 waitfor_commit
= true;
3172 if (waitfor_commit
) {
3173 _flush(in
, new C_Client_FlushComplete(this, in
));
3174 ldout(cct
, 10) << "waiting for WRBUFFER to get dropped" << dendl
;
3179 if (!waitfor_caps
&& !waitfor_commit
) {
3180 if ((have
& need
) == need
) {
3181 int revoking
= implemented
& ~have
;
3182 ldout(cct
, 10) << "get_caps " << *in
<< " have " << ccap_string(have
)
3183 << " need " << ccap_string(need
) << " want " << ccap_string(want
)
3184 << " revoking " << ccap_string(revoking
)
3186 if ((revoking
& want
) == 0) {
3187 *phave
= need
| (have
& want
);
3188 in
->get_cap_ref(need
);
3192 ldout(cct
, 10) << "waiting for caps " << *in
<< " need " << ccap_string(need
) << " want " << ccap_string(want
) << dendl
;
3193 waitfor_caps
= true;
3196 if ((need
& CEPH_CAP_FILE_WR
) && in
->auth_cap
&&
3197 in
->auth_cap
->session
->readonly
)
3200 if (in
->flags
& I_CAP_DROPPED
) {
3201 int mds_wanted
= in
->caps_mds_wanted();
3202 if ((mds_wanted
& need
) != need
) {
3203 int ret
= _renew_caps(in
);
3208 if ((mds_wanted
& file_wanted
) ==
3209 (file_wanted
& (CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_WR
))) {
3210 in
->flags
&= ~I_CAP_DROPPED
;
3215 wait_on_list(in
->waitfor_caps
);
3216 else if (waitfor_commit
)
3217 wait_on_list(in
->waitfor_commit
);
3221 int Client::get_caps_used(Inode
*in
)
3223 unsigned used
= in
->caps_used();
3224 if (!(used
& CEPH_CAP_FILE_CACHE
) &&
3225 !objectcacher
->set_is_empty(&in
->oset
))
3226 used
|= CEPH_CAP_FILE_CACHE
;
3230 void Client::cap_delay_requeue(Inode
*in
)
3232 ldout(cct
, 10) << "cap_delay_requeue on " << *in
<< dendl
;
3233 in
->hold_caps_until
= ceph_clock_now();
3234 in
->hold_caps_until
+= cct
->_conf
->client_caps_release_delay
;
3235 delayed_caps
.push_back(&in
->cap_item
);
3238 void Client::send_cap(Inode
*in
, MetaSession
*session
, Cap
*cap
,
3239 bool sync
, int used
, int want
, int retain
,
3240 int flush
, ceph_tid_t flush_tid
)
3242 int held
= cap
->issued
| cap
->implemented
;
3243 int revoking
= cap
->implemented
& ~cap
->issued
;
3244 retain
&= ~revoking
;
3245 int dropping
= cap
->issued
& ~retain
;
3246 int op
= CEPH_CAP_OP_UPDATE
;
3248 ldout(cct
, 10) << "send_cap " << *in
3249 << " mds." << session
->mds_num
<< " seq " << cap
->seq
3250 << (sync
? " sync " : " async ")
3251 << " used " << ccap_string(used
)
3252 << " want " << ccap_string(want
)
3253 << " flush " << ccap_string(flush
)
3254 << " retain " << ccap_string(retain
)
3255 << " held "<< ccap_string(held
)
3256 << " revoking " << ccap_string(revoking
)
3257 << " dropping " << ccap_string(dropping
)
3260 if (cct
->_conf
->client_inject_release_failure
&& revoking
) {
3261 const int would_have_issued
= cap
->issued
& retain
;
3262 const int would_have_implemented
= cap
->implemented
& (cap
->issued
| used
);
3264 // - tell the server we think issued is whatever they issued plus whatever we implemented
3265 // - leave what we have implemented in place
3266 ldout(cct
, 20) << __func__
<< " injecting failure to release caps" << dendl
;
3267 cap
->issued
= cap
->issued
| cap
->implemented
;
3269 // Make an exception for revoking xattr caps: we are injecting
3270 // failure to release other caps, but allow xattr because client
3271 // will block on xattr ops if it can't release these to MDS (#9800)
3272 const int xattr_mask
= CEPH_CAP_XATTR_SHARED
| CEPH_CAP_XATTR_EXCL
;
3273 cap
->issued
^= xattr_mask
& revoking
;
3274 cap
->implemented
^= xattr_mask
& revoking
;
3276 ldout(cct
, 20) << __func__
<< " issued " << ccap_string(cap
->issued
) << " vs " << ccap_string(would_have_issued
) << dendl
;
3277 ldout(cct
, 20) << __func__
<< " implemented " << ccap_string(cap
->implemented
) << " vs " << ccap_string(would_have_implemented
) << dendl
;
3280 cap
->issued
&= retain
;
3281 cap
->implemented
&= cap
->issued
| used
;
3284 snapid_t follows
= 0;
3287 follows
= in
->snaprealm
->get_snap_context().seq
;
3289 MClientCaps
*m
= new MClientCaps(op
,
3292 cap
->cap_id
, cap
->seq
,
3298 m
->caller_uid
= in
->cap_dirtier_uid
;
3299 m
->caller_gid
= in
->cap_dirtier_gid
;
3301 m
->head
.issue_seq
= cap
->issue_seq
;
3302 m
->set_tid(flush_tid
);
3304 m
->head
.uid
= in
->uid
;
3305 m
->head
.gid
= in
->gid
;
3306 m
->head
.mode
= in
->mode
;
3308 m
->head
.nlink
= in
->nlink
;
3310 if (flush
& CEPH_CAP_XATTR_EXCL
) {
3311 ::encode(in
->xattrs
, m
->xattrbl
);
3312 m
->head
.xattr_version
= in
->xattr_version
;
3316 m
->max_size
= in
->max_size
;
3317 m
->truncate_seq
= in
->truncate_seq
;
3318 m
->truncate_size
= in
->truncate_size
;
3319 m
->mtime
= in
->mtime
;
3320 m
->atime
= in
->atime
;
3321 m
->ctime
= in
->ctime
;
3322 m
->btime
= in
->btime
;
3323 m
->time_warp_seq
= in
->time_warp_seq
;
3324 m
->change_attr
= in
->change_attr
;
3326 m
->flags
|= CLIENT_CAPS_SYNC
;
3328 if (flush
& CEPH_CAP_FILE_WR
) {
3329 m
->inline_version
= in
->inline_version
;
3330 m
->inline_data
= in
->inline_data
;
3333 in
->reported_size
= in
->size
;
3334 m
->set_snap_follows(follows
);
3336 if (cap
== in
->auth_cap
) {
3337 m
->set_max_size(in
->wanted_max_size
);
3338 in
->requested_max_size
= in
->wanted_max_size
;
3339 ldout(cct
, 15) << "auth cap, setting max_size = " << in
->requested_max_size
<< dendl
;
3342 if (!session
->flushing_caps_tids
.empty())
3343 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3345 session
->con
->send_message(m
);
3348 static bool is_max_size_approaching(Inode
*in
)
3350 /* mds will adjust max size according to the reported size */
3351 if (in
->flushing_caps
& CEPH_CAP_FILE_WR
)
3353 if (in
->size
>= in
->max_size
)
3355 /* half of previous max_size increment has been used */
3356 if (in
->max_size
> in
->reported_size
&&
3357 (in
->size
<< 1) >= in
->max_size
+ in
->reported_size
)
3365 * Examine currently used and wanted versus held caps. Release, flush or ack
3366 * revoked caps to the MDS as appropriate.
3368 * @param in the inode to check
3369 * @param flags flags to apply to cap check
3371 void Client::check_caps(Inode
*in
, unsigned flags
)
3373 unsigned wanted
= in
->caps_wanted();
3374 unsigned used
= get_caps_used(in
);
3377 if (in
->is_dir() && (in
->flags
& I_COMPLETE
)) {
3378 // we do this here because we don't want to drop to Fs (and then
3379 // drop the Fs if we do a create!) if that alone makes us send lookups
3380 // to the MDS. Doing it in in->caps_wanted() has knock-on effects elsewhere
3381 wanted
|= CEPH_CAP_FILE_EXCL
;
3385 int issued
= in
->caps_issued(&implemented
);
3386 int revoking
= implemented
& ~issued
;
3388 int retain
= wanted
| used
| CEPH_CAP_PIN
;
3391 retain
|= CEPH_CAP_ANY
;
3393 retain
|= CEPH_CAP_ANY_SHARED
;
3396 ldout(cct
, 10) << "check_caps on " << *in
3397 << " wanted " << ccap_string(wanted
)
3398 << " used " << ccap_string(used
)
3399 << " issued " << ccap_string(issued
)
3400 << " revoking " << ccap_string(revoking
)
3401 << " flags=" << flags
3404 if (in
->snapid
!= CEPH_NOSNAP
)
3405 return; //snap caps last forever, can't write
3407 if (in
->caps
.empty())
3408 return; // guard if at end of func
3410 if ((revoking
& (CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_LAZYIO
)) &&
3411 (used
& CEPH_CAP_FILE_CACHE
) && !(used
& CEPH_CAP_FILE_BUFFER
))
3414 if (!in
->cap_snaps
.empty())
3417 if (flags
& CHECK_CAPS_NODELAY
)
3418 in
->hold_caps_until
= utime_t();
3420 cap_delay_requeue(in
);
3422 utime_t now
= ceph_clock_now();
3424 map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin();
3425 while (it
!= in
->caps
.end()) {
3426 mds_rank_t mds
= it
->first
;
3427 Cap
*cap
= it
->second
;
3430 MetaSession
*session
= mds_sessions
[mds
];
3434 if (in
->auth_cap
&& cap
!= in
->auth_cap
)
3435 cap_used
&= ~in
->auth_cap
->issued
;
3437 revoking
= cap
->implemented
& ~cap
->issued
;
3439 ldout(cct
, 10) << " cap mds." << mds
3440 << " issued " << ccap_string(cap
->issued
)
3441 << " implemented " << ccap_string(cap
->implemented
)
3442 << " revoking " << ccap_string(revoking
) << dendl
;
3444 if (in
->wanted_max_size
> in
->max_size
&&
3445 in
->wanted_max_size
> in
->requested_max_size
&&
3446 cap
== in
->auth_cap
)
3449 /* approaching file_max? */
3450 if ((cap
->issued
& CEPH_CAP_FILE_WR
) &&
3451 cap
== in
->auth_cap
&&
3452 is_max_size_approaching(in
)) {
3453 ldout(cct
, 10) << "size " << in
->size
<< " approaching max_size " << in
->max_size
3454 << ", reported " << in
->reported_size
<< dendl
;
3458 /* completed revocation? */
3459 if (revoking
&& (revoking
& cap_used
) == 0) {
3460 ldout(cct
, 10) << "completed revocation of " << ccap_string(cap
->implemented
& ~cap
->issued
) << dendl
;
3464 /* want more caps from mds? */
3465 if (wanted
& ~(cap
->wanted
| cap
->issued
))
3468 if (!revoking
&& unmounting
&& (cap_used
== 0))
3471 if (wanted
== cap
->wanted
&& // mds knows what we want.
3472 ((cap
->issued
& ~retain
) == 0) &&// and we don't have anything we wouldn't like
3473 !in
->dirty_caps
) // and we have no dirty caps
3476 if (now
< in
->hold_caps_until
) {
3477 ldout(cct
, 10) << "delaying cap release" << dendl
;
3482 // re-send old cap/snapcap flushes first.
3483 if (session
->mds_state
>= MDSMap::STATE_RECONNECT
&&
3484 session
->mds_state
< MDSMap::STATE_ACTIVE
&&
3485 session
->early_flushing_caps
.count(in
) == 0) {
3486 ldout(cct
, 20) << " reflushing caps (check_caps) on " << *in
3487 << " to mds." << session
->mds_num
<< dendl
;
3488 session
->early_flushing_caps
.insert(in
);
3489 if (in
->cap_snaps
.size())
3490 flush_snaps(in
, true);
3491 if (in
->flushing_caps
)
3492 flush_caps(in
, session
, flags
& CHECK_CAPS_SYNCHRONOUS
);
3496 ceph_tid_t flush_tid
;
3497 if (in
->auth_cap
== cap
&& in
->dirty_caps
) {
3498 flushing
= mark_caps_flushing(in
, &flush_tid
);
3504 send_cap(in
, session
, cap
, flags
& CHECK_CAPS_SYNCHRONOUS
, cap_used
, wanted
,
3505 retain
, flushing
, flush_tid
);
3510 void Client::queue_cap_snap(Inode
*in
, SnapContext
& old_snapc
)
3512 int used
= get_caps_used(in
);
3513 int dirty
= in
->caps_dirty();
3514 ldout(cct
, 10) << "queue_cap_snap " << *in
<< " snapc " << old_snapc
<< " used " << ccap_string(used
) << dendl
;
3516 if (in
->cap_snaps
.size() &&
3517 in
->cap_snaps
.rbegin()->second
.writing
) {
3518 ldout(cct
, 10) << "queue_cap_snap already have pending cap_snap on " << *in
<< dendl
;
3520 } else if (in
->caps_dirty() ||
3521 (used
& CEPH_CAP_FILE_WR
) ||
3522 (dirty
& CEPH_CAP_ANY_WR
)) {
3523 const auto &capsnapem
= in
->cap_snaps
.emplace(std::piecewise_construct
, std::make_tuple(old_snapc
.seq
), std::make_tuple(in
));
3524 assert(capsnapem
.second
== true); /* element inserted */
3525 CapSnap
&capsnap
= capsnapem
.first
->second
;
3526 capsnap
.context
= old_snapc
;
3527 capsnap
.issued
= in
->caps_issued();
3528 capsnap
.dirty
= in
->caps_dirty();
3530 capsnap
.dirty_data
= (used
& CEPH_CAP_FILE_BUFFER
);
3532 capsnap
.uid
= in
->uid
;
3533 capsnap
.gid
= in
->gid
;
3534 capsnap
.mode
= in
->mode
;
3535 capsnap
.btime
= in
->btime
;
3536 capsnap
.xattrs
= in
->xattrs
;
3537 capsnap
.xattr_version
= in
->xattr_version
;
3539 if (used
& CEPH_CAP_FILE_WR
) {
3540 ldout(cct
, 10) << "queue_cap_snap WR used on " << *in
<< dendl
;
3541 capsnap
.writing
= 1;
3543 finish_cap_snap(in
, capsnap
, used
);
3546 ldout(cct
, 10) << "queue_cap_snap not dirty|writing on " << *in
<< dendl
;
3550 void Client::finish_cap_snap(Inode
*in
, CapSnap
&capsnap
, int used
)
3552 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " capsnap " << (void *)&capsnap
<< " used " << ccap_string(used
) << dendl
;
3553 capsnap
.size
= in
->size
;
3554 capsnap
.mtime
= in
->mtime
;
3555 capsnap
.atime
= in
->atime
;
3556 capsnap
.ctime
= in
->ctime
;
3557 capsnap
.time_warp_seq
= in
->time_warp_seq
;
3558 capsnap
.change_attr
= in
->change_attr
;
3560 capsnap
.dirty
|= in
->caps_dirty();
3562 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3563 capsnap
.inline_data
= in
->inline_data
;
3564 capsnap
.inline_version
= in
->inline_version
;
3567 if (used
& CEPH_CAP_FILE_BUFFER
) {
3568 ldout(cct
, 10) << "finish_cap_snap " << *in
<< " cap_snap " << &capsnap
<< " used " << used
3569 << " WRBUFFER, delaying" << dendl
;
3571 capsnap
.dirty_data
= 0;
3576 void Client::_flushed_cap_snap(Inode
*in
, snapid_t seq
)
3578 ldout(cct
, 10) << "_flushed_cap_snap seq " << seq
<< " on " << *in
<< dendl
;
3579 in
->cap_snaps
.at(seq
).dirty_data
= 0;
3583 void Client::flush_snaps(Inode
*in
, bool all_again
)
3585 ldout(cct
, 10) << "flush_snaps on " << *in
<< " all_again " << all_again
<< dendl
;
3586 assert(in
->cap_snaps
.size());
3589 assert(in
->auth_cap
);
3590 MetaSession
*session
= in
->auth_cap
->session
;
3591 int mseq
= in
->auth_cap
->mseq
;
3593 for (auto &p
: in
->cap_snaps
) {
3594 CapSnap
&capsnap
= p
.second
;
3596 // only flush once per session
3597 if (capsnap
.flush_tid
> 0)
3601 ldout(cct
, 10) << "flush_snaps mds." << session
->mds_num
3602 << " follows " << p
.first
3603 << " size " << capsnap
.size
3604 << " mtime " << capsnap
.mtime
3605 << " dirty_data=" << capsnap
.dirty_data
3606 << " writing=" << capsnap
.writing
3607 << " on " << *in
<< dendl
;
3608 if (capsnap
.dirty_data
|| capsnap
.writing
)
3611 if (capsnap
.flush_tid
== 0) {
3612 capsnap
.flush_tid
= ++last_flush_tid
;
3613 if (!in
->flushing_cap_item
.is_on_list())
3614 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
3615 session
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
3618 MClientCaps
*m
= new MClientCaps(CEPH_CAP_OP_FLUSHSNAP
, in
->ino
, in
->snaprealm
->ino
, 0, mseq
,
3621 m
->caller_uid
= user_id
;
3623 m
->caller_gid
= group_id
;
3625 m
->set_client_tid(capsnap
.flush_tid
);
3626 m
->head
.snap_follows
= p
.first
;
3628 m
->head
.caps
= capsnap
.issued
;
3629 m
->head
.dirty
= capsnap
.dirty
;
3631 m
->head
.uid
= capsnap
.uid
;
3632 m
->head
.gid
= capsnap
.gid
;
3633 m
->head
.mode
= capsnap
.mode
;
3634 m
->btime
= capsnap
.btime
;
3636 m
->size
= capsnap
.size
;
3638 m
->head
.xattr_version
= capsnap
.xattr_version
;
3639 ::encode(capsnap
.xattrs
, m
->xattrbl
);
3641 m
->ctime
= capsnap
.ctime
;
3642 m
->btime
= capsnap
.btime
;
3643 m
->mtime
= capsnap
.mtime
;
3644 m
->atime
= capsnap
.atime
;
3645 m
->time_warp_seq
= capsnap
.time_warp_seq
;
3646 m
->change_attr
= capsnap
.change_attr
;
3648 if (capsnap
.dirty
& CEPH_CAP_FILE_WR
) {
3649 m
->inline_version
= in
->inline_version
;
3650 m
->inline_data
= in
->inline_data
;
3653 assert(!session
->flushing_caps_tids
.empty());
3654 m
->set_oldest_flush_tid(*session
->flushing_caps_tids
.begin());
3656 session
->con
->send_message(m
);
3662 void Client::wait_on_list(list
<Cond
*>& ls
)
3665 ls
.push_back(&cond
);
3666 cond
.Wait(client_lock
);
3670 void Client::signal_cond_list(list
<Cond
*>& ls
)
3672 for (list
<Cond
*>::iterator it
= ls
.begin(); it
!= ls
.end(); ++it
)
3676 void Client::wait_on_context_list(list
<Context
*>& ls
)
3681 ls
.push_back(new C_Cond(&cond
, &done
, &r
));
3683 cond
.Wait(client_lock
);
3686 void Client::signal_context_list(list
<Context
*>& ls
)
3688 while (!ls
.empty()) {
3689 ls
.front()->complete(0);
3694 void Client::wake_inode_waiters(MetaSession
*s
)
3696 xlist
<Cap
*>::iterator iter
= s
->caps
.begin();
3697 while (!iter
.end()){
3698 signal_cond_list((*iter
)->inode
->waitfor_caps
);
3704 // flush dirty data (from objectcache)
3706 class C_Client_CacheInvalidate
: public Context
{
3710 int64_t offset
, length
;
3712 C_Client_CacheInvalidate(Client
*c
, Inode
*in
, int64_t off
, int64_t len
) :
3713 client(c
), offset(off
), length(len
) {
3714 if (client
->use_faked_inos())
3715 ino
= vinodeno_t(in
->faked_ino
, CEPH_NOSNAP
);
3719 void finish(int r
) override
{
3720 // _async_invalidate takes the lock when it needs to, call this back from outside of lock.
3721 assert(!client
->client_lock
.is_locked_by_me());
3722 client
->_async_invalidate(ino
, offset
, length
);
3726 void Client::_async_invalidate(vinodeno_t ino
, int64_t off
, int64_t len
)
3730 ldout(cct
, 10) << "_async_invalidate " << ino
<< " " << off
<< "~" << len
<< dendl
;
3731 ino_invalidate_cb(callback_handle
, ino
, off
, len
);
3734 void Client::_schedule_invalidate_callback(Inode
*in
, int64_t off
, int64_t len
) {
3736 if (ino_invalidate_cb
)
3737 // we queue the invalidate, which calls the callback and decrements the ref
3738 async_ino_invalidator
.queue(new C_Client_CacheInvalidate(this, in
, off
, len
));
3741 void Client::_invalidate_inode_cache(Inode
*in
)
3743 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< dendl
;
3745 // invalidate our userspace inode cache
3746 if (cct
->_conf
->client_oc
)
3747 objectcacher
->release_set(&in
->oset
);
3749 _schedule_invalidate_callback(in
, 0, 0);
3752 void Client::_invalidate_inode_cache(Inode
*in
, int64_t off
, int64_t len
)
3754 ldout(cct
, 10) << "_invalidate_inode_cache " << *in
<< " " << off
<< "~" << len
<< dendl
;
3756 // invalidate our userspace inode cache
3757 if (cct
->_conf
->client_oc
) {
3758 vector
<ObjectExtent
> ls
;
3759 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, len
, in
->truncate_size
, ls
);
3760 objectcacher
->discard_set(&in
->oset
, ls
);
3763 _schedule_invalidate_callback(in
, off
, len
);
3766 bool Client::_release(Inode
*in
)
3768 ldout(cct
, 20) << "_release " << *in
<< dendl
;
3769 if (in
->cap_refs
[CEPH_CAP_FILE_CACHE
] == 0) {
3770 _invalidate_inode_cache(in
);
3776 bool Client::_flush(Inode
*in
, Context
*onfinish
)
3778 ldout(cct
, 10) << "_flush " << *in
<< dendl
;
3780 if (!in
->oset
.dirty_or_tx
) {
3781 ldout(cct
, 10) << " nothing to flush" << dendl
;
3782 onfinish
->complete(0);
3786 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
3787 ldout(cct
, 1) << __func__
<< ": FULL, purging for ENOSPC" << dendl
;
3788 objectcacher
->purge_set(&in
->oset
);
3790 onfinish
->complete(-ENOSPC
);
3795 return objectcacher
->flush_set(&in
->oset
, onfinish
);
3798 void Client::_flush_range(Inode
*in
, int64_t offset
, uint64_t size
)
3800 assert(client_lock
.is_locked());
3801 if (!in
->oset
.dirty_or_tx
) {
3802 ldout(cct
, 10) << " nothing to flush" << dendl
;
3806 Mutex
flock("Client::_flush_range flock");
3809 Context
*onflush
= new C_SafeCond(&flock
, &cond
, &safe
);
3810 bool ret
= objectcacher
->file_flush(&in
->oset
, &in
->layout
, in
->snaprealm
->get_snap_context(),
3811 offset
, size
, onflush
);
3814 client_lock
.Unlock();
3823 void Client::flush_set_callback(ObjectCacher::ObjectSet
*oset
)
3825 // Mutex::Locker l(client_lock);
3826 assert(client_lock
.is_locked()); // will be called via dispatch() -> objecter -> ...
3827 Inode
*in
= static_cast<Inode
*>(oset
->parent
);
3832 void Client::_flushed(Inode
*in
)
3834 ldout(cct
, 10) << "_flushed " << *in
<< dendl
;
3836 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
3841 // checks common to add_update_cap, handle_cap_grant
3842 void Client::check_cap_issue(Inode
*in
, Cap
*cap
, unsigned issued
)
3844 unsigned had
= in
->caps_issued();
3846 if ((issued
& CEPH_CAP_FILE_CACHE
) &&
3847 !(had
& CEPH_CAP_FILE_CACHE
))
3850 if ((issued
& CEPH_CAP_FILE_SHARED
) &&
3851 !(had
& CEPH_CAP_FILE_SHARED
)) {
3855 clear_dir_complete_and_ordered(in
, true);
3859 void Client::add_update_cap(Inode
*in
, MetaSession
*mds_session
, uint64_t cap_id
,
3860 unsigned issued
, unsigned seq
, unsigned mseq
, inodeno_t realm
,
3861 int flags
, const UserPerm
& cap_perms
)
3864 mds_rank_t mds
= mds_session
->mds_num
;
3865 if (in
->caps
.count(mds
)) {
3866 cap
= in
->caps
[mds
];
3869 * auth mds of the inode changed. we received the cap export
3870 * message, but still haven't received the cap import message.
3871 * handle_cap_export() updated the new auth MDS' cap.
3873 * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
3874 * a message that was send before the cap import message. So
3875 * don't remove caps.
3877 if (ceph_seq_cmp(seq
, cap
->seq
) <= 0) {
3878 assert(cap
== in
->auth_cap
);
3879 assert(cap
->cap_id
== cap_id
);
3882 issued
|= cap
->issued
;
3883 flags
|= CEPH_CAP_FLAG_AUTH
;
3886 mds_session
->num_caps
++;
3887 if (!in
->is_any_caps()) {
3888 assert(in
->snaprealm
== 0);
3889 in
->snaprealm
= get_snap_realm(realm
);
3890 in
->snaprealm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
3891 ldout(cct
, 15) << "add_update_cap first one, opened snaprealm " << in
->snaprealm
<< dendl
;
3893 in
->caps
[mds
] = cap
= new Cap
;
3895 mds_session
->caps
.push_back(&cap
->cap_item
);
3896 cap
->session
= mds_session
;
3898 cap
->gen
= mds_session
->cap_gen
;
3899 cap_list
.push_back(&in
->cap_item
);
3902 check_cap_issue(in
, cap
, issued
);
3904 if (flags
& CEPH_CAP_FLAG_AUTH
) {
3905 if (in
->auth_cap
!= cap
&&
3906 (!in
->auth_cap
|| ceph_seq_cmp(in
->auth_cap
->mseq
, mseq
) < 0)) {
3907 if (in
->auth_cap
&& in
->flushing_cap_item
.is_on_list()) {
3908 ldout(cct
, 10) << "add_update_cap changing auth cap: "
3909 << "add myself to new auth MDS' flushing caps list" << dendl
;
3910 adjust_session_flushing_caps(in
, in
->auth_cap
->session
, mds_session
);
3916 unsigned old_caps
= cap
->issued
;
3917 cap
->cap_id
= cap_id
;
3918 cap
->issued
|= issued
;
3919 cap
->implemented
|= issued
;
3921 cap
->issue_seq
= seq
;
3923 cap
->latest_perms
= cap_perms
;
3924 ldout(cct
, 10) << "add_update_cap issued " << ccap_string(old_caps
) << " -> " << ccap_string(cap
->issued
)
3925 << " from mds." << mds
3929 if ((issued
& ~old_caps
) && in
->auth_cap
== cap
) {
3930 // non-auth MDS is revoking the newly grant caps ?
3931 for (map
<mds_rank_t
,Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
3932 if (it
->second
== cap
)
3934 if (it
->second
->implemented
& ~it
->second
->issued
& issued
) {
3935 check_caps(in
, CHECK_CAPS_NODELAY
);
3941 if (issued
& ~old_caps
)
3942 signal_cond_list(in
->waitfor_caps
);
3945 void Client::remove_cap(Cap
*cap
, bool queue_release
)
3947 Inode
*in
= cap
->inode
;
3948 MetaSession
*session
= cap
->session
;
3949 mds_rank_t mds
= cap
->session
->mds_num
;
3951 ldout(cct
, 10) << "remove_cap mds." << mds
<< " on " << *in
<< dendl
;
3953 if (queue_release
) {
3954 session
->enqueue_cap_release(
3962 if (in
->auth_cap
== cap
) {
3963 if (in
->flushing_cap_item
.is_on_list()) {
3964 ldout(cct
, 10) << " removing myself from flushing_cap list" << dendl
;
3965 in
->flushing_cap_item
.remove_myself();
3967 in
->auth_cap
= NULL
;
3969 assert(in
->caps
.count(mds
));
3970 in
->caps
.erase(mds
);
3972 cap
->cap_item
.remove_myself();
3976 if (!in
->is_any_caps()) {
3977 ldout(cct
, 15) << "remove_cap last one, closing snaprealm " << in
->snaprealm
<< dendl
;
3978 in
->snaprealm_item
.remove_myself();
3979 put_snap_realm(in
->snaprealm
);
3984 void Client::remove_all_caps(Inode
*in
)
3986 while (!in
->caps
.empty())
3987 remove_cap(in
->caps
.begin()->second
, true);
3990 void Client::remove_session_caps(MetaSession
*s
)
3992 ldout(cct
, 10) << "remove_session_caps mds." << s
->mds_num
<< dendl
;
3994 while (s
->caps
.size()) {
3995 Cap
*cap
= *s
->caps
.begin();
3996 Inode
*in
= cap
->inode
;
3997 bool dirty_caps
= false, cap_snaps
= false;
3998 if (in
->auth_cap
== cap
) {
3999 cap_snaps
= !in
->cap_snaps
.empty();
4000 dirty_caps
= in
->dirty_caps
| in
->flushing_caps
;
4001 in
->wanted_max_size
= 0;
4002 in
->requested_max_size
= 0;
4003 in
->flags
|= I_CAP_DROPPED
;
4005 remove_cap(cap
, false);
4006 signal_cond_list(in
->waitfor_caps
);
4008 InodeRef
tmp_ref(in
);
4009 in
->cap_snaps
.clear();
4012 lderr(cct
) << "remove_session_caps still has dirty|flushing caps on " << *in
<< dendl
;
4013 if (in
->flushing_caps
) {
4014 num_flushing_caps
--;
4015 in
->flushing_cap_tids
.clear();
4017 in
->flushing_caps
= 0;
4022 s
->flushing_caps_tids
.clear();
4026 class C_Client_Remount
: public Context
{
4030 explicit C_Client_Remount(Client
*c
) : client(c
) {}
4031 void finish(int r
) override
{
4033 r
= client
->remount_cb(client
->callback_handle
);
4035 client_t whoami
= client
->get_nodeid();
4036 lderr(client
->cct
) << "tried to remount (to trim kernel dentries) and got error "
4038 if (client
->require_remount
&& !client
->unmounting
) {
4039 assert(0 == "failed to remount for kernel dentry trimming");
4045 void Client::_invalidate_kernel_dcache()
4049 if (can_invalidate_dentries
&& dentry_invalidate_cb
&& root
->dir
) {
4050 for (ceph::unordered_map
<string
, Dentry
*>::iterator p
= root
->dir
->dentries
.begin();
4051 p
!= root
->dir
->dentries
.end();
4053 if (p
->second
->inode
)
4054 _schedule_invalidate_dentry_callback(p
->second
, false);
4056 } else if (remount_cb
) {
4058 // when remounting a file system, linux kernel trims all unused dentries in the fs
4059 remount_finisher
.queue(new C_Client_Remount(this));
4063 void Client::trim_caps(MetaSession
*s
, int max
)
4065 mds_rank_t mds
= s
->mds_num
;
4066 int caps_size
= s
->caps
.size();
4067 ldout(cct
, 10) << "trim_caps mds." << mds
<< " max " << max
4068 << " caps " << caps_size
<< dendl
;
4071 xlist
<Cap
*>::iterator p
= s
->caps
.begin();
4072 while ((caps_size
- trimmed
) > max
&& !p
.end()) {
4074 Inode
*in
= cap
->inode
;
4076 // Increment p early because it will be invalidated if cap
4077 // is deleted inside remove_cap
4080 if (in
->caps
.size() > 1 && cap
!= in
->auth_cap
) {
4081 int mine
= cap
->issued
| cap
->implemented
;
4082 int oissued
= in
->auth_cap
? in
->auth_cap
->issued
: 0;
4083 // disposable non-auth cap
4084 if (!(get_caps_used(in
) & ~oissued
& mine
)) {
4085 ldout(cct
, 20) << " removing unused, unneeded non-auth cap on " << *in
<< dendl
;
4086 remove_cap(cap
, true);
4090 ldout(cct
, 20) << " trying to trim dentries for " << *in
<< dendl
;
4092 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
4093 InodeRef
tmp_ref(in
);
4094 while (q
!= in
->dn_set
.end()) {
4096 if (dn
->lru_is_expireable()) {
4097 if (can_invalidate_dentries
&&
4098 dn
->dir
->parent_inode
->ino
== MDS_INO_ROOT
) {
4099 // Only issue one of these per DN for inodes in root: handle
4100 // others more efficiently by calling for root-child DNs at
4101 // the end of this function.
4102 _schedule_invalidate_dentry_callback(dn
, true);
4106 ldout(cct
, 20) << " not expirable: " << dn
->name
<< dendl
;
4110 if (all
&& in
->ino
!= MDS_INO_ROOT
) {
4111 ldout(cct
, 20) << __func__
<< " counting as trimmed: " << *in
<< dendl
;
4117 if (s
->caps
.size() > max
)
4118 _invalidate_kernel_dcache();
4121 void Client::force_session_readonly(MetaSession
*s
)
4124 for (xlist
<Cap
*>::iterator p
= s
->caps
.begin(); !p
.end(); ++p
) {
4125 Inode
*in
= (*p
)->inode
;
4126 if (in
->caps_wanted() & CEPH_CAP_FILE_WR
)
4127 signal_cond_list(in
->waitfor_caps
);
4131 void Client::mark_caps_dirty(Inode
*in
, int caps
)
4133 ldout(cct
, 10) << "mark_caps_dirty " << *in
<< " " << ccap_string(in
->dirty_caps
) << " -> "
4134 << ccap_string(in
->dirty_caps
| caps
) << dendl
;
4135 if (caps
&& !in
->caps_dirty())
4137 in
->dirty_caps
|= caps
;
4140 int Client::mark_caps_flushing(Inode
*in
, ceph_tid_t
* ptid
)
4142 MetaSession
*session
= in
->auth_cap
->session
;
4144 int flushing
= in
->dirty_caps
;
4147 ceph_tid_t flush_tid
= ++last_flush_tid
;
4148 in
->flushing_cap_tids
[flush_tid
] = flushing
;
4150 if (!in
->flushing_caps
) {
4151 ldout(cct
, 10) << "mark_caps_flushing " << ccap_string(flushing
) << " " << *in
<< dendl
;
4152 num_flushing_caps
++;
4154 ldout(cct
, 10) << "mark_caps_flushing (more) " << ccap_string(flushing
) << " " << *in
<< dendl
;
4157 in
->flushing_caps
|= flushing
;
4160 if (!in
->flushing_cap_item
.is_on_list())
4161 session
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4162 session
->flushing_caps_tids
.insert(flush_tid
);
4168 void Client::adjust_session_flushing_caps(Inode
*in
, MetaSession
*old_s
, MetaSession
*new_s
)
4170 for (auto &p
: in
->cap_snaps
) {
4171 CapSnap
&capsnap
= p
.second
;
4172 if (capsnap
.flush_tid
> 0) {
4173 old_s
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4174 new_s
->flushing_caps_tids
.insert(capsnap
.flush_tid
);
4177 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4178 it
!= in
->flushing_cap_tids
.end();
4180 old_s
->flushing_caps_tids
.erase(it
->first
);
4181 new_s
->flushing_caps_tids
.insert(it
->first
);
4183 new_s
->flushing_caps
.push_back(&in
->flushing_cap_item
);
4187 * Flush all caps back to the MDS. Because the callers generally wait on the
4188 * result of this function (syncfs and umount cases), we set
4189 * CHECK_CAPS_SYNCHRONOUS on the last check_caps call.
4191 void Client::flush_caps_sync()
4193 ldout(cct
, 10) << __func__
<< dendl
;
4194 xlist
<Inode
*>::iterator p
= delayed_caps
.begin();
4196 unsigned flags
= CHECK_CAPS_NODELAY
;
4200 delayed_caps
.pop_front();
4201 if (p
.end() && cap_list
.empty())
4202 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4203 check_caps(in
, flags
);
4207 p
= cap_list
.begin();
4209 unsigned flags
= CHECK_CAPS_NODELAY
;
4214 flags
|= CHECK_CAPS_SYNCHRONOUS
;
4215 check_caps(in
, flags
);
4219 void Client::flush_caps(Inode
*in
, MetaSession
*session
, bool sync
)
4221 ldout(cct
, 10) << "flush_caps " << in
<< " mds." << session
->mds_num
<< dendl
;
4222 Cap
*cap
= in
->auth_cap
;
4223 assert(cap
->session
== session
);
4225 for (map
<ceph_tid_t
,int>::iterator p
= in
->flushing_cap_tids
.begin();
4226 p
!= in
->flushing_cap_tids
.end();
4228 bool req_sync
= false;
4230 /* If this is a synchronous request, then flush the journal on last one */
4231 if (sync
&& (p
->first
== in
->flushing_cap_tids
.rbegin()->first
))
4234 send_cap(in
, session
, cap
, req_sync
,
4235 (get_caps_used(in
) | in
->caps_dirty()),
4236 in
->caps_wanted(), (cap
->issued
| cap
->implemented
),
4237 p
->second
, p
->first
);
4241 void Client::wait_sync_caps(Inode
*in
, ceph_tid_t want
)
4243 while (in
->flushing_caps
) {
4244 map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4245 assert(it
!= in
->flushing_cap_tids
.end());
4246 if (it
->first
> want
)
4248 ldout(cct
, 10) << "wait_sync_caps on " << *in
<< " flushing "
4249 << ccap_string(it
->second
) << " want " << want
4250 << " last " << it
->first
<< dendl
;
4251 wait_on_list(in
->waitfor_caps
);
4255 void Client::wait_sync_caps(ceph_tid_t want
)
4258 ldout(cct
, 10) << "wait_sync_caps want " << want
<< " (last is " << last_flush_tid
<< ", "
4259 << num_flushing_caps
<< " total flushing)" << dendl
;
4260 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
4261 p
!= mds_sessions
.end();
4263 MetaSession
*s
= p
->second
;
4264 if (s
->flushing_caps_tids
.empty())
4266 ceph_tid_t oldest_tid
= *s
->flushing_caps_tids
.begin();
4267 if (oldest_tid
<= want
) {
4268 ldout(cct
, 10) << " waiting on mds." << p
->first
<< " tid " << oldest_tid
4269 << " (want " << want
<< ")" << dendl
;
4270 sync_cond
.Wait(client_lock
);
4276 void Client::kick_flushing_caps(MetaSession
*session
)
4278 mds_rank_t mds
= session
->mds_num
;
4279 ldout(cct
, 10) << "kick_flushing_caps mds." << mds
<< dendl
;
4281 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4283 if (session
->early_flushing_caps
.count(in
))
4285 ldout(cct
, 20) << " reflushing caps on " << *in
<< " to mds." << mds
<< dendl
;
4286 if (in
->cap_snaps
.size())
4287 flush_snaps(in
, true);
4288 if (in
->flushing_caps
)
4289 flush_caps(in
, session
);
4292 session
->early_flushing_caps
.clear();
4295 void Client::early_kick_flushing_caps(MetaSession
*session
)
4297 session
->early_flushing_caps
.clear();
4299 for (xlist
<Inode
*>::iterator p
= session
->flushing_caps
.begin(); !p
.end(); ++p
) {
4301 assert(in
->auth_cap
);
4303 // if flushing caps were revoked, we re-send the cap flush in client reconnect
4304 // stage. This guarantees that MDS processes the cap flush message before issuing
4305 // the flushing caps to other client.
4306 if ((in
->flushing_caps
& in
->auth_cap
->issued
) == in
->flushing_caps
)
4309 ldout(cct
, 20) << " reflushing caps (early_kick) on " << *in
4310 << " to mds." << session
->mds_num
<< dendl
;
4312 session
->early_flushing_caps
.insert(in
);
4314 if (in
->cap_snaps
.size())
4315 flush_snaps(in
, true);
4316 if (in
->flushing_caps
)
4317 flush_caps(in
, session
);
4322 void Client::kick_maxsize_requests(MetaSession
*session
)
4324 xlist
<Cap
*>::iterator iter
= session
->caps
.begin();
4325 while (!iter
.end()){
4326 (*iter
)->inode
->requested_max_size
= 0;
4327 (*iter
)->inode
->wanted_max_size
= 0;
4328 signal_cond_list((*iter
)->inode
->waitfor_caps
);
4333 void SnapRealm::build_snap_context()
4335 set
<snapid_t
> snaps
;
4336 snapid_t max_seq
= seq
;
4338 // start with prior_parents?
4339 for (unsigned i
=0; i
<prior_parent_snaps
.size(); i
++)
4340 snaps
.insert(prior_parent_snaps
[i
]);
4342 // current parent's snaps
4344 const SnapContext
& psnapc
= pparent
->get_snap_context();
4345 for (unsigned i
=0; i
<psnapc
.snaps
.size(); i
++)
4346 if (psnapc
.snaps
[i
] >= parent_since
)
4347 snaps
.insert(psnapc
.snaps
[i
]);
4348 if (psnapc
.seq
> max_seq
)
4349 max_seq
= psnapc
.seq
;
4353 for (unsigned i
=0; i
<my_snaps
.size(); i
++)
4354 snaps
.insert(my_snaps
[i
]);
4357 cached_snap_context
.seq
= max_seq
;
4358 cached_snap_context
.snaps
.resize(0);
4359 cached_snap_context
.snaps
.reserve(snaps
.size());
4360 for (set
<snapid_t
>::reverse_iterator p
= snaps
.rbegin(); p
!= snaps
.rend(); ++p
)
4361 cached_snap_context
.snaps
.push_back(*p
);
4364 void Client::invalidate_snaprealm_and_children(SnapRealm
*realm
)
4369 while (!q
.empty()) {
4373 ldout(cct
, 10) << "invalidate_snaprealm_and_children " << *realm
<< dendl
;
4374 realm
->invalidate_cache();
4376 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4377 p
!= realm
->pchildren
.end();
4383 SnapRealm
*Client::get_snap_realm(inodeno_t r
)
4385 SnapRealm
*realm
= snap_realms
[r
];
4387 snap_realms
[r
] = realm
= new SnapRealm(r
);
4388 ldout(cct
, 20) << "get_snap_realm " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4393 SnapRealm
*Client::get_snap_realm_maybe(inodeno_t r
)
4395 if (snap_realms
.count(r
) == 0) {
4396 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " fail" << dendl
;
4399 SnapRealm
*realm
= snap_realms
[r
];
4400 ldout(cct
, 20) << "get_snap_realm_maybe " << r
<< " " << realm
<< " " << realm
->nref
<< " -> " << (realm
->nref
+ 1) << dendl
;
4405 void Client::put_snap_realm(SnapRealm
*realm
)
4407 ldout(cct
, 20) << "put_snap_realm " << realm
->ino
<< " " << realm
4408 << " " << realm
->nref
<< " -> " << (realm
->nref
- 1) << dendl
;
4409 if (--realm
->nref
== 0) {
4410 snap_realms
.erase(realm
->ino
);
4411 if (realm
->pparent
) {
4412 realm
->pparent
->pchildren
.erase(realm
);
4413 put_snap_realm(realm
->pparent
);
4419 bool Client::adjust_realm_parent(SnapRealm
*realm
, inodeno_t parent
)
4421 if (realm
->parent
!= parent
) {
4422 ldout(cct
, 10) << "adjust_realm_parent " << *realm
4423 << " " << realm
->parent
<< " -> " << parent
<< dendl
;
4424 realm
->parent
= parent
;
4425 if (realm
->pparent
) {
4426 realm
->pparent
->pchildren
.erase(realm
);
4427 put_snap_realm(realm
->pparent
);
4429 realm
->pparent
= get_snap_realm(parent
);
4430 realm
->pparent
->pchildren
.insert(realm
);
4436 static bool has_new_snaps(const SnapContext
& old_snapc
,
4437 const SnapContext
& new_snapc
)
4439 return !new_snapc
.snaps
.empty() && new_snapc
.snaps
[0] > old_snapc
.seq
;
4443 void Client::update_snap_trace(bufferlist
& bl
, SnapRealm
**realm_ret
, bool flush
)
4445 SnapRealm
*first_realm
= NULL
;
4446 ldout(cct
, 10) << "update_snap_trace len " << bl
.length() << dendl
;
4448 map
<SnapRealm
*, SnapContext
> dirty_realms
;
4450 bufferlist::iterator p
= bl
.begin();
4454 SnapRealm
*realm
= get_snap_realm(info
.ino());
4456 bool invalidate
= false;
4458 if (info
.seq() > realm
->seq
) {
4459 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq() << " > " << realm
->seq
4463 // writeback any dirty caps _before_ updating snap list (i.e. with old snap info)
4464 // flush me + children
4467 while (!q
.empty()) {
4468 SnapRealm
*realm
= q
.front();
4471 for (set
<SnapRealm
*>::iterator p
= realm
->pchildren
.begin();
4472 p
!= realm
->pchildren
.end();
4476 if (dirty_realms
.count(realm
) == 0) {
4478 dirty_realms
[realm
] = realm
->get_snap_context();
4484 realm
->seq
= info
.seq();
4485 realm
->created
= info
.created();
4486 realm
->parent_since
= info
.parent_since();
4487 realm
->prior_parent_snaps
= info
.prior_parent_snaps
;
4488 realm
->my_snaps
= info
.my_snaps
;
4492 // _always_ verify parent
4493 if (adjust_realm_parent(realm
, info
.parent()))
4497 invalidate_snaprealm_and_children(realm
);
4498 ldout(cct
, 15) << "update_snap_trace " << *realm
<< " self|parent updated" << dendl
;
4499 ldout(cct
, 15) << " snapc " << realm
->get_snap_context() << dendl
;
4501 ldout(cct
, 10) << "update_snap_trace " << *realm
<< " seq " << info
.seq()
4502 << " <= " << realm
->seq
<< " and same parent, SKIPPING" << dendl
;
4506 first_realm
= realm
;
4508 put_snap_realm(realm
);
4511 for (map
<SnapRealm
*, SnapContext
>::iterator q
= dirty_realms
.begin();
4512 q
!= dirty_realms
.end();
4514 SnapRealm
*realm
= q
->first
;
4515 // if there are new snaps ?
4516 if (has_new_snaps(q
->second
, realm
->get_snap_context())) {
4517 ldout(cct
, 10) << " flushing caps on " << *realm
<< dendl
;
4518 xlist
<Inode
*>::iterator r
= realm
->inodes_with_caps
.begin();
4522 queue_cap_snap(in
, q
->second
);
4525 ldout(cct
, 10) << " no new snap on " << *realm
<< dendl
;
4527 put_snap_realm(realm
);
4531 *realm_ret
= first_realm
;
4533 put_snap_realm(first_realm
);
4536 void Client::handle_snap(MClientSnap
*m
)
4538 ldout(cct
, 10) << "handle_snap " << *m
<< dendl
;
4539 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4540 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4546 got_mds_push(session
);
4548 map
<Inode
*, SnapContext
> to_move
;
4549 SnapRealm
*realm
= 0;
4551 if (m
->head
.op
== CEPH_SNAP_OP_SPLIT
) {
4552 assert(m
->head
.split
);
4554 bufferlist::iterator p
= m
->bl
.begin();
4556 assert(info
.ino() == m
->head
.split
);
4558 // flush, then move, ino's.
4559 realm
= get_snap_realm(info
.ino());
4560 ldout(cct
, 10) << " splitting off " << *realm
<< dendl
;
4561 for (vector
<inodeno_t
>::iterator p
= m
->split_inos
.begin();
4562 p
!= m
->split_inos
.end();
4564 vinodeno_t
vino(*p
, CEPH_NOSNAP
);
4565 if (inode_map
.count(vino
)) {
4566 Inode
*in
= inode_map
[vino
];
4567 if (!in
->snaprealm
|| in
->snaprealm
== realm
)
4569 if (in
->snaprealm
->created
> info
.created()) {
4570 ldout(cct
, 10) << " NOT moving " << *in
<< " from _newer_ realm "
4571 << *in
->snaprealm
<< dendl
;
4574 ldout(cct
, 10) << " moving " << *in
<< " from " << *in
->snaprealm
<< dendl
;
4577 in
->snaprealm_item
.remove_myself();
4578 to_move
[in
] = in
->snaprealm
->get_snap_context();
4579 put_snap_realm(in
->snaprealm
);
4583 // move child snaprealms, too
4584 for (vector
<inodeno_t
>::iterator p
= m
->split_realms
.begin();
4585 p
!= m
->split_realms
.end();
4587 ldout(cct
, 10) << "adjusting snaprealm " << *p
<< " parent" << dendl
;
4588 SnapRealm
*child
= get_snap_realm_maybe(*p
);
4591 adjust_realm_parent(child
, realm
->ino
);
4592 put_snap_realm(child
);
4596 update_snap_trace(m
->bl
, NULL
, m
->head
.op
!= CEPH_SNAP_OP_DESTROY
);
4599 for (auto p
= to_move
.begin(); p
!= to_move
.end(); ++p
) {
4600 Inode
*in
= p
->first
;
4601 in
->snaprealm
= realm
;
4602 realm
->inodes_with_caps
.push_back(&in
->snaprealm_item
);
4604 // queue for snap writeback
4605 if (has_new_snaps(p
->second
, realm
->get_snap_context()))
4606 queue_cap_snap(in
, p
->second
);
4608 put_snap_realm(realm
);
4614 void Client::handle_quota(MClientQuota
*m
)
4616 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4617 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4623 got_mds_push(session
);
4625 ldout(cct
, 10) << "handle_quota " << *m
<< " from mds." << mds
<< dendl
;
4627 vinodeno_t
vino(m
->ino
, CEPH_NOSNAP
);
4628 if (inode_map
.count(vino
)) {
4630 in
= inode_map
[vino
];
4633 in
->quota
= m
->quota
;
4634 in
->rstat
= m
->rstat
;
4641 void Client::handle_caps(MClientCaps
*m
)
4643 mds_rank_t mds
= mds_rank_t(m
->get_source().num());
4644 MetaSession
*session
= _get_mds_session(mds
, m
->get_connection().get());
4650 if (m
->osd_epoch_barrier
&& !objecter
->have_map(m
->osd_epoch_barrier
)) {
4651 // Pause RADOS operations until we see the required epoch
4652 objecter
->set_epoch_barrier(m
->osd_epoch_barrier
);
4655 if (m
->osd_epoch_barrier
> cap_epoch_barrier
) {
4656 // Record the barrier so that we will transmit it to MDS when releasing
4657 set_cap_epoch_barrier(m
->osd_epoch_barrier
);
4660 got_mds_push(session
);
4662 m
->clear_payload(); // for if/when we send back to MDS
4665 vinodeno_t
vino(m
->get_ino(), CEPH_NOSNAP
);
4666 if (inode_map
.count(vino
))
4667 in
= inode_map
[vino
];
4669 if (m
->get_op() == CEPH_CAP_OP_IMPORT
) {
4670 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< " on IMPORT, immediately releasing" << dendl
;
4671 session
->enqueue_cap_release(
4678 ldout(cct
, 5) << "handle_caps don't have vino " << vino
<< ", dropping" << dendl
;
4682 // in case the mds is waiting on e.g. a revocation
4683 flush_cap_releases();
4687 switch (m
->get_op()) {
4688 case CEPH_CAP_OP_EXPORT
:
4689 return handle_cap_export(session
, in
, m
);
4690 case CEPH_CAP_OP_FLUSHSNAP_ACK
:
4691 return handle_cap_flushsnap_ack(session
, in
, m
);
4692 case CEPH_CAP_OP_IMPORT
:
4693 handle_cap_import(session
, in
, m
);
4696 if (in
->caps
.count(mds
) == 0) {
4697 ldout(cct
, 5) << "handle_caps don't have " << *in
<< " cap on mds." << mds
<< dendl
;
4702 Cap
*cap
= in
->caps
[mds
];
4704 switch (m
->get_op()) {
4705 case CEPH_CAP_OP_TRUNC
: return handle_cap_trunc(session
, in
, m
);
4706 case CEPH_CAP_OP_IMPORT
:
4707 case CEPH_CAP_OP_REVOKE
:
4708 case CEPH_CAP_OP_GRANT
: return handle_cap_grant(session
, in
, cap
, m
);
4709 case CEPH_CAP_OP_FLUSH_ACK
: return handle_cap_flush_ack(session
, in
, cap
, m
);
4715 void Client::handle_cap_import(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4717 mds_rank_t mds
= session
->mds_num
;
4719 ldout(cct
, 5) << "handle_cap_import ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4720 << " IMPORT from mds." << mds
<< dendl
;
4722 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4725 if (m
->peer
.cap_id
&& in
->caps
.count(peer_mds
)) {
4726 cap
= in
->caps
[peer_mds
];
4728 cap_perms
= cap
->latest_perms
;
4733 SnapRealm
*realm
= NULL
;
4734 update_snap_trace(m
->snapbl
, &realm
);
4736 add_update_cap(in
, session
, m
->get_cap_id(),
4737 m
->get_caps(), m
->get_seq(), m
->get_mseq(), m
->get_realm(),
4738 CEPH_CAP_FLAG_AUTH
, cap_perms
);
4740 if (cap
&& cap
->cap_id
== m
->peer
.cap_id
) {
4741 remove_cap(cap
, (m
->peer
.flags
& CEPH_CAP_FLAG_RELEASE
));
4745 put_snap_realm(realm
);
4747 if (in
->auth_cap
&& in
->auth_cap
->session
->mds_num
== mds
) {
4748 // reflush any/all caps (if we are now the auth_cap)
4749 if (in
->cap_snaps
.size())
4750 flush_snaps(in
, true);
4751 if (in
->flushing_caps
)
4752 flush_caps(in
, session
);
4756 void Client::handle_cap_export(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4758 mds_rank_t mds
= session
->mds_num
;
4760 ldout(cct
, 5) << "handle_cap_export ino " << m
->get_ino() << " mseq " << m
->get_mseq()
4761 << " EXPORT from mds." << mds
<< dendl
;
4764 if (in
->caps
.count(mds
))
4765 cap
= in
->caps
[mds
];
4767 const mds_rank_t peer_mds
= mds_rank_t(m
->peer
.mds
);
4769 if (cap
&& cap
->cap_id
== m
->get_cap_id()) {
4770 if (m
->peer
.cap_id
) {
4771 MetaSession
*tsession
= _get_or_open_mds_session(peer_mds
);
4772 if (in
->caps
.count(peer_mds
)) {
4773 Cap
*tcap
= in
->caps
[peer_mds
];
4774 if (tcap
->cap_id
!= m
->peer
.cap_id
||
4775 ceph_seq_cmp(tcap
->seq
, m
->peer
.seq
) < 0) {
4776 tcap
->cap_id
= m
->peer
.cap_id
;
4777 tcap
->seq
= m
->peer
.seq
- 1;
4778 tcap
->issue_seq
= tcap
->seq
;
4779 tcap
->mseq
= m
->peer
.mseq
;
4780 tcap
->issued
|= cap
->issued
;
4781 tcap
->implemented
|= cap
->issued
;
4782 if (cap
== in
->auth_cap
)
4783 in
->auth_cap
= tcap
;
4784 if (in
->auth_cap
== tcap
&& in
->flushing_cap_item
.is_on_list())
4785 adjust_session_flushing_caps(in
, session
, tsession
);
4788 add_update_cap(in
, tsession
, m
->peer
.cap_id
, cap
->issued
,
4789 m
->peer
.seq
- 1, m
->peer
.mseq
, (uint64_t)-1,
4790 cap
== in
->auth_cap
? CEPH_CAP_FLAG_AUTH
: 0,
4794 if (cap
== in
->auth_cap
)
4795 in
->flags
|= I_CAP_DROPPED
;
4798 remove_cap(cap
, false);
4804 void Client::handle_cap_trunc(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4806 mds_rank_t mds
= session
->mds_num
;
4807 assert(in
->caps
[mds
]);
4809 ldout(cct
, 10) << "handle_cap_trunc on ino " << *in
4810 << " size " << in
->size
<< " -> " << m
->get_size()
4813 int implemented
= 0;
4814 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
4815 issued
|= implemented
;
4816 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(),
4817 m
->get_size(), m
->get_change_attr(), m
->get_time_warp_seq(),
4818 m
->get_ctime(), m
->get_mtime(), m
->get_atime(),
4819 m
->inline_version
, m
->inline_data
, issued
);
4823 void Client::handle_cap_flush_ack(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
4825 ceph_tid_t flush_ack_tid
= m
->get_client_tid();
4826 int dirty
= m
->get_dirty();
4830 for (map
<ceph_tid_t
, int>::iterator it
= in
->flushing_cap_tids
.begin();
4831 it
!= in
->flushing_cap_tids
.end(); ) {
4832 if (it
->first
== flush_ack_tid
)
4833 cleaned
= it
->second
;
4834 if (it
->first
<= flush_ack_tid
) {
4835 session
->flushing_caps_tids
.erase(it
->first
);
4836 in
->flushing_cap_tids
.erase(it
++);
4840 cleaned
&= ~it
->second
;
4846 ldout(cct
, 5) << "handle_cap_flush_ack mds." << session
->mds_num
4847 << " cleaned " << ccap_string(cleaned
) << " on " << *in
4848 << " with " << ccap_string(dirty
) << dendl
;
4851 signal_cond_list(in
->waitfor_caps
);
4852 if (session
->flushing_caps_tids
.empty() ||
4853 *session
->flushing_caps_tids
.begin() > flush_ack_tid
)
4858 in
->cap_dirtier_uid
= -1;
4859 in
->cap_dirtier_gid
= -1;
4863 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != any cap bit tids" << dendl
;
4865 if (in
->flushing_caps
) {
4866 ldout(cct
, 5) << " flushing_caps " << ccap_string(in
->flushing_caps
)
4867 << " -> " << ccap_string(in
->flushing_caps
& ~cleaned
) << dendl
;
4868 in
->flushing_caps
&= ~cleaned
;
4869 if (in
->flushing_caps
== 0) {
4870 ldout(cct
, 10) << " " << *in
<< " !flushing" << dendl
;
4871 num_flushing_caps
--;
4872 if (in
->cap_snaps
.empty())
4873 in
->flushing_cap_item
.remove_myself();
4875 if (!in
->caps_dirty())
4884 void Client::handle_cap_flushsnap_ack(MetaSession
*session
, Inode
*in
, MClientCaps
*m
)
4886 mds_rank_t mds
= session
->mds_num
;
4887 assert(in
->caps
[mds
]);
4888 snapid_t follows
= m
->get_snap_follows();
4890 if (in
->cap_snaps
.count(follows
)) {
4891 CapSnap
&capsnap
= in
->cap_snaps
.at(follows
);
4892 if (m
->get_client_tid() != capsnap
.flush_tid
) {
4893 ldout(cct
, 10) << " tid " << m
->get_client_tid() << " != " << capsnap
.flush_tid
<< dendl
;
4895 ldout(cct
, 5) << "handle_cap_flushedsnap mds." << mds
<< " flushed snap follows " << follows
4896 << " on " << *in
<< dendl
;
4898 if (in
->get_num_ref() == 1)
4899 tmp_ref
= in
; // make sure inode not get freed while erasing item from in->cap_snaps
4900 if (in
->flushing_caps
== 0 && in
->cap_snaps
.empty())
4901 in
->flushing_cap_item
.remove_myself();
4902 session
->flushing_caps_tids
.erase(capsnap
.flush_tid
);
4903 in
->cap_snaps
.erase(follows
);
4906 ldout(cct
, 5) << "handle_cap_flushedsnap DUP(?) mds." << mds
<< " flushed snap follows " << follows
4907 << " on " << *in
<< dendl
;
4908 // we may not have it if we send multiple FLUSHSNAP requests and (got multiple FLUSHEDSNAPs back)
4914 class C_Client_DentryInvalidate
: public Context
{
4921 C_Client_DentryInvalidate(Client
*c
, Dentry
*dn
, bool del
) :
4922 client(c
), name(dn
->name
) {
4923 if (client
->use_faked_inos()) {
4924 dirino
.ino
= dn
->dir
->parent_inode
->faked_ino
;
4926 ino
.ino
= dn
->inode
->faked_ino
;
4928 dirino
= dn
->dir
->parent_inode
->vino();
4930 ino
= dn
->inode
->vino();
4933 ino
.ino
= inodeno_t();
4935 void finish(int r
) override
{
4936 // _async_dentry_invalidate is responsible for its own locking
4937 assert(!client
->client_lock
.is_locked_by_me());
4938 client
->_async_dentry_invalidate(dirino
, ino
, name
);
4942 void Client::_async_dentry_invalidate(vinodeno_t dirino
, vinodeno_t ino
, string
& name
)
4946 ldout(cct
, 10) << "_async_dentry_invalidate '" << name
<< "' ino " << ino
4947 << " in dir " << dirino
<< dendl
;
4948 dentry_invalidate_cb(callback_handle
, dirino
, ino
, name
);
4951 void Client::_schedule_invalidate_dentry_callback(Dentry
*dn
, bool del
)
4953 if (dentry_invalidate_cb
&& dn
->inode
->ll_ref
> 0)
4954 async_dentry_invalidator
.queue(new C_Client_DentryInvalidate(this, dn
, del
));
4957 void Client::_try_to_trim_inode(Inode
*in
, bool sched_inval
)
4959 int ref
= in
->get_num_ref();
4961 if (in
->dir
&& !in
->dir
->dentries
.empty()) {
4962 for (auto p
= in
->dir
->dentries
.begin();
4963 p
!= in
->dir
->dentries
.end(); ) {
4964 Dentry
*dn
= p
->second
;
4966 /* rmsnap removes whole subtree, need trim inodes recursively.
4967 * we don't need to invalidate dentries recursively. because
4968 * invalidating a directory dentry effectively invalidate
4970 if (in
->snapid
!= CEPH_NOSNAP
&& dn
->inode
&& dn
->inode
->is_dir())
4971 _try_to_trim_inode(dn
->inode
.get(), false);
4973 if (dn
->lru_is_expireable())
4974 unlink(dn
, true, false); // keep dir, drop dentry
4976 if (in
->dir
->dentries
.empty()) {
4982 if (ref
> 0 && (in
->flags
& I_SNAPDIR_OPEN
)) {
4983 InodeRef snapdir
= open_snapdir(in
);
4984 _try_to_trim_inode(snapdir
.get(), false);
4988 if (ref
> 0 && in
->ll_ref
> 0 && sched_inval
) {
4989 set
<Dentry
*>::iterator q
= in
->dn_set
.begin();
4990 while (q
!= in
->dn_set
.end()) {
4992 // FIXME: we play lots of unlink/link tricks when handling MDS replies,
4993 // so in->dn_set doesn't always reflect the state of kernel's dcache.
4994 _schedule_invalidate_dentry_callback(dn
, true);
4995 unlink(dn
, true, true);
5000 void Client::handle_cap_grant(MetaSession
*session
, Inode
*in
, Cap
*cap
, MClientCaps
*m
)
5002 mds_rank_t mds
= session
->mds_num
;
5003 int used
= get_caps_used(in
);
5004 int wanted
= in
->caps_wanted();
5006 const int old_caps
= cap
->issued
;
5007 const int new_caps
= m
->get_caps();
5008 ldout(cct
, 5) << "handle_cap_grant on in " << m
->get_ino()
5009 << " mds." << mds
<< " seq " << m
->get_seq()
5010 << " caps now " << ccap_string(new_caps
)
5011 << " was " << ccap_string(old_caps
) << dendl
;
5012 cap
->seq
= m
->get_seq();
5014 in
->layout
= m
->get_layout();
5017 int implemented
= 0;
5018 int issued
= in
->caps_issued(&implemented
) | in
->caps_dirty();
5019 issued
|= implemented
;
5021 if ((issued
& CEPH_CAP_AUTH_EXCL
) == 0) {
5022 in
->mode
= m
->head
.mode
;
5023 in
->uid
= m
->head
.uid
;
5024 in
->gid
= m
->head
.gid
;
5025 in
->btime
= m
->btime
;
5027 bool deleted_inode
= false;
5028 if ((issued
& CEPH_CAP_LINK_EXCL
) == 0) {
5029 in
->nlink
= m
->head
.nlink
;
5030 if (in
->nlink
== 0 &&
5031 (new_caps
& (CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
)))
5032 deleted_inode
= true;
5034 if ((issued
& CEPH_CAP_XATTR_EXCL
) == 0 &&
5035 m
->xattrbl
.length() &&
5036 m
->head
.xattr_version
> in
->xattr_version
) {
5037 bufferlist::iterator p
= m
->xattrbl
.begin();
5038 ::decode(in
->xattrs
, p
);
5039 in
->xattr_version
= m
->head
.xattr_version
;
5041 update_inode_file_bits(in
, m
->get_truncate_seq(), m
->get_truncate_size(), m
->get_size(),
5042 m
->get_change_attr(), m
->get_time_warp_seq(), m
->get_ctime(),
5043 m
->get_mtime(), m
->get_atime(),
5044 m
->inline_version
, m
->inline_data
, issued
);
5047 if (cap
== in
->auth_cap
&&
5048 m
->get_max_size() != in
->max_size
) {
5049 ldout(cct
, 10) << "max_size " << in
->max_size
<< " -> " << m
->get_max_size() << dendl
;
5050 in
->max_size
= m
->get_max_size();
5051 if (in
->max_size
> in
->wanted_max_size
) {
5052 in
->wanted_max_size
= 0;
5053 in
->requested_max_size
= 0;
5058 if (m
->get_op() == CEPH_CAP_OP_IMPORT
&& m
->get_wanted() != wanted
)
5061 check_cap_issue(in
, cap
, new_caps
);
5064 if (old_caps
& ~new_caps
) {
5065 ldout(cct
, 10) << " revocation of " << ccap_string(~new_caps
& old_caps
) << dendl
;
5066 cap
->issued
= new_caps
;
5067 cap
->implemented
|= new_caps
;
5069 if (((used
& ~new_caps
) & CEPH_CAP_FILE_BUFFER
)
5070 && !_flush(in
, new C_Client_FlushComplete(this, in
))) {
5071 // waitin' for flush
5072 } else if ((old_caps
& ~new_caps
) & CEPH_CAP_FILE_CACHE
) {
5076 cap
->wanted
= 0; // don't let check_caps skip sending a response to MDS
5080 } else if (old_caps
== new_caps
) {
5081 ldout(cct
, 10) << " caps unchanged at " << ccap_string(old_caps
) << dendl
;
5083 ldout(cct
, 10) << " grant, new caps are " << ccap_string(new_caps
& ~old_caps
) << dendl
;
5084 cap
->issued
= new_caps
;
5085 cap
->implemented
|= new_caps
;
5087 if (cap
== in
->auth_cap
) {
5088 // non-auth MDS is revoking the newly grant caps ?
5089 for (map
<mds_rank_t
, Cap
*>::iterator it
= in
->caps
.begin(); it
!= in
->caps
.end(); ++it
) {
5090 if (it
->second
== cap
)
5092 if (it
->second
->implemented
& ~it
->second
->issued
& new_caps
) {
5105 signal_cond_list(in
->waitfor_caps
);
5107 // may drop inode's last ref
5109 _try_to_trim_inode(in
, true);
5114 int Client::_getgrouplist(gid_t
** sgids
, uid_t uid
, gid_t gid
)
5116 // cppcheck-suppress variableScope
5121 sgid_count
= getgroups_cb(callback_handle
, &sgid_buf
);
5122 if (sgid_count
> 0) {
5128 #if HAVE_GETGROUPLIST
5132 ldout(cct
, 3) << "getting user entry failed" << dendl
;
5135 //use PAM to get the group list
5136 // initial number of group entries, defaults to posix standard of 16
5137 // PAM implementations may provide more than 16 groups....
5139 sgid_buf
= (gid_t
*)malloc(sgid_count
* sizeof(gid_t
));
5140 if (sgid_buf
== NULL
) {
5141 ldout(cct
, 3) << "allocating group memory failed" << dendl
;
5146 #if defined(__APPLE__)
5147 if (getgrouplist(pw
->pw_name
, gid
, (int*)sgid_buf
, &sgid_count
) == -1) {
5149 if (getgrouplist(pw
->pw_name
, gid
, sgid_buf
, &sgid_count
) == -1) {
5151 // we need to resize the group list and try again
5152 void *_realloc
= NULL
;
5153 if ((_realloc
= realloc(sgid_buf
, sgid_count
* sizeof(gid_t
))) == NULL
) {
5154 ldout(cct
, 3) << "allocating group memory failed" << dendl
;
5158 sgid_buf
= (gid_t
*)_realloc
;
5161 // list was successfully retrieved
5171 int Client::inode_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
5173 if (perms
.uid() == 0)
5176 if (perms
.uid() != in
->uid
&& (in
->mode
& S_IRWXG
)) {
5177 int ret
= _posix_acl_permission(in
, perms
, want
);
5182 // check permissions before doing anything else
5183 if (!in
->check_mode(perms
, want
))
5188 int Client::xattr_permission(Inode
*in
, const char *name
, unsigned want
,
5189 const UserPerm
& perms
)
5191 int r
= _getattr_for_perm(in
, perms
);
5196 if (strncmp(name
, "system.", 7) == 0) {
5197 if ((want
& MAY_WRITE
) && (perms
.uid() != 0 && perms
.uid() != in
->uid
))
5200 r
= inode_permission(in
, perms
, want
);
5203 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5207 ostream
& operator<<(ostream
&out
, const UserPerm
& perm
) {
5208 out
<< "UserPerm(uid: " << perm
.uid() << ", gid: " << perm
.gid() << ")";
5212 int Client::may_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
5213 const UserPerm
& perms
)
5215 ldout(cct
, 20) << __func__
<< *in
<< "; " << perms
<< dendl
;
5216 int r
= _getattr_for_perm(in
, perms
);
5220 if (mask
& CEPH_SETATTR_SIZE
) {
5221 r
= inode_permission(in
, perms
, MAY_WRITE
);
5227 if (mask
& CEPH_SETATTR_UID
) {
5228 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
|| stx
->stx_uid
!= in
->uid
))
5231 if (mask
& CEPH_SETATTR_GID
) {
5232 if (perms
.uid() != 0 && (perms
.uid() != in
->uid
||
5233 (!perms
.gid_in_groups(stx
->stx_gid
) && stx
->stx_gid
!= in
->gid
)))
5237 if (mask
& CEPH_SETATTR_MODE
) {
5238 if (perms
.uid() != 0 && perms
.uid() != in
->uid
)
5241 gid_t i_gid
= (mask
& CEPH_SETATTR_GID
) ? stx
->stx_gid
: in
->gid
;
5242 if (perms
.uid() != 0 && !perms
.gid_in_groups(i_gid
))
5243 stx
->stx_mode
&= ~S_ISGID
;
5246 if (mask
& (CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
|
5247 CEPH_SETATTR_MTIME
| CEPH_SETATTR_ATIME
)) {
5248 if (perms
.uid() != 0 && perms
.uid() != in
->uid
) {
5249 int check_mask
= CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
;
5250 if (!(mask
& CEPH_SETATTR_MTIME_NOW
))
5251 check_mask
|= CEPH_SETATTR_MTIME
;
5252 if (!(mask
& CEPH_SETATTR_ATIME_NOW
))
5253 check_mask
|= CEPH_SETATTR_ATIME
;
5254 if (check_mask
& mask
) {
5257 r
= inode_permission(in
, perms
, MAY_WRITE
);
5265 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5269 int Client::may_open(Inode
*in
, int flags
, const UserPerm
& perms
)
5271 ldout(cct
, 20) << __func__
<< *in
<< "; " << perms
<< dendl
;
5274 if ((flags
& O_ACCMODE
) == O_WRONLY
)
5276 else if ((flags
& O_ACCMODE
) == O_RDWR
)
5277 want
= MAY_READ
| MAY_WRITE
;
5278 else if ((flags
& O_ACCMODE
) == O_RDONLY
)
5280 if (flags
& O_TRUNC
)
5284 switch (in
->mode
& S_IFMT
) {
5289 if (want
& MAY_WRITE
) {
5296 r
= _getattr_for_perm(in
, perms
);
5300 r
= inode_permission(in
, perms
, want
);
5302 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5306 int Client::may_lookup(Inode
*dir
, const UserPerm
& perms
)
5308 ldout(cct
, 20) << __func__
<< *dir
<< "; " << perms
<< dendl
;
5309 int r
= _getattr_for_perm(dir
, perms
);
5313 r
= inode_permission(dir
, perms
, MAY_EXEC
);
5315 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5319 int Client::may_create(Inode
*dir
, const UserPerm
& perms
)
5321 ldout(cct
, 20) << __func__
<< *dir
<< "; " << perms
<< dendl
;
5322 int r
= _getattr_for_perm(dir
, perms
);
5326 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5328 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5332 int Client::may_delete(Inode
*dir
, const char *name
, const UserPerm
& perms
)
5334 ldout(cct
, 20) << __func__
<< *dir
<< "; " << "; name " << name
<< "; " << perms
<< dendl
;
5335 int r
= _getattr_for_perm(dir
, perms
);
5339 r
= inode_permission(dir
, perms
, MAY_EXEC
| MAY_WRITE
);
5343 /* 'name == NULL' means rmsnap */
5344 if (perms
.uid() != 0 && name
&& (dir
->mode
& S_ISVTX
)) {
5346 r
= _lookup(dir
, name
, CEPH_CAP_AUTH_SHARED
, &otherin
, perms
);
5349 if (dir
->uid
!= perms
.uid() && otherin
->uid
!= perms
.uid())
5353 ldout(cct
, 3) << __func__
<< " " << dir
<< " = " << r
<< dendl
;
5357 int Client::may_hardlink(Inode
*in
, const UserPerm
& perms
)
5359 ldout(cct
, 20) << __func__
<< *in
<< "; " << perms
<< dendl
;
5360 int r
= _getattr_for_perm(in
, perms
);
5364 if (perms
.uid() == 0 || perms
.uid() == in
->uid
) {
5370 if (!S_ISREG(in
->mode
))
5373 if (in
->mode
& S_ISUID
)
5376 if ((in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))
5379 r
= inode_permission(in
, perms
, MAY_READ
| MAY_WRITE
);
5381 ldout(cct
, 3) << __func__
<< " " << in
<< " = " << r
<< dendl
;
5385 int Client::_getattr_for_perm(Inode
*in
, const UserPerm
& perms
)
5387 int mask
= CEPH_STAT_CAP_MODE
;
5389 if (acl_type
!= NO_ACL
) {
5390 mask
|= CEPH_STAT_CAP_XATTR
;
5391 force
= in
->xattr_version
== 0;
5393 return _getattr(in
, mask
, perms
, force
);
5396 vinodeno_t
Client::_get_vino(Inode
*in
)
5398 /* The caller must hold the client lock */
5399 return vinodeno_t(in
->ino
, in
->snapid
);
5402 inodeno_t
Client::_get_inodeno(Inode
*in
)
5404 /* The caller must hold the client lock */
5410 * Resolve an MDS spec to a list of MDS daemon GIDs.
5412 * The spec is a string representing a GID, rank, filesystem:rank, or name/id.
5413 * It may be '*' in which case it matches all GIDs.
5415 * If no error is returned, the `targets` vector will be populated with at least
5418 int Client::resolve_mds(
5419 const std::string
&mds_spec
,
5420 std::vector
<mds_gid_t
> *targets
)
5423 assert(targets
!= nullptr);
5426 std::stringstream ss
;
5427 int role_r
= fsmap
->parse_role(mds_spec
, &role
, ss
);
5429 // We got a role, resolve it to a GID
5430 ldout(cct
, 10) << __func__
<< ": resolved '" << mds_spec
<< "' to role '"
5431 << role
<< "'" << dendl
;
5433 fsmap
->get_filesystem(role
.fscid
)->mds_map
.get_info(role
.rank
).global_id
);
5437 std::string strtol_err
;
5438 long long rank_or_gid
= strict_strtoll(mds_spec
.c_str(), 10, &strtol_err
);
5439 if (strtol_err
.empty()) {
5440 // It is a possible GID
5441 const mds_gid_t mds_gid
= mds_gid_t(rank_or_gid
);
5442 if (fsmap
->gid_exists(mds_gid
)) {
5443 ldout(cct
, 10) << __func__
<< ": validated GID " << mds_gid
<< dendl
;
5444 targets
->push_back(mds_gid
);
5446 lderr(cct
) << __func__
<< ": GID " << mds_gid
<< " not in MDS map"
5450 } else if (mds_spec
== "*") {
5451 // It is a wildcard: use all MDSs
5452 const auto mds_info
= fsmap
->get_mds_info();
5454 if (mds_info
.empty()) {
5455 lderr(cct
) << __func__
<< ": * passed but no MDS daemons found" << dendl
;
5459 for (const auto i
: mds_info
) {
5460 targets
->push_back(i
.first
);
5463 // It did not parse as an integer, it is not a wildcard, it must be a name
5464 const mds_gid_t mds_gid
= fsmap
->find_mds_gid_by_name(mds_spec
);
5466 lderr(cct
) << "MDS ID '" << mds_spec
<< "' not found" << dendl
;
5468 lderr(cct
) << "FSMap: " << *fsmap
<< dendl
;
5472 ldout(cct
, 10) << __func__
<< ": resolved ID '" << mds_spec
5473 << "' to GID " << mds_gid
<< dendl
;
5474 targets
->push_back(mds_gid
);
5483 * Authenticate with mon and establish global ID
5485 int Client::authenticate()
5487 assert(client_lock
.is_locked_by_me());
5489 if (monclient
->is_authenticated()) {
5493 client_lock
.Unlock();
5494 int r
= monclient
->authenticate(cct
->_conf
->client_mount_timeout
);
5500 whoami
= monclient
->get_global_id();
5501 messenger
->set_myname(entity_name_t::CLIENT(whoami
.v
));
5506 int Client::fetch_fsmap(bool user
)
5509 // Retrieve FSMap to enable looking up daemon addresses. We need FSMap
5510 // rather than MDSMap because no one MDSMap contains all the daemons, and
5511 // a `tell` can address any daemon.
5512 version_t fsmap_latest
;
5515 monclient
->get_version("fsmap", &fsmap_latest
, NULL
, &cond
);
5516 client_lock
.Unlock();
5519 } while (r
== -EAGAIN
);
5522 lderr(cct
) << "Failed to learn FSMap version: " << cpp_strerror(r
) << dendl
;
5526 ldout(cct
, 10) << __func__
<< " learned FSMap version " << fsmap_latest
<< dendl
;
5529 if (!fsmap_user
|| fsmap_user
->get_epoch() < fsmap_latest
) {
5530 monclient
->sub_want("fsmap.user", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5531 monclient
->renew_subs();
5532 wait_on_list(waiting_for_fsmap
);
5535 assert(fsmap_user
->get_epoch() >= fsmap_latest
);
5537 if (!fsmap
|| fsmap
->get_epoch() < fsmap_latest
) {
5538 monclient
->sub_want("fsmap", fsmap_latest
, CEPH_SUBSCRIBE_ONETIME
);
5539 monclient
->renew_subs();
5540 wait_on_list(waiting_for_fsmap
);
5543 assert(fsmap
->get_epoch() >= fsmap_latest
);
5545 ldout(cct
, 10) << __func__
<< " finished waiting for FSMap version "
5546 << fsmap_latest
<< dendl
;
5552 * @mds_spec one of ID, rank, GID, "*"
5555 int Client::mds_command(
5556 const std::string
&mds_spec
,
5557 const vector
<string
>& cmd
,
5558 const bufferlist
& inbl
,
5563 Mutex::Locker
lock(client_lock
);
5565 assert(initialized
);
5573 r
= fetch_fsmap(false);
5578 // Look up MDS target(s) of the command
5579 std::vector
<mds_gid_t
> targets
;
5580 r
= resolve_mds(mds_spec
, &targets
);
5585 // If daemons are laggy, we won't send them commands. If all
5586 // are laggy then we fail.
5587 std::vector
<mds_gid_t
> non_laggy
;
5588 for (const auto gid
: targets
) {
5589 const auto info
= fsmap
->get_info_gid(gid
);
5590 if (!info
.laggy()) {
5591 non_laggy
.push_back(gid
);
5594 if (non_laggy
.size() == 0) {
5595 *outs
= "All targeted MDS daemons are laggy";
5599 if (metadata
.empty()) {
5600 // We are called on an unmounted client, so metadata
5601 // won't be initialized yet.
5602 populate_metadata("");
5605 // Send commands to targets
5606 C_GatherBuilder
gather(cct
, onfinish
);
5607 for (const auto target_gid
: non_laggy
) {
5608 const auto info
= fsmap
->get_info_gid(target_gid
);
5610 // Open a connection to the target MDS
5611 entity_inst_t inst
= info
.get_inst();
5612 ConnectionRef conn
= messenger
->get_connection(inst
);
5614 // Generate MDSCommandOp state
5615 auto &op
= command_table
.start_command();
5617 op
.on_finish
= gather
.new_sub();
5622 op
.mds_gid
= target_gid
;
5625 ldout(cct
, 4) << __func__
<< ": new command op to " << target_gid
5626 << " tid=" << op
.tid
<< cmd
<< dendl
;
5628 // Construct and send MCommand
5629 MCommand
*m
= op
.get_message(monclient
->get_fsid());
5630 conn
->send_message(m
);
5637 void Client::handle_command_reply(MCommandReply
*m
)
5639 ceph_tid_t
const tid
= m
->get_tid();
5641 ldout(cct
, 10) << __func__
<< ": tid=" << m
->get_tid() << dendl
;
5643 if (!command_table
.exists(tid
)) {
5644 ldout(cct
, 1) << __func__
<< ": unknown tid " << tid
<< ", dropping" << dendl
;
5649 auto &op
= command_table
.get_command(tid
);
5651 op
.outbl
->claim(m
->get_data());
5658 op
.on_finish
->complete(m
->r
);
5661 command_table
.erase(tid
);
5666 // -------------------
5669 int Client::mount(const std::string
&mount_root
, const UserPerm
& perms
,
5672 Mutex::Locker
lock(client_lock
);
5675 ldout(cct
, 5) << "already mounted" << dendl
;
5679 int r
= authenticate();
5681 lderr(cct
) << "authentication failed: " << cpp_strerror(r
) << dendl
;
5685 std::string want
= "mdsmap";
5686 const auto &mds_ns
= cct
->_conf
->client_mds_namespace
;
5687 if (!mds_ns
.empty()) {
5688 r
= fetch_fsmap(true);
5691 fs_cluster_id_t cid
= fsmap_user
->get_fs_cid(mds_ns
);
5692 if (cid
== FS_CLUSTER_ID_NONE
)
5695 std::ostringstream oss
;
5696 oss
<< want
<< "." << cid
;
5699 ldout(cct
, 10) << "Subscribing to map '" << want
<< "'" << dendl
;
5701 monclient
->sub_want(want
, 0, 0);
5702 monclient
->renew_subs();
5704 tick(); // start tick
5708 auto availability
= mdsmap
->is_cluster_available();
5709 if (availability
== MDSMap::STUCK_UNAVAILABLE
) {
5711 ldout(cct
, 10) << "mds cluster unavailable: epoch=" << mdsmap
->get_epoch() << dendl
;
5712 return CEPH_FUSE_NO_MDS_UP
;
5713 } else if (availability
== MDSMap::AVAILABLE
) {
5714 // Continue to mount
5716 } else if (availability
== MDSMap::TRANSIENT_UNAVAILABLE
) {
5717 // Else, wait. MDSMonitor will update the map to bring
5718 // us to a conclusion eventually.
5719 wait_on_list(waiting_for_mdsmap
);
5721 // Unexpected value!
5727 populate_metadata(mount_root
.empty() ? "/" : mount_root
);
5729 filepath
fp(CEPH_INO_ROOT
);
5730 if (!mount_root
.empty()) {
5731 fp
= filepath(mount_root
.c_str());
5734 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
5735 req
->set_filepath(fp
);
5736 req
->head
.args
.getattr
.mask
= CEPH_STAT_CAP_INODE_ALL
;
5737 int res
= make_request(req
, perms
);
5739 if (res
== -EACCES
&& root
) {
5740 ldout(cct
, 1) << __func__
<< " EACCES on parent of mount point; quotas may not work" << dendl
;
5758 if (!cct
->_conf
->client_trace
.empty()) {
5759 traceout
.open(cct
->_conf
->client_trace
.c_str());
5760 if (traceout
.is_open()) {
5761 ldout(cct
, 1) << "opened trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5763 ldout(cct
, 1) << "FAILED to open trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5768 ldout(cct, 3) << "op: // client trace data structs" << dendl;
5769 ldout(cct, 3) << "op: struct stat st;" << dendl;
5770 ldout(cct, 3) << "op: struct utimbuf utim;" << dendl;
5771 ldout(cct, 3) << "op: int readlinkbuf_len = 1000;" << dendl;
5772 ldout(cct, 3) << "op: char readlinkbuf[readlinkbuf_len];" << dendl;
5773 ldout(cct, 3) << "op: map<string, inode_t*> dir_contents;" << dendl;
5774 ldout(cct, 3) << "op: map<int, int> open_files;" << dendl;
5775 ldout(cct, 3) << "op: int fd;" << dendl;
5782 void Client::_close_sessions()
5784 while (!mds_sessions
.empty()) {
5785 // send session closes!
5786 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5787 p
!= mds_sessions
.end();
5789 if (p
->second
->state
!= MetaSession::STATE_CLOSING
) {
5790 _close_mds_session(p
->second
);
5794 // wait for sessions to close
5795 ldout(cct
, 2) << "waiting for " << mds_sessions
.size() << " mds sessions to close" << dendl
;
5796 mount_cond
.Wait(client_lock
);
5800 void Client::flush_mdlog_sync()
5802 if (mds_requests
.empty())
5804 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5805 p
!= mds_sessions
.end();
5807 MetaSession
*s
= p
->second
;
5812 void Client::flush_mdlog(MetaSession
*session
)
5814 // Only send this to Luminous or newer MDS daemons, older daemons
5815 // will crash if they see an unknown CEPH_SESSION_* value in this msg.
5816 const uint64_t features
= session
->con
->get_features();
5817 if (HAVE_FEATURE(features
, SERVER_LUMINOUS
)) {
5818 MClientSession
*m
= new MClientSession(CEPH_SESSION_REQUEST_FLUSH_MDLOG
);
5819 session
->con
->send_message(m
);
5824 void Client::unmount()
5826 Mutex::Locker
lock(client_lock
);
5828 assert(mounted
); // caller is confused?
5830 ldout(cct
, 2) << "unmounting" << dendl
;
5833 flush_mdlog_sync(); // flush the mdlog for pending requests, if any
5834 while (!mds_requests
.empty()) {
5835 ldout(cct
, 10) << "waiting on " << mds_requests
.size() << " requests" << dendl
;
5836 mount_cond
.Wait(client_lock
);
5840 timer
.cancel_event(tick_event
);
5845 // clean up any unclosed files
5846 while (!fd_map
.empty()) {
5847 Fh
*fh
= fd_map
.begin()->second
;
5848 fd_map
.erase(fd_map
.begin());
5849 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *fh
->inode
<< dendl
;
5853 while (!ll_unclosed_fh_set
.empty()) {
5854 set
<Fh
*>::iterator it
= ll_unclosed_fh_set
.begin();
5856 ll_unclosed_fh_set
.erase(fh
);
5857 ldout(cct
, 0) << " destroyed lost open file " << fh
<< " on " << *(fh
->inode
) << dendl
;
5861 while (!opened_dirs
.empty()) {
5862 dir_result_t
*dirp
= *opened_dirs
.begin();
5863 ldout(cct
, 0) << " destroyed lost open dir " << dirp
<< " on " << *dirp
->inode
<< dendl
;
5870 ldout(cct
, 0) << " skipping clean shutdown, we are blacklisted" << dendl
;
5872 if (cct
->_conf
->client_oc
) {
5873 // Purge all cached data so that ObjectCacher doesn't get hung up
5874 // trying to flush it. ObjectCacher's behaviour on EBLACKLISTED
5875 // is to just leave things marked dirty
5876 // (http://tracker.ceph.com/issues/9105)
5877 for (const auto &i
: inode_map
) {
5878 objectcacher
->purge_set(&(i
.second
->oset
));
5886 while (unsafe_sync_write
> 0) {
5887 ldout(cct
, 0) << unsafe_sync_write
<< " unsafe_sync_writes, waiting" << dendl
;
5888 mount_cond
.Wait(client_lock
);
5891 if (cct
->_conf
->client_oc
) {
5892 // flush/release all buffered data
5893 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
5894 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator p
= inode_map
.begin();
5895 p
!= inode_map
.end();
5899 Inode
*in
= p
->second
;
5901 ldout(cct
, 0) << "null inode_map entry ino " << p
->first
<< dendl
;
5904 if (!in
->caps
.empty()) {
5905 InodeRef
tmp_ref(in
);
5907 _flush(in
, new C_Client_FlushComplete(this, in
));
5913 wait_sync_caps(last_flush_tid
);
5919 while (lru
.lru_get_size() > 0 ||
5920 !inode_map
.empty()) {
5921 ldout(cct
, 2) << "cache still has " << lru
.lru_get_size()
5922 << "+" << inode_map
.size() << " items"
5923 << ", waiting (for caps to release?)"
5925 utime_t until
= ceph_clock_now() + utime_t(5, 0);
5926 int r
= mount_cond
.WaitUntil(client_lock
, until
);
5927 if (r
== ETIMEDOUT
) {
5931 assert(lru
.lru_get_size() == 0);
5932 assert(inode_map
.empty());
5935 if (!cct
->_conf
->client_trace
.empty()) {
5936 ldout(cct
, 1) << "closing trace file '" << cct
->_conf
->client_trace
<< "'" << dendl
;
5944 ldout(cct
, 2) << "unmounted." << dendl
;
5949 class C_C_Tick
: public Context
{
5952 explicit C_C_Tick(Client
*c
) : client(c
) {}
5953 void finish(int r
) override
{
5954 // Called back via Timer, which takes client_lock for us
5955 assert(client
->client_lock
.is_locked_by_me());
5960 void Client::flush_cap_releases()
5962 // send any cap releases
5963 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
5964 p
!= mds_sessions
.end();
5966 if (p
->second
->release
&& mdsmap
->is_clientreplay_or_active_or_stopping(
5968 if (cct
->_conf
->client_inject_release_failure
) {
5969 ldout(cct
, 20) << __func__
<< " injecting failure to send cap release message" << dendl
;
5970 p
->second
->release
->put();
5972 p
->second
->con
->send_message(p
->second
->release
);
5974 p
->second
->release
= 0;
5981 if (cct
->_conf
->client_debug_inject_tick_delay
> 0) {
5982 sleep(cct
->_conf
->client_debug_inject_tick_delay
);
5983 assert(0 == cct
->_conf
->set_val("client_debug_inject_tick_delay", "0"));
5984 cct
->_conf
->apply_changes(NULL
);
5987 ldout(cct
, 21) << "tick" << dendl
;
5988 tick_event
= new C_C_Tick(this);
5989 timer
.add_event_after(cct
->_conf
->client_tick_interval
, tick_event
);
5991 utime_t now
= ceph_clock_now();
5993 if (!mounted
&& !mds_requests
.empty()) {
5994 MetaRequest
*req
= mds_requests
.begin()->second
;
5995 if (req
->op_stamp
+ cct
->_conf
->client_mount_timeout
< now
) {
5996 req
->abort(-ETIMEDOUT
);
5997 if (req
->caller_cond
) {
5999 req
->caller_cond
->Signal();
6001 signal_cond_list(waiting_for_mdsmap
);
6002 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6003 p
!= mds_sessions
.end();
6005 signal_context_list(p
->second
->waiting_for_open
);
6009 if (mdsmap
->get_epoch()) {
6011 utime_t el
= now
- last_cap_renew
;
6012 if (el
> mdsmap
->get_session_timeout() / 3.0)
6015 flush_cap_releases();
6019 xlist
<Inode
*>::iterator p
= delayed_caps
.begin();
6023 if (in
->hold_caps_until
> now
)
6025 delayed_caps
.pop_front();
6026 cap_list
.push_back(&in
->cap_item
);
6027 check_caps(in
, CHECK_CAPS_NODELAY
);
6033 void Client::renew_caps()
6035 ldout(cct
, 10) << "renew_caps()" << dendl
;
6036 last_cap_renew
= ceph_clock_now();
6038 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
6039 p
!= mds_sessions
.end();
6041 ldout(cct
, 15) << "renew_caps requesting from mds." << p
->first
<< dendl
;
6042 if (mdsmap
->get_state(p
->first
) >= MDSMap::STATE_REJOIN
)
6043 renew_caps(p
->second
);
6047 void Client::renew_caps(MetaSession
*session
)
6049 ldout(cct
, 10) << "renew_caps mds." << session
->mds_num
<< dendl
;
6050 session
->last_cap_renew_request
= ceph_clock_now();
6051 uint64_t seq
= ++session
->cap_renew_seq
;
6052 session
->con
->send_message(new MClientSession(CEPH_SESSION_REQUEST_RENEWCAPS
, seq
));
6056 // ===============================================================
6057 // high level (POSIXy) interface
6059 int Client::_do_lookup(Inode
*dir
, const string
& name
, int mask
,
6060 InodeRef
*target
, const UserPerm
& perms
)
6062 int op
= dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_LOOKUPSNAP
: CEPH_MDS_OP_LOOKUP
;
6063 MetaRequest
*req
= new MetaRequest(op
);
6065 dir
->make_nosnap_relative_path(path
);
6066 path
.push_dentry(name
);
6067 req
->set_filepath(path
);
6068 req
->set_inode(dir
);
6069 if (cct
->_conf
->client_debug_getattr_caps
&& op
== CEPH_MDS_OP_LOOKUP
)
6070 mask
|= DEBUG_GETATTR_CAPS
;
6071 req
->head
.args
.getattr
.mask
= mask
;
6073 ldout(cct
, 10) << "_do_lookup on " << path
<< dendl
;
6075 int r
= make_request(req
, perms
, target
);
6076 ldout(cct
, 10) << "_do_lookup res is " << r
<< dendl
;
6080 int Client::_lookup(Inode
*dir
, const string
& dname
, int mask
, InodeRef
*target
,
6081 const UserPerm
& perms
)
6086 if (!dir
->is_dir()) {
6091 if (dname
== "..") {
6092 if (dir
->dn_set
.empty())
6095 *target
= dir
->get_first_parent()->dir
->parent_inode
; //dirs can't be hard-linked
6104 if (dname
.length() > NAME_MAX
) {
6109 if (dname
== cct
->_conf
->client_snapdir
&&
6110 dir
->snapid
== CEPH_NOSNAP
) {
6111 *target
= open_snapdir(dir
);
6116 dir
->dir
->dentries
.count(dname
)) {
6117 dn
= dir
->dir
->dentries
[dname
];
6119 ldout(cct
, 20) << "_lookup have dn " << dname
<< " mds." << dn
->lease_mds
<< " ttl " << dn
->lease_ttl
6120 << " seq " << dn
->lease_seq
6123 if (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
)) {
6124 // is dn lease valid?
6125 utime_t now
= ceph_clock_now();
6126 if (dn
->lease_mds
>= 0 &&
6127 dn
->lease_ttl
> now
&&
6128 mds_sessions
.count(dn
->lease_mds
)) {
6129 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6130 if (s
->cap_ttl
> now
&&
6131 s
->cap_gen
== dn
->lease_gen
) {
6132 // touch this mds's dir cap too, even though we don't _explicitly_ use it here, to
6133 // make trim_caps() behave.
6134 dir
->try_touch_cap(dn
->lease_mds
);
6137 ldout(cct
, 20) << " bad lease, cap_ttl " << s
->cap_ttl
<< ", cap_gen " << s
->cap_gen
6138 << " vs lease_gen " << dn
->lease_gen
<< dendl
;
6141 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
)) {
6142 if (dn
->cap_shared_gen
== dir
->shared_gen
&&
6143 (!dn
->inode
|| dn
->inode
->caps_issued_mask(mask
)))
6145 if (!dn
->inode
&& (dir
->flags
& I_COMPLETE
)) {
6146 ldout(cct
, 10) << "_lookup concluded ENOENT locally for "
6147 << *dir
<< " dn '" << dname
<< "'" << dendl
;
6152 ldout(cct
, 20) << " no cap on " << dn
->inode
->vino() << dendl
;
6155 // can we conclude ENOENT locally?
6156 if (dir
->caps_issued_mask(CEPH_CAP_FILE_SHARED
) &&
6157 (dir
->flags
& I_COMPLETE
)) {
6158 ldout(cct
, 10) << "_lookup concluded ENOENT locally for " << *dir
<< " dn '" << dname
<< "'" << dendl
;
6163 r
= _do_lookup(dir
, dname
, mask
, target
, perms
);
6168 *target
= dn
->inode
;
6176 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << r
<< dendl
;
6178 ldout(cct
, 10) << "_lookup " << *dir
<< " " << dname
<< " = " << **target
<< dendl
;
6182 int Client::get_or_create(Inode
*dir
, const char* name
,
6183 Dentry
**pdn
, bool expect_null
)
6186 ldout(cct
, 20) << "get_or_create " << *dir
<< " name " << name
<< dendl
;
6188 if (dir
->dir
->dentries
.count(name
)) {
6189 Dentry
*dn
= dir
->dir
->dentries
[name
];
6191 // is dn lease valid?
6192 utime_t now
= ceph_clock_now();
6194 dn
->lease_mds
>= 0 &&
6195 dn
->lease_ttl
> now
&&
6196 mds_sessions
.count(dn
->lease_mds
)) {
6197 MetaSession
*s
= mds_sessions
[dn
->lease_mds
];
6198 if (s
->cap_ttl
> now
&&
6199 s
->cap_gen
== dn
->lease_gen
) {
6206 // otherwise link up a new one
6207 *pdn
= link(dir
->dir
, name
, NULL
, NULL
);
6214 int Client::path_walk(const filepath
& origpath
, InodeRef
*end
,
6215 const UserPerm
& perms
, bool followsym
, int mask
)
6217 filepath path
= origpath
;
6219 if (origpath
.absolute())
6225 ldout(cct
, 10) << "path_walk " << path
<< dendl
;
6230 while (i
< path
.depth() && cur
) {
6232 const string
&dname
= path
[i
];
6233 ldout(cct
, 10) << " " << i
<< " " << *cur
<< " " << dname
<< dendl
;
6234 ldout(cct
, 20) << " (path is " << path
<< ")" << dendl
;
6236 if (cct
->_conf
->client_permissions
) {
6237 int r
= may_lookup(cur
.get(), perms
);
6240 caps
= CEPH_CAP_AUTH_SHARED
;
6243 /* Get extra requested caps on the last component */
6244 if (i
== (path
.depth() - 1))
6246 int r
= _lookup(cur
.get(), dname
, caps
, &next
, perms
);
6249 // only follow trailing symlink if followsym. always follow
6250 // 'directory' symlinks.
6251 if (next
&& next
->is_symlink()) {
6253 ldout(cct
, 20) << " symlink count " << symlinks
<< ", value is '" << next
->symlink
<< "'" << dendl
;
6254 if (symlinks
> MAXSYMLINKS
) {
6258 if (i
< path
.depth() - 1) {
6260 // replace consumed components of path with symlink dir target
6261 filepath
resolved(next
->symlink
.c_str());
6262 resolved
.append(path
.postfixpath(i
+ 1));
6265 if (next
->symlink
[0] == '/') {
6269 } else if (followsym
) {
6270 if (next
->symlink
[0] == '/') {
6271 path
= next
->symlink
.c_str();
6276 filepath
more(next
->symlink
.c_str());
6277 // we need to remove the symlink component from off of the path
6278 // before adding the target that the symlink points to. remain
6279 // at the same position in the path.
6299 int Client::link(const char *relexisting
, const char *relpath
, const UserPerm
& perm
)
6301 Mutex::Locker
lock(client_lock
);
6302 tout(cct
) << "link" << std::endl
;
6303 tout(cct
) << relexisting
<< std::endl
;
6304 tout(cct
) << relpath
<< std::endl
;
6306 filepath
existing(relexisting
);
6309 int r
= path_walk(existing
, &in
, perm
, true);
6312 if (std::string(relpath
) == "/") {
6316 filepath
path(relpath
);
6317 string name
= path
.last_dentry();
6320 r
= path_walk(path
, &dir
, perm
, true);
6323 if (cct
->_conf
->client_permissions
) {
6324 if (S_ISDIR(in
->mode
)) {
6328 r
= may_hardlink(in
.get(), perm
);
6331 r
= may_create(dir
.get(), perm
);
6335 r
= _link(in
.get(), dir
.get(), name
.c_str(), perm
);
6339 int Client::unlink(const char *relpath
, const UserPerm
& perm
)
6341 Mutex::Locker
lock(client_lock
);
6342 tout(cct
) << "unlink" << std::endl
;
6343 tout(cct
) << relpath
<< std::endl
;
6345 if (std::string(relpath
) == "/")
6348 filepath
path(relpath
);
6349 string name
= path
.last_dentry();
6352 int r
= path_walk(path
, &dir
, perm
);
6355 if (cct
->_conf
->client_permissions
) {
6356 r
= may_delete(dir
.get(), name
.c_str(), perm
);
6360 return _unlink(dir
.get(), name
.c_str(), perm
);
6363 int Client::rename(const char *relfrom
, const char *relto
, const UserPerm
& perm
)
6365 Mutex::Locker
lock(client_lock
);
6366 tout(cct
) << "rename" << std::endl
;
6367 tout(cct
) << relfrom
<< std::endl
;
6368 tout(cct
) << relto
<< std::endl
;
6370 if (std::string(relfrom
) == "/" || std::string(relto
) == "/")
6373 filepath
from(relfrom
);
6375 string fromname
= from
.last_dentry();
6377 string toname
= to
.last_dentry();
6380 InodeRef fromdir
, todir
;
6381 int r
= path_walk(from
, &fromdir
, perm
);
6384 r
= path_walk(to
, &todir
, perm
);
6388 if (cct
->_conf
->client_permissions
) {
6389 int r
= may_delete(fromdir
.get(), fromname
.c_str(), perm
);
6392 r
= may_delete(todir
.get(), toname
.c_str(), perm
);
6393 if (r
< 0 && r
!= -ENOENT
)
6396 r
= _rename(fromdir
.get(), fromname
.c_str(), todir
.get(), toname
.c_str(), perm
);
6403 int Client::mkdir(const char *relpath
, mode_t mode
, const UserPerm
& perm
)
6405 Mutex::Locker
lock(client_lock
);
6406 tout(cct
) << "mkdir" << std::endl
;
6407 tout(cct
) << relpath
<< std::endl
;
6408 tout(cct
) << mode
<< std::endl
;
6409 ldout(cct
, 10) << "mkdir: " << relpath
<< dendl
;
6411 if (std::string(relpath
) == "/")
6414 filepath
path(relpath
);
6415 string name
= path
.last_dentry();
6418 int r
= path_walk(path
, &dir
, perm
);
6421 if (cct
->_conf
->client_permissions
) {
6422 r
= may_create(dir
.get(), perm
);
6426 return _mkdir(dir
.get(), name
.c_str(), mode
, perm
);
6429 int Client::mkdirs(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
6431 Mutex::Locker
lock(client_lock
);
6432 ldout(cct
, 10) << "Client::mkdirs " << relpath
<< dendl
;
6433 tout(cct
) << "mkdirs" << std::endl
;
6434 tout(cct
) << relpath
<< std::endl
;
6435 tout(cct
) << mode
<< std::endl
;
6437 //get through existing parts of path
6438 filepath
path(relpath
);
6440 int r
= 0, caps
= 0;
6443 for (i
=0; i
<path
.depth(); ++i
) {
6444 if (cct
->_conf
->client_permissions
) {
6445 r
= may_lookup(cur
.get(), perms
);
6448 caps
= CEPH_CAP_AUTH_SHARED
;
6450 r
= _lookup(cur
.get(), path
[i
].c_str(), caps
, &next
, perms
);
6455 //check that we have work left to do
6456 if (i
==path
.depth()) return -EEXIST
;
6457 if (r
!=-ENOENT
) return r
;
6458 ldout(cct
, 20) << "mkdirs got through " << i
<< " directories on path " << relpath
<< dendl
;
6459 //make new directory at each level
6460 for (; i
<path
.depth(); ++i
) {
6461 if (cct
->_conf
->client_permissions
) {
6462 r
= may_create(cur
.get(), perms
);
6467 r
= _mkdir(cur
.get(), path
[i
].c_str(), mode
, perms
, &next
);
6469 //check proper creation/existence
6470 if(-EEXIST
== r
&& i
< path
.depth() - 1) {
6471 r
= _lookup(cur
.get(), path
[i
].c_str(), CEPH_CAP_AUTH_SHARED
, &next
, perms
);
6475 //move to new dir and continue
6477 ldout(cct
, 20) << "mkdirs: successfully created directory "
6478 << filepath(cur
->ino
).get_path() << dendl
;
6483 int Client::rmdir(const char *relpath
, const UserPerm
& perms
)
6485 Mutex::Locker
lock(client_lock
);
6486 tout(cct
) << "rmdir" << std::endl
;
6487 tout(cct
) << relpath
<< std::endl
;
6489 if (std::string(relpath
) == "/")
6492 filepath
path(relpath
);
6493 string name
= path
.last_dentry();
6496 int r
= path_walk(path
, &dir
, perms
);
6499 if (cct
->_conf
->client_permissions
) {
6500 int r
= may_delete(dir
.get(), name
.c_str(), perms
);
6504 return _rmdir(dir
.get(), name
.c_str(), perms
);
6507 int Client::mknod(const char *relpath
, mode_t mode
, const UserPerm
& perms
, dev_t rdev
)
6509 Mutex::Locker
lock(client_lock
);
6510 tout(cct
) << "mknod" << std::endl
;
6511 tout(cct
) << relpath
<< std::endl
;
6512 tout(cct
) << mode
<< std::endl
;
6513 tout(cct
) << rdev
<< std::endl
;
6515 if (std::string(relpath
) == "/")
6518 filepath
path(relpath
);
6519 string name
= path
.last_dentry();
6522 int r
= path_walk(path
, &dir
, perms
);
6525 if (cct
->_conf
->client_permissions
) {
6526 int r
= may_create(dir
.get(), perms
);
6530 return _mknod(dir
.get(), name
.c_str(), mode
, rdev
, perms
);
6535 int Client::symlink(const char *target
, const char *relpath
, const UserPerm
& perms
)
6537 Mutex::Locker
lock(client_lock
);
6538 tout(cct
) << "symlink" << std::endl
;
6539 tout(cct
) << target
<< std::endl
;
6540 tout(cct
) << relpath
<< std::endl
;
6542 if (std::string(relpath
) == "/")
6545 filepath
path(relpath
);
6546 string name
= path
.last_dentry();
6549 int r
= path_walk(path
, &dir
, perms
);
6552 if (cct
->_conf
->client_permissions
) {
6553 int r
= may_create(dir
.get(), perms
);
6557 return _symlink(dir
.get(), name
.c_str(), target
, perms
);
6560 int Client::readlink(const char *relpath
, char *buf
, loff_t size
, const UserPerm
& perms
)
6562 Mutex::Locker
lock(client_lock
);
6563 tout(cct
) << "readlink" << std::endl
;
6564 tout(cct
) << relpath
<< std::endl
;
6566 filepath
path(relpath
);
6568 int r
= path_walk(path
, &in
, perms
, false);
6572 return _readlink(in
.get(), buf
, size
);
6575 int Client::_readlink(Inode
*in
, char *buf
, size_t size
)
6577 if (!in
->is_symlink())
6580 // copy into buf (at most size bytes)
6581 int r
= in
->symlink
.length();
6584 memcpy(buf
, in
->symlink
.c_str(), r
);
6591 int Client::_getattr(Inode
*in
, int mask
, const UserPerm
& perms
, bool force
)
6593 bool yes
= in
->caps_issued_mask(mask
);
6595 ldout(cct
, 10) << "_getattr mask " << ccap_string(mask
) << " issued=" << yes
<< dendl
;
6599 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_GETATTR
);
6601 in
->make_nosnap_relative_path(path
);
6602 req
->set_filepath(path
);
6604 req
->head
.args
.getattr
.mask
= mask
;
6606 int res
= make_request(req
, perms
);
6607 ldout(cct
, 10) << "_getattr result=" << res
<< dendl
;
6611 int Client::_do_setattr(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6612 const UserPerm
& perms
, InodeRef
*inp
)
6614 int issued
= in
->caps_issued();
6616 ldout(cct
, 10) << "_setattr mask " << mask
<< " issued " <<
6617 ccap_string(issued
) << dendl
;
6619 if (in
->snapid
!= CEPH_NOSNAP
) {
6622 if ((mask
& CEPH_SETATTR_SIZE
) &&
6623 (unsigned long)stx
->stx_size
> in
->size
&&
6624 is_quota_bytes_exceeded(in
, (unsigned long)stx
->stx_size
- in
->size
,
6629 // make the change locally?
6630 if ((in
->cap_dirtier_uid
>= 0 && perms
.uid() != in
->cap_dirtier_uid
) ||
6631 (in
->cap_dirtier_gid
>= 0 && perms
.gid() != in
->cap_dirtier_gid
)) {
6632 ldout(cct
, 10) << __func__
<< " caller " << perms
.uid() << ":" << perms
.gid()
6633 << " != cap dirtier " << in
->cap_dirtier_uid
<< ":"
6634 << in
->cap_dirtier_gid
<< ", forcing sync setattr"
6637 * This works because we implicitly flush the caps as part of the
6638 * request, so the cap update check will happen with the writeback
6639 * cap context, and then the setattr check will happen with the
6642 * In reality this pattern is likely pretty rare (different users
6643 * setattr'ing the same file). If that turns out not to be the
6644 * case later, we can build a more complex pipelined cap writeback
6648 mask
|= CEPH_SETATTR_CTIME
;
6653 // caller just needs us to bump the ctime
6654 in
->ctime
= ceph_clock_now();
6655 in
->cap_dirtier_uid
= perms
.uid();
6656 in
->cap_dirtier_gid
= perms
.gid();
6657 if (issued
& CEPH_CAP_AUTH_EXCL
)
6658 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6659 else if (issued
& CEPH_CAP_FILE_EXCL
)
6660 mark_caps_dirty(in
, CEPH_CAP_FILE_EXCL
);
6661 else if (issued
& CEPH_CAP_XATTR_EXCL
)
6662 mark_caps_dirty(in
, CEPH_CAP_XATTR_EXCL
);
6664 mask
|= CEPH_SETATTR_CTIME
;
6667 if (in
->caps_issued_mask(CEPH_CAP_AUTH_EXCL
)) {
6668 bool kill_sguid
= mask
& (CEPH_SETATTR_SIZE
|CEPH_SETATTR_KILL_SGUID
);
6670 mask
&= ~CEPH_SETATTR_KILL_SGUID
;
6672 if (mask
& CEPH_SETATTR_UID
) {
6673 in
->ctime
= ceph_clock_now();
6674 in
->cap_dirtier_uid
= perms
.uid();
6675 in
->cap_dirtier_gid
= perms
.gid();
6676 in
->uid
= stx
->stx_uid
;
6677 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6678 mask
&= ~CEPH_SETATTR_UID
;
6680 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6682 if (mask
& CEPH_SETATTR_GID
) {
6683 in
->ctime
= ceph_clock_now();
6684 in
->cap_dirtier_uid
= perms
.uid();
6685 in
->cap_dirtier_gid
= perms
.gid();
6686 in
->gid
= stx
->stx_gid
;
6687 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6688 mask
&= ~CEPH_SETATTR_GID
;
6690 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6693 if (mask
& CEPH_SETATTR_MODE
) {
6694 in
->ctime
= ceph_clock_now();
6695 in
->cap_dirtier_uid
= perms
.uid();
6696 in
->cap_dirtier_gid
= perms
.gid();
6697 in
->mode
= (in
->mode
& ~07777) | (stx
->stx_mode
& 07777);
6698 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6699 mask
&= ~CEPH_SETATTR_MODE
;
6700 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6701 } else if (kill_sguid
&& S_ISREG(in
->mode
)) {
6702 /* Must squash the any setuid/setgid bits with an ownership change */
6703 in
->mode
&= ~S_ISUID
;
6704 if ((in
->mode
& (S_ISGID
|S_IXGRP
)) == (S_ISGID
|S_IXGRP
))
6705 in
->mode
&= ~S_ISGID
;
6706 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6709 if (mask
& CEPH_SETATTR_BTIME
) {
6710 in
->ctime
= ceph_clock_now();
6711 in
->cap_dirtier_uid
= perms
.uid();
6712 in
->cap_dirtier_gid
= perms
.gid();
6713 in
->btime
= utime_t(stx
->stx_btime
);
6714 mark_caps_dirty(in
, CEPH_CAP_AUTH_EXCL
);
6715 mask
&= ~CEPH_SETATTR_BTIME
;
6716 ldout(cct
,10) << "changing btime to " << in
->btime
<< dendl
;
6718 } else if (mask
& CEPH_SETATTR_SIZE
) {
6719 /* If we don't have Ax, then we must ask the server to clear them on truncate */
6720 mask
|= CEPH_SETATTR_KILL_SGUID
;
6723 if (in
->caps_issued_mask(CEPH_CAP_FILE_EXCL
)) {
6724 if (mask
& (CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
)) {
6725 if (mask
& CEPH_SETATTR_MTIME
)
6726 in
->mtime
= utime_t(stx
->stx_mtime
);
6727 if (mask
& CEPH_SETATTR_ATIME
)
6728 in
->atime
= utime_t(stx
->stx_atime
);
6729 in
->ctime
= ceph_clock_now();
6730 in
->cap_dirtier_uid
= perms
.uid();
6731 in
->cap_dirtier_gid
= perms
.gid();
6732 in
->time_warp_seq
++;
6733 mark_caps_dirty(in
, CEPH_CAP_FILE_EXCL
);
6734 mask
&= ~(CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
);
6743 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETATTR
);
6747 in
->make_nosnap_relative_path(path
);
6748 req
->set_filepath(path
);
6751 if (mask
& CEPH_SETATTR_KILL_SGUID
) {
6752 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6754 if (mask
& CEPH_SETATTR_MODE
) {
6755 req
->head
.args
.setattr
.mode
= stx
->stx_mode
;
6756 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6757 ldout(cct
,10) << "changing mode to " << stx
->stx_mode
<< dendl
;
6759 if (mask
& CEPH_SETATTR_UID
) {
6760 req
->head
.args
.setattr
.uid
= stx
->stx_uid
;
6761 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6762 ldout(cct
,10) << "changing uid to " << stx
->stx_uid
<< dendl
;
6764 if (mask
& CEPH_SETATTR_GID
) {
6765 req
->head
.args
.setattr
.gid
= stx
->stx_gid
;
6766 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6767 ldout(cct
,10) << "changing gid to " << stx
->stx_gid
<< dendl
;
6769 if (mask
& CEPH_SETATTR_BTIME
) {
6770 req
->head
.args
.setattr
.btime
= utime_t(stx
->stx_btime
);
6771 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
;
6773 if (mask
& CEPH_SETATTR_MTIME
) {
6774 req
->head
.args
.setattr
.mtime
= utime_t(stx
->stx_mtime
);
6775 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
| CEPH_CAP_FILE_RD
|
6778 if (mask
& CEPH_SETATTR_ATIME
) {
6779 req
->head
.args
.setattr
.atime
= utime_t(stx
->stx_atime
);
6780 req
->inode_drop
|= CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_RD
|
6783 if (mask
& CEPH_SETATTR_SIZE
) {
6784 if ((unsigned long)stx
->stx_size
< mdsmap
->get_max_filesize()) {
6785 req
->head
.args
.setattr
.size
= stx
->stx_size
;
6786 ldout(cct
,10) << "changing size to " << stx
->stx_size
<< dendl
;
6789 ldout(cct
,10) << "unable to set size to " << stx
->stx_size
<< ". Too large!" << dendl
;
6792 req
->inode_drop
|= CEPH_CAP_AUTH_SHARED
| CEPH_CAP_FILE_RD
|
6795 req
->head
.args
.setattr
.mask
= mask
;
6797 req
->regetattr_mask
= mask
;
6799 int res
= make_request(req
, perms
, inp
);
6800 ldout(cct
, 10) << "_setattr result=" << res
<< dendl
;
6804 /* Note that we only care about attrs that setattr cares about */
6805 void Client::stat_to_statx(struct stat
*st
, struct ceph_statx
*stx
)
6807 stx
->stx_size
= st
->st_size
;
6808 stx
->stx_mode
= st
->st_mode
;
6809 stx
->stx_uid
= st
->st_uid
;
6810 stx
->stx_gid
= st
->st_gid
;
6811 stx
->stx_mtime
= st
->st_mtim
;
6812 stx
->stx_atime
= st
->st_atim
;
6815 int Client::__setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
6816 const UserPerm
& perms
, InodeRef
*inp
)
6818 int ret
= _do_setattr(in
, stx
, mask
, perms
, inp
);
6821 if (mask
& CEPH_SETATTR_MODE
)
6822 ret
= _posix_acl_chmod(in
, stx
->stx_mode
, perms
);
6826 int Client::_setattrx(InodeRef
&in
, struct ceph_statx
*stx
, int mask
,
6827 const UserPerm
& perms
)
6829 mask
&= (CEPH_SETATTR_MODE
| CEPH_SETATTR_UID
|
6830 CEPH_SETATTR_GID
| CEPH_SETATTR_MTIME
|
6831 CEPH_SETATTR_ATIME
| CEPH_SETATTR_SIZE
|
6832 CEPH_SETATTR_CTIME
| CEPH_SETATTR_BTIME
);
6833 if (cct
->_conf
->client_permissions
) {
6834 int r
= may_setattr(in
.get(), stx
, mask
, perms
);
6838 return __setattrx(in
.get(), stx
, mask
, perms
);
6841 int Client::_setattr(InodeRef
&in
, struct stat
*attr
, int mask
,
6842 const UserPerm
& perms
)
6844 struct ceph_statx stx
;
6846 stat_to_statx(attr
, &stx
);
6847 mask
&= ~CEPH_SETATTR_BTIME
;
6848 return _setattrx(in
, &stx
, mask
, perms
);
6851 int Client::setattr(const char *relpath
, struct stat
*attr
, int mask
,
6852 const UserPerm
& perms
)
6854 Mutex::Locker
lock(client_lock
);
6855 tout(cct
) << "setattr" << std::endl
;
6856 tout(cct
) << relpath
<< std::endl
;
6857 tout(cct
) << mask
<< std::endl
;
6859 filepath
path(relpath
);
6861 int r
= path_walk(path
, &in
, perms
);
6864 return _setattr(in
, attr
, mask
, perms
);
6867 int Client::setattrx(const char *relpath
, struct ceph_statx
*stx
, int mask
,
6868 const UserPerm
& perms
, int flags
)
6870 Mutex::Locker
lock(client_lock
);
6871 tout(cct
) << "setattrx" << std::endl
;
6872 tout(cct
) << relpath
<< std::endl
;
6873 tout(cct
) << mask
<< std::endl
;
6875 filepath
path(relpath
);
6877 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
));
6880 return _setattrx(in
, stx
, mask
, perms
);
6883 int Client::fsetattr(int fd
, struct stat
*attr
, int mask
, const UserPerm
& perms
)
6885 Mutex::Locker
lock(client_lock
);
6886 tout(cct
) << "fsetattr" << std::endl
;
6887 tout(cct
) << fd
<< std::endl
;
6888 tout(cct
) << mask
<< std::endl
;
6890 Fh
*f
= get_filehandle(fd
);
6893 #if defined(__linux__) && defined(O_PATH)
6894 if (f
->flags
& O_PATH
)
6897 return _setattr(f
->inode
, attr
, mask
, perms
);
6900 int Client::fsetattrx(int fd
, struct ceph_statx
*stx
, int mask
, const UserPerm
& perms
)
6902 Mutex::Locker
lock(client_lock
);
6903 tout(cct
) << "fsetattr" << std::endl
;
6904 tout(cct
) << fd
<< std::endl
;
6905 tout(cct
) << mask
<< std::endl
;
6907 Fh
*f
= get_filehandle(fd
);
6910 #if defined(__linux__) && defined(O_PATH)
6911 if (f
->flags
& O_PATH
)
6914 return _setattrx(f
->inode
, stx
, mask
, perms
);
6917 int Client::stat(const char *relpath
, struct stat
*stbuf
, const UserPerm
& perms
,
6918 frag_info_t
*dirstat
, int mask
)
6920 ldout(cct
, 3) << "stat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6921 Mutex::Locker
lock(client_lock
);
6922 tout(cct
) << "stat" << std::endl
;
6923 tout(cct
) << relpath
<< std::endl
;
6924 filepath
path(relpath
);
6926 int r
= path_walk(path
, &in
, perms
, true, mask
);
6929 r
= _getattr(in
, mask
, perms
);
6931 ldout(cct
, 3) << "stat exit on error!" << dendl
;
6934 fill_stat(in
, stbuf
, dirstat
);
6935 ldout(cct
, 3) << "stat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6939 unsigned Client::statx_to_mask(unsigned int flags
, unsigned int want
)
6943 /* if NO_ATTR_SYNC is set, then we don't need any -- just use what's in cache */
6944 if (flags
& AT_NO_ATTR_SYNC
)
6947 /* Always set PIN to distinguish from AT_NO_ATTR_SYNC case */
6948 mask
|= CEPH_CAP_PIN
;
6949 if (want
& (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
6950 mask
|= CEPH_CAP_AUTH_SHARED
;
6951 if (want
& (CEPH_STATX_NLINK
|CEPH_STATX_CTIME
|CEPH_STATX_VERSION
))
6952 mask
|= CEPH_CAP_LINK_SHARED
;
6953 if (want
& (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|CEPH_STATX_CTIME
|CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
|CEPH_STATX_VERSION
))
6954 mask
|= CEPH_CAP_FILE_SHARED
;
6955 if (want
& (CEPH_STATX_VERSION
|CEPH_STATX_CTIME
))
6956 mask
|= CEPH_CAP_XATTR_SHARED
;
6961 int Client::statx(const char *relpath
, struct ceph_statx
*stx
,
6962 const UserPerm
& perms
,
6963 unsigned int want
, unsigned int flags
)
6965 ldout(cct
, 3) << "statx enter (relpath " << relpath
<< " want " << want
<< ")" << dendl
;
6966 Mutex::Locker
lock(client_lock
);
6967 tout(cct
) << "statx" << std::endl
;
6968 tout(cct
) << relpath
<< std::endl
;
6969 filepath
path(relpath
);
6972 unsigned mask
= statx_to_mask(flags
, want
);
6974 int r
= path_walk(path
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
6978 r
= _getattr(in
, mask
, perms
);
6980 ldout(cct
, 3) << "statx exit on error!" << dendl
;
6984 fill_statx(in
, mask
, stx
);
6985 ldout(cct
, 3) << "statx exit (relpath " << relpath
<< " mask " << stx
->stx_mask
<< ")" << dendl
;
6989 int Client::lstat(const char *relpath
, struct stat
*stbuf
,
6990 const UserPerm
& perms
, frag_info_t
*dirstat
, int mask
)
6992 ldout(cct
, 3) << "lstat enter (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
6993 Mutex::Locker
lock(client_lock
);
6994 tout(cct
) << "lstat" << std::endl
;
6995 tout(cct
) << relpath
<< std::endl
;
6996 filepath
path(relpath
);
6998 // don't follow symlinks
6999 int r
= path_walk(path
, &in
, perms
, false, mask
);
7002 r
= _getattr(in
, mask
, perms
);
7004 ldout(cct
, 3) << "lstat exit on error!" << dendl
;
7007 fill_stat(in
, stbuf
, dirstat
);
7008 ldout(cct
, 3) << "lstat exit (relpath " << relpath
<< " mask " << mask
<< ")" << dendl
;
7012 int Client::fill_stat(Inode
*in
, struct stat
*st
, frag_info_t
*dirstat
, nest_info_t
*rstat
)
7014 ldout(cct
, 10) << "fill_stat on " << in
->ino
<< " snap/dev" << in
->snapid
7015 << " mode 0" << oct
<< in
->mode
<< dec
7016 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7017 memset(st
, 0, sizeof(struct stat
));
7018 if (use_faked_inos())
7019 st
->st_ino
= in
->faked_ino
;
7021 st
->st_ino
= in
->ino
;
7022 st
->st_dev
= in
->snapid
;
7023 st
->st_mode
= in
->mode
;
7024 st
->st_rdev
= in
->rdev
;
7025 st
->st_nlink
= in
->nlink
;
7026 st
->st_uid
= in
->uid
;
7027 st
->st_gid
= in
->gid
;
7028 if (in
->ctime
> in
->mtime
) {
7029 stat_set_ctime_sec(st
, in
->ctime
.sec());
7030 stat_set_ctime_nsec(st
, in
->ctime
.nsec());
7032 stat_set_ctime_sec(st
, in
->mtime
.sec());
7033 stat_set_ctime_nsec(st
, in
->mtime
.nsec());
7035 stat_set_atime_sec(st
, in
->atime
.sec());
7036 stat_set_atime_nsec(st
, in
->atime
.nsec());
7037 stat_set_mtime_sec(st
, in
->mtime
.sec());
7038 stat_set_mtime_nsec(st
, in
->mtime
.nsec());
7040 if (cct
->_conf
->client_dirsize_rbytes
)
7041 st
->st_size
= in
->rstat
.rbytes
;
7043 st
->st_size
= in
->dirstat
.size();
7046 st
->st_size
= in
->size
;
7047 st
->st_blocks
= (in
->size
+ 511) >> 9;
7049 st
->st_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7052 *dirstat
= in
->dirstat
;
7056 return in
->caps_issued();
7059 void Client::fill_statx(Inode
*in
, unsigned int mask
, struct ceph_statx
*stx
)
7061 ldout(cct
, 10) << "fill_statx on " << in
->ino
<< " snap/dev" << in
->snapid
7062 << " mode 0" << oct
<< in
->mode
<< dec
7063 << " mtime " << in
->mtime
<< " ctime " << in
->ctime
<< dendl
;
7064 memset(stx
, 0, sizeof(struct ceph_statx
));
7067 * If mask is 0, then the caller set AT_NO_ATTR_SYNC. Reset the mask
7068 * so that all bits are set.
7073 /* These are always considered to be available */
7074 stx
->stx_dev
= in
->snapid
;
7075 stx
->stx_blksize
= MAX(in
->layout
.stripe_unit
, 4096);
7077 /* Type bits are always set, even when CEPH_STATX_MODE is not */
7078 stx
->stx_mode
= S_IFMT
& in
->mode
;
7079 stx
->stx_ino
= use_faked_inos() ? in
->faked_ino
: (ino_t
)in
->ino
;
7080 stx
->stx_rdev
= in
->rdev
;
7081 stx
->stx_mask
|= (CEPH_STATX_INO
|CEPH_STATX_RDEV
);
7083 if (mask
& CEPH_CAP_AUTH_SHARED
) {
7084 stx
->stx_uid
= in
->uid
;
7085 stx
->stx_gid
= in
->gid
;
7086 stx
->stx_mode
= in
->mode
;
7087 in
->btime
.to_timespec(&stx
->stx_btime
);
7088 stx
->stx_mask
|= (CEPH_STATX_MODE
|CEPH_STATX_UID
|CEPH_STATX_GID
|CEPH_STATX_BTIME
);
7091 if (mask
& CEPH_CAP_LINK_SHARED
) {
7092 stx
->stx_nlink
= in
->nlink
;
7093 stx
->stx_mask
|= CEPH_STATX_NLINK
;
7096 if (mask
& CEPH_CAP_FILE_SHARED
) {
7098 in
->atime
.to_timespec(&stx
->stx_atime
);
7099 in
->mtime
.to_timespec(&stx
->stx_mtime
);
7102 if (cct
->_conf
->client_dirsize_rbytes
)
7103 stx
->stx_size
= in
->rstat
.rbytes
;
7105 stx
->stx_size
= in
->dirstat
.size();
7106 stx
->stx_blocks
= 1;
7108 stx
->stx_size
= in
->size
;
7109 stx
->stx_blocks
= (in
->size
+ 511) >> 9;
7111 stx
->stx_mask
|= (CEPH_STATX_ATIME
|CEPH_STATX_MTIME
|
7112 CEPH_STATX_SIZE
|CEPH_STATX_BLOCKS
);
7115 /* Change time and change_attr both require all shared caps to view */
7116 if ((mask
& CEPH_STAT_CAP_INODE_ALL
) == CEPH_STAT_CAP_INODE_ALL
) {
7117 stx
->stx_version
= in
->change_attr
;
7118 if (in
->ctime
> in
->mtime
)
7119 in
->ctime
.to_timespec(&stx
->stx_ctime
);
7121 in
->mtime
.to_timespec(&stx
->stx_ctime
);
7122 stx
->stx_mask
|= (CEPH_STATX_CTIME
|CEPH_STATX_VERSION
);
7127 void Client::touch_dn(Dentry
*dn
)
7132 int Client::chmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7134 Mutex::Locker
lock(client_lock
);
7135 tout(cct
) << "chmod" << std::endl
;
7136 tout(cct
) << relpath
<< std::endl
;
7137 tout(cct
) << mode
<< std::endl
;
7138 filepath
path(relpath
);
7140 int r
= path_walk(path
, &in
, perms
);
7144 attr
.st_mode
= mode
;
7145 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7148 int Client::fchmod(int fd
, mode_t mode
, const UserPerm
& perms
)
7150 Mutex::Locker
lock(client_lock
);
7151 tout(cct
) << "fchmod" << std::endl
;
7152 tout(cct
) << fd
<< std::endl
;
7153 tout(cct
) << mode
<< std::endl
;
7154 Fh
*f
= get_filehandle(fd
);
7157 #if defined(__linux__) && defined(O_PATH)
7158 if (f
->flags
& O_PATH
)
7162 attr
.st_mode
= mode
;
7163 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_MODE
, perms
);
7166 int Client::lchmod(const char *relpath
, mode_t mode
, const UserPerm
& perms
)
7168 Mutex::Locker
lock(client_lock
);
7169 tout(cct
) << "lchmod" << std::endl
;
7170 tout(cct
) << relpath
<< std::endl
;
7171 tout(cct
) << mode
<< std::endl
;
7172 filepath
path(relpath
);
7174 // don't follow symlinks
7175 int r
= path_walk(path
, &in
, perms
, false);
7179 attr
.st_mode
= mode
;
7180 return _setattr(in
, &attr
, CEPH_SETATTR_MODE
, perms
);
7183 int Client::chown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7184 const UserPerm
& perms
)
7186 Mutex::Locker
lock(client_lock
);
7187 tout(cct
) << "chown" << std::endl
;
7188 tout(cct
) << relpath
<< std::endl
;
7189 tout(cct
) << new_uid
<< std::endl
;
7190 tout(cct
) << new_gid
<< std::endl
;
7191 filepath
path(relpath
);
7193 int r
= path_walk(path
, &in
, perms
);
7197 attr
.st_uid
= new_uid
;
7198 attr
.st_gid
= new_gid
;
7200 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7201 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7202 return _setattr(in
, &attr
, mask
, perms
);
7205 int Client::fchown(int fd
, uid_t new_uid
, gid_t new_gid
, const UserPerm
& perms
)
7207 Mutex::Locker
lock(client_lock
);
7208 tout(cct
) << "fchown" << std::endl
;
7209 tout(cct
) << fd
<< std::endl
;
7210 tout(cct
) << new_uid
<< std::endl
;
7211 tout(cct
) << new_gid
<< std::endl
;
7212 Fh
*f
= get_filehandle(fd
);
7215 #if defined(__linux__) && defined(O_PATH)
7216 if (f
->flags
& O_PATH
)
7220 attr
.st_uid
= new_uid
;
7221 attr
.st_gid
= new_gid
;
7223 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7224 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7225 return _setattr(f
->inode
, &attr
, mask
, perms
);
7228 int Client::lchown(const char *relpath
, uid_t new_uid
, gid_t new_gid
,
7229 const UserPerm
& perms
)
7231 Mutex::Locker
lock(client_lock
);
7232 tout(cct
) << "lchown" << std::endl
;
7233 tout(cct
) << relpath
<< std::endl
;
7234 tout(cct
) << new_uid
<< std::endl
;
7235 tout(cct
) << new_gid
<< std::endl
;
7236 filepath
path(relpath
);
7238 // don't follow symlinks
7239 int r
= path_walk(path
, &in
, perms
, false);
7243 attr
.st_uid
= new_uid
;
7244 attr
.st_gid
= new_gid
;
7246 if (new_uid
!= static_cast<uid_t
>(-1)) mask
|= CEPH_SETATTR_UID
;
7247 if (new_gid
!= static_cast<gid_t
>(-1)) mask
|= CEPH_SETATTR_GID
;
7248 return _setattr(in
, &attr
, mask
, perms
);
7251 int Client::utime(const char *relpath
, struct utimbuf
*buf
,
7252 const UserPerm
& perms
)
7254 Mutex::Locker
lock(client_lock
);
7255 tout(cct
) << "utime" << std::endl
;
7256 tout(cct
) << relpath
<< std::endl
;
7257 tout(cct
) << buf
->modtime
<< std::endl
;
7258 tout(cct
) << buf
->actime
<< std::endl
;
7259 filepath
path(relpath
);
7261 int r
= path_walk(path
, &in
, perms
);
7265 stat_set_mtime_sec(&attr
, buf
->modtime
);
7266 stat_set_mtime_nsec(&attr
, 0);
7267 stat_set_atime_sec(&attr
, buf
->actime
);
7268 stat_set_atime_nsec(&attr
, 0);
7269 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7272 int Client::lutime(const char *relpath
, struct utimbuf
*buf
,
7273 const UserPerm
& perms
)
7275 Mutex::Locker
lock(client_lock
);
7276 tout(cct
) << "lutime" << std::endl
;
7277 tout(cct
) << relpath
<< std::endl
;
7278 tout(cct
) << buf
->modtime
<< std::endl
;
7279 tout(cct
) << buf
->actime
<< std::endl
;
7280 filepath
path(relpath
);
7282 // don't follow symlinks
7283 int r
= path_walk(path
, &in
, perms
, false);
7287 stat_set_mtime_sec(&attr
, buf
->modtime
);
7288 stat_set_mtime_nsec(&attr
, 0);
7289 stat_set_atime_sec(&attr
, buf
->actime
);
7290 stat_set_atime_nsec(&attr
, 0);
7291 return _setattr(in
, &attr
, CEPH_SETATTR_MTIME
|CEPH_SETATTR_ATIME
, perms
);
7294 int Client::flock(int fd
, int operation
, uint64_t owner
)
7296 Mutex::Locker
lock(client_lock
);
7297 tout(cct
) << "flock" << std::endl
;
7298 tout(cct
) << fd
<< std::endl
;
7299 tout(cct
) << operation
<< std::endl
;
7300 tout(cct
) << owner
<< std::endl
;
7301 Fh
*f
= get_filehandle(fd
);
7305 return _flock(f
, operation
, owner
);
7308 int Client::opendir(const char *relpath
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7310 Mutex::Locker
lock(client_lock
);
7311 tout(cct
) << "opendir" << std::endl
;
7312 tout(cct
) << relpath
<< std::endl
;
7313 filepath
path(relpath
);
7315 int r
= path_walk(path
, &in
, perms
, true);
7318 if (cct
->_conf
->client_permissions
) {
7319 int r
= may_open(in
.get(), O_RDONLY
, perms
);
7323 r
= _opendir(in
.get(), dirpp
, perms
);
7324 /* if ENOTDIR, dirpp will be an uninitialized point and it's very dangerous to access its value */
7326 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
7330 int Client::_opendir(Inode
*in
, dir_result_t
**dirpp
, const UserPerm
& perms
)
7334 *dirpp
= new dir_result_t(in
, perms
);
7335 opened_dirs
.insert(*dirpp
);
7336 ldout(cct
, 3) << "_opendir(" << in
->ino
<< ") = " << 0 << " (" << *dirpp
<< ")" << dendl
;
7341 int Client::closedir(dir_result_t
*dir
)
7343 Mutex::Locker
lock(client_lock
);
7344 tout(cct
) << "closedir" << std::endl
;
7345 tout(cct
) << (unsigned long)dir
<< std::endl
;
7347 ldout(cct
, 3) << "closedir(" << dir
<< ") = 0" << dendl
;
7352 void Client::_closedir(dir_result_t
*dirp
)
7354 ldout(cct
, 10) << "_closedir(" << dirp
<< ")" << dendl
;
7356 ldout(cct
, 10) << "_closedir detaching inode " << dirp
->inode
<< dendl
;
7357 dirp
->inode
.reset();
7359 _readdir_drop_dirp_buffer(dirp
);
7360 opened_dirs
.erase(dirp
);
7364 void Client::rewinddir(dir_result_t
*dirp
)
7366 Mutex::Locker
lock(client_lock
);
7368 ldout(cct
, 3) << "rewinddir(" << dirp
<< ")" << dendl
;
7369 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7370 _readdir_drop_dirp_buffer(d
);
7374 loff_t
Client::telldir(dir_result_t
*dirp
)
7376 dir_result_t
*d
= static_cast<dir_result_t
*>(dirp
);
7377 ldout(cct
, 3) << "telldir(" << dirp
<< ") = " << d
->offset
<< dendl
;
7381 void Client::seekdir(dir_result_t
*dirp
, loff_t offset
)
7383 Mutex::Locker
lock(client_lock
);
7385 ldout(cct
, 3) << "seekdir(" << dirp
<< ", " << offset
<< ")" << dendl
;
7387 if (offset
== dirp
->offset
)
7390 if (offset
> dirp
->offset
)
7391 dirp
->release_count
= 0; // bump if we do a forward seek
7393 dirp
->ordered_count
= 0; // disable filling readdir cache
7395 if (dirp
->hash_order()) {
7396 if (dirp
->offset
> offset
) {
7397 _readdir_drop_dirp_buffer(dirp
);
7402 dirp
->buffer_frag
!= frag_t(dir_result_t::fpos_high(offset
)) ||
7403 dirp
->offset_low() > dir_result_t::fpos_low(offset
)) {
7404 _readdir_drop_dirp_buffer(dirp
);
7409 dirp
->offset
= offset
;
7414 // ino_t d_ino; /* inode number */
7415 // off_t d_off; /* offset to the next dirent */
7416 // unsigned short d_reclen; /* length of this record */
7417 // unsigned char d_type; /* type of file */
7418 // char d_name[256]; /* filename */
7420 void Client::fill_dirent(struct dirent
*de
, const char *name
, int type
, uint64_t ino
, loff_t next_off
)
7422 strncpy(de
->d_name
, name
, 255);
7423 de
->d_name
[255] = '\0';
7426 #if !defined(DARWIN) && !defined(__FreeBSD__)
7427 de
->d_off
= next_off
;
7430 de
->d_type
= IFTODT(type
);
7431 ldout(cct
, 10) << "fill_dirent '" << de
->d_name
<< "' -> " << inodeno_t(de
->d_ino
)
7432 << " type " << (int)de
->d_type
<< " w/ next_off " << hex
<< next_off
<< dec
<< dendl
;
7436 void Client::_readdir_next_frag(dir_result_t
*dirp
)
7438 frag_t fg
= dirp
->buffer_frag
;
7440 if (fg
.is_rightmost()) {
7441 ldout(cct
, 10) << "_readdir_next_frag advance from " << fg
<< " to END" << dendl
;
7448 ldout(cct
, 10) << "_readdir_next_frag advance from " << dirp
->buffer_frag
<< " to " << fg
<< dendl
;
7450 if (dirp
->hash_order()) {
7452 int64_t new_offset
= dir_result_t::make_fpos(fg
.value(), 2, true);
7453 if (dirp
->offset
< new_offset
) // don't decrease offset
7454 dirp
->offset
= new_offset
;
7456 dirp
->last_name
.clear();
7457 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7458 _readdir_rechoose_frag(dirp
);
7462 void Client::_readdir_rechoose_frag(dir_result_t
*dirp
)
7464 assert(dirp
->inode
);
7466 if (dirp
->hash_order())
7469 frag_t cur
= frag_t(dirp
->offset_high());
7470 frag_t fg
= dirp
->inode
->dirfragtree
[cur
.value()];
7472 ldout(cct
, 10) << "_readdir_rechoose_frag frag " << cur
<< " maps to " << fg
<< dendl
;
7473 dirp
->offset
= dir_result_t::make_fpos(fg
, 2, false);
7474 dirp
->last_name
.clear();
7475 dirp
->next_offset
= 2;
7479 void Client::_readdir_drop_dirp_buffer(dir_result_t
*dirp
)
7481 ldout(cct
, 10) << "_readdir_drop_dirp_buffer " << dirp
<< dendl
;
7482 dirp
->buffer
.clear();
7485 int Client::_readdir_get_frag(dir_result_t
*dirp
)
7488 assert(dirp
->inode
);
7490 // get the current frag.
7492 if (dirp
->hash_order())
7493 fg
= dirp
->inode
->dirfragtree
[dirp
->offset_high()];
7495 fg
= frag_t(dirp
->offset_high());
7497 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " on " << dirp
->inode
->ino
<< " fg " << fg
7498 << " offset " << hex
<< dirp
->offset
<< dec
<< dendl
;
7500 int op
= CEPH_MDS_OP_READDIR
;
7501 if (dirp
->inode
&& dirp
->inode
->snapid
== CEPH_SNAPDIR
)
7502 op
= CEPH_MDS_OP_LSSNAP
;
7504 InodeRef
& diri
= dirp
->inode
;
7506 MetaRequest
*req
= new MetaRequest(op
);
7508 diri
->make_nosnap_relative_path(path
);
7509 req
->set_filepath(path
);
7510 req
->set_inode(diri
.get());
7511 req
->head
.args
.readdir
.frag
= fg
;
7512 req
->head
.args
.readdir
.flags
= CEPH_READDIR_REPLY_BITFLAGS
;
7513 if (dirp
->last_name
.length()) {
7514 req
->path2
.set_path(dirp
->last_name
.c_str());
7515 } else if (dirp
->hash_order()) {
7516 req
->head
.args
.readdir
.offset_hash
= dirp
->offset_high();
7521 int res
= make_request(req
, dirp
->perms
, NULL
, NULL
, -1, &dirbl
);
7523 if (res
== -EAGAIN
) {
7524 ldout(cct
, 10) << "_readdir_get_frag got EAGAIN, retrying" << dendl
;
7525 _readdir_rechoose_frag(dirp
);
7526 return _readdir_get_frag(dirp
);
7530 ldout(cct
, 10) << "_readdir_get_frag " << dirp
<< " got frag " << dirp
->buffer_frag
7531 << " size " << dirp
->buffer
.size() << dendl
;
7533 ldout(cct
, 10) << "_readdir_get_frag got error " << res
<< ", setting end flag" << dendl
;
7540 struct dentry_off_lt
{
7541 bool operator()(const Dentry
* dn
, int64_t off
) const {
7542 return dir_result_t::fpos_cmp(dn
->offset
, off
) < 0;
7546 int Client::_readdir_cache_cb(dir_result_t
*dirp
, add_dirent_cb_t cb
, void *p
,
7547 int caps
, bool getref
)
7549 assert(client_lock
.is_locked());
7550 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
7551 << " last_name " << dirp
->last_name
<< " offset " << hex
<< dirp
->offset
<< dec
7553 Dir
*dir
= dirp
->inode
->dir
;
7556 ldout(cct
, 10) << " dir is empty" << dendl
;
7561 vector
<Dentry
*>::iterator pd
= std::lower_bound(dir
->readdir_cache
.begin(),
7562 dir
->readdir_cache
.end(),
7563 dirp
->offset
, dentry_off_lt());
7567 if (!dirp
->inode
->is_complete_and_ordered())
7569 if (pd
== dir
->readdir_cache
.end())
7572 if (dn
->inode
== NULL
) {
7573 ldout(cct
, 15) << " skipping null '" << dn
->name
<< "'" << dendl
;
7577 if (dn
->cap_shared_gen
!= dir
->parent_inode
->shared_gen
) {
7578 ldout(cct
, 15) << " skipping mismatch shared gen '" << dn
->name
<< "'" << dendl
;
7583 int r
= _getattr(dn
->inode
, caps
, dirp
->perms
);
7587 struct ceph_statx stx
;
7589 fill_statx(dn
->inode
, caps
, &stx
);
7591 uint64_t next_off
= dn
->offset
+ 1;
7593 if (pd
== dir
->readdir_cache
.end())
7594 next_off
= dir_result_t::END
;
7597 fill_dirent(&de
, dn
->name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7599 in
= dn
->inode
.get();
7603 dn_name
= dn
->name
; // fill in name while we have lock
7605 client_lock
.Unlock();
7606 r
= cb(p
, &de
, &stx
, next_off
, in
); // _next_ offset
7608 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< dn
->offset
<< dec
7609 << " = " << r
<< dendl
;
7614 dirp
->offset
= next_off
;
7616 dirp
->next_offset
= 2;
7618 dirp
->next_offset
= dirp
->offset_low();
7619 dirp
->last_name
= dn_name
; // we successfully returned this one; update!
7624 ldout(cct
, 10) << "_readdir_cache_cb " << dirp
<< " on " << dirp
->inode
->ino
<< " at end" << dendl
;
7629 int Client::readdir_r_cb(dir_result_t
*d
, add_dirent_cb_t cb
, void *p
,
7630 unsigned want
, unsigned flags
, bool getref
)
7632 int caps
= statx_to_mask(flags
, want
);
7634 Mutex::Locker
lock(client_lock
);
7636 dir_result_t
*dirp
= static_cast<dir_result_t
*>(d
);
7638 ldout(cct
, 10) << "readdir_r_cb " << *dirp
->inode
<< " offset " << hex
<< dirp
->offset
7639 << dec
<< " at_end=" << dirp
->at_end()
7640 << " hash_order=" << dirp
->hash_order() << dendl
;
7643 struct ceph_statx stx
;
7644 memset(&de
, 0, sizeof(de
));
7645 memset(&stx
, 0, sizeof(stx
));
7647 InodeRef
& diri
= dirp
->inode
;
7652 if (dirp
->offset
== 0) {
7653 ldout(cct
, 15) << " including ." << dendl
;
7654 assert(diri
->dn_set
.size() < 2); // can't have multiple hard-links to a dir
7655 uint64_t next_off
= 1;
7658 r
= _getattr(diri
, caps
, dirp
->perms
);
7662 fill_statx(diri
, caps
, &stx
);
7663 fill_dirent(&de
, ".", S_IFDIR
, stx
.stx_ino
, next_off
);
7665 Inode
*inode
= NULL
;
7671 client_lock
.Unlock();
7672 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7677 dirp
->offset
= next_off
;
7681 if (dirp
->offset
== 1) {
7682 ldout(cct
, 15) << " including .." << dendl
;
7683 uint64_t next_off
= 2;
7685 if (diri
->dn_set
.empty())
7688 in
= diri
->get_first_parent()->inode
;
7691 r
= _getattr(diri
, caps
, dirp
->perms
);
7695 fill_statx(in
, caps
, &stx
);
7696 fill_dirent(&de
, "..", S_IFDIR
, stx
.stx_ino
, next_off
);
7698 Inode
*inode
= NULL
;
7704 client_lock
.Unlock();
7705 r
= cb(p
, &de
, &stx
, next_off
, inode
);
7710 dirp
->offset
= next_off
;
7715 // can we read from our cache?
7716 ldout(cct
, 10) << "offset " << hex
<< dirp
->offset
<< dec
7717 << " snapid " << dirp
->inode
->snapid
<< " (complete && ordered) "
7718 << dirp
->inode
->is_complete_and_ordered()
7719 << " issued " << ccap_string(dirp
->inode
->caps_issued())
7721 if (dirp
->inode
->snapid
!= CEPH_SNAPDIR
&&
7722 dirp
->inode
->is_complete_and_ordered() &&
7723 dirp
->inode
->caps_issued_mask(CEPH_CAP_FILE_SHARED
)) {
7724 int err
= _readdir_cache_cb(dirp
, cb
, p
, caps
, getref
);
7733 bool check_caps
= true;
7734 if (!dirp
->is_cached()) {
7735 int r
= _readdir_get_frag(dirp
);
7738 // _readdir_get_frag () may updates dirp->offset if the replied dirfrag is
7739 // different than the requested one. (our dirfragtree was outdated)
7742 frag_t fg
= dirp
->buffer_frag
;
7744 ldout(cct
, 10) << "frag " << fg
<< " buffer size " << dirp
->buffer
.size()
7745 << " offset " << hex
<< dirp
->offset
<< dendl
;
7747 for (auto it
= std::lower_bound(dirp
->buffer
.begin(), dirp
->buffer
.end(),
7748 dirp
->offset
, dir_result_t::dentry_off_lt());
7749 it
!= dirp
->buffer
.end();
7751 dir_result_t::dentry
&entry
= *it
;
7753 uint64_t next_off
= entry
.offset
+ 1;
7757 r
= _getattr(entry
.inode
, caps
, dirp
->perms
);
7762 fill_statx(entry
.inode
, caps
, &stx
);
7763 fill_dirent(&de
, entry
.name
.c_str(), stx
.stx_mode
, stx
.stx_ino
, next_off
);
7765 Inode
*inode
= NULL
;
7767 inode
= entry
.inode
.get();
7771 client_lock
.Unlock();
7772 r
= cb(p
, &de
, &stx
, next_off
, inode
); // _next_ offset
7775 ldout(cct
, 15) << " de " << de
.d_name
<< " off " << hex
<< next_off
- 1 << dec
7776 << " = " << r
<< dendl
;
7780 dirp
->offset
= next_off
;
7785 if (dirp
->next_offset
> 2) {
7786 ldout(cct
, 10) << " fetching next chunk of this frag" << dendl
;
7787 _readdir_drop_dirp_buffer(dirp
);
7791 if (!fg
.is_rightmost()) {
7793 _readdir_next_frag(dirp
);
7797 if (diri
->shared_gen
== dirp
->start_shared_gen
&&
7798 diri
->dir_release_count
== dirp
->release_count
) {
7799 if (diri
->dir_ordered_count
== dirp
->ordered_count
) {
7800 ldout(cct
, 10) << " marking (I_COMPLETE|I_DIR_ORDERED) on " << *diri
<< dendl
;
7802 assert(diri
->dir
->readdir_cache
.size() >= dirp
->cache_index
);
7803 diri
->dir
->readdir_cache
.resize(dirp
->cache_index
);
7805 diri
->flags
|= I_COMPLETE
| I_DIR_ORDERED
;
7807 ldout(cct
, 10) << " marking I_COMPLETE on " << *diri
<< dendl
;
7808 diri
->flags
|= I_COMPLETE
;
7820 int Client::readdir_r(dir_result_t
*d
, struct dirent
*de
)
7822 return readdirplus_r(d
, de
, 0, 0, 0, NULL
);
7829 * 1 if we got a dirent
7830 * 0 for end of directory
7834 struct single_readdir
{
7836 struct ceph_statx
*stx
;
7841 static int _readdir_single_dirent_cb(void *p
, struct dirent
*de
,
7842 struct ceph_statx
*stx
, off_t off
,
7845 single_readdir
*c
= static_cast<single_readdir
*>(p
);
7848 return -1; // already filled this dirent
7858 struct dirent
*Client::readdir(dir_result_t
*d
)
7861 static struct dirent de
;
7868 // our callback fills the dirent and sets sr.full=true on first
7869 // call, and returns -1 the second time around.
7870 ret
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
);
7872 errno
= -ret
; // this sucks.
7873 return (dirent
*) NULL
;
7878 return (dirent
*) NULL
;
7881 int Client::readdirplus_r(dir_result_t
*d
, struct dirent
*de
,
7882 struct ceph_statx
*stx
, unsigned want
,
7883 unsigned flags
, Inode
**out
)
7891 // our callback fills the dirent and sets sr.full=true on first
7892 // call, and returns -1 the second time around.
7893 int r
= readdir_r_cb(d
, _readdir_single_dirent_cb
, (void *)&sr
, want
, flags
, out
);
7905 struct getdents_result
{
7912 static int _readdir_getdent_cb(void *p
, struct dirent
*de
,
7913 struct ceph_statx
*stx
, off_t off
, Inode
*in
)
7915 struct getdents_result
*c
= static_cast<getdents_result
*>(p
);
7921 dlen
= strlen(de
->d_name
) + 1;
7923 if (c
->pos
+ dlen
> c
->buflen
)
7924 return -1; // doesn't fit
7927 memcpy(c
->buf
+ c
->pos
, de
, sizeof(*de
));
7929 memcpy(c
->buf
+ c
->pos
, de
->d_name
, dlen
);
7935 int Client::_getdents(dir_result_t
*dir
, char *buf
, int buflen
, bool fullent
)
7940 gr
.fullent
= fullent
;
7943 int r
= readdir_r_cb(dir
, _readdir_getdent_cb
, (void *)&gr
);
7945 if (r
< 0) { // some error
7946 if (r
== -1) { // buffer ran out of space
7947 if (gr
.pos
) { // but we got some entries already!
7949 } // or we need a larger buffer
7951 } else { // actual error, return it
7960 struct getdir_result
{
7961 list
<string
> *contents
;
7965 static int _getdir_cb(void *p
, struct dirent
*de
, struct ceph_statx
*stx
, off_t off
, Inode
*in
)
7967 getdir_result
*r
= static_cast<getdir_result
*>(p
);
7969 r
->contents
->push_back(de
->d_name
);
7974 int Client::getdir(const char *relpath
, list
<string
>& contents
,
7975 const UserPerm
& perms
)
7977 ldout(cct
, 3) << "getdir(" << relpath
<< ")" << dendl
;
7979 Mutex::Locker
lock(client_lock
);
7980 tout(cct
) << "getdir" << std::endl
;
7981 tout(cct
) << relpath
<< std::endl
;
7985 int r
= opendir(relpath
, &d
, perms
);
7990 gr
.contents
= &contents
;
7992 r
= readdir_r_cb(d
, _getdir_cb
, (void *)&gr
);
8002 /****** file i/o **********/
8003 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
,
8004 mode_t mode
, int stripe_unit
, int stripe_count
,
8005 int object_size
, const char *data_pool
)
8007 ldout(cct
, 3) << "open enter(" << relpath
<< ", " << ceph_flags_sys2wire(flags
) << "," << mode
<< ")" << dendl
;
8008 Mutex::Locker
lock(client_lock
);
8009 tout(cct
) << "open" << std::endl
;
8010 tout(cct
) << relpath
<< std::endl
;
8011 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
8015 #if defined(__linux__) && defined(O_PATH)
8016 /* When the O_PATH is being specified, others flags than O_DIRECTORY
8017 * and O_NOFOLLOW are ignored. Please refer do_entry_open() function
8018 * in kernel (fs/open.c). */
8020 flags
&= O_DIRECTORY
| O_NOFOLLOW
| O_PATH
;
8023 filepath
path(relpath
);
8025 bool created
= false;
8026 /* O_CREATE with O_EXCL enforces O_NOFOLLOW. */
8027 bool followsym
= !((flags
& O_NOFOLLOW
) || ((flags
& O_CREAT
) && (flags
& O_EXCL
)));
8028 int r
= path_walk(path
, &in
, perms
, followsym
, ceph_caps_for_mode(mode
));
8030 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
8033 #if defined(__linux__) && defined(O_PATH)
8034 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
) && !(flags
& O_PATH
))
8036 if (r
== 0 && in
->is_symlink() && (flags
& O_NOFOLLOW
))
8040 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
8041 filepath dirpath
= path
;
8042 string dname
= dirpath
.last_dentry();
8043 dirpath
.pop_dentry();
8045 r
= path_walk(dirpath
, &dir
, perms
, true,
8046 cct
->_conf
->client_permissions
? CEPH_CAP_AUTH_SHARED
: 0);
8049 if (cct
->_conf
->client_permissions
) {
8050 r
= may_create(dir
.get(), perms
);
8054 r
= _create(dir
.get(), dname
.c_str(), flags
, mode
, &in
, &fh
, stripe_unit
,
8055 stripe_count
, object_size
, data_pool
, &created
, perms
);
8061 // posix says we can only check permissions of existing files
8062 if (cct
->_conf
->client_permissions
) {
8063 r
= may_open(in
.get(), flags
, perms
);
8070 r
= _open(in
.get(), flags
, mode
, &fh
, perms
);
8072 // allocate a integer file descriptor
8075 assert(fd_map
.count(r
) == 0);
8080 tout(cct
) << r
<< std::endl
;
8081 ldout(cct
, 3) << "open exit(" << path
<< ", " << ceph_flags_sys2wire(flags
) << ") = " << r
<< dendl
;
8085 int Client::open(const char *relpath
, int flags
, const UserPerm
& perms
, mode_t mode
)
8087 /* Use default file striping parameters */
8088 return open(relpath
, flags
, perms
, mode
, 0, 0, 0, NULL
);
8091 int Client::lookup_hash(inodeno_t ino
, inodeno_t dirino
, const char *name
,
8092 const UserPerm
& perms
)
8094 Mutex::Locker
lock(client_lock
);
8095 ldout(cct
, 3) << "lookup_hash enter(" << ino
<< ", #" << dirino
<< "/" << name
<< ")" << dendl
;
8097 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPHASH
);
8099 req
->set_filepath(path
);
8101 uint32_t h
= ceph_str_hash(CEPH_STR_HASH_RJENKINS
, name
, strlen(name
));
8103 sprintf(f
, "%u", h
);
8104 filepath
path2(dirino
);
8105 path2
.push_dentry(string(f
));
8106 req
->set_filepath2(path2
);
8108 int r
= make_request(req
, perms
, NULL
, NULL
,
8109 rand() % mdsmap
->get_num_in_mds());
8110 ldout(cct
, 3) << "lookup_hash exit(" << ino
<< ", #" << dirino
<< "/" << name
<< ") = " << r
<< dendl
;
8116 * Load inode into local cache.
8118 * If inode pointer is non-NULL, and take a reference on
8119 * the resulting Inode object in one operation, so that caller
8120 * can safely assume inode will still be there after return.
8122 int Client::lookup_ino(inodeno_t ino
, const UserPerm
& perms
, Inode
**inode
)
8124 Mutex::Locker
lock(client_lock
);
8125 ldout(cct
, 3) << "lookup_ino enter(" << ino
<< ")" << dendl
;
8127 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPINO
);
8129 req
->set_filepath(path
);
8131 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8132 if (r
== 0 && inode
!= NULL
) {
8133 vinodeno_t
vino(ino
, CEPH_NOSNAP
);
8134 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
8135 assert(p
!= inode_map
.end());
8139 ldout(cct
, 3) << "lookup_ino exit(" << ino
<< ") = " << r
<< dendl
;
8146 * Find the parent inode of `ino` and insert it into
8147 * our cache. Conditionally also set `parent` to a referenced
8148 * Inode* if caller provides non-NULL value.
8150 int Client::lookup_parent(Inode
*ino
, const UserPerm
& perms
, Inode
**parent
)
8152 Mutex::Locker
lock(client_lock
);
8153 ldout(cct
, 3) << "lookup_parent enter(" << ino
->ino
<< ")" << dendl
;
8155 if (!ino
->dn_set
.empty()) {
8156 // if we exposed the parent here, we'd need to check permissions,
8157 // but right now we just rely on the MDS doing so in make_request
8158 ldout(cct
, 3) << "lookup_parent dentry already present" << dendl
;
8162 if (ino
->is_root()) {
8164 ldout(cct
, 3) << "ino is root, no parent" << dendl
;
8168 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPPARENT
);
8169 filepath
path(ino
->ino
);
8170 req
->set_filepath(path
);
8173 int r
= make_request(req
, perms
, &target
, NULL
, rand() % mdsmap
->get_num_in_mds());
8174 // Give caller a reference to the parent ino if they provided a pointer.
8175 if (parent
!= NULL
) {
8177 *parent
= target
.get();
8179 ldout(cct
, 3) << "lookup_parent found parent " << (*parent
)->ino
<< dendl
;
8184 ldout(cct
, 3) << "lookup_parent exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8190 * Populate the parent dentry for `ino`, provided it is
8191 * a child of `parent`.
8193 int Client::lookup_name(Inode
*ino
, Inode
*parent
, const UserPerm
& perms
)
8195 assert(parent
->is_dir());
8197 Mutex::Locker
lock(client_lock
);
8198 ldout(cct
, 3) << "lookup_name enter(" << ino
->ino
<< ")" << dendl
;
8200 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
8201 req
->set_filepath2(filepath(parent
->ino
));
8202 req
->set_filepath(filepath(ino
->ino
));
8203 req
->set_inode(ino
);
8205 int r
= make_request(req
, perms
, NULL
, NULL
, rand() % mdsmap
->get_num_in_mds());
8206 ldout(cct
, 3) << "lookup_name exit(" << ino
->ino
<< ") = " << r
<< dendl
;
8211 Fh
*Client::_create_fh(Inode
*in
, int flags
, int cmode
, const UserPerm
& perms
)
8219 f
->actor_perms
= perms
;
8221 ldout(cct
, 10) << "_create_fh " << in
->ino
<< " mode " << cmode
<< dendl
;
8223 if (in
->snapid
!= CEPH_NOSNAP
) {
8224 in
->snap_cap_refs
++;
8225 ldout(cct
, 5) << "open success, fh is " << f
<< " combined IMMUTABLE SNAP caps "
8226 << ccap_string(in
->caps_issued()) << dendl
;
8229 const md_config_t
*conf
= cct
->_conf
;
8230 f
->readahead
.set_trigger_requests(1);
8231 f
->readahead
.set_min_readahead_size(conf
->client_readahead_min
);
8232 uint64_t max_readahead
= Readahead::NO_LIMIT
;
8233 if (conf
->client_readahead_max_bytes
) {
8234 max_readahead
= MIN(max_readahead
, (uint64_t)conf
->client_readahead_max_bytes
);
8236 if (conf
->client_readahead_max_periods
) {
8237 max_readahead
= MIN(max_readahead
, in
->layout
.get_period()*(uint64_t)conf
->client_readahead_max_periods
);
8239 f
->readahead
.set_max_readahead_size(max_readahead
);
8240 vector
<uint64_t> alignments
;
8241 alignments
.push_back(in
->layout
.get_period());
8242 alignments
.push_back(in
->layout
.stripe_unit
);
8243 f
->readahead
.set_alignments(alignments
);
8248 int Client::_release_fh(Fh
*f
)
8250 //ldout(cct, 3) << "op: client->close(open_files[ " << fh << " ]);" << dendl;
8251 //ldout(cct, 3) << "op: open_files.erase( " << fh << " );" << dendl;
8252 Inode
*in
= f
->inode
.get();
8253 ldout(cct
, 5) << "_release_fh " << f
<< " mode " << f
->mode
<< " on " << *in
<< dendl
;
8255 if (in
->snapid
== CEPH_NOSNAP
) {
8256 if (in
->put_open_ref(f
->mode
)) {
8257 _flush(in
, new C_Client_FlushComplete(this, in
));
8261 assert(in
->snap_cap_refs
> 0);
8262 in
->snap_cap_refs
--;
8265 _release_filelocks(f
);
8267 // Finally, read any async err (i.e. from flushes)
8268 int err
= f
->take_async_err();
8270 ldout(cct
, 1) << "_release_fh " << f
<< " on inode " << *in
<< " caught async_err = "
8271 << cpp_strerror(err
) << dendl
;
8273 ldout(cct
, 10) << "_release_fh " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
8281 void Client::_put_fh(Fh
*f
)
8283 int left
= f
->put();
8289 int Client::_open(Inode
*in
, int flags
, mode_t mode
, Fh
**fhp
,
8290 const UserPerm
& perms
)
8292 if (in
->snapid
!= CEPH_NOSNAP
&&
8293 (flags
& (O_WRONLY
| O_RDWR
| O_CREAT
| O_TRUNC
| O_APPEND
))) {
8297 // use normalized flags to generate cmode
8298 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
8301 int want
= ceph_caps_for_mode(cmode
);
8304 in
->get_open_ref(cmode
); // make note of pending open, since it effects _wanted_ caps.
8306 if ((flags
& O_TRUNC
) == 0 &&
8307 in
->caps_issued_mask(want
)) {
8309 check_caps(in
, CHECK_CAPS_NODELAY
);
8311 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8313 in
->make_nosnap_relative_path(path
);
8314 req
->set_filepath(path
);
8315 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
& ~O_CREAT
);
8316 req
->head
.args
.open
.mode
= mode
;
8317 req
->head
.args
.open
.pool
= -1;
8318 if (cct
->_conf
->client_debug_getattr_caps
)
8319 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8321 req
->head
.args
.open
.mask
= 0;
8322 req
->head
.args
.open
.old_size
= in
->size
; // for O_TRUNC
8324 result
= make_request(req
, perms
);
8330 *fhp
= _create_fh(in
, flags
, cmode
, perms
);
8332 in
->put_open_ref(cmode
);
8340 int Client::_renew_caps(Inode
*in
)
8342 int wanted
= in
->caps_file_wanted();
8343 if (in
->is_any_caps() &&
8344 ((wanted
& CEPH_CAP_ANY_WR
) == 0 || in
->auth_cap
)) {
8345 check_caps(in
, CHECK_CAPS_NODELAY
);
8350 if ((wanted
& CEPH_CAP_FILE_RD
) && (wanted
& CEPH_CAP_FILE_WR
))
8352 else if (wanted
& CEPH_CAP_FILE_RD
)
8354 else if (wanted
& CEPH_CAP_FILE_WR
)
8357 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_OPEN
);
8359 in
->make_nosnap_relative_path(path
);
8360 req
->set_filepath(path
);
8361 req
->head
.args
.open
.flags
= flags
;
8362 req
->head
.args
.open
.pool
= -1;
8363 if (cct
->_conf
->client_debug_getattr_caps
)
8364 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
8366 req
->head
.args
.open
.mask
= 0;
8369 // duplicate in case Cap goes away; not sure if that race is a concern?
8370 const UserPerm
*pperm
= in
->get_best_perms();
8374 int ret
= make_request(req
, perms
);
8378 int Client::close(int fd
)
8380 ldout(cct
, 3) << "close enter(" << fd
<< ")" << dendl
;
8381 Mutex::Locker
lock(client_lock
);
8382 tout(cct
) << "close" << std::endl
;
8383 tout(cct
) << fd
<< std::endl
;
8385 Fh
*fh
= get_filehandle(fd
);
8388 int err
= _release_fh(fh
);
8391 ldout(cct
, 3) << "close exit(" << fd
<< ")" << dendl
;
8399 loff_t
Client::lseek(int fd
, loff_t offset
, int whence
)
8401 Mutex::Locker
lock(client_lock
);
8402 tout(cct
) << "lseek" << std::endl
;
8403 tout(cct
) << fd
<< std::endl
;
8404 tout(cct
) << offset
<< std::endl
;
8405 tout(cct
) << whence
<< std::endl
;
8407 Fh
*f
= get_filehandle(fd
);
8410 #if defined(__linux__) && defined(O_PATH)
8411 if (f
->flags
& O_PATH
)
8414 return _lseek(f
, offset
, whence
);
8417 loff_t
Client::_lseek(Fh
*f
, loff_t offset
, int whence
)
8419 Inode
*in
= f
->inode
.get();
8432 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8435 f
->pos
= in
->size
+ offset
;
8442 ldout(cct
, 3) << "_lseek(" << f
<< ", " << offset
<< ", " << whence
<< ") = " << f
->pos
<< dendl
;
8447 void Client::lock_fh_pos(Fh
*f
)
8449 ldout(cct
, 10) << "lock_fh_pos " << f
<< dendl
;
8451 if (f
->pos_locked
|| !f
->pos_waiters
.empty()) {
8453 f
->pos_waiters
.push_back(&cond
);
8454 ldout(cct
, 10) << "lock_fh_pos BLOCKING on " << f
<< dendl
;
8455 while (f
->pos_locked
|| f
->pos_waiters
.front() != &cond
)
8456 cond
.Wait(client_lock
);
8457 ldout(cct
, 10) << "lock_fh_pos UNBLOCKING on " << f
<< dendl
;
8458 assert(f
->pos_waiters
.front() == &cond
);
8459 f
->pos_waiters
.pop_front();
8462 f
->pos_locked
= true;
8465 void Client::unlock_fh_pos(Fh
*f
)
8467 ldout(cct
, 10) << "unlock_fh_pos " << f
<< dendl
;
8468 f
->pos_locked
= false;
8471 int Client::uninline_data(Inode
*in
, Context
*onfinish
)
8473 if (!in
->inline_data
.length()) {
8474 onfinish
->complete(0);
8479 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (long long unsigned)in
->ino
);
8480 object_t oid
= oid_buf
;
8482 ObjectOperation create_ops
;
8483 create_ops
.create(false);
8485 objecter
->mutate(oid
,
8486 OSDMap::file_to_object_locator(in
->layout
),
8488 in
->snaprealm
->get_snap_context(),
8489 ceph::real_clock::now(),
8493 bufferlist inline_version_bl
;
8494 ::encode(in
->inline_version
, inline_version_bl
);
8496 ObjectOperation uninline_ops
;
8497 uninline_ops
.cmpxattr("inline_version",
8498 CEPH_OSD_CMPXATTR_OP_GT
,
8499 CEPH_OSD_CMPXATTR_MODE_U64
,
8501 bufferlist inline_data
= in
->inline_data
;
8502 uninline_ops
.write(0, inline_data
, in
->truncate_size
, in
->truncate_seq
);
8503 uninline_ops
.setxattr("inline_version", stringify(in
->inline_version
));
8505 objecter
->mutate(oid
,
8506 OSDMap::file_to_object_locator(in
->layout
),
8508 in
->snaprealm
->get_snap_context(),
8509 ceph::real_clock::now(),
8518 // blocking osd interface
8520 int Client::read(int fd
, char *buf
, loff_t size
, loff_t offset
)
8522 Mutex::Locker
lock(client_lock
);
8523 tout(cct
) << "read" << std::endl
;
8524 tout(cct
) << fd
<< std::endl
;
8525 tout(cct
) << size
<< std::endl
;
8526 tout(cct
) << offset
<< std::endl
;
8528 Fh
*f
= get_filehandle(fd
);
8531 #if defined(__linux__) && defined(O_PATH)
8532 if (f
->flags
& O_PATH
)
8536 int r
= _read(f
, offset
, size
, &bl
);
8537 ldout(cct
, 3) << "read(" << fd
<< ", " << (void*)buf
<< ", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
8539 bl
.copy(0, bl
.length(), buf
);
8545 int Client::preadv(int fd
, const struct iovec
*iov
, int iovcnt
, loff_t offset
)
8549 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, false);
8552 int Client::_read(Fh
*f
, int64_t offset
, uint64_t size
, bufferlist
*bl
)
8554 const md_config_t
*conf
= cct
->_conf
;
8555 Inode
*in
= f
->inode
.get();
8557 if ((f
->mode
& CEPH_FILE_MODE_RD
) == 0)
8559 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
8561 bool movepos
= false;
8567 loff_t start_pos
= offset
;
8569 if (in
->inline_version
== 0) {
8570 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
8576 assert(in
->inline_version
> 0);
8581 int r
= get_caps(in
, CEPH_CAP_FILE_RD
, CEPH_CAP_FILE_CACHE
, &have
, -1);
8587 if (f
->flags
& O_DIRECT
)
8588 have
&= ~CEPH_CAP_FILE_CACHE
;
8590 Mutex
uninline_flock("Client::_read_uninline_data flock");
8592 bool uninline_done
= false;
8593 int uninline_ret
= 0;
8594 Context
*onuninline
= NULL
;
8596 if (in
->inline_version
< CEPH_INLINE_NONE
) {
8597 if (!(have
& CEPH_CAP_FILE_CACHE
)) {
8598 onuninline
= new C_SafeCond(&uninline_flock
,
8602 uninline_data(in
, onuninline
);
8604 uint32_t len
= in
->inline_data
.length();
8606 uint64_t endoff
= offset
+ size
;
8607 if (endoff
> in
->size
)
8611 if (endoff
<= len
) {
8612 bl
->substr_of(in
->inline_data
, offset
, endoff
- offset
);
8614 bl
->substr_of(in
->inline_data
, offset
, len
- offset
);
8615 bl
->append_zero(endoff
- len
);
8617 } else if ((uint64_t)offset
< endoff
) {
8618 bl
->append_zero(endoff
- offset
);
8625 if (!conf
->client_debug_force_sync_read
&&
8626 (conf
->client_oc
&& (have
& CEPH_CAP_FILE_CACHE
))) {
8628 if (f
->flags
& O_RSYNC
) {
8629 _flush_range(in
, offset
, size
);
8631 r
= _read_async(f
, offset
, size
, bl
);
8635 if (f
->flags
& O_DIRECT
)
8636 _flush_range(in
, offset
, size
);
8638 bool checkeof
= false;
8639 r
= _read_sync(f
, offset
, size
, bl
, &checkeof
);
8646 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8649 r
= _getattr(in
, CEPH_STAT_CAP_SIZE
, f
->actor_perms
);
8654 if ((uint64_t)offset
< in
->size
)
8662 f
->pos
= start_pos
+ bl
->length();
8670 client_lock
.Unlock();
8671 uninline_flock
.Lock();
8672 while (!uninline_done
)
8673 uninline_cond
.Wait(uninline_flock
);
8674 uninline_flock
.Unlock();
8677 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
8678 in
->inline_data
.clear();
8679 in
->inline_version
= CEPH_INLINE_NONE
;
8680 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
8687 put_cap_ref(in
, CEPH_CAP_FILE_RD
);
8693 return bl
->length();
8696 Client::C_Readahead::C_Readahead(Client
*c
, Fh
*f
) :
8699 f
->readahead
.inc_pending();
8702 Client::C_Readahead::~C_Readahead() {
8703 f
->readahead
.dec_pending();
8707 void Client::C_Readahead::finish(int r
) {
8708 lgeneric_subdout(client
->cct
, client
, 20) << "client." << client
->get_nodeid() << " " << "C_Readahead on " << f
->inode
<< dendl
;
8709 client
->put_cap_ref(f
->inode
.get(), CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8712 int Client::_read_async(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
)
8714 const md_config_t
*conf
= cct
->_conf
;
8715 Inode
*in
= f
->inode
.get();
8717 ldout(cct
, 10) << "_read_async " << *in
<< " " << off
<< "~" << len
<< dendl
;
8719 // trim read based on file size?
8720 if (off
>= in
->size
)
8724 if (off
+ len
> in
->size
) {
8725 len
= in
->size
- off
;
8728 ldout(cct
, 10) << " min_bytes=" << f
->readahead
.get_min_readahead_size()
8729 << " max_bytes=" << f
->readahead
.get_max_readahead_size()
8730 << " max_periods=" << conf
->client_readahead_max_periods
<< dendl
;
8732 // read (and possibly block)
8734 Mutex
flock("Client::_read_async flock");
8737 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &rvalue
);
8738 r
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8739 off
, len
, bl
, 0, onfinish
);
8741 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8742 client_lock
.Unlock();
8748 put_cap_ref(in
, CEPH_CAP_FILE_CACHE
);
8755 if(f
->readahead
.get_min_readahead_size() > 0) {
8756 pair
<uint64_t, uint64_t> readahead_extent
= f
->readahead
.update(off
, len
, in
->size
);
8757 if (readahead_extent
.second
> 0) {
8758 ldout(cct
, 20) << "readahead " << readahead_extent
.first
<< "~" << readahead_extent
.second
8759 << " (caller wants " << off
<< "~" << len
<< ")" << dendl
;
8760 Context
*onfinish2
= new C_Readahead(this, f
);
8761 int r2
= objectcacher
->file_read(&in
->oset
, &in
->layout
, in
->snapid
,
8762 readahead_extent
.first
, readahead_extent
.second
,
8763 NULL
, 0, onfinish2
);
8765 ldout(cct
, 20) << "readahead initiated, c " << onfinish2
<< dendl
;
8766 get_cap_ref(in
, CEPH_CAP_FILE_RD
| CEPH_CAP_FILE_CACHE
);
8768 ldout(cct
, 20) << "readahead was no-op, already cached" << dendl
;
8777 int Client::_read_sync(Fh
*f
, uint64_t off
, uint64_t len
, bufferlist
*bl
,
8780 Inode
*in
= f
->inode
.get();
8785 ldout(cct
, 10) << "_read_sync " << *in
<< " " << off
<< "~" << len
<< dendl
;
8787 Mutex
flock("Client::_read_sync flock");
8792 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
8796 filer
->read_trunc(in
->ino
, &in
->layout
, in
->snapid
,
8798 in
->truncate_size
, in
->truncate_seq
,
8800 client_lock
.Unlock();
8807 // if we get ENOENT from OSD, assume 0 bytes returned
8818 bl
->claim_append(tbl
);
8821 if (r
>= 0 && r
< wanted
) {
8822 if (pos
< in
->size
) {
8823 // zero up to known EOF
8824 int64_t some
= in
->size
- pos
;
8846 * we keep count of uncommitted sync writes on the inode, so that
8849 void Client::_sync_write_commit(Inode
*in
)
8851 assert(unsafe_sync_write
> 0);
8852 unsafe_sync_write
--;
8854 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
8856 ldout(cct
, 15) << "sync_write_commit unsafe_sync_write = " << unsafe_sync_write
<< dendl
;
8857 if (unsafe_sync_write
== 0 && unmounting
) {
8858 ldout(cct
, 10) << "sync_write_commit -- no more unsafe writes, unmount can proceed" << dendl
;
8859 mount_cond
.Signal();
8863 int Client::write(int fd
, const char *buf
, loff_t size
, loff_t offset
)
8865 Mutex::Locker
lock(client_lock
);
8866 tout(cct
) << "write" << std::endl
;
8867 tout(cct
) << fd
<< std::endl
;
8868 tout(cct
) << size
<< std::endl
;
8869 tout(cct
) << offset
<< std::endl
;
8871 Fh
*fh
= get_filehandle(fd
);
8874 #if defined(__linux__) && defined(O_PATH)
8875 if (fh
->flags
& O_PATH
)
8878 int r
= _write(fh
, offset
, size
, buf
, NULL
, 0);
8879 ldout(cct
, 3) << "write(" << fd
<< ", \"...\", " << size
<< ", " << offset
<< ") = " << r
<< dendl
;
8883 int Client::pwritev(int fd
, const struct iovec
*iov
, int iovcnt
, int64_t offset
)
8887 return _preadv_pwritev(fd
, iov
, iovcnt
, offset
, true);
8890 int Client::_preadv_pwritev(int fd
, const struct iovec
*iov
, unsigned iovcnt
, int64_t offset
, bool write
)
8892 Mutex::Locker
lock(client_lock
);
8893 tout(cct
) << fd
<< std::endl
;
8894 tout(cct
) << offset
<< std::endl
;
8896 Fh
*fh
= get_filehandle(fd
);
8899 #if defined(__linux__) && defined(O_PATH)
8900 if (fh
->flags
& O_PATH
)
8903 loff_t totallen
= 0;
8904 for (unsigned i
= 0; i
< iovcnt
; i
++) {
8905 totallen
+= iov
[i
].iov_len
;
8908 int w
= _write(fh
, offset
, totallen
, NULL
, iov
, iovcnt
);
8909 ldout(cct
, 3) << "pwritev(" << fd
<< ", \"...\", " << totallen
<< ", " << offset
<< ") = " << w
<< dendl
;
8913 int r
= _read(fh
, offset
, totallen
, &bl
);
8914 ldout(cct
, 3) << "preadv(" << fd
<< ", " << offset
<< ") = " << r
<< dendl
;
8919 for (unsigned j
= 0, resid
= r
; j
< iovcnt
&& resid
> 0; j
++) {
8921 * This piece of code aims to handle the case that bufferlist does not have enough data
8922 * to fill in the iov
8924 if (resid
< iov
[j
].iov_len
) {
8925 bl
.copy(bufoff
, resid
, (char *)iov
[j
].iov_base
);
8928 bl
.copy(bufoff
, iov
[j
].iov_len
, (char *)iov
[j
].iov_base
);
8930 resid
-= iov
[j
].iov_len
;
8931 bufoff
+= iov
[j
].iov_len
;
8937 int Client::_write(Fh
*f
, int64_t offset
, uint64_t size
, const char *buf
,
8938 const struct iovec
*iov
, int iovcnt
)
8940 if ((uint64_t)(offset
+size
) > mdsmap
->get_max_filesize()) //too large!
8943 //ldout(cct, 7) << "write fh " << fh << " size " << size << " offset " << offset << dendl;
8944 Inode
*in
= f
->inode
.get();
8946 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
)) {
8950 assert(in
->snapid
== CEPH_NOSNAP
);
8952 // was Fh opened as writeable?
8953 if ((f
->mode
& CEPH_FILE_MODE_WR
) == 0)
8957 uint64_t endoff
= offset
+ size
;
8958 if (endoff
> in
->size
&& is_quota_bytes_exceeded(in
, endoff
- in
->size
,
8963 // use/adjust fd pos?
8967 * FIXME: this is racy in that we may block _after_ this point waiting for caps, and size may
8968 * change out from under us.
8970 if (f
->flags
& O_APPEND
) {
8971 int r
= _lseek(f
, 0, SEEK_END
);
8978 f
->pos
= offset
+size
;
8982 //bool lazy = f->mode == CEPH_FILE_MODE_LAZY;
8984 ldout(cct
, 10) << "cur file size is " << in
->size
<< dendl
;
8987 utime_t start
= ceph_clock_now();
8989 if (in
->inline_version
== 0) {
8990 int r
= _getattr(in
, CEPH_STAT_CAP_INLINE_DATA
, f
->actor_perms
, true);
8993 assert(in
->inline_version
> 0);
8996 // copy into fresh buffer (since our write may be resub, async)
9000 bl
.append(buf
, size
);
9002 for (int i
= 0; i
< iovcnt
; i
++) {
9003 if (iov
[i
].iov_len
> 0) {
9004 bl
.append((const char *)iov
[i
].iov_base
, iov
[i
].iov_len
);
9010 uint64_t totalwritten
;
9012 int r
= get_caps(in
, CEPH_CAP_FILE_WR
|CEPH_CAP_AUTH_SHARED
,
9013 CEPH_CAP_FILE_BUFFER
, &have
, endoff
);
9017 /* clear the setuid/setgid bits, if any */
9018 if (unlikely((in
->mode
& S_ISUID
) ||
9019 (in
->mode
& (S_ISGID
| S_IXGRP
)) == (S_ISGID
| S_IXGRP
))) {
9020 struct ceph_statx stx
= { 0 };
9022 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9023 r
= __setattrx(in
, &stx
, CEPH_SETATTR_KILL_SGUID
, f
->actor_perms
);
9027 put_cap_ref(in
, CEPH_CAP_AUTH_SHARED
);
9030 if (f
->flags
& O_DIRECT
)
9031 have
&= ~CEPH_CAP_FILE_BUFFER
;
9033 ldout(cct
, 10) << " snaprealm " << *in
->snaprealm
<< dendl
;
9035 Mutex
uninline_flock("Client::_write_uninline_data flock");
9037 bool uninline_done
= false;
9038 int uninline_ret
= 0;
9039 Context
*onuninline
= NULL
;
9041 if (in
->inline_version
< CEPH_INLINE_NONE
) {
9042 if (endoff
> cct
->_conf
->client_max_inline_size
||
9043 endoff
> CEPH_INLINE_MAX_SIZE
||
9044 !(have
& CEPH_CAP_FILE_BUFFER
)) {
9045 onuninline
= new C_SafeCond(&uninline_flock
,
9049 uninline_data(in
, onuninline
);
9051 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9053 uint32_t len
= in
->inline_data
.length();
9056 in
->inline_data
.copy(endoff
, len
- endoff
, bl
);
9059 in
->inline_data
.splice(offset
, len
- offset
);
9060 else if (offset
> len
)
9061 in
->inline_data
.append_zero(offset
- len
);
9063 in
->inline_data
.append(bl
);
9064 in
->inline_version
++;
9066 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9072 if (cct
->_conf
->client_oc
&& (have
& CEPH_CAP_FILE_BUFFER
)) {
9073 // do buffered write
9074 if (!in
->oset
.dirty_or_tx
)
9075 get_cap_ref(in
, CEPH_CAP_FILE_CACHE
| CEPH_CAP_FILE_BUFFER
);
9077 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9079 // async, caching, non-blocking.
9080 r
= objectcacher
->file_write(&in
->oset
, &in
->layout
,
9081 in
->snaprealm
->get_snap_context(),
9082 offset
, size
, bl
, ceph::real_clock::now(),
9084 put_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
9089 // flush cached write if O_SYNC is set on file fh
9090 // O_DSYNC == O_SYNC on linux < 2.6.33
9091 // O_SYNC = __O_SYNC | O_DSYNC on linux >= 2.6.33
9092 if ((f
->flags
& O_SYNC
) || (f
->flags
& O_DSYNC
)) {
9093 _flush_range(in
, offset
, size
);
9096 if (f
->flags
& O_DIRECT
)
9097 _flush_range(in
, offset
, size
);
9099 // simple, non-atomic sync write
9100 Mutex
flock("Client::_write flock");
9103 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
9105 unsafe_sync_write
++;
9106 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
); // released by onsafe callback
9108 filer
->write_trunc(in
->ino
, &in
->layout
, in
->snaprealm
->get_snap_context(),
9109 offset
, size
, bl
, ceph::real_clock::now(), 0,
9110 in
->truncate_size
, in
->truncate_seq
,
9112 client_lock
.Unlock();
9119 _sync_write_commit(in
);
9122 // if we get here, write was successful, update client metadata
9125 lat
= ceph_clock_now();
9127 logger
->tinc(l_c_wrlat
, lat
);
9129 totalwritten
= size
;
9130 r
= (int)totalwritten
;
9133 if (totalwritten
+ offset
> in
->size
) {
9134 in
->size
= totalwritten
+ offset
;
9135 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9137 if (is_quota_bytes_approaching(in
, f
->actor_perms
)) {
9138 check_caps(in
, CHECK_CAPS_NODELAY
);
9139 } else if (is_max_size_approaching(in
)) {
9143 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", extending file size" << dendl
;
9145 ldout(cct
, 7) << "wrote to " << totalwritten
+offset
<< ", leaving file size at " << in
->size
<< dendl
;
9149 in
->mtime
= ceph_clock_now();
9151 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9156 client_lock
.Unlock();
9157 uninline_flock
.Lock();
9158 while (!uninline_done
)
9159 uninline_cond
.Wait(uninline_flock
);
9160 uninline_flock
.Unlock();
9163 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
9164 in
->inline_data
.clear();
9165 in
->inline_version
= CEPH_INLINE_NONE
;
9166 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
9172 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
9176 int Client::_flush(Fh
*f
)
9178 Inode
*in
= f
->inode
.get();
9179 int err
= f
->take_async_err();
9181 ldout(cct
, 1) << __func__
<< ": " << f
<< " on inode " << *in
<< " caught async_err = "
9182 << cpp_strerror(err
) << dendl
;
9184 ldout(cct
, 10) << __func__
<< ": " << f
<< " on inode " << *in
<< " no async_err state" << dendl
;
9190 int Client::truncate(const char *relpath
, loff_t length
, const UserPerm
& perms
)
9192 struct ceph_statx stx
;
9193 stx
.stx_size
= length
;
9194 return setattrx(relpath
, &stx
, CEPH_SETATTR_SIZE
, perms
);
9197 int Client::ftruncate(int fd
, loff_t length
, const UserPerm
& perms
)
9199 Mutex::Locker
lock(client_lock
);
9200 tout(cct
) << "ftruncate" << std::endl
;
9201 tout(cct
) << fd
<< std::endl
;
9202 tout(cct
) << length
<< std::endl
;
9204 Fh
*f
= get_filehandle(fd
);
9207 #if defined(__linux__) && defined(O_PATH)
9208 if (f
->flags
& O_PATH
)
9212 attr
.st_size
= length
;
9213 return _setattr(f
->inode
, &attr
, CEPH_SETATTR_SIZE
, perms
);
9216 int Client::fsync(int fd
, bool syncdataonly
)
9218 Mutex::Locker
lock(client_lock
);
9219 tout(cct
) << "fsync" << std::endl
;
9220 tout(cct
) << fd
<< std::endl
;
9221 tout(cct
) << syncdataonly
<< std::endl
;
9223 Fh
*f
= get_filehandle(fd
);
9226 #if defined(__linux__) && defined(O_PATH)
9227 if (f
->flags
& O_PATH
)
9230 int r
= _fsync(f
, syncdataonly
);
9232 // The IOs in this fsync were okay, but maybe something happened
9233 // in the background that we shoudl be reporting?
9234 r
= f
->take_async_err();
9235 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
9236 << ") = 0, async_err = " << r
<< dendl
;
9238 // Assume that an error we encountered during fsync, even reported
9239 // synchronously, would also have applied the error to the Fh, and we
9240 // should clear it here to avoid returning the same error again on next
9242 ldout(cct
, 3) << "fsync(" << fd
<< ", " << syncdataonly
<< ") = "
9244 f
->take_async_err();
9249 int Client::_fsync(Inode
*in
, bool syncdataonly
)
9252 Mutex
lock("Client::_fsync::lock");
9255 C_SafeCond
*object_cacher_completion
= NULL
;
9256 ceph_tid_t flush_tid
= 0;
9259 ldout(cct
, 3) << "_fsync on " << *in
<< " " << (syncdataonly
? "(dataonly)":"(data+metadata)") << dendl
;
9261 if (cct
->_conf
->client_oc
) {
9262 object_cacher_completion
= new C_SafeCond(&lock
, &cond
, &done
, &r
);
9263 tmp_ref
= in
; // take a reference; C_SafeCond doesn't and _flush won't either
9264 _flush(in
, object_cacher_completion
);
9265 ldout(cct
, 15) << "using return-valued form of _fsync" << dendl
;
9268 if (!syncdataonly
&& in
->dirty_caps
) {
9269 check_caps(in
, CHECK_CAPS_NODELAY
|CHECK_CAPS_SYNCHRONOUS
);
9270 if (in
->flushing_caps
)
9271 flush_tid
= last_flush_tid
;
9272 } else ldout(cct
, 10) << "no metadata needs to commit" << dendl
;
9274 if (!syncdataonly
&& !in
->unsafe_ops
.empty()) {
9275 MetaRequest
*req
= in
->unsafe_ops
.back();
9276 ldout(cct
, 15) << "waiting on unsafe requests, last tid " << req
->get_tid() << dendl
;
9279 wait_on_list(req
->waitfor_safe
);
9283 if (object_cacher_completion
) { // wait on a real reply instead of guessing
9284 client_lock
.Unlock();
9286 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
9291 ldout(cct
, 15) << "got " << r
<< " from flush writeback" << dendl
;
9293 // FIXME: this can starve
9294 while (in
->cap_refs
[CEPH_CAP_FILE_BUFFER
] > 0) {
9295 ldout(cct
, 10) << "ino " << in
->ino
<< " has " << in
->cap_refs
[CEPH_CAP_FILE_BUFFER
]
9296 << " uncommitted, waiting" << dendl
;
9297 wait_on_list(in
->waitfor_commit
);
9303 wait_sync_caps(in
, flush_tid
);
9305 ldout(cct
, 10) << "ino " << in
->ino
<< " has no uncommitted writes" << dendl
;
9307 ldout(cct
, 1) << "ino " << in
->ino
<< " failed to commit to disk! "
9308 << cpp_strerror(-r
) << dendl
;
9314 int Client::_fsync(Fh
*f
, bool syncdataonly
)
9316 ldout(cct
, 3) << "_fsync(" << f
<< ", " << (syncdataonly
? "dataonly)":"data+metadata)") << dendl
;
9317 return _fsync(f
->inode
.get(), syncdataonly
);
9320 int Client::fstat(int fd
, struct stat
*stbuf
, const UserPerm
& perms
, int mask
)
9322 Mutex::Locker
lock(client_lock
);
9323 tout(cct
) << "fstat mask " << hex
<< mask
<< dec
<< std::endl
;
9324 tout(cct
) << fd
<< std::endl
;
9326 Fh
*f
= get_filehandle(fd
);
9329 int r
= _getattr(f
->inode
, mask
, perms
);
9332 fill_stat(f
->inode
, stbuf
, NULL
);
9333 ldout(cct
, 3) << "fstat(" << fd
<< ", " << stbuf
<< ") = " << r
<< dendl
;
9337 int Client::fstatx(int fd
, struct ceph_statx
*stx
, const UserPerm
& perms
,
9338 unsigned int want
, unsigned int flags
)
9340 Mutex::Locker
lock(client_lock
);
9341 tout(cct
) << "fstatx flags " << hex
<< flags
<< " want " << want
<< dec
<< std::endl
;
9342 tout(cct
) << fd
<< std::endl
;
9344 Fh
*f
= get_filehandle(fd
);
9348 unsigned mask
= statx_to_mask(flags
, want
);
9351 if (mask
&& !f
->inode
->caps_issued_mask(mask
)) {
9352 r
= _getattr(f
->inode
, mask
, perms
);
9354 ldout(cct
, 3) << "fstatx exit on error!" << dendl
;
9359 fill_statx(f
->inode
, mask
, stx
);
9360 ldout(cct
, 3) << "fstatx(" << fd
<< ", " << stx
<< ") = " << r
<< dendl
;
9364 // not written yet, but i want to link!
9366 int Client::chdir(const char *relpath
, std::string
&new_cwd
,
9367 const UserPerm
& perms
)
9369 Mutex::Locker
lock(client_lock
);
9370 tout(cct
) << "chdir" << std::endl
;
9371 tout(cct
) << relpath
<< std::endl
;
9372 filepath
path(relpath
);
9374 int r
= path_walk(path
, &in
, perms
);
9379 ldout(cct
, 3) << "chdir(" << relpath
<< ") cwd now " << cwd
->ino
<< dendl
;
9381 getcwd(new_cwd
, perms
);
9385 void Client::getcwd(string
& dir
, const UserPerm
& perms
)
9388 ldout(cct
, 10) << "getcwd " << *cwd
<< dendl
;
9390 Inode
*in
= cwd
.get();
9391 while (in
!= root
) {
9392 assert(in
->dn_set
.size() < 2); // dirs can't be hard-linked
9394 // A cwd or ancester is unlinked
9395 if (in
->dn_set
.empty()) {
9399 Dentry
*dn
= in
->get_first_parent();
9404 ldout(cct
, 10) << "getcwd looking up parent for " << *in
<< dendl
;
9405 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
9406 filepath
path(in
->ino
);
9407 req
->set_filepath(path
);
9409 int res
= make_request(req
, perms
);
9418 path
.push_front_dentry(dn
->name
);
9419 in
= dn
->dir
->parent_inode
;
9422 dir
+= path
.get_path();
9425 int Client::statfs(const char *path
, struct statvfs
*stbuf
,
9426 const UserPerm
& perms
)
9428 Mutex::Locker
l(client_lock
);
9429 tout(cct
) << "statfs" << std::endl
;
9433 objecter
->get_fs_stats(stats
, &cond
);
9435 client_lock
.Unlock();
9436 int rval
= cond
.wait();
9440 ldout(cct
, 1) << "underlying call to statfs returned error: "
9441 << cpp_strerror(rval
)
9446 memset(stbuf
, 0, sizeof(*stbuf
));
9449 * we're going to set a block size of 4MB so we can represent larger
9450 * FSes without overflowing. Additionally convert the space
9451 * measurements from KB to bytes while making them in terms of
9452 * blocks. We use 4MB only because it is big enough, and because it
9453 * actually *is* the (ceph) default block size.
9455 const int CEPH_BLOCK_SHIFT
= 22;
9456 stbuf
->f_frsize
= 1 << CEPH_BLOCK_SHIFT
;
9457 stbuf
->f_bsize
= 1 << CEPH_BLOCK_SHIFT
;
9458 stbuf
->f_files
= stats
.num_objects
;
9459 stbuf
->f_ffree
= -1;
9460 stbuf
->f_favail
= -1;
9461 stbuf
->f_fsid
= -1; // ??
9462 stbuf
->f_flag
= 0; // ??
9463 stbuf
->f_namemax
= NAME_MAX
;
9465 // Usually quota_root will == root_ancestor, but if the mount root has no
9466 // quota but we can see a parent of it that does have a quota, we'll
9467 // respect that one instead.
9468 assert(root
!= nullptr);
9469 Inode
*quota_root
= root
->quota
.is_enable() ? root
: get_quota_root(root
, perms
);
9471 // get_quota_root should always give us something
9472 // because client quotas are always enabled
9473 assert(quota_root
!= nullptr);
9475 if (quota_root
&& cct
->_conf
->client_quota_df
&& quota_root
->quota
.max_bytes
) {
9477 // Skip the getattr if any sessions are stale, as we don't want to
9478 // block `df` if this client has e.g. been evicted, or if the MDS cluster
9480 if (!_any_stale_sessions()) {
9481 int r
= _getattr(quota_root
, 0, perms
, true);
9483 // Ignore return value: error getting latest inode metadata is not a good
9484 // reason to break "df".
9485 lderr(cct
) << "Error in getattr on quota root 0x"
9486 << std::hex
<< quota_root
->ino
<< std::dec
9487 << " statfs result may be outdated" << dendl
;
9491 // Special case: if there is a size quota set on the Inode acting
9492 // as the root for this client mount, then report the quota status
9493 // as the filesystem statistics.
9494 const fsblkcnt_t total
= quota_root
->quota
.max_bytes
>> CEPH_BLOCK_SHIFT
;
9495 const fsblkcnt_t used
= quota_root
->rstat
.rbytes
>> CEPH_BLOCK_SHIFT
;
9496 // It is possible for a quota to be exceeded: arithmetic here must
9497 // handle case where used > total.
9498 const fsblkcnt_t free
= total
> used
? total
- used
: 0;
9500 stbuf
->f_blocks
= total
;
9501 stbuf
->f_bfree
= free
;
9502 stbuf
->f_bavail
= free
;
9504 // General case: report the overall RADOS cluster's statistics. Because
9505 // multiple pools may be used without one filesystem namespace via
9506 // layouts, this is the most correct thing we can do.
9507 stbuf
->f_blocks
= stats
.kb
>> (CEPH_BLOCK_SHIFT
- 10);
9508 stbuf
->f_bfree
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9509 stbuf
->f_bavail
= stats
.kb_avail
>> (CEPH_BLOCK_SHIFT
- 10);
9515 int Client::_do_filelock(Inode
*in
, Fh
*fh
, int lock_type
, int op
, int sleep
,
9516 struct flock
*fl
, uint64_t owner
, bool removing
)
9518 ldout(cct
, 10) << "_do_filelock ino " << in
->ino
9519 << (lock_type
== CEPH_LOCK_FCNTL
? " fcntl" : " flock")
9520 << " type " << fl
->l_type
<< " owner " << owner
9521 << " " << fl
->l_start
<< "~" << fl
->l_len
<< dendl
;
9524 if (F_RDLCK
== fl
->l_type
)
9525 lock_cmd
= CEPH_LOCK_SHARED
;
9526 else if (F_WRLCK
== fl
->l_type
)
9527 lock_cmd
= CEPH_LOCK_EXCL
;
9528 else if (F_UNLCK
== fl
->l_type
)
9529 lock_cmd
= CEPH_LOCK_UNLOCK
;
9533 if (op
!= CEPH_MDS_OP_SETFILELOCK
|| lock_cmd
== CEPH_LOCK_UNLOCK
)
9537 * Set the most significant bit, so that MDS knows the 'owner'
9538 * is sufficient to identify the owner of lock. (old code uses
9539 * both 'owner' and 'pid')
9541 owner
|= (1ULL << 63);
9543 MetaRequest
*req
= new MetaRequest(op
);
9545 in
->make_nosnap_relative_path(path
);
9546 req
->set_filepath(path
);
9549 req
->head
.args
.filelock_change
.rule
= lock_type
;
9550 req
->head
.args
.filelock_change
.type
= lock_cmd
;
9551 req
->head
.args
.filelock_change
.owner
= owner
;
9552 req
->head
.args
.filelock_change
.pid
= fl
->l_pid
;
9553 req
->head
.args
.filelock_change
.start
= fl
->l_start
;
9554 req
->head
.args
.filelock_change
.length
= fl
->l_len
;
9555 req
->head
.args
.filelock_change
.wait
= sleep
;
9560 if (sleep
&& switch_interrupt_cb
) {
9562 switch_interrupt_cb(callback_handle
, req
->get());
9563 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9564 // disable interrupt
9565 switch_interrupt_cb(callback_handle
, NULL
);
9566 if (ret
== 0 && req
->aborted()) {
9567 // effect of this lock request has been revoked by the 'lock intr' request
9568 ret
= req
->get_abort_code();
9572 ret
= make_request(req
, fh
->actor_perms
, NULL
, NULL
, -1, &bl
);
9576 if (op
== CEPH_MDS_OP_GETFILELOCK
) {
9577 ceph_filelock filelock
;
9578 bufferlist::iterator p
= bl
.begin();
9579 ::decode(filelock
, p
);
9581 if (CEPH_LOCK_SHARED
== filelock
.type
)
9582 fl
->l_type
= F_RDLCK
;
9583 else if (CEPH_LOCK_EXCL
== filelock
.type
)
9584 fl
->l_type
= F_WRLCK
;
9586 fl
->l_type
= F_UNLCK
;
9588 fl
->l_whence
= SEEK_SET
;
9589 fl
->l_start
= filelock
.start
;
9590 fl
->l_len
= filelock
.length
;
9591 fl
->l_pid
= filelock
.pid
;
9592 } else if (op
== CEPH_MDS_OP_SETFILELOCK
) {
9593 ceph_lock_state_t
*lock_state
;
9594 if (lock_type
== CEPH_LOCK_FCNTL
) {
9595 if (!in
->fcntl_locks
)
9596 in
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9597 lock_state
= in
->fcntl_locks
;
9598 } else if (lock_type
== CEPH_LOCK_FLOCK
) {
9599 if (!in
->flock_locks
)
9600 in
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9601 lock_state
= in
->flock_locks
;
9606 _update_lock_state(fl
, owner
, lock_state
);
9609 if (lock_type
== CEPH_LOCK_FCNTL
) {
9610 if (!fh
->fcntl_locks
)
9611 fh
->fcntl_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FCNTL
);
9612 lock_state
= fh
->fcntl_locks
;
9614 if (!fh
->flock_locks
)
9615 fh
->flock_locks
= new ceph_lock_state_t(cct
, CEPH_LOCK_FLOCK
);
9616 lock_state
= fh
->flock_locks
;
9618 _update_lock_state(fl
, owner
, lock_state
);
9626 int Client::_interrupt_filelock(MetaRequest
*req
)
9628 // Set abort code, but do not kick. The abort code prevents the request
9629 // from being re-sent.
9632 return 0; // haven't sent the request
9634 Inode
*in
= req
->inode();
9637 if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FLOCK
)
9638 lock_type
= CEPH_LOCK_FLOCK_INTR
;
9639 else if (req
->head
.args
.filelock_change
.rule
== CEPH_LOCK_FCNTL
)
9640 lock_type
= CEPH_LOCK_FCNTL_INTR
;
9646 MetaRequest
*intr_req
= new MetaRequest(CEPH_MDS_OP_SETFILELOCK
);
9648 in
->make_nosnap_relative_path(path
);
9649 intr_req
->set_filepath(path
);
9650 intr_req
->set_inode(in
);
9651 intr_req
->head
.args
.filelock_change
= req
->head
.args
.filelock_change
;
9652 intr_req
->head
.args
.filelock_change
.rule
= lock_type
;
9653 intr_req
->head
.args
.filelock_change
.type
= CEPH_LOCK_UNLOCK
;
9655 UserPerm
perms(req
->get_uid(), req
->get_gid());
9656 return make_request(intr_req
, perms
, NULL
, NULL
, -1);
9659 void Client::_encode_filelocks(Inode
*in
, bufferlist
& bl
)
9661 if (!in
->fcntl_locks
&& !in
->flock_locks
)
9664 unsigned nr_fcntl_locks
= in
->fcntl_locks
? in
->fcntl_locks
->held_locks
.size() : 0;
9665 ::encode(nr_fcntl_locks
, bl
);
9666 if (nr_fcntl_locks
) {
9667 ceph_lock_state_t
* lock_state
= in
->fcntl_locks
;
9668 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9669 p
!= lock_state
->held_locks
.end();
9671 ::encode(p
->second
, bl
);
9674 unsigned nr_flock_locks
= in
->flock_locks
? in
->flock_locks
->held_locks
.size() : 0;
9675 ::encode(nr_flock_locks
, bl
);
9676 if (nr_flock_locks
) {
9677 ceph_lock_state_t
* lock_state
= in
->flock_locks
;
9678 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9679 p
!= lock_state
->held_locks
.end();
9681 ::encode(p
->second
, bl
);
9684 ldout(cct
, 10) << "_encode_filelocks ino " << in
->ino
<< ", " << nr_fcntl_locks
9685 << " fcntl locks, " << nr_flock_locks
<< " flock locks" << dendl
;
9688 void Client::_release_filelocks(Fh
*fh
)
9690 if (!fh
->fcntl_locks
&& !fh
->flock_locks
)
9693 Inode
*in
= fh
->inode
.get();
9694 ldout(cct
, 10) << "_release_filelocks " << fh
<< " ino " << in
->ino
<< dendl
;
9696 list
<pair
<int, ceph_filelock
> > to_release
;
9698 if (fh
->fcntl_locks
) {
9699 ceph_lock_state_t
* lock_state
= fh
->fcntl_locks
;
9700 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9701 p
!= lock_state
->held_locks
.end();
9703 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FCNTL
, p
->second
));
9704 delete fh
->fcntl_locks
;
9706 if (fh
->flock_locks
) {
9707 ceph_lock_state_t
* lock_state
= fh
->flock_locks
;
9708 for(multimap
<uint64_t, ceph_filelock
>::iterator p
= lock_state
->held_locks
.begin();
9709 p
!= lock_state
->held_locks
.end();
9711 to_release
.push_back(pair
<int, ceph_filelock
>(CEPH_LOCK_FLOCK
, p
->second
));
9712 delete fh
->flock_locks
;
9715 if (to_release
.empty())
9719 memset(&fl
, 0, sizeof(fl
));
9720 fl
.l_whence
= SEEK_SET
;
9721 fl
.l_type
= F_UNLCK
;
9723 for (list
<pair
<int, ceph_filelock
> >::iterator p
= to_release
.begin();
9724 p
!= to_release
.end();
9726 fl
.l_start
= p
->second
.start
;
9727 fl
.l_len
= p
->second
.length
;
9728 fl
.l_pid
= p
->second
.pid
;
9729 _do_filelock(in
, fh
, p
->first
, CEPH_MDS_OP_SETFILELOCK
, 0, &fl
,
9730 p
->second
.owner
, true);
9734 void Client::_update_lock_state(struct flock
*fl
, uint64_t owner
,
9735 ceph_lock_state_t
*lock_state
)
9738 if (F_RDLCK
== fl
->l_type
)
9739 lock_cmd
= CEPH_LOCK_SHARED
;
9740 else if (F_WRLCK
== fl
->l_type
)
9741 lock_cmd
= CEPH_LOCK_EXCL
;
9743 lock_cmd
= CEPH_LOCK_UNLOCK
;;
9745 ceph_filelock filelock
;
9746 filelock
.start
= fl
->l_start
;
9747 filelock
.length
= fl
->l_len
;
9748 filelock
.client
= 0;
9749 // see comment in _do_filelock()
9750 filelock
.owner
= owner
| (1ULL << 63);
9751 filelock
.pid
= fl
->l_pid
;
9752 filelock
.type
= lock_cmd
;
9754 if (filelock
.type
== CEPH_LOCK_UNLOCK
) {
9755 list
<ceph_filelock
> activated_locks
;
9756 lock_state
->remove_lock(filelock
, activated_locks
);
9758 bool r
= lock_state
->add_lock(filelock
, false, false, NULL
);
9763 int Client::_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
9765 Inode
*in
= fh
->inode
.get();
9766 ldout(cct
, 10) << "_getlk " << fh
<< " ino " << in
->ino
<< dendl
;
9767 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_GETFILELOCK
, 0, fl
, owner
);
9771 int Client::_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
9773 Inode
*in
= fh
->inode
.get();
9774 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< dendl
;
9775 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FCNTL
, CEPH_MDS_OP_SETFILELOCK
, sleep
, fl
, owner
);
9776 ldout(cct
, 10) << "_setlk " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
9780 int Client::_flock(Fh
*fh
, int cmd
, uint64_t owner
)
9782 Inode
*in
= fh
->inode
.get();
9783 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< dendl
;
9785 int sleep
= !(cmd
& LOCK_NB
);
9804 memset(&fl
, 0, sizeof(fl
));
9806 fl
.l_whence
= SEEK_SET
;
9808 int ret
= _do_filelock(in
, fh
, CEPH_LOCK_FLOCK
, CEPH_MDS_OP_SETFILELOCK
, sleep
, &fl
, owner
);
9809 ldout(cct
, 10) << "_flock " << fh
<< " ino " << in
->ino
<< " result=" << ret
<< dendl
;
9813 int Client::ll_statfs(Inode
*in
, struct statvfs
*stbuf
, const UserPerm
& perms
)
9815 /* Since the only thing this does is wrap a call to statfs, and
9816 statfs takes a lock, it doesn't seem we have a need to split it
9818 return statfs(0, stbuf
, perms
);
9821 void Client::ll_register_callbacks(struct client_callback_args
*args
)
9825 Mutex::Locker
l(client_lock
);
9826 ldout(cct
, 10) << "ll_register_callbacks cb " << args
->handle
9827 << " invalidate_ino_cb " << args
->ino_cb
9828 << " invalidate_dentry_cb " << args
->dentry_cb
9829 << " getgroups_cb" << args
->getgroups_cb
9830 << " switch_interrupt_cb " << args
->switch_intr_cb
9831 << " remount_cb " << args
->remount_cb
9833 callback_handle
= args
->handle
;
9835 ino_invalidate_cb
= args
->ino_cb
;
9836 async_ino_invalidator
.start();
9838 if (args
->dentry_cb
) {
9839 dentry_invalidate_cb
= args
->dentry_cb
;
9840 async_dentry_invalidator
.start();
9842 if (args
->switch_intr_cb
) {
9843 switch_interrupt_cb
= args
->switch_intr_cb
;
9844 interrupt_finisher
.start();
9846 if (args
->remount_cb
) {
9847 remount_cb
= args
->remount_cb
;
9848 remount_finisher
.start();
9850 getgroups_cb
= args
->getgroups_cb
;
9851 umask_cb
= args
->umask_cb
;
9854 int Client::test_dentry_handling(bool can_invalidate
)
9858 can_invalidate_dentries
= can_invalidate
;
9860 if (can_invalidate_dentries
) {
9861 assert(dentry_invalidate_cb
);
9862 ldout(cct
, 1) << "using dentry_invalidate_cb" << dendl
;
9863 } else if (remount_cb
) {
9864 ldout(cct
, 1) << "using remount_cb" << dendl
;
9865 int s
= remount_cb(callback_handle
);
9867 lderr(cct
) << "Failed to invoke remount, needed to ensure kernel dcache consistency"
9870 if (cct
->_conf
->client_die_on_failed_remount
) {
9871 require_remount
= true;
9875 lderr(cct
) << "no method to invalidate kernel dentry cache; expect issues!" << dendl
;
9876 if (cct
->_conf
->client_die_on_failed_remount
)
9882 int Client::_sync_fs()
9884 ldout(cct
, 10) << "_sync_fs" << dendl
;
9887 Mutex
lock("Client::_fsync::lock");
9889 bool flush_done
= false;
9890 if (cct
->_conf
->client_oc
)
9891 objectcacher
->flush_all(new C_SafeCond(&lock
, &cond
, &flush_done
));
9897 ceph_tid_t flush_tid
= last_flush_tid
;
9899 // wait for unsafe mds requests
9900 wait_unsafe_requests();
9902 wait_sync_caps(flush_tid
);
9905 client_lock
.Unlock();
9907 ldout(cct
, 15) << "waiting on data to flush" << dendl
;
9917 int Client::sync_fs()
9919 Mutex::Locker
l(client_lock
);
9923 int64_t Client::drop_caches()
9925 Mutex::Locker
l(client_lock
);
9926 return objectcacher
->release_all();
9930 int Client::lazyio_propogate(int fd
, loff_t offset
, size_t count
)
9932 Mutex::Locker
l(client_lock
);
9933 ldout(cct
, 3) << "op: client->lazyio_propogate(" << fd
9934 << ", " << offset
<< ", " << count
<< ")" << dendl
;
9936 Fh
*f
= get_filehandle(fd
);
9946 int Client::lazyio_synchronize(int fd
, loff_t offset
, size_t count
)
9948 Mutex::Locker
l(client_lock
);
9949 ldout(cct
, 3) << "op: client->lazyio_synchronize(" << fd
9950 << ", " << offset
<< ", " << count
<< ")" << dendl
;
9952 Fh
*f
= get_filehandle(fd
);
9955 Inode
*in
= f
->inode
.get();
9964 // =============================
9967 int Client::mksnap(const char *relpath
, const char *name
, const UserPerm
& perm
)
9969 Mutex::Locker
l(client_lock
);
9970 filepath
path(relpath
);
9972 int r
= path_walk(path
, &in
, perm
);
9975 if (cct
->_conf
->client_permissions
) {
9976 r
= may_create(in
.get(), perm
);
9980 Inode
*snapdir
= open_snapdir(in
.get());
9981 return _mkdir(snapdir
, name
, 0, perm
);
9983 int Client::rmsnap(const char *relpath
, const char *name
, const UserPerm
& perms
)
9985 Mutex::Locker
l(client_lock
);
9986 filepath
path(relpath
);
9988 int r
= path_walk(path
, &in
, perms
);
9991 if (cct
->_conf
->client_permissions
) {
9992 r
= may_delete(in
.get(), NULL
, perms
);
9996 Inode
*snapdir
= open_snapdir(in
.get());
9997 return _rmdir(snapdir
, name
, perms
);
10000 // =============================
10003 int Client::get_caps_issued(int fd
) {
10005 Mutex::Locker
lock(client_lock
);
10007 Fh
*f
= get_filehandle(fd
);
10011 return f
->inode
->caps_issued();
10014 int Client::get_caps_issued(const char *path
, const UserPerm
& perms
)
10016 Mutex::Locker
lock(client_lock
);
10019 int r
= path_walk(p
, &in
, perms
, true);
10022 return in
->caps_issued();
10025 // =========================================
10028 Inode
*Client::open_snapdir(Inode
*diri
)
10031 vinodeno_t
vino(diri
->ino
, CEPH_SNAPDIR
);
10032 if (!inode_map
.count(vino
)) {
10033 in
= new Inode(this, vino
, &diri
->layout
);
10035 in
->ino
= diri
->ino
;
10036 in
->snapid
= CEPH_SNAPDIR
;
10037 in
->mode
= diri
->mode
;
10038 in
->uid
= diri
->uid
;
10039 in
->gid
= diri
->gid
;
10040 in
->mtime
= diri
->mtime
;
10041 in
->ctime
= diri
->ctime
;
10042 in
->btime
= diri
->btime
;
10043 in
->size
= diri
->size
;
10044 in
->change_attr
= diri
->change_attr
;
10046 in
->dirfragtree
.clear();
10047 in
->snapdir_parent
= diri
;
10048 diri
->flags
|= I_SNAPDIR_OPEN
;
10049 inode_map
[vino
] = in
;
10050 if (use_faked_inos())
10051 _assign_faked_ino(in
);
10052 ldout(cct
, 10) << "open_snapdir created snapshot inode " << *in
<< dendl
;
10054 in
= inode_map
[vino
];
10055 ldout(cct
, 10) << "open_snapdir had snapshot inode " << *in
<< dendl
;
10060 int Client::ll_lookup(Inode
*parent
, const char *name
, struct stat
*attr
,
10061 Inode
**out
, const UserPerm
& perms
)
10063 Mutex::Locker
lock(client_lock
);
10064 vinodeno_t vparent
= _get_vino(parent
);
10065 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
<< dendl
;
10066 tout(cct
) << "ll_lookup" << std::endl
;
10067 tout(cct
) << name
<< std::endl
;
10070 if (!cct
->_conf
->fuse_default_permissions
) {
10071 r
= may_lookup(parent
, perms
);
10076 string
dname(name
);
10079 r
= _lookup(parent
, dname
, CEPH_STAT_CAP_INODE_ALL
, &in
, perms
);
10086 fill_stat(in
, attr
);
10090 ldout(cct
, 3) << "ll_lookup " << vparent
<< " " << name
10091 << " -> " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
10092 tout(cct
) << attr
->st_ino
<< std::endl
;
10097 int Client::ll_lookupx(Inode
*parent
, const char *name
, Inode
**out
,
10098 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
10099 const UserPerm
& perms
)
10101 Mutex::Locker
lock(client_lock
);
10102 vinodeno_t vparent
= _get_vino(parent
);
10103 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
<< dendl
;
10104 tout(cct
) << "ll_lookupx" << std::endl
;
10105 tout(cct
) << name
<< std::endl
;
10108 if (!cct
->_conf
->fuse_default_permissions
) {
10109 r
= may_lookup(parent
, perms
);
10114 string
dname(name
);
10117 unsigned mask
= statx_to_mask(flags
, want
);
10118 r
= _lookup(parent
, dname
, mask
, &in
, perms
);
10124 fill_statx(in
, mask
, stx
);
10128 ldout(cct
, 3) << "ll_lookupx " << vparent
<< " " << name
10129 << " -> " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
10130 tout(cct
) << stx
->stx_ino
<< std::endl
;
10135 int Client::ll_walk(const char* name
, Inode
**out
, struct ceph_statx
*stx
,
10136 unsigned int want
, unsigned int flags
, const UserPerm
& perms
)
10138 Mutex::Locker
lock(client_lock
);
10139 filepath
fp(name
, 0);
10142 unsigned mask
= statx_to_mask(flags
, want
);
10144 ldout(cct
, 3) << "ll_walk" << name
<< dendl
;
10145 tout(cct
) << "ll_walk" << std::endl
;
10146 tout(cct
) << name
<< std::endl
;
10148 rc
= path_walk(fp
, &in
, perms
, !(flags
& AT_SYMLINK_NOFOLLOW
), mask
);
10150 /* zero out mask, just in case... */
10157 fill_statx(in
, mask
, stx
);
10164 void Client::_ll_get(Inode
*in
)
10166 if (in
->ll_ref
== 0) {
10168 if (in
->is_dir() && !in
->dn_set
.empty()) {
10169 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10170 in
->get_first_parent()->get(); // pin dentry
10174 ldout(cct
, 20) << "_ll_get " << in
<< " " << in
->ino
<< " -> " << in
->ll_ref
<< dendl
;
10177 int Client::_ll_put(Inode
*in
, int num
)
10180 ldout(cct
, 20) << "_ll_put " << in
<< " " << in
->ino
<< " " << num
<< " -> " << in
->ll_ref
<< dendl
;
10181 if (in
->ll_ref
== 0) {
10182 if (in
->is_dir() && !in
->dn_set
.empty()) {
10183 assert(in
->dn_set
.size() == 1); // dirs can't be hard-linked
10184 in
->get_first_parent()->put(); // unpin dentry
10193 void Client::_ll_drop_pins()
10195 ldout(cct
, 10) << "_ll_drop_pins" << dendl
;
10196 ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator next
;
10197 for (ceph::unordered_map
<vinodeno_t
, Inode
*>::iterator it
= inode_map
.begin();
10198 it
!= inode_map
.end();
10200 Inode
*in
= it
->second
;
10204 _ll_put(in
, in
->ll_ref
);
10208 bool Client::ll_forget(Inode
*in
, int count
)
10210 Mutex::Locker
lock(client_lock
);
10211 inodeno_t ino
= _get_inodeno(in
);
10213 ldout(cct
, 3) << "ll_forget " << ino
<< " " << count
<< dendl
;
10214 tout(cct
) << "ll_forget" << std::endl
;
10215 tout(cct
) << ino
.val
<< std::endl
;
10216 tout(cct
) << count
<< std::endl
;
10218 if (ino
== 1) return true; // ignore forget on root.
10221 if (in
->ll_ref
< count
) {
10222 ldout(cct
, 1) << "WARNING: ll_forget on " << ino
<< " " << count
10223 << ", which only has ll_ref=" << in
->ll_ref
<< dendl
;
10224 _ll_put(in
, in
->ll_ref
);
10227 if (_ll_put(in
, count
) == 0)
10234 bool Client::ll_put(Inode
*in
)
10236 /* ll_forget already takes the lock */
10237 return ll_forget(in
, 1);
10240 snapid_t
Client::ll_get_snapid(Inode
*in
)
10242 Mutex::Locker
lock(client_lock
);
10246 Inode
*Client::ll_get_inode(ino_t ino
)
10248 Mutex::Locker
lock(client_lock
);
10249 vinodeno_t vino
= _map_faked_ino(ino
);
10250 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10251 if (p
== inode_map
.end())
10253 Inode
*in
= p
->second
;
10258 Inode
*Client::ll_get_inode(vinodeno_t vino
)
10260 Mutex::Locker
lock(client_lock
);
10261 unordered_map
<vinodeno_t
,Inode
*>::iterator p
= inode_map
.find(vino
);
10262 if (p
== inode_map
.end())
10264 Inode
*in
= p
->second
;
10269 int Client::_ll_getattr(Inode
*in
, int caps
, const UserPerm
& perms
)
10271 vinodeno_t vino
= _get_vino(in
);
10273 ldout(cct
, 3) << "ll_getattr " << vino
<< dendl
;
10274 tout(cct
) << "ll_getattr" << std::endl
;
10275 tout(cct
) << vino
.ino
.val
<< std::endl
;
10277 if (vino
.snapid
< CEPH_NOSNAP
)
10280 return _getattr(in
, caps
, perms
);
10283 int Client::ll_getattr(Inode
*in
, struct stat
*attr
, const UserPerm
& perms
)
10285 Mutex::Locker
lock(client_lock
);
10287 int res
= _ll_getattr(in
, CEPH_STAT_CAP_INODE_ALL
, perms
);
10290 fill_stat(in
, attr
);
10291 ldout(cct
, 3) << "ll_getattr " << _get_vino(in
) << " = " << res
<< dendl
;
10295 int Client::ll_getattrx(Inode
*in
, struct ceph_statx
*stx
, unsigned int want
,
10296 unsigned int flags
, const UserPerm
& perms
)
10298 Mutex::Locker
lock(client_lock
);
10301 unsigned mask
= statx_to_mask(flags
, want
);
10303 if (mask
&& !in
->caps_issued_mask(mask
))
10304 res
= _ll_getattr(in
, mask
, perms
);
10307 fill_statx(in
, mask
, stx
);
10308 ldout(cct
, 3) << "ll_getattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10312 int Client::_ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10313 const UserPerm
& perms
, InodeRef
*inp
)
10315 vinodeno_t vino
= _get_vino(in
);
10317 ldout(cct
, 3) << "ll_setattrx " << vino
<< " mask " << hex
<< mask
<< dec
10319 tout(cct
) << "ll_setattrx" << std::endl
;
10320 tout(cct
) << vino
.ino
.val
<< std::endl
;
10321 tout(cct
) << stx
->stx_mode
<< std::endl
;
10322 tout(cct
) << stx
->stx_uid
<< std::endl
;
10323 tout(cct
) << stx
->stx_gid
<< std::endl
;
10324 tout(cct
) << stx
->stx_size
<< std::endl
;
10325 tout(cct
) << stx
->stx_mtime
<< std::endl
;
10326 tout(cct
) << stx
->stx_atime
<< std::endl
;
10327 tout(cct
) << stx
->stx_btime
<< std::endl
;
10328 tout(cct
) << mask
<< std::endl
;
10330 if (!cct
->_conf
->fuse_default_permissions
) {
10331 int res
= may_setattr(in
, stx
, mask
, perms
);
10336 mask
&= ~(CEPH_SETATTR_MTIME_NOW
| CEPH_SETATTR_ATIME_NOW
);
10338 return __setattrx(in
, stx
, mask
, perms
, inp
);
10341 int Client::ll_setattrx(Inode
*in
, struct ceph_statx
*stx
, int mask
,
10342 const UserPerm
& perms
)
10344 Mutex::Locker
lock(client_lock
);
10345 InodeRef
target(in
);
10346 int res
= _ll_setattrx(in
, stx
, mask
, perms
, &target
);
10348 assert(in
== target
.get());
10349 fill_statx(in
, in
->caps_issued(), stx
);
10352 ldout(cct
, 3) << "ll_setattrx " << _get_vino(in
) << " = " << res
<< dendl
;
10356 int Client::ll_setattr(Inode
*in
, struct stat
*attr
, int mask
,
10357 const UserPerm
& perms
)
10359 struct ceph_statx stx
;
10360 stat_to_statx(attr
, &stx
);
10362 Mutex::Locker
lock(client_lock
);
10363 InodeRef
target(in
);
10364 int res
= _ll_setattrx(in
, &stx
, mask
, perms
, &target
);
10366 assert(in
== target
.get());
10367 fill_stat(in
, attr
);
10370 ldout(cct
, 3) << "ll_setattr " << _get_vino(in
) << " = " << res
<< dendl
;
10378 int Client::getxattr(const char *path
, const char *name
, void *value
, size_t size
,
10379 const UserPerm
& perms
)
10381 Mutex::Locker
lock(client_lock
);
10383 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10386 return _getxattr(in
, name
, value
, size
, perms
);
10389 int Client::lgetxattr(const char *path
, const char *name
, void *value
, size_t size
,
10390 const UserPerm
& perms
)
10392 Mutex::Locker
lock(client_lock
);
10394 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10397 return _getxattr(in
, name
, value
, size
, perms
);
10400 int Client::fgetxattr(int fd
, const char *name
, void *value
, size_t size
,
10401 const UserPerm
& perms
)
10403 Mutex::Locker
lock(client_lock
);
10404 Fh
*f
= get_filehandle(fd
);
10407 return _getxattr(f
->inode
, name
, value
, size
, perms
);
10410 int Client::listxattr(const char *path
, char *list
, size_t size
,
10411 const UserPerm
& perms
)
10413 Mutex::Locker
lock(client_lock
);
10415 int r
= Client::path_walk(path
, &in
, perms
, true, CEPH_STAT_CAP_XATTR
);
10418 return Client::_listxattr(in
.get(), list
, size
, perms
);
10421 int Client::llistxattr(const char *path
, char *list
, size_t size
,
10422 const UserPerm
& perms
)
10424 Mutex::Locker
lock(client_lock
);
10426 int r
= Client::path_walk(path
, &in
, perms
, false, CEPH_STAT_CAP_XATTR
);
10429 return Client::_listxattr(in
.get(), list
, size
, perms
);
10432 int Client::flistxattr(int fd
, char *list
, size_t size
, const UserPerm
& perms
)
10434 Mutex::Locker
lock(client_lock
);
10435 Fh
*f
= get_filehandle(fd
);
10438 return Client::_listxattr(f
->inode
.get(), list
, size
, perms
);
10441 int Client::removexattr(const char *path
, const char *name
,
10442 const UserPerm
& perms
)
10444 Mutex::Locker
lock(client_lock
);
10446 int r
= Client::path_walk(path
, &in
, perms
, true);
10449 return _removexattr(in
, name
, perms
);
10452 int Client::lremovexattr(const char *path
, const char *name
,
10453 const UserPerm
& perms
)
10455 Mutex::Locker
lock(client_lock
);
10457 int r
= Client::path_walk(path
, &in
, perms
, false);
10460 return _removexattr(in
, name
, perms
);
10463 int Client::fremovexattr(int fd
, const char *name
, const UserPerm
& perms
)
10465 Mutex::Locker
lock(client_lock
);
10466 Fh
*f
= get_filehandle(fd
);
10469 return _removexattr(f
->inode
, name
, perms
);
10472 int Client::setxattr(const char *path
, const char *name
, const void *value
,
10473 size_t size
, int flags
, const UserPerm
& perms
)
10475 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10477 Mutex::Locker
lock(client_lock
);
10479 int r
= Client::path_walk(path
, &in
, perms
, true);
10482 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10485 int Client::lsetxattr(const char *path
, const char *name
, const void *value
,
10486 size_t size
, int flags
, const UserPerm
& perms
)
10488 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10490 Mutex::Locker
lock(client_lock
);
10492 int r
= Client::path_walk(path
, &in
, perms
, false);
10495 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10498 int Client::fsetxattr(int fd
, const char *name
, const void *value
, size_t size
,
10499 int flags
, const UserPerm
& perms
)
10501 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10503 Mutex::Locker
lock(client_lock
);
10504 Fh
*f
= get_filehandle(fd
);
10507 return _setxattr(f
->inode
, name
, value
, size
, flags
, perms
);
10510 int Client::_getxattr(Inode
*in
, const char *name
, void *value
, size_t size
,
10511 const UserPerm
& perms
)
10515 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10519 // Do a force getattr to get the latest quota before returning
10520 // a value to userspace.
10521 r
= _getattr(in
, 0, perms
, true);
10523 // Error from getattr!
10527 // call pointer-to-member function
10529 if (!(vxattr
->exists_cb
&& !(this->*(vxattr
->exists_cb
))(in
))) {
10530 r
= (this->*(vxattr
->getxattr_cb
))(in
, buf
, sizeof(buf
));
10536 if (r
> (int)size
) {
10538 } else if (r
> 0) {
10539 memcpy(value
, buf
, r
);
10545 if (acl_type
== NO_ACL
&& !strncmp(name
, "system.", 7)) {
10550 r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10554 if (in
->xattrs
.count(n
)) {
10555 r
= in
->xattrs
[n
].length();
10556 if (r
> 0 && size
!= 0) {
10557 if (size
>= (unsigned)r
)
10558 memcpy(value
, in
->xattrs
[n
].c_str(), r
);
10565 ldout(cct
, 3) << "_getxattr(" << in
->ino
<< ", \"" << name
<< "\", " << size
<< ") = " << r
<< dendl
;
10569 int Client::_getxattr(InodeRef
&in
, const char *name
, void *value
, size_t size
,
10570 const UserPerm
& perms
)
10572 if (cct
->_conf
->client_permissions
) {
10573 int r
= xattr_permission(in
.get(), name
, MAY_READ
, perms
);
10577 return _getxattr(in
.get(), name
, value
, size
, perms
);
10580 int Client::ll_getxattr(Inode
*in
, const char *name
, void *value
,
10581 size_t size
, const UserPerm
& perms
)
10583 Mutex::Locker
lock(client_lock
);
10585 vinodeno_t vino
= _get_vino(in
);
10587 ldout(cct
, 3) << "ll_getxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
10588 tout(cct
) << "ll_getxattr" << std::endl
;
10589 tout(cct
) << vino
.ino
.val
<< std::endl
;
10590 tout(cct
) << name
<< std::endl
;
10592 if (!cct
->_conf
->fuse_default_permissions
) {
10593 int r
= xattr_permission(in
, name
, MAY_READ
, perms
);
10598 return _getxattr(in
, name
, value
, size
, perms
);
10601 int Client::_listxattr(Inode
*in
, char *name
, size_t size
,
10602 const UserPerm
& perms
)
10604 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
10606 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10607 p
!= in
->xattrs
.end();
10609 r
+= p
->first
.length() + 1;
10611 const VXattr
*vxattrs
= _get_vxattrs(in
);
10612 r
+= _vxattrs_name_size(vxattrs
);
10615 if (size
>= (unsigned)r
) {
10616 for (map
<string
,bufferptr
>::iterator p
= in
->xattrs
.begin();
10617 p
!= in
->xattrs
.end();
10619 memcpy(name
, p
->first
.c_str(), p
->first
.length());
10620 name
+= p
->first
.length();
10625 for (int i
= 0; !vxattrs
[i
].name
.empty(); i
++) {
10626 const VXattr
& vxattr
= vxattrs
[i
];
10629 // call pointer-to-member function
10630 if(vxattr
.exists_cb
&& !(this->*(vxattr
.exists_cb
))(in
))
10632 memcpy(name
, vxattr
.name
.c_str(), vxattr
.name
.length());
10633 name
+= vxattr
.name
.length();
10642 ldout(cct
, 3) << "_listxattr(" << in
->ino
<< ", " << size
<< ") = " << r
<< dendl
;
10646 int Client::ll_listxattr(Inode
*in
, char *names
, size_t size
,
10647 const UserPerm
& perms
)
10649 Mutex::Locker
lock(client_lock
);
10651 vinodeno_t vino
= _get_vino(in
);
10653 ldout(cct
, 3) << "ll_listxattr " << vino
<< " size " << size
<< dendl
;
10654 tout(cct
) << "ll_listxattr" << std::endl
;
10655 tout(cct
) << vino
.ino
.val
<< std::endl
;
10656 tout(cct
) << size
<< std::endl
;
10658 return _listxattr(in
, names
, size
, perms
);
10661 int Client::_do_setxattr(Inode
*in
, const char *name
, const void *value
,
10662 size_t size
, int flags
, const UserPerm
& perms
)
10665 int xattr_flags
= 0;
10667 xattr_flags
|= CEPH_XATTR_REMOVE
;
10668 if (flags
& XATTR_CREATE
)
10669 xattr_flags
|= CEPH_XATTR_CREATE
;
10670 if (flags
& XATTR_REPLACE
)
10671 xattr_flags
|= CEPH_XATTR_REPLACE
;
10673 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SETXATTR
);
10675 in
->make_nosnap_relative_path(path
);
10676 req
->set_filepath(path
);
10677 req
->set_string2(name
);
10678 req
->set_inode(in
);
10679 req
->head
.args
.setxattr
.flags
= xattr_flags
;
10682 bl
.append((const char*)value
, size
);
10685 int res
= make_request(req
, perms
);
10688 ldout(cct
, 3) << "_setxattr(" << in
->ino
<< ", \"" << name
<< "\") = " <<
10693 int Client::_setxattr(Inode
*in
, const char *name
, const void *value
,
10694 size_t size
, int flags
, const UserPerm
& perms
)
10696 if (in
->snapid
!= CEPH_NOSNAP
) {
10700 bool posix_acl_xattr
= false;
10701 if (acl_type
== POSIX_ACL
)
10702 posix_acl_xattr
= !strncmp(name
, "system.", 7);
10704 if (strncmp(name
, "user.", 5) &&
10705 strncmp(name
, "security.", 9) &&
10706 strncmp(name
, "trusted.", 8) &&
10707 strncmp(name
, "ceph.", 5) &&
10709 return -EOPNOTSUPP
;
10711 if (posix_acl_xattr
) {
10712 if (!strcmp(name
, ACL_EA_ACCESS
)) {
10713 mode_t new_mode
= in
->mode
;
10715 int ret
= posix_acl_equiv_mode(value
, size
, &new_mode
);
10722 if (new_mode
!= in
->mode
) {
10723 struct ceph_statx stx
;
10724 stx
.stx_mode
= new_mode
;
10725 ret
= _do_setattr(in
, &stx
, CEPH_SETATTR_MODE
, perms
, NULL
);
10730 } else if (!strcmp(name
, ACL_EA_DEFAULT
)) {
10732 if (!S_ISDIR(in
->mode
))
10734 int ret
= posix_acl_check(value
, size
);
10743 return -EOPNOTSUPP
;
10746 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10747 if (vxattr
&& vxattr
->readonly
)
10748 return -EOPNOTSUPP
;
10751 return _do_setxattr(in
, name
, value
, size
, flags
, perms
);
10754 int Client::_setxattr(InodeRef
&in
, const char *name
, const void *value
,
10755 size_t size
, int flags
, const UserPerm
& perms
)
10757 if (cct
->_conf
->client_permissions
) {
10758 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
10762 return _setxattr(in
.get(), name
, value
, size
, flags
, perms
);
10765 int Client::_setxattr_check_data_pool(string
& name
, string
& value
, const OSDMap
*osdmap
)
10768 if (name
== "layout") {
10769 string::iterator begin
= value
.begin();
10770 string::iterator end
= value
.end();
10771 keys_and_values
<string::iterator
> p
; // create instance of parser
10772 std::map
<string
, string
> m
; // map to receive results
10773 if (!qi::parse(begin
, end
, p
, m
)) { // returns true if successful
10778 for (map
<string
,string
>::iterator q
= m
.begin(); q
!= m
.end(); ++q
) {
10779 if (q
->first
== "pool") {
10784 } else if (name
== "layout.pool") {
10788 if (tmp
.length()) {
10791 pool
= boost::lexical_cast
<unsigned>(tmp
);
10792 if (!osdmap
->have_pg_pool(pool
))
10794 } catch (boost::bad_lexical_cast
const&) {
10795 pool
= osdmap
->lookup_pg_pool_name(tmp
);
10805 void Client::_setxattr_maybe_wait_for_osdmap(const char *name
, const void *value
, size_t size
)
10807 // For setting pool of layout, MetaRequest need osdmap epoch.
10808 // There is a race which create a new data pool but client and mds both don't have.
10809 // Make client got the latest osdmap which make mds quickly judge whether get newer osdmap.
10810 if (strcmp(name
, "ceph.file.layout.pool") == 0 || strcmp(name
, "ceph.dir.layout.pool") == 0 ||
10811 strcmp(name
, "ceph.file.layout") == 0 || strcmp(name
, "ceph.dir.layout") == 0) {
10812 string
rest(strstr(name
, "layout"));
10813 string
v((const char*)value
, size
);
10814 int r
= objecter
->with_osdmap([&](const OSDMap
& o
) {
10815 return _setxattr_check_data_pool(rest
, v
, &o
);
10818 if (r
== -ENOENT
) {
10820 objecter
->wait_for_latest_osdmap(&ctx
);
10826 int Client::ll_setxattr(Inode
*in
, const char *name
, const void *value
,
10827 size_t size
, int flags
, const UserPerm
& perms
)
10829 _setxattr_maybe_wait_for_osdmap(name
, value
, size
);
10831 Mutex::Locker
lock(client_lock
);
10833 vinodeno_t vino
= _get_vino(in
);
10835 ldout(cct
, 3) << "ll_setxattr " << vino
<< " " << name
<< " size " << size
<< dendl
;
10836 tout(cct
) << "ll_setxattr" << std::endl
;
10837 tout(cct
) << vino
.ino
.val
<< std::endl
;
10838 tout(cct
) << name
<< std::endl
;
10840 if (!cct
->_conf
->fuse_default_permissions
) {
10841 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
10845 return _setxattr(in
, name
, value
, size
, flags
, perms
);
10848 int Client::_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
10850 if (in
->snapid
!= CEPH_NOSNAP
) {
10854 // same xattrs supported by kernel client
10855 if (strncmp(name
, "user.", 5) &&
10856 strncmp(name
, "system.", 7) &&
10857 strncmp(name
, "security.", 9) &&
10858 strncmp(name
, "trusted.", 8) &&
10859 strncmp(name
, "ceph.", 5))
10860 return -EOPNOTSUPP
;
10862 const VXattr
*vxattr
= _match_vxattr(in
, name
);
10863 if (vxattr
&& vxattr
->readonly
)
10864 return -EOPNOTSUPP
;
10866 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_RMXATTR
);
10868 in
->make_nosnap_relative_path(path
);
10869 req
->set_filepath(path
);
10870 req
->set_filepath2(name
);
10871 req
->set_inode(in
);
10873 int res
= make_request(req
, perms
);
10876 ldout(cct
, 3) << "_removexattr(" << in
->ino
<< ", \"" << name
<< "\") = " << res
<< dendl
;
10880 int Client::_removexattr(InodeRef
&in
, const char *name
, const UserPerm
& perms
)
10882 if (cct
->_conf
->client_permissions
) {
10883 int r
= xattr_permission(in
.get(), name
, MAY_WRITE
, perms
);
10887 return _removexattr(in
.get(), name
, perms
);
10890 int Client::ll_removexattr(Inode
*in
, const char *name
, const UserPerm
& perms
)
10892 Mutex::Locker
lock(client_lock
);
10894 vinodeno_t vino
= _get_vino(in
);
10896 ldout(cct
, 3) << "ll_removexattr " << vino
<< " " << name
<< dendl
;
10897 tout(cct
) << "ll_removexattr" << std::endl
;
10898 tout(cct
) << vino
.ino
.val
<< std::endl
;
10899 tout(cct
) << name
<< std::endl
;
10901 if (!cct
->_conf
->fuse_default_permissions
) {
10902 int r
= xattr_permission(in
, name
, MAY_WRITE
, perms
);
10907 return _removexattr(in
, name
, perms
);
10910 bool Client::_vxattrcb_quota_exists(Inode
*in
)
10912 return in
->quota
.is_enable();
10914 size_t Client::_vxattrcb_quota(Inode
*in
, char *val
, size_t size
)
10916 return snprintf(val
, size
,
10917 "max_bytes=%lld max_files=%lld",
10918 (long long int)in
->quota
.max_bytes
,
10919 (long long int)in
->quota
.max_files
);
10921 size_t Client::_vxattrcb_quota_max_bytes(Inode
*in
, char *val
, size_t size
)
10923 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_bytes
);
10925 size_t Client::_vxattrcb_quota_max_files(Inode
*in
, char *val
, size_t size
)
10927 return snprintf(val
, size
, "%lld", (long long int)in
->quota
.max_files
);
10930 bool Client::_vxattrcb_layout_exists(Inode
*in
)
10932 return in
->layout
!= file_layout_t();
10934 size_t Client::_vxattrcb_layout(Inode
*in
, char *val
, size_t size
)
10936 int r
= snprintf(val
, size
,
10937 "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=",
10938 (unsigned long long)in
->layout
.stripe_unit
,
10939 (unsigned long long)in
->layout
.stripe_count
,
10940 (unsigned long long)in
->layout
.object_size
);
10941 objecter
->with_osdmap([&](const OSDMap
& o
) {
10942 if (o
.have_pg_pool(in
->layout
.pool_id
))
10943 r
+= snprintf(val
+ r
, size
- r
, "%s",
10944 o
.get_pool_name(in
->layout
.pool_id
).c_str());
10946 r
+= snprintf(val
+ r
, size
- r
, "%" PRIu64
,
10947 (uint64_t)in
->layout
.pool_id
);
10949 if (in
->layout
.pool_ns
.length())
10950 r
+= snprintf(val
+ r
, size
- r
, " pool_namespace=%s",
10951 in
->layout
.pool_ns
.c_str());
10954 size_t Client::_vxattrcb_layout_stripe_unit(Inode
*in
, char *val
, size_t size
)
10956 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_unit
);
10958 size_t Client::_vxattrcb_layout_stripe_count(Inode
*in
, char *val
, size_t size
)
10960 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.stripe_count
);
10962 size_t Client::_vxattrcb_layout_object_size(Inode
*in
, char *val
, size_t size
)
10964 return snprintf(val
, size
, "%lld", (unsigned long long)in
->layout
.object_size
);
10966 size_t Client::_vxattrcb_layout_pool(Inode
*in
, char *val
, size_t size
)
10969 objecter
->with_osdmap([&](const OSDMap
& o
) {
10970 if (o
.have_pg_pool(in
->layout
.pool_id
))
10971 r
= snprintf(val
, size
, "%s", o
.get_pool_name(
10972 in
->layout
.pool_id
).c_str());
10974 r
= snprintf(val
, size
, "%" PRIu64
, (uint64_t)in
->layout
.pool_id
);
10978 size_t Client::_vxattrcb_layout_pool_namespace(Inode
*in
, char *val
, size_t size
)
10980 return snprintf(val
, size
, "%s", in
->layout
.pool_ns
.c_str());
10982 size_t Client::_vxattrcb_dir_entries(Inode
*in
, char *val
, size_t size
)
10984 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->dirstat
.nfiles
+ in
->dirstat
.nsubdirs
));
10986 size_t Client::_vxattrcb_dir_files(Inode
*in
, char *val
, size_t size
)
10988 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nfiles
);
10990 size_t Client::_vxattrcb_dir_subdirs(Inode
*in
, char *val
, size_t size
)
10992 return snprintf(val
, size
, "%lld", (unsigned long long)in
->dirstat
.nsubdirs
);
10994 size_t Client::_vxattrcb_dir_rentries(Inode
*in
, char *val
, size_t size
)
10996 return snprintf(val
, size
, "%lld", (unsigned long long)(in
->rstat
.rfiles
+ in
->rstat
.rsubdirs
));
10998 size_t Client::_vxattrcb_dir_rfiles(Inode
*in
, char *val
, size_t size
)
11000 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rfiles
);
11002 size_t Client::_vxattrcb_dir_rsubdirs(Inode
*in
, char *val
, size_t size
)
11004 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rsubdirs
);
11006 size_t Client::_vxattrcb_dir_rbytes(Inode
*in
, char *val
, size_t size
)
11008 return snprintf(val
, size
, "%lld", (unsigned long long)in
->rstat
.rbytes
);
11010 size_t Client::_vxattrcb_dir_rctime(Inode
*in
, char *val
, size_t size
)
11012 return snprintf(val
, size
, "%ld.09%ld", (long)in
->rstat
.rctime
.sec(),
11013 (long)in
->rstat
.rctime
.nsec());
11016 #define CEPH_XATTR_NAME(_type, _name) "ceph." #_type "." #_name
11017 #define CEPH_XATTR_NAME2(_type, _name, _name2) "ceph." #_type "." #_name "." #_name2
11019 #define XATTR_NAME_CEPH(_type, _name) \
11021 name: CEPH_XATTR_NAME(_type, _name), \
11022 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11027 #define XATTR_LAYOUT_FIELD(_type, _name, _field) \
11029 name: CEPH_XATTR_NAME2(_type, _name, _field), \
11030 getxattr_cb: &Client::_vxattrcb_ ## _name ## _ ## _field, \
11033 exists_cb: &Client::_vxattrcb_layout_exists, \
11035 #define XATTR_QUOTA_FIELD(_type, _name) \
11037 name: CEPH_XATTR_NAME(_type, _name), \
11038 getxattr_cb: &Client::_vxattrcb_ ## _type ## _ ## _name, \
11041 exists_cb: &Client::_vxattrcb_quota_exists, \
11044 const Client::VXattr
Client::_dir_vxattrs
[] = {
11046 name
: "ceph.dir.layout",
11047 getxattr_cb
: &Client::_vxattrcb_layout
,
11050 exists_cb
: &Client::_vxattrcb_layout_exists
,
11052 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_unit
),
11053 XATTR_LAYOUT_FIELD(dir
, layout
, stripe_count
),
11054 XATTR_LAYOUT_FIELD(dir
, layout
, object_size
),
11055 XATTR_LAYOUT_FIELD(dir
, layout
, pool
),
11056 XATTR_LAYOUT_FIELD(dir
, layout
, pool_namespace
),
11057 XATTR_NAME_CEPH(dir
, entries
),
11058 XATTR_NAME_CEPH(dir
, files
),
11059 XATTR_NAME_CEPH(dir
, subdirs
),
11060 XATTR_NAME_CEPH(dir
, rentries
),
11061 XATTR_NAME_CEPH(dir
, rfiles
),
11062 XATTR_NAME_CEPH(dir
, rsubdirs
),
11063 XATTR_NAME_CEPH(dir
, rbytes
),
11064 XATTR_NAME_CEPH(dir
, rctime
),
11066 name
: "ceph.quota",
11067 getxattr_cb
: &Client::_vxattrcb_quota
,
11070 exists_cb
: &Client::_vxattrcb_quota_exists
,
11072 XATTR_QUOTA_FIELD(quota
, max_bytes
),
11073 XATTR_QUOTA_FIELD(quota
, max_files
),
11074 { name
: "" } /* Required table terminator */
11077 const Client::VXattr
Client::_file_vxattrs
[] = {
11079 name
: "ceph.file.layout",
11080 getxattr_cb
: &Client::_vxattrcb_layout
,
11083 exists_cb
: &Client::_vxattrcb_layout_exists
,
11085 XATTR_LAYOUT_FIELD(file
, layout
, stripe_unit
),
11086 XATTR_LAYOUT_FIELD(file
, layout
, stripe_count
),
11087 XATTR_LAYOUT_FIELD(file
, layout
, object_size
),
11088 XATTR_LAYOUT_FIELD(file
, layout
, pool
),
11089 XATTR_LAYOUT_FIELD(file
, layout
, pool_namespace
),
11090 { name
: "" } /* Required table terminator */
11093 const Client::VXattr
*Client::_get_vxattrs(Inode
*in
)
11096 return _dir_vxattrs
;
11097 else if (in
->is_file())
11098 return _file_vxattrs
;
11102 const Client::VXattr
*Client::_match_vxattr(Inode
*in
, const char *name
)
11104 if (strncmp(name
, "ceph.", 5) == 0) {
11105 const VXattr
*vxattr
= _get_vxattrs(in
);
11107 while (!vxattr
->name
.empty()) {
11108 if (vxattr
->name
== name
)
11117 size_t Client::_vxattrs_calcu_name_size(const VXattr
*vxattr
)
11120 while (!vxattr
->name
.empty()) {
11121 if (!vxattr
->hidden
)
11122 len
+= vxattr
->name
.length() + 1;
11128 int Client::ll_readlink(Inode
*in
, char *buf
, size_t buflen
, const UserPerm
& perms
)
11130 Mutex::Locker
lock(client_lock
);
11132 vinodeno_t vino
= _get_vino(in
);
11134 ldout(cct
, 3) << "ll_readlink " << vino
<< dendl
;
11135 tout(cct
) << "ll_readlink" << std::endl
;
11136 tout(cct
) << vino
.ino
.val
<< std::endl
;
11138 set
<Dentry
*>::iterator dn
= in
->dn_set
.begin();
11139 while (dn
!= in
->dn_set
.end()) {
11144 int r
= _readlink(in
, buf
, buflen
); // FIXME: no permission checking!
11145 ldout(cct
, 3) << "ll_readlink " << vino
<< " = " << r
<< dendl
;
11149 int Client::_mknod(Inode
*dir
, const char *name
, mode_t mode
, dev_t rdev
,
11150 const UserPerm
& perms
, InodeRef
*inp
)
11152 ldout(cct
, 3) << "_mknod(" << dir
->ino
<< " " << name
<< ", 0" << oct
11153 << mode
<< dec
<< ", " << rdev
<< ", uid " << perms
.uid()
11154 << ", gid " << perms
.gid() << ")" << dendl
;
11156 if (strlen(name
) > NAME_MAX
)
11157 return -ENAMETOOLONG
;
11159 if (dir
->snapid
!= CEPH_NOSNAP
) {
11162 if (is_quota_files_exceeded(dir
, perms
)) {
11166 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_MKNOD
);
11169 dir
->make_nosnap_relative_path(path
);
11170 path
.push_dentry(name
);
11171 req
->set_filepath(path
);
11172 req
->set_inode(dir
);
11173 req
->head
.args
.mknod
.rdev
= rdev
;
11174 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11175 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11177 bufferlist xattrs_bl
;
11178 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11181 req
->head
.args
.mknod
.mode
= mode
;
11182 if (xattrs_bl
.length() > 0)
11183 req
->set_data(xattrs_bl
);
11186 res
= get_or_create(dir
, name
, &de
);
11189 req
->set_dentry(de
);
11191 res
= make_request(req
, perms
, inp
);
11195 ldout(cct
, 3) << "mknod(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11203 int Client::ll_mknod(Inode
*parent
, const char *name
, mode_t mode
,
11204 dev_t rdev
, struct stat
*attr
, Inode
**out
,
11205 const UserPerm
& perms
)
11207 Mutex::Locker
lock(client_lock
);
11209 vinodeno_t vparent
= _get_vino(parent
);
11211 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
<< dendl
;
11212 tout(cct
) << "ll_mknod" << std::endl
;
11213 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11214 tout(cct
) << name
<< std::endl
;
11215 tout(cct
) << mode
<< std::endl
;
11216 tout(cct
) << rdev
<< std::endl
;
11218 if (!cct
->_conf
->fuse_default_permissions
) {
11219 int r
= may_create(parent
, perms
);
11225 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11227 fill_stat(in
, attr
);
11230 tout(cct
) << attr
->st_ino
<< std::endl
;
11231 ldout(cct
, 3) << "ll_mknod " << vparent
<< " " << name
11232 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11237 int Client::ll_mknodx(Inode
*parent
, const char *name
, mode_t mode
,
11238 dev_t rdev
, Inode
**out
,
11239 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11240 const UserPerm
& perms
)
11242 unsigned caps
= statx_to_mask(flags
, want
);
11243 Mutex::Locker
lock(client_lock
);
11245 vinodeno_t vparent
= _get_vino(parent
);
11247 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
<< dendl
;
11248 tout(cct
) << "ll_mknodx" << std::endl
;
11249 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11250 tout(cct
) << name
<< std::endl
;
11251 tout(cct
) << mode
<< std::endl
;
11252 tout(cct
) << rdev
<< std::endl
;
11254 if (!cct
->_conf
->fuse_default_permissions
) {
11255 int r
= may_create(parent
, perms
);
11261 int r
= _mknod(parent
, name
, mode
, rdev
, perms
, &in
);
11263 fill_statx(in
, caps
, stx
);
11266 tout(cct
) << stx
->stx_ino
<< std::endl
;
11267 ldout(cct
, 3) << "ll_mknodx " << vparent
<< " " << name
11268 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11273 int Client::_create(Inode
*dir
, const char *name
, int flags
, mode_t mode
,
11274 InodeRef
*inp
, Fh
**fhp
, int stripe_unit
, int stripe_count
,
11275 int object_size
, const char *data_pool
, bool *created
,
11276 const UserPerm
& perms
)
11278 ldout(cct
, 3) << "_create(" << dir
->ino
<< " " << name
<< ", 0" << oct
<<
11279 mode
<< dec
<< ")" << dendl
;
11281 if (strlen(name
) > NAME_MAX
)
11282 return -ENAMETOOLONG
;
11283 if (dir
->snapid
!= CEPH_NOSNAP
) {
11286 if (is_quota_files_exceeded(dir
, perms
)) {
11290 // use normalized flags to generate cmode
11291 int cmode
= ceph_flags_to_mode(ceph_flags_sys2wire(flags
));
11295 int64_t pool_id
= -1;
11296 if (data_pool
&& *data_pool
) {
11297 pool_id
= objecter
->with_osdmap(
11298 std::mem_fn(&OSDMap::lookup_pg_pool_name
), data_pool
);
11301 if (pool_id
> 0xffffffffll
)
11302 return -ERANGE
; // bummer!
11305 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_CREATE
);
11308 dir
->make_nosnap_relative_path(path
);
11309 path
.push_dentry(name
);
11310 req
->set_filepath(path
);
11311 req
->set_inode(dir
);
11312 req
->head
.args
.open
.flags
= ceph_flags_sys2wire(flags
| O_CREAT
);
11314 req
->head
.args
.open
.stripe_unit
= stripe_unit
;
11315 req
->head
.args
.open
.stripe_count
= stripe_count
;
11316 req
->head
.args
.open
.object_size
= object_size
;
11317 if (cct
->_conf
->client_debug_getattr_caps
)
11318 req
->head
.args
.open
.mask
= DEBUG_GETATTR_CAPS
;
11320 req
->head
.args
.open
.mask
= 0;
11321 req
->head
.args
.open
.pool
= pool_id
;
11322 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11323 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11326 bufferlist xattrs_bl
;
11327 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perms
);
11330 req
->head
.args
.open
.mode
= mode
;
11331 if (xattrs_bl
.length() > 0)
11332 req
->set_data(xattrs_bl
);
11335 res
= get_or_create(dir
, name
, &de
);
11338 req
->set_dentry(de
);
11340 res
= make_request(req
, perms
, inp
, created
);
11345 /* If the caller passed a value in fhp, do the open */
11347 (*inp
)->get_open_ref(cmode
);
11348 *fhp
= _create_fh(inp
->get(), flags
, cmode
, perms
);
11354 ldout(cct
, 3) << "create(" << path
<< ", 0" << oct
<< mode
<< dec
11355 << " layout " << stripe_unit
11356 << ' ' << stripe_count
11357 << ' ' << object_size
11358 <<") = " << res
<< dendl
;
11367 int Client::_mkdir(Inode
*dir
, const char *name
, mode_t mode
, const UserPerm
& perm
,
11370 ldout(cct
, 3) << "_mkdir(" << dir
->ino
<< " " << name
<< ", 0" << oct
11371 << mode
<< dec
<< ", uid " << perm
.uid()
11372 << ", gid " << perm
.gid() << ")" << dendl
;
11374 if (strlen(name
) > NAME_MAX
)
11375 return -ENAMETOOLONG
;
11377 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
11380 if (is_quota_files_exceeded(dir
, perm
)) {
11383 MetaRequest
*req
= new MetaRequest(dir
->snapid
== CEPH_SNAPDIR
?
11384 CEPH_MDS_OP_MKSNAP
: CEPH_MDS_OP_MKDIR
);
11387 dir
->make_nosnap_relative_path(path
);
11388 path
.push_dentry(name
);
11389 req
->set_filepath(path
);
11390 req
->set_inode(dir
);
11391 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11392 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11395 bufferlist xattrs_bl
;
11396 int res
= _posix_acl_create(dir
, &mode
, xattrs_bl
, perm
);
11399 req
->head
.args
.mkdir
.mode
= mode
;
11400 if (xattrs_bl
.length() > 0)
11401 req
->set_data(xattrs_bl
);
11404 res
= get_or_create(dir
, name
, &de
);
11407 req
->set_dentry(de
);
11409 ldout(cct
, 10) << "_mkdir: making request" << dendl
;
11410 res
= make_request(req
, perm
, inp
);
11411 ldout(cct
, 10) << "_mkdir result is " << res
<< dendl
;
11415 ldout(cct
, 3) << "_mkdir(" << path
<< ", 0" << oct
<< mode
<< dec
<< ") = " << res
<< dendl
;
11423 int Client::ll_mkdir(Inode
*parent
, const char *name
, mode_t mode
,
11424 struct stat
*attr
, Inode
**out
, const UserPerm
& perm
)
11426 Mutex::Locker
lock(client_lock
);
11428 vinodeno_t vparent
= _get_vino(parent
);
11430 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
<< dendl
;
11431 tout(cct
) << "ll_mkdir" << std::endl
;
11432 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11433 tout(cct
) << name
<< std::endl
;
11434 tout(cct
) << mode
<< std::endl
;
11436 if (!cct
->_conf
->fuse_default_permissions
) {
11437 int r
= may_create(parent
, perm
);
11443 int r
= _mkdir(parent
, name
, mode
, perm
, &in
);
11445 fill_stat(in
, attr
);
11448 tout(cct
) << attr
->st_ino
<< std::endl
;
11449 ldout(cct
, 3) << "ll_mkdir " << vparent
<< " " << name
11450 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11455 int Client::ll_mkdirx(Inode
*parent
, const char *name
, mode_t mode
, Inode
**out
,
11456 struct ceph_statx
*stx
, unsigned want
, unsigned flags
,
11457 const UserPerm
& perms
)
11459 Mutex::Locker
lock(client_lock
);
11461 vinodeno_t vparent
= _get_vino(parent
);
11463 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
<< dendl
;
11464 tout(cct
) << "ll_mkdirx" << std::endl
;
11465 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11466 tout(cct
) << name
<< std::endl
;
11467 tout(cct
) << mode
<< std::endl
;
11469 if (!cct
->_conf
->fuse_default_permissions
) {
11470 int r
= may_create(parent
, perms
);
11476 int r
= _mkdir(parent
, name
, mode
, perms
, &in
);
11478 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11484 tout(cct
) << stx
->stx_ino
<< std::endl
;
11485 ldout(cct
, 3) << "ll_mkdirx " << vparent
<< " " << name
11486 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11491 int Client::_symlink(Inode
*dir
, const char *name
, const char *target
,
11492 const UserPerm
& perms
, InodeRef
*inp
)
11494 ldout(cct
, 3) << "_symlink(" << dir
->ino
<< " " << name
<< ", " << target
11495 << ", uid " << perms
.uid() << ", gid " << perms
.gid() << ")"
11498 if (strlen(name
) > NAME_MAX
)
11499 return -ENAMETOOLONG
;
11501 if (dir
->snapid
!= CEPH_NOSNAP
) {
11504 if (is_quota_files_exceeded(dir
, perms
)) {
11508 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_SYMLINK
);
11511 dir
->make_nosnap_relative_path(path
);
11512 path
.push_dentry(name
);
11513 req
->set_filepath(path
);
11514 req
->set_inode(dir
);
11515 req
->set_string2(target
);
11516 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11517 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11520 int res
= get_or_create(dir
, name
, &de
);
11523 req
->set_dentry(de
);
11525 res
= make_request(req
, perms
, inp
);
11528 ldout(cct
, 3) << "_symlink(\"" << path
<< "\", \"" << target
<< "\") = " <<
11537 int Client::ll_symlink(Inode
*parent
, const char *name
, const char *value
,
11538 struct stat
*attr
, Inode
**out
, const UserPerm
& perms
)
11540 Mutex::Locker
lock(client_lock
);
11542 vinodeno_t vparent
= _get_vino(parent
);
11544 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
<< " -> " << value
11546 tout(cct
) << "ll_symlink" << std::endl
;
11547 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11548 tout(cct
) << name
<< std::endl
;
11549 tout(cct
) << value
<< std::endl
;
11551 if (!cct
->_conf
->fuse_default_permissions
) {
11552 int r
= may_create(parent
, perms
);
11558 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11560 fill_stat(in
, attr
);
11563 tout(cct
) << attr
->st_ino
<< std::endl
;
11564 ldout(cct
, 3) << "ll_symlink " << vparent
<< " " << name
11565 << " = " << r
<< " (" << hex
<< attr
->st_ino
<< dec
<< ")" << dendl
;
11570 int Client::ll_symlinkx(Inode
*parent
, const char *name
, const char *value
,
11571 Inode
**out
, struct ceph_statx
*stx
, unsigned want
,
11572 unsigned flags
, const UserPerm
& perms
)
11574 Mutex::Locker
lock(client_lock
);
11576 vinodeno_t vparent
= _get_vino(parent
);
11578 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
<< " -> " << value
11580 tout(cct
) << "ll_symlinkx" << std::endl
;
11581 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11582 tout(cct
) << name
<< std::endl
;
11583 tout(cct
) << value
<< std::endl
;
11585 if (!cct
->_conf
->fuse_default_permissions
) {
11586 int r
= may_create(parent
, perms
);
11592 int r
= _symlink(parent
, name
, value
, perms
, &in
);
11594 fill_statx(in
, statx_to_mask(flags
, want
), stx
);
11597 tout(cct
) << stx
->stx_ino
<< std::endl
;
11598 ldout(cct
, 3) << "ll_symlinkx " << vparent
<< " " << name
11599 << " = " << r
<< " (" << hex
<< stx
->stx_ino
<< dec
<< ")" << dendl
;
11604 int Client::_unlink(Inode
*dir
, const char *name
, const UserPerm
& perm
)
11606 ldout(cct
, 3) << "_unlink(" << dir
->ino
<< " " << name
11607 << " uid " << perm
.uid() << " gid " << perm
.gid()
11610 if (dir
->snapid
!= CEPH_NOSNAP
) {
11614 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_UNLINK
);
11617 dir
->make_nosnap_relative_path(path
);
11618 path
.push_dentry(name
);
11619 req
->set_filepath(path
);
11624 int res
= get_or_create(dir
, name
, &de
);
11627 req
->set_dentry(de
);
11628 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11629 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11631 res
= _lookup(dir
, name
, 0, &otherin
, perm
);
11634 req
->set_other_inode(otherin
.get());
11635 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
11637 req
->set_inode(dir
);
11639 res
= make_request(req
, perm
);
11642 ldout(cct
, 3) << "unlink(" << path
<< ") = " << res
<< dendl
;
11650 int Client::ll_unlink(Inode
*in
, const char *name
, const UserPerm
& perm
)
11652 Mutex::Locker
lock(client_lock
);
11654 vinodeno_t vino
= _get_vino(in
);
11656 ldout(cct
, 3) << "ll_unlink " << vino
<< " " << name
<< dendl
;
11657 tout(cct
) << "ll_unlink" << std::endl
;
11658 tout(cct
) << vino
.ino
.val
<< std::endl
;
11659 tout(cct
) << name
<< std::endl
;
11661 if (!cct
->_conf
->fuse_default_permissions
) {
11662 int r
= may_delete(in
, name
, perm
);
11666 return _unlink(in
, name
, perm
);
11669 int Client::_rmdir(Inode
*dir
, const char *name
, const UserPerm
& perms
)
11671 ldout(cct
, 3) << "_rmdir(" << dir
->ino
<< " " << name
<< " uid "
11672 << perms
.uid() << " gid " << perms
.gid() << ")" << dendl
;
11674 if (dir
->snapid
!= CEPH_NOSNAP
&& dir
->snapid
!= CEPH_SNAPDIR
) {
11678 MetaRequest
*req
= new MetaRequest(dir
->snapid
== CEPH_SNAPDIR
? CEPH_MDS_OP_RMSNAP
:CEPH_MDS_OP_RMDIR
);
11680 dir
->make_nosnap_relative_path(path
);
11681 path
.push_dentry(name
);
11682 req
->set_filepath(path
);
11684 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11685 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11686 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
11691 int res
= get_or_create(dir
, name
, &de
);
11694 res
= _lookup(dir
, name
, 0, &in
, perms
);
11697 if (req
->get_op() == CEPH_MDS_OP_RMDIR
) {
11698 req
->set_inode(dir
);
11699 req
->set_dentry(de
);
11700 req
->set_other_inode(in
.get());
11702 unlink(de
, true, true);
11703 req
->set_other_inode(in
.get());
11706 res
= make_request(req
, perms
);
11709 ldout(cct
, 3) << "rmdir(" << path
<< ") = " << res
<< dendl
;
11717 int Client::ll_rmdir(Inode
*in
, const char *name
, const UserPerm
& perms
)
11719 Mutex::Locker
lock(client_lock
);
11721 vinodeno_t vino
= _get_vino(in
);
11723 ldout(cct
, 3) << "ll_rmdir " << vino
<< " " << name
<< dendl
;
11724 tout(cct
) << "ll_rmdir" << std::endl
;
11725 tout(cct
) << vino
.ino
.val
<< std::endl
;
11726 tout(cct
) << name
<< std::endl
;
11728 if (!cct
->_conf
->fuse_default_permissions
) {
11729 int r
= may_delete(in
, name
, perms
);
11734 return _rmdir(in
, name
, perms
);
11737 int Client::_rename(Inode
*fromdir
, const char *fromname
, Inode
*todir
, const char *toname
, const UserPerm
& perm
)
11739 ldout(cct
, 3) << "_rename(" << fromdir
->ino
<< " " << fromname
<< " to "
11740 << todir
->ino
<< " " << toname
11741 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")"
11744 if (fromdir
->snapid
!= todir
->snapid
)
11747 int op
= CEPH_MDS_OP_RENAME
;
11748 if (fromdir
->snapid
!= CEPH_NOSNAP
) {
11749 if (fromdir
== todir
&& fromdir
->snapid
== CEPH_SNAPDIR
)
11750 op
= CEPH_MDS_OP_RENAMESNAP
;
11754 if (fromdir
!= todir
) {
11755 Inode
*fromdir_root
=
11756 fromdir
->quota
.is_enable() ? fromdir
: get_quota_root(fromdir
, perm
);
11757 Inode
*todir_root
=
11758 todir
->quota
.is_enable() ? todir
: get_quota_root(todir
, perm
);
11759 if (fromdir_root
!= todir_root
) {
11765 MetaRequest
*req
= new MetaRequest(op
);
11768 fromdir
->make_nosnap_relative_path(from
);
11769 from
.push_dentry(fromname
);
11771 todir
->make_nosnap_relative_path(to
);
11772 to
.push_dentry(toname
);
11773 req
->set_filepath(to
);
11774 req
->set_filepath2(from
);
11777 int res
= get_or_create(fromdir
, fromname
, &oldde
);
11781 res
= get_or_create(todir
, toname
, &de
);
11785 if (op
== CEPH_MDS_OP_RENAME
) {
11786 req
->set_old_dentry(oldde
);
11787 req
->old_dentry_drop
= CEPH_CAP_FILE_SHARED
;
11788 req
->old_dentry_unless
= CEPH_CAP_FILE_EXCL
;
11790 req
->set_dentry(de
);
11791 req
->dentry_drop
= CEPH_CAP_FILE_SHARED
;
11792 req
->dentry_unless
= CEPH_CAP_FILE_EXCL
;
11794 InodeRef oldin
, otherin
;
11795 res
= _lookup(fromdir
, fromname
, 0, &oldin
, perm
);
11798 req
->set_old_inode(oldin
.get());
11799 req
->old_inode_drop
= CEPH_CAP_LINK_SHARED
;
11801 res
= _lookup(todir
, toname
, 0, &otherin
, perm
);
11802 if (res
!= 0 && res
!= -ENOENT
) {
11804 } else if (res
== 0) {
11805 req
->set_other_inode(otherin
.get());
11806 req
->other_inode_drop
= CEPH_CAP_LINK_SHARED
| CEPH_CAP_LINK_EXCL
;
11809 req
->set_inode(todir
);
11811 // renamesnap reply contains no tracedn, so we need to invalidate
11813 unlink(oldde
, true, true);
11814 unlink(de
, true, true);
11817 res
= make_request(req
, perm
, &target
);
11818 ldout(cct
, 10) << "rename result is " << res
<< dendl
;
11820 // renamed item from our cache
11823 ldout(cct
, 3) << "_rename(" << from
<< ", " << to
<< ") = " << res
<< dendl
;
11831 int Client::ll_rename(Inode
*parent
, const char *name
, Inode
*newparent
,
11832 const char *newname
, const UserPerm
& perm
)
11834 Mutex::Locker
lock(client_lock
);
11836 vinodeno_t vparent
= _get_vino(parent
);
11837 vinodeno_t vnewparent
= _get_vino(newparent
);
11839 ldout(cct
, 3) << "ll_rename " << vparent
<< " " << name
<< " to "
11840 << vnewparent
<< " " << newname
<< dendl
;
11841 tout(cct
) << "ll_rename" << std::endl
;
11842 tout(cct
) << vparent
.ino
.val
<< std::endl
;
11843 tout(cct
) << name
<< std::endl
;
11844 tout(cct
) << vnewparent
.ino
.val
<< std::endl
;
11845 tout(cct
) << newname
<< std::endl
;
11847 if (!cct
->_conf
->fuse_default_permissions
) {
11848 int r
= may_delete(parent
, name
, perm
);
11851 r
= may_delete(newparent
, newname
, perm
);
11852 if (r
< 0 && r
!= -ENOENT
)
11856 return _rename(parent
, name
, newparent
, newname
, perm
);
11859 int Client::_link(Inode
*in
, Inode
*dir
, const char *newname
, const UserPerm
& perm
, InodeRef
*inp
)
11861 ldout(cct
, 3) << "_link(" << in
->ino
<< " to " << dir
->ino
<< " " << newname
11862 << " uid " << perm
.uid() << " gid " << perm
.gid() << ")" << dendl
;
11864 if (strlen(newname
) > NAME_MAX
)
11865 return -ENAMETOOLONG
;
11867 if (in
->snapid
!= CEPH_NOSNAP
|| dir
->snapid
!= CEPH_NOSNAP
) {
11870 if (is_quota_files_exceeded(dir
, perm
)) {
11874 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LINK
);
11876 filepath
path(newname
, dir
->ino
);
11877 req
->set_filepath(path
);
11878 filepath
existing(in
->ino
);
11879 req
->set_filepath2(existing
);
11881 req
->set_inode(dir
);
11882 req
->inode_drop
= CEPH_CAP_FILE_SHARED
;
11883 req
->inode_unless
= CEPH_CAP_FILE_EXCL
;
11886 int res
= get_or_create(dir
, newname
, &de
);
11889 req
->set_dentry(de
);
11891 res
= make_request(req
, perm
, inp
);
11892 ldout(cct
, 10) << "link result is " << res
<< dendl
;
11895 ldout(cct
, 3) << "link(" << existing
<< ", " << path
<< ") = " << res
<< dendl
;
11903 int Client::ll_link(Inode
*in
, Inode
*newparent
, const char *newname
,
11904 const UserPerm
& perm
)
11906 Mutex::Locker
lock(client_lock
);
11908 vinodeno_t vino
= _get_vino(in
);
11909 vinodeno_t vnewparent
= _get_vino(newparent
);
11911 ldout(cct
, 3) << "ll_link " << vino
<< " to " << vnewparent
<< " " <<
11913 tout(cct
) << "ll_link" << std::endl
;
11914 tout(cct
) << vino
.ino
.val
<< std::endl
;
11915 tout(cct
) << vnewparent
<< std::endl
;
11916 tout(cct
) << newname
<< std::endl
;
11921 if (!cct
->_conf
->fuse_default_permissions
) {
11922 if (S_ISDIR(in
->mode
))
11925 r
= may_hardlink(in
, perm
);
11929 r
= may_create(newparent
, perm
);
11934 return _link(in
, newparent
, newname
, perm
, &target
);
11937 int Client::ll_num_osds(void)
11939 Mutex::Locker
lock(client_lock
);
11940 return objecter
->with_osdmap(std::mem_fn(&OSDMap::get_num_osds
));
11943 int Client::ll_osdaddr(int osd
, uint32_t *addr
)
11945 Mutex::Locker
lock(client_lock
);
11947 bool exists
= objecter
->with_osdmap([&](const OSDMap
& o
) {
11948 if (!o
.exists(osd
))
11950 g
= o
.get_addr(osd
);
11955 uint32_t nb_addr
= (g
.in4_addr()).sin_addr
.s_addr
;
11956 *addr
= ntohl(nb_addr
);
11959 uint32_t Client::ll_stripe_unit(Inode
*in
)
11961 Mutex::Locker
lock(client_lock
);
11962 return in
->layout
.stripe_unit
;
11965 uint64_t Client::ll_snap_seq(Inode
*in
)
11967 Mutex::Locker
lock(client_lock
);
11968 return in
->snaprealm
->seq
;
11971 int Client::ll_file_layout(Inode
*in
, file_layout_t
*layout
)
11973 Mutex::Locker
lock(client_lock
);
11974 *layout
= in
->layout
;
11978 int Client::ll_file_layout(Fh
*fh
, file_layout_t
*layout
)
11980 return ll_file_layout(fh
->inode
.get(), layout
);
11983 /* Currently we cannot take advantage of redundancy in reads, since we
11984 would have to go through all possible placement groups (a
11985 potentially quite large number determined by a hash), and use CRUSH
11986 to calculate the appropriate set of OSDs for each placement group,
11987 then index into that. An array with one entry per OSD is much more
11988 tractable and works for demonstration purposes. */
11990 int Client::ll_get_stripe_osd(Inode
*in
, uint64_t blockno
,
11991 file_layout_t
* layout
)
11993 Mutex::Locker
lock(client_lock
);
11994 inodeno_t ino
= ll_get_inodeno(in
);
11995 uint32_t object_size
= layout
->object_size
;
11996 uint32_t su
= layout
->stripe_unit
;
11997 uint32_t stripe_count
= layout
->stripe_count
;
11998 uint64_t stripes_per_object
= object_size
/ su
;
12000 uint64_t stripeno
= blockno
/ stripe_count
; // which horizontal stripe (Y)
12001 uint64_t stripepos
= blockno
% stripe_count
; // which object in the object set (X)
12002 uint64_t objectsetno
= stripeno
/ stripes_per_object
; // which object set
12003 uint64_t objectno
= objectsetno
* stripe_count
+ stripepos
; // object id
12005 object_t oid
= file_object_t(ino
, objectno
);
12006 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12007 ceph_object_layout olayout
=
12008 o
.file_to_object_layout(oid
, *layout
);
12009 pg_t pg
= (pg_t
)olayout
.ol_pgid
;
12012 o
.pg_to_acting_osds(pg
, &osds
, &primary
);
12017 /* Return the offset of the block, internal to the object */
12019 uint64_t Client::ll_get_internal_offset(Inode
*in
, uint64_t blockno
)
12021 Mutex::Locker
lock(client_lock
);
12022 file_layout_t
*layout
=&(in
->layout
);
12023 uint32_t object_size
= layout
->object_size
;
12024 uint32_t su
= layout
->stripe_unit
;
12025 uint64_t stripes_per_object
= object_size
/ su
;
12027 return (blockno
% stripes_per_object
) * su
;
12030 int Client::ll_opendir(Inode
*in
, int flags
, dir_result_t
** dirpp
,
12031 const UserPerm
& perms
)
12033 Mutex::Locker
lock(client_lock
);
12035 vinodeno_t vino
= _get_vino(in
);
12037 ldout(cct
, 3) << "ll_opendir " << vino
<< dendl
;
12038 tout(cct
) << "ll_opendir" << std::endl
;
12039 tout(cct
) << vino
.ino
.val
<< std::endl
;
12041 if (!cct
->_conf
->fuse_default_permissions
) {
12042 int r
= may_open(in
, flags
, perms
);
12047 int r
= _opendir(in
, dirpp
, perms
);
12048 tout(cct
) << (unsigned long)*dirpp
<< std::endl
;
12050 ldout(cct
, 3) << "ll_opendir " << vino
<< " = " << r
<< " (" << *dirpp
<< ")"
12055 int Client::ll_releasedir(dir_result_t
*dirp
)
12057 Mutex::Locker
lock(client_lock
);
12058 ldout(cct
, 3) << "ll_releasedir " << dirp
<< dendl
;
12059 tout(cct
) << "ll_releasedir" << std::endl
;
12060 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12065 int Client::ll_fsyncdir(dir_result_t
*dirp
)
12067 Mutex::Locker
lock(client_lock
);
12068 ldout(cct
, 3) << "ll_fsyncdir " << dirp
<< dendl
;
12069 tout(cct
) << "ll_fsyncdir" << std::endl
;
12070 tout(cct
) << (unsigned long)dirp
<< std::endl
;
12072 return _fsync(dirp
->inode
.get(), false);
12075 int Client::ll_open(Inode
*in
, int flags
, Fh
**fhp
, const UserPerm
& perms
)
12077 assert(!(flags
& O_CREAT
));
12079 Mutex::Locker
lock(client_lock
);
12081 vinodeno_t vino
= _get_vino(in
);
12083 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) << dendl
;
12084 tout(cct
) << "ll_open" << std::endl
;
12085 tout(cct
) << vino
.ino
.val
<< std::endl
;
12086 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12089 if (!cct
->_conf
->fuse_default_permissions
) {
12090 r
= may_open(in
, flags
, perms
);
12095 r
= _open(in
, flags
, 0, fhp
/* may be NULL */, perms
);
12098 Fh
*fhptr
= fhp
? *fhp
: NULL
;
12100 ll_unclosed_fh_set
.insert(fhptr
);
12102 tout(cct
) << (unsigned long)fhptr
<< std::endl
;
12103 ldout(cct
, 3) << "ll_open " << vino
<< " " << ceph_flags_sys2wire(flags
) <<
12104 " = " << r
<< " (" << fhptr
<< ")" << dendl
;
12108 int Client::_ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12109 int flags
, InodeRef
*in
, int caps
, Fh
**fhp
,
12110 const UserPerm
& perms
)
12114 vinodeno_t vparent
= _get_vino(parent
);
12116 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12117 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << ", uid " << perms
.uid()
12118 << ", gid " << perms
.gid() << dendl
;
12119 tout(cct
) << "ll_create" << std::endl
;
12120 tout(cct
) << vparent
.ino
.val
<< std::endl
;
12121 tout(cct
) << name
<< std::endl
;
12122 tout(cct
) << mode
<< std::endl
;
12123 tout(cct
) << ceph_flags_sys2wire(flags
) << std::endl
;
12125 bool created
= false;
12126 int r
= _lookup(parent
, name
, caps
, in
, perms
);
12128 if (r
== 0 && (flags
& O_CREAT
) && (flags
& O_EXCL
))
12131 if (r
== -ENOENT
&& (flags
& O_CREAT
)) {
12132 if (!cct
->_conf
->fuse_default_permissions
) {
12133 r
= may_create(parent
, perms
);
12137 r
= _create(parent
, name
, flags
, mode
, in
, fhp
, 0, 0, 0, NULL
, &created
,
12148 ldout(cct
, 20) << "_ll_create created = " << created
<< dendl
;
12150 if (!cct
->_conf
->fuse_default_permissions
) {
12151 r
= may_open(in
->get(), flags
, perms
);
12154 int release_r
= _release_fh(*fhp
);
12155 assert(release_r
== 0); // during create, no async data ops should have happened
12160 if (*fhp
== NULL
) {
12161 r
= _open(in
->get(), flags
, mode
, fhp
, perms
);
12169 ll_unclosed_fh_set
.insert(*fhp
);
12174 Inode
*inode
= in
->get();
12175 if (use_faked_inos())
12176 ino
= inode
->faked_ino
;
12181 tout(cct
) << (unsigned long)*fhp
<< std::endl
;
12182 tout(cct
) << ino
<< std::endl
;
12183 ldout(cct
, 3) << "_ll_create " << vparent
<< " " << name
<< " 0" << oct
<<
12184 mode
<< dec
<< " " << ceph_flags_sys2wire(flags
) << " = " << r
<< " (" <<
12185 *fhp
<< " " << hex
<< ino
<< dec
<< ")" << dendl
;
12190 int Client::ll_create(Inode
*parent
, const char *name
, mode_t mode
,
12191 int flags
, struct stat
*attr
, Inode
**outp
, Fh
**fhp
,
12192 const UserPerm
& perms
)
12194 Mutex::Locker
lock(client_lock
);
12197 int r
= _ll_create(parent
, name
, mode
, flags
, &in
, CEPH_STAT_CAP_INODE_ALL
,
12202 // passing an Inode in outp requires an additional ref
12207 fill_stat(in
, attr
);
12215 int Client::ll_createx(Inode
*parent
, const char *name
, mode_t mode
,
12216 int oflags
, Inode
**outp
, Fh
**fhp
,
12217 struct ceph_statx
*stx
, unsigned want
, unsigned lflags
,
12218 const UserPerm
& perms
)
12220 unsigned caps
= statx_to_mask(lflags
, want
);
12221 Mutex::Locker
lock(client_lock
);
12225 int r
= _ll_create(parent
, name
, mode
, oflags
, &in
, caps
, fhp
, perms
);
12229 // passing an Inode in outp requires an additional ref
12234 fill_statx(in
, caps
, stx
);
12243 loff_t
Client::ll_lseek(Fh
*fh
, loff_t offset
, int whence
)
12245 Mutex::Locker
lock(client_lock
);
12246 tout(cct
) << "ll_lseek" << std::endl
;
12247 tout(cct
) << offset
<< std::endl
;
12248 tout(cct
) << whence
<< std::endl
;
12250 return _lseek(fh
, offset
, whence
);
12253 int Client::ll_read(Fh
*fh
, loff_t off
, loff_t len
, bufferlist
*bl
)
12255 Mutex::Locker
lock(client_lock
);
12256 ldout(cct
, 3) << "ll_read " << fh
<< " " << fh
->inode
->ino
<< " " << " " << off
<< "~" << len
<< dendl
;
12257 tout(cct
) << "ll_read" << std::endl
;
12258 tout(cct
) << (unsigned long)fh
<< std::endl
;
12259 tout(cct
) << off
<< std::endl
;
12260 tout(cct
) << len
<< std::endl
;
12262 return _read(fh
, off
, len
, bl
);
12265 int Client::ll_read_block(Inode
*in
, uint64_t blockid
,
12269 file_layout_t
* layout
)
12271 Mutex::Locker
lock(client_lock
);
12272 vinodeno_t vino
= ll_get_vino(in
);
12273 object_t oid
= file_object_t(vino
.ino
, blockid
);
12274 C_SaferCond onfinish
;
12277 objecter
->read(oid
,
12278 object_locator_t(layout
->pool_id
),
12283 CEPH_OSD_FLAG_READ
,
12286 client_lock
.Unlock();
12287 int r
= onfinish
.wait();
12288 client_lock
.Lock();
12291 bl
.copy(0, bl
.length(), buf
);
12298 /* It appears that the OSD doesn't return success unless the entire
12299 buffer was written, return the write length on success. */
12301 int Client::ll_write_block(Inode
*in
, uint64_t blockid
,
12302 char* buf
, uint64_t offset
,
12303 uint64_t length
, file_layout_t
* layout
,
12304 uint64_t snapseq
, uint32_t sync
)
12306 Mutex
flock("Client::ll_write_block flock");
12307 vinodeno_t vino
= ll_get_vino(in
);
12316 if (true || sync
) {
12317 /* if write is stable, the epilogue is waiting on
12319 onsafe
= new C_SafeCond(&flock
, &cond
, &done
, &r
);
12322 /* if write is unstable, we just place a barrier for
12323 * future commits to wait on */
12324 /*onsafe = new C_Block_Sync(this, vino.ino,
12325 barrier_interval(offset, offset + length), &r);
12329 object_t oid
= file_object_t(vino
.ino
, blockid
);
12330 SnapContext fakesnap
;
12332 if (length
> 0) bp
= buffer::copy(buf
, length
);
12336 ldout(cct
, 1) << "ll_block_write for " << vino
.ino
<< "." << blockid
12339 fakesnap
.seq
= snapseq
;
12341 /* lock just in time */
12342 client_lock
.Lock();
12344 objecter
->write(oid
,
12345 object_locator_t(layout
->pool_id
),
12350 ceph::real_clock::now(),
12354 client_lock
.Unlock();
12355 if (!done
/* also !sync */) {
12369 int Client::ll_commit_blocks(Inode
*in
,
12373 Mutex::Locker
lock(client_lock
);
12375 BarrierContext *bctx;
12376 vinodeno_t vino = ll_get_vino(in);
12377 uint64_t ino = vino.ino;
12379 ldout(cct, 1) << "ll_commit_blocks for " << vino.ino << " from "
12380 << offset << " to " << length << dendl;
12386 map<uint64_t, BarrierContext*>::iterator p = barriers.find(ino);
12387 if (p != barriers.end()) {
12388 barrier_interval civ(offset, offset + length);
12389 p->second->commit_barrier(civ);
12395 int Client::ll_write(Fh
*fh
, loff_t off
, loff_t len
, const char *data
)
12397 Mutex::Locker
lock(client_lock
);
12398 ldout(cct
, 3) << "ll_write " << fh
<< " " << fh
->inode
->ino
<< " " << off
<<
12399 "~" << len
<< dendl
;
12400 tout(cct
) << "ll_write" << std::endl
;
12401 tout(cct
) << (unsigned long)fh
<< std::endl
;
12402 tout(cct
) << off
<< std::endl
;
12403 tout(cct
) << len
<< std::endl
;
12405 int r
= _write(fh
, off
, len
, data
, NULL
, 0);
12406 ldout(cct
, 3) << "ll_write " << fh
<< " " << off
<< "~" << len
<< " = " << r
12411 int Client::ll_flush(Fh
*fh
)
12413 Mutex::Locker
lock(client_lock
);
12414 ldout(cct
, 3) << "ll_flush " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12415 tout(cct
) << "ll_flush" << std::endl
;
12416 tout(cct
) << (unsigned long)fh
<< std::endl
;
12421 int Client::ll_fsync(Fh
*fh
, bool syncdataonly
)
12423 Mutex::Locker
lock(client_lock
);
12424 ldout(cct
, 3) << "ll_fsync " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12425 tout(cct
) << "ll_fsync" << std::endl
;
12426 tout(cct
) << (unsigned long)fh
<< std::endl
;
12428 int r
= _fsync(fh
, syncdataonly
);
12430 // If we're returning an error, clear it from the FH
12431 fh
->take_async_err();
12436 #ifdef FALLOC_FL_PUNCH_HOLE
12438 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
12440 if (offset
< 0 || length
<= 0)
12443 if (mode
& ~(FALLOC_FL_KEEP_SIZE
| FALLOC_FL_PUNCH_HOLE
))
12444 return -EOPNOTSUPP
;
12446 if ((mode
& FALLOC_FL_PUNCH_HOLE
) && !(mode
& FALLOC_FL_KEEP_SIZE
))
12447 return -EOPNOTSUPP
;
12449 Inode
*in
= fh
->inode
.get();
12451 if (objecter
->osdmap_pool_full(in
->layout
.pool_id
) &&
12452 !(mode
& FALLOC_FL_PUNCH_HOLE
)) {
12456 if (in
->snapid
!= CEPH_NOSNAP
)
12459 if ((fh
->mode
& CEPH_FILE_MODE_WR
) == 0)
12462 uint64_t size
= offset
+ length
;
12463 if (!(mode
& (FALLOC_FL_PUNCH_HOLE
| FALLOC_FL_KEEP_SIZE
)) &&
12465 is_quota_bytes_exceeded(in
, size
- in
->size
, fh
->actor_perms
)) {
12470 int r
= get_caps(in
, CEPH_CAP_FILE_WR
, CEPH_CAP_FILE_BUFFER
, &have
, -1);
12474 Mutex
uninline_flock("Client::_fallocate_uninline_data flock");
12475 Cond uninline_cond
;
12476 bool uninline_done
= false;
12477 int uninline_ret
= 0;
12478 Context
*onuninline
= NULL
;
12480 if (mode
& FALLOC_FL_PUNCH_HOLE
) {
12481 if (in
->inline_version
< CEPH_INLINE_NONE
&&
12482 (have
& CEPH_CAP_FILE_BUFFER
)) {
12484 int len
= in
->inline_data
.length();
12485 if (offset
< len
) {
12487 in
->inline_data
.copy(0, offset
, bl
);
12489 if (offset
+ size
> len
)
12490 size
= len
- offset
;
12492 bl
.append_zero(size
);
12493 if (offset
+ size
< len
)
12494 in
->inline_data
.copy(offset
+ size
, len
- offset
- size
, bl
);
12495 in
->inline_data
= bl
;
12496 in
->inline_version
++;
12498 in
->mtime
= ceph_clock_now();
12500 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12502 if (in
->inline_version
< CEPH_INLINE_NONE
) {
12503 onuninline
= new C_SafeCond(&uninline_flock
,
12507 uninline_data(in
, onuninline
);
12510 Mutex
flock("Client::_punch_hole flock");
12513 Context
*onfinish
= new C_SafeCond(&flock
, &cond
, &done
);
12515 unsafe_sync_write
++;
12516 get_cap_ref(in
, CEPH_CAP_FILE_BUFFER
);
12518 _invalidate_inode_cache(in
, offset
, length
);
12519 filer
->zero(in
->ino
, &in
->layout
,
12520 in
->snaprealm
->get_snap_context(),
12522 ceph::real_clock::now(),
12523 0, true, onfinish
);
12524 in
->mtime
= ceph_clock_now();
12526 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12528 client_lock
.Unlock();
12533 client_lock
.Lock();
12534 _sync_write_commit(in
);
12536 } else if (!(mode
& FALLOC_FL_KEEP_SIZE
)) {
12537 uint64_t size
= offset
+ length
;
12538 if (size
> in
->size
) {
12540 in
->mtime
= ceph_clock_now();
12542 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12544 if (is_quota_bytes_approaching(in
, fh
->actor_perms
)) {
12545 check_caps(in
, CHECK_CAPS_NODELAY
);
12546 } else if (is_max_size_approaching(in
)) {
12553 client_lock
.Unlock();
12554 uninline_flock
.Lock();
12555 while (!uninline_done
)
12556 uninline_cond
.Wait(uninline_flock
);
12557 uninline_flock
.Unlock();
12558 client_lock
.Lock();
12560 if (uninline_ret
>= 0 || uninline_ret
== -ECANCELED
) {
12561 in
->inline_data
.clear();
12562 in
->inline_version
= CEPH_INLINE_NONE
;
12563 mark_caps_dirty(in
, CEPH_CAP_FILE_WR
);
12569 put_cap_ref(in
, CEPH_CAP_FILE_WR
);
12574 int Client::_fallocate(Fh
*fh
, int mode
, int64_t offset
, int64_t length
)
12576 return -EOPNOTSUPP
;
12582 int Client::ll_fallocate(Fh
*fh
, int mode
, loff_t offset
, loff_t length
)
12584 Mutex::Locker
lock(client_lock
);
12585 ldout(cct
, 3) << "ll_fallocate " << fh
<< " " << fh
->inode
->ino
<< " " << dendl
;
12586 tout(cct
) << "ll_fallocate " << mode
<< " " << offset
<< " " << length
<< std::endl
;
12587 tout(cct
) << (unsigned long)fh
<< std::endl
;
12589 return _fallocate(fh
, mode
, offset
, length
);
12592 int Client::fallocate(int fd
, int mode
, loff_t offset
, loff_t length
)
12594 Mutex::Locker
lock(client_lock
);
12595 tout(cct
) << "fallocate " << " " << fd
<< mode
<< " " << offset
<< " " << length
<< std::endl
;
12597 Fh
*fh
= get_filehandle(fd
);
12600 #if defined(__linux__) && defined(O_PATH)
12601 if (fh
->flags
& O_PATH
)
12604 return _fallocate(fh
, mode
, offset
, length
);
12607 int Client::ll_release(Fh
*fh
)
12609 Mutex::Locker
lock(client_lock
);
12610 ldout(cct
, 3) << "ll_release (fh)" << fh
<< " " << fh
->inode
->ino
<< " " <<
12612 tout(cct
) << "ll_release (fh)" << std::endl
;
12613 tout(cct
) << (unsigned long)fh
<< std::endl
;
12615 if (ll_unclosed_fh_set
.count(fh
))
12616 ll_unclosed_fh_set
.erase(fh
);
12617 return _release_fh(fh
);
12620 int Client::ll_getlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
)
12622 Mutex::Locker
lock(client_lock
);
12624 ldout(cct
, 3) << "ll_getlk (fh)" << fh
<< " " << fh
->inode
->ino
<< dendl
;
12625 tout(cct
) << "ll_getk (fh)" << (unsigned long)fh
<< std::endl
;
12627 return _getlk(fh
, fl
, owner
);
12630 int Client::ll_setlk(Fh
*fh
, struct flock
*fl
, uint64_t owner
, int sleep
)
12632 Mutex::Locker
lock(client_lock
);
12634 ldout(cct
, 3) << "ll_setlk (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
12635 tout(cct
) << "ll_setk (fh)" << (unsigned long)fh
<< std::endl
;
12637 return _setlk(fh
, fl
, owner
, sleep
);
12640 int Client::ll_flock(Fh
*fh
, int cmd
, uint64_t owner
)
12642 Mutex::Locker
lock(client_lock
);
12644 ldout(cct
, 3) << "ll_flock (fh) " << fh
<< " " << fh
->inode
->ino
<< dendl
;
12645 tout(cct
) << "ll_flock (fh)" << (unsigned long)fh
<< std::endl
;
12647 return _flock(fh
, cmd
, owner
);
12650 class C_Client_RequestInterrupt
: public Context
{
12655 C_Client_RequestInterrupt(Client
*c
, MetaRequest
*r
) : client(c
), req(r
) {
12658 void finish(int r
) override
{
12659 Mutex::Locker
l(client
->client_lock
);
12660 assert(req
->head
.op
== CEPH_MDS_OP_SETFILELOCK
);
12661 client
->_interrupt_filelock(req
);
12662 client
->put_request(req
);
12666 void Client::ll_interrupt(void *d
)
12668 MetaRequest
*req
= static_cast<MetaRequest
*>(d
);
12669 ldout(cct
, 3) << "ll_interrupt tid " << req
->get_tid() << dendl
;
12670 tout(cct
) << "ll_interrupt tid " << req
->get_tid() << std::endl
;
12671 interrupt_finisher
.queue(new C_Client_RequestInterrupt(this, req
));
12674 // =========================================
12677 // expose file layouts
12679 int Client::describe_layout(const char *relpath
, file_layout_t
*lp
,
12680 const UserPerm
& perms
)
12682 Mutex::Locker
lock(client_lock
);
12684 filepath
path(relpath
);
12686 int r
= path_walk(path
, &in
, perms
);
12692 ldout(cct
, 3) << "describe_layout(" << relpath
<< ") = 0" << dendl
;
12696 int Client::fdescribe_layout(int fd
, file_layout_t
*lp
)
12698 Mutex::Locker
lock(client_lock
);
12700 Fh
*f
= get_filehandle(fd
);
12703 Inode
*in
= f
->inode
.get();
12707 ldout(cct
, 3) << "fdescribe_layout(" << fd
<< ") = 0" << dendl
;
12714 int64_t Client::get_pool_id(const char *pool_name
)
12716 Mutex::Locker
lock(client_lock
);
12717 return objecter
->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name
),
12721 string
Client::get_pool_name(int64_t pool
)
12723 Mutex::Locker
lock(client_lock
);
12724 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
12725 return o
.have_pg_pool(pool
) ? o
.get_pool_name(pool
) : string();
12729 int Client::get_pool_replication(int64_t pool
)
12731 Mutex::Locker
lock(client_lock
);
12732 return objecter
->with_osdmap([pool
](const OSDMap
& o
) {
12733 return o
.have_pg_pool(pool
) ? o
.get_pg_pool(pool
)->get_size() : -ENOENT
;
12737 int Client::get_file_extent_osds(int fd
, loff_t off
, loff_t
*len
, vector
<int>& osds
)
12739 Mutex::Locker
lock(client_lock
);
12741 Fh
*f
= get_filehandle(fd
);
12744 Inode
*in
= f
->inode
.get();
12746 vector
<ObjectExtent
> extents
;
12747 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, off
, 1, in
->truncate_size
, extents
);
12748 assert(extents
.size() == 1);
12750 objecter
->with_osdmap([&](const OSDMap
& o
) {
12751 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
12752 o
.pg_to_acting_osds(pg
, osds
);
12759 * Return the remainder of the extent (stripe unit)
12761 * If length = 1 is passed to Striper::file_to_extents we get a single
12762 * extent back, but its length is one so we still need to compute the length
12763 * to the end of the stripe unit.
12765 * If length = su then we may get 1 or 2 objects back in the extents vector
12766 * which would have to be examined. Even then, the offsets are local to the
12767 * object, so matching up to the file offset is extra work.
12769 * It seems simpler to stick with length = 1 and manually compute the
12773 uint64_t su
= in
->layout
.stripe_unit
;
12774 *len
= su
- (off
% su
);
12780 int Client::get_osd_crush_location(int id
, vector
<pair
<string
, string
> >& path
)
12782 Mutex::Locker
lock(client_lock
);
12785 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12786 return o
.crush
->get_full_location_ordered(id
, path
);
12790 int Client::get_file_stripe_address(int fd
, loff_t offset
,
12791 vector
<entity_addr_t
>& address
)
12793 Mutex::Locker
lock(client_lock
);
12795 Fh
*f
= get_filehandle(fd
);
12798 Inode
*in
= f
->inode
.get();
12801 vector
<ObjectExtent
> extents
;
12802 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, 1,
12803 in
->truncate_size
, extents
);
12804 assert(extents
.size() == 1);
12806 // now we have the object and its 'layout'
12807 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12808 pg_t pg
= o
.object_locator_to_pg(extents
[0].oid
, extents
[0].oloc
);
12810 o
.pg_to_acting_osds(pg
, osds
);
12813 for (unsigned i
= 0; i
< osds
.size(); i
++) {
12814 entity_addr_t addr
= o
.get_addr(osds
[i
]);
12815 address
.push_back(addr
);
12821 int Client::get_osd_addr(int osd
, entity_addr_t
& addr
)
12823 Mutex::Locker
lock(client_lock
);
12824 return objecter
->with_osdmap([&](const OSDMap
& o
) {
12825 if (!o
.exists(osd
))
12828 addr
= o
.get_addr(osd
);
12833 int Client::enumerate_layout(int fd
, vector
<ObjectExtent
>& result
,
12834 loff_t length
, loff_t offset
)
12836 Mutex::Locker
lock(client_lock
);
12838 Fh
*f
= get_filehandle(fd
);
12841 Inode
*in
= f
->inode
.get();
12843 // map to a list of extents
12844 Striper::file_to_extents(cct
, in
->ino
, &in
->layout
, offset
, length
, in
->truncate_size
, result
);
12846 ldout(cct
, 3) << "enumerate_layout(" << fd
<< ", " << length
<< ", " << offset
<< ") = 0" << dendl
;
12852 * find an osd with the same ip. -1 if none.
12854 int Client::get_local_osd()
12856 Mutex::Locker
lock(client_lock
);
12857 objecter
->with_osdmap([this](const OSDMap
& o
) {
12858 if (o
.get_epoch() != local_osd_epoch
) {
12859 local_osd
= o
.find_osd_on_ip(messenger
->get_myaddr());
12860 local_osd_epoch
= o
.get_epoch();
12871 // ===============================
12873 void Client::ms_handle_connect(Connection
*con
)
12875 ldout(cct
, 10) << "ms_handle_connect on " << con
->get_peer_addr() << dendl
;
12878 bool Client::ms_handle_reset(Connection
*con
)
12880 ldout(cct
, 0) << "ms_handle_reset on " << con
->get_peer_addr() << dendl
;
12884 void Client::ms_handle_remote_reset(Connection
*con
)
12886 ldout(cct
, 0) << "ms_handle_remote_reset on " << con
->get_peer_addr() << dendl
;
12887 Mutex::Locker
l(client_lock
);
12888 switch (con
->get_peer_type()) {
12889 case CEPH_ENTITY_TYPE_MDS
:
12891 // kludge to figure out which mds this is; fixme with a Connection* state
12892 mds_rank_t mds
= MDS_RANK_NONE
;
12893 MetaSession
*s
= NULL
;
12894 for (map
<mds_rank_t
,MetaSession
*>::iterator p
= mds_sessions
.begin();
12895 p
!= mds_sessions
.end();
12897 if (mdsmap
->get_addr(p
->first
) == con
->get_peer_addr()) {
12903 switch (s
->state
) {
12904 case MetaSession::STATE_CLOSING
:
12905 ldout(cct
, 1) << "reset from mds we were closing; we'll call that closed" << dendl
;
12906 _closed_mds_session(s
);
12909 case MetaSession::STATE_OPENING
:
12911 ldout(cct
, 1) << "reset from mds we were opening; retrying" << dendl
;
12912 list
<Context
*> waiters
;
12913 waiters
.swap(s
->waiting_for_open
);
12914 _closed_mds_session(s
);
12915 MetaSession
*news
= _get_or_open_mds_session(mds
);
12916 news
->waiting_for_open
.swap(waiters
);
12920 case MetaSession::STATE_OPEN
:
12922 const md_config_t
*conf
= cct
->_conf
;
12923 if (conf
->client_reconnect_stale
) {
12924 ldout(cct
, 1) << "reset from mds we were open; close mds session for reconnect" << dendl
;
12925 _closed_mds_session(s
);
12927 ldout(cct
, 1) << "reset from mds we were open; mark session as stale" << dendl
;
12928 s
->state
= MetaSession::STATE_STALE
;
12933 case MetaSession::STATE_NEW
:
12934 case MetaSession::STATE_CLOSED
:
12944 bool Client::ms_handle_refused(Connection
*con
)
12946 ldout(cct
, 1) << "ms_handle_refused on " << con
->get_peer_addr() << dendl
;
12950 bool Client::ms_get_authorizer(int dest_type
, AuthAuthorizer
**authorizer
, bool force_new
)
12952 if (dest_type
== CEPH_ENTITY_TYPE_MON
)
12954 *authorizer
= monclient
->build_authorizer(dest_type
);
12958 Inode
*Client::get_quota_root(Inode
*in
, const UserPerm
& perms
)
12961 utime_t now
= ceph_clock_now();
12964 if (cur
!= in
&& cur
->quota
.is_enable())
12967 Inode
*parent_in
= NULL
;
12968 if (!cur
->dn_set
.empty()) {
12969 for (auto p
= cur
->dn_set
.begin(); p
!= cur
->dn_set
.end(); ++p
) {
12971 if (dn
->lease_mds
>= 0 &&
12972 dn
->lease_ttl
> now
&&
12973 mds_sessions
.count(dn
->lease_mds
)) {
12974 parent_in
= dn
->dir
->parent_inode
;
12976 Inode
*diri
= dn
->dir
->parent_inode
;
12977 if (diri
->caps_issued_mask(CEPH_CAP_FILE_SHARED
) &&
12978 diri
->shared_gen
== dn
->cap_shared_gen
) {
12979 parent_in
= dn
->dir
->parent_inode
;
12985 } else if (root_parents
.count(cur
)) {
12986 parent_in
= root_parents
[cur
].get();
12994 if (cur
== root_ancestor
)
12997 MetaRequest
*req
= new MetaRequest(CEPH_MDS_OP_LOOKUPNAME
);
12998 filepath
path(cur
->ino
);
12999 req
->set_filepath(path
);
13000 req
->set_inode(cur
);
13002 InodeRef parent_ref
;
13003 int ret
= make_request(req
, perms
, &parent_ref
);
13005 ldout(cct
, 1) << __func__
<< " " << in
->vino()
13006 << " failed to find parent of " << cur
->vino()
13007 << " err " << ret
<< dendl
;
13008 // FIXME: what to do?
13009 cur
= root_ancestor
;
13013 now
= ceph_clock_now();
13015 cur
= parent_ref
.get();
13017 cur
= in
; // start over
13020 ldout(cct
, 10) << __func__
<< " " << in
->vino() << " -> " << cur
->vino() << dendl
;
13025 * Traverse quota ancestors of the Inode, return true
13026 * if any of them passes the passed function
13028 bool Client::check_quota_condition(Inode
*in
, const UserPerm
& perms
,
13029 std::function
<bool (const Inode
&in
)> test
)
13032 assert(in
!= NULL
);
13037 if (in
== root_ancestor
) {
13038 // We're done traversing, drop out
13041 // Continue up the tree
13042 in
= get_quota_root(in
, perms
);
13049 bool Client::is_quota_files_exceeded(Inode
*in
, const UserPerm
& perms
)
13051 return check_quota_condition(in
, perms
,
13052 [](const Inode
&in
) {
13053 return in
.quota
.max_files
&& in
.rstat
.rsize() >= in
.quota
.max_files
;
13057 bool Client::is_quota_bytes_exceeded(Inode
*in
, int64_t new_bytes
,
13058 const UserPerm
& perms
)
13060 return check_quota_condition(in
, perms
,
13061 [&new_bytes
](const Inode
&in
) {
13062 return in
.quota
.max_bytes
&& (in
.rstat
.rbytes
+ new_bytes
)
13063 > in
.quota
.max_bytes
;
13067 bool Client::is_quota_bytes_approaching(Inode
*in
, const UserPerm
& perms
)
13069 return check_quota_condition(in
, perms
,
13070 [](const Inode
&in
) {
13071 if (in
.quota
.max_bytes
) {
13072 if (in
.rstat
.rbytes
>= in
.quota
.max_bytes
) {
13076 assert(in
.size
>= in
.reported_size
);
13077 const uint64_t space
= in
.quota
.max_bytes
- in
.rstat
.rbytes
;
13078 const uint64_t size
= in
.size
- in
.reported_size
;
13079 return (space
>> 4) < size
;
13093 int Client::check_pool_perm(Inode
*in
, int need
)
13095 if (!cct
->_conf
->client_check_pool_perm
)
13098 int64_t pool_id
= in
->layout
.pool_id
;
13099 std::string pool_ns
= in
->layout
.pool_ns
;
13100 std::pair
<int64_t, std::string
> perm_key(pool_id
, pool_ns
);
13103 auto it
= pool_perms
.find(perm_key
);
13104 if (it
== pool_perms
.end())
13106 if (it
->second
== POOL_CHECKING
) {
13107 // avoid concurrent checkings
13108 wait_on_list(waiting_for_pool_perm
);
13111 assert(have
& POOL_CHECKED
);
13117 if (in
->snapid
!= CEPH_NOSNAP
) {
13118 // pool permission check needs to write to the first object. But for snapshot,
13119 // head of the first object may have alread been deleted. To avoid creating
13120 // orphan object, skip the check for now.
13124 pool_perms
[perm_key
] = POOL_CHECKING
;
13127 snprintf(oid_buf
, sizeof(oid_buf
), "%llx.00000000", (unsigned long long)in
->ino
);
13128 object_t oid
= oid_buf
;
13130 SnapContext nullsnapc
;
13132 C_SaferCond rd_cond
;
13133 ObjectOperation rd_op
;
13134 rd_op
.stat(NULL
, (ceph::real_time
*)nullptr, NULL
);
13136 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), rd_op
,
13137 nullsnapc
, ceph::real_clock::now(), 0, &rd_cond
);
13139 C_SaferCond wr_cond
;
13140 ObjectOperation wr_op
;
13141 wr_op
.create(true);
13143 objecter
->mutate(oid
, OSDMap::file_to_object_locator(in
->layout
), wr_op
,
13144 nullsnapc
, ceph::real_clock::now(), 0, &wr_cond
);
13146 client_lock
.Unlock();
13147 int rd_ret
= rd_cond
.wait();
13148 int wr_ret
= wr_cond
.wait();
13149 client_lock
.Lock();
13151 bool errored
= false;
13153 if (rd_ret
== 0 || rd_ret
== -ENOENT
)
13155 else if (rd_ret
!= -EPERM
) {
13156 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13157 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13161 if (wr_ret
== 0 || wr_ret
== -EEXIST
)
13162 have
|= POOL_WRITE
;
13163 else if (wr_ret
!= -EPERM
) {
13164 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13165 << " rd_err = " << rd_ret
<< " wr_err = " << wr_ret
<< dendl
;
13170 // Indeterminate: erase CHECKING state so that subsequent calls re-check.
13171 // Raise EIO because actual error code might be misleading for
13172 // userspace filesystem user.
13173 pool_perms
.erase(perm_key
);
13174 signal_cond_list(waiting_for_pool_perm
);
13178 pool_perms
[perm_key
] = have
| POOL_CHECKED
;
13179 signal_cond_list(waiting_for_pool_perm
);
13182 if ((need
& CEPH_CAP_FILE_RD
) && !(have
& POOL_READ
)) {
13183 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13184 << " need " << ccap_string(need
) << ", but no read perm" << dendl
;
13187 if ((need
& CEPH_CAP_FILE_WR
) && !(have
& POOL_WRITE
)) {
13188 ldout(cct
, 10) << "check_pool_perm on pool " << pool_id
<< " ns " << pool_ns
13189 << " need " << ccap_string(need
) << ", but no write perm" << dendl
;
13196 int Client::_posix_acl_permission(Inode
*in
, const UserPerm
& perms
, unsigned want
)
13198 if (acl_type
== POSIX_ACL
) {
13199 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13200 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13202 return posix_acl_permits(access_acl
, in
->uid
, in
->gid
, perms
, want
);
13208 int Client::_posix_acl_chmod(Inode
*in
, mode_t mode
, const UserPerm
& perms
)
13210 if (acl_type
== NO_ACL
)
13213 int r
= _getattr(in
, CEPH_STAT_CAP_XATTR
, perms
, in
->xattr_version
== 0);
13217 if (acl_type
== POSIX_ACL
) {
13218 if (in
->xattrs
.count(ACL_EA_ACCESS
)) {
13219 const bufferptr
& access_acl
= in
->xattrs
[ACL_EA_ACCESS
];
13220 bufferptr
acl(access_acl
.c_str(), access_acl
.length());
13221 r
= posix_acl_access_chmod(acl
, mode
);
13224 r
= _do_setxattr(in
, ACL_EA_ACCESS
, acl
.c_str(), acl
.length(), 0, perms
);
13230 ldout(cct
, 10) << __func__
<< " ino " << in
->ino
<< " result=" << r
<< dendl
;
13234 int Client::_posix_acl_create(Inode
*dir
, mode_t
*mode
, bufferlist
& xattrs_bl
,
13235 const UserPerm
& perms
)
13237 if (acl_type
== NO_ACL
)
13240 if (S_ISLNK(*mode
))
13243 int r
= _getattr(dir
, CEPH_STAT_CAP_XATTR
, perms
, dir
->xattr_version
== 0);
13247 if (acl_type
== POSIX_ACL
) {
13248 if (dir
->xattrs
.count(ACL_EA_DEFAULT
)) {
13249 map
<string
, bufferptr
> xattrs
;
13251 const bufferptr
& default_acl
= dir
->xattrs
[ACL_EA_DEFAULT
];
13252 bufferptr
acl(default_acl
.c_str(), default_acl
.length());
13253 r
= posix_acl_inherit_mode(acl
, mode
);
13258 r
= posix_acl_equiv_mode(acl
.c_str(), acl
.length(), mode
);
13262 xattrs
[ACL_EA_ACCESS
] = acl
;
13265 if (S_ISDIR(*mode
))
13266 xattrs
[ACL_EA_DEFAULT
] = dir
->xattrs
[ACL_EA_DEFAULT
];
13270 ::encode(xattrs
, xattrs_bl
);
13273 *mode
&= ~umask_cb(callback_handle
);
13278 ldout(cct
, 10) << __func__
<< " dir ino " << dir
->ino
<< " result=" << r
<< dendl
;
13282 void Client::set_filer_flags(int flags
)
13284 Mutex::Locker
l(client_lock
);
13285 assert(flags
== 0 ||
13286 flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13287 objecter
->add_global_op_flags(flags
);
13290 void Client::clear_filer_flags(int flags
)
13292 Mutex::Locker
l(client_lock
);
13293 assert(flags
== CEPH_OSD_FLAG_LOCALIZE_READS
);
13294 objecter
->clear_global_op_flag(flags
);
13298 * This is included in cap release messages, to cause
13299 * the MDS to wait until this OSD map epoch. It is necessary
13300 * in corner cases where we cancel RADOS ops, so that
13301 * nobody else tries to do IO to the same objects in
13302 * the same epoch as the cancelled ops.
13304 void Client::set_cap_epoch_barrier(epoch_t e
)
13306 ldout(cct
, 5) << __func__
<< " epoch = " << e
<< dendl
;
13307 cap_epoch_barrier
= e
;
13310 const char** Client::get_tracked_conf_keys() const
13312 static const char* keys
[] = {
13313 "client_cache_size",
13314 "client_cache_mid",
13321 void Client::handle_conf_change(const struct md_config_t
*conf
,
13322 const std::set
<std::string
> &changed
)
13324 Mutex::Locker
lock(client_lock
);
13326 if (changed
.count("client_cache_size") ||
13327 changed
.count("client_cache_mid")) {
13328 lru
.lru_set_max(cct
->_conf
->client_cache_size
);
13329 lru
.lru_set_midpoint(cct
->_conf
->client_cache_mid
);
13331 if (changed
.count("client_acl_type")) {
13333 if (cct
->_conf
->client_acl_type
== "posix_acl")
13334 acl_type
= POSIX_ACL
;
13338 void Client::init_groups(UserPerm
*perms
)
13341 int count
= _getgrouplist(&sgids
, perms
->uid(), perms
->gid());
13342 perms
->init_gids(sgids
, count
);
13345 void intrusive_ptr_add_ref(Inode
*in
)
13350 void intrusive_ptr_release(Inode
*in
)
13352 in
->client
->put_inode(in
);
13355 mds_rank_t
Client::_get_random_up_mds() const
13357 assert(client_lock
.is_locked_by_me());
13359 std::set
<mds_rank_t
> up
;
13360 mdsmap
->get_up_mds_set(up
);
13363 return MDS_RANK_NONE
;
13364 std::set
<mds_rank_t
>::const_iterator p
= up
.begin();
13365 for (int n
= rand() % up
.size(); n
; n
--)
13371 StandaloneClient::StandaloneClient(Messenger
*m
, MonClient
*mc
)
13372 : Client(m
, mc
, new Objecter(m
->cct
, m
, mc
, NULL
, 0, 0))
13374 monclient
->set_messenger(m
);
13375 objecter
->set_client_incarnation(0);
13378 StandaloneClient::~StandaloneClient()
13381 objecter
= nullptr;
13384 int StandaloneClient::init()
13387 objectcacher
->start();
13390 client_lock
.Lock();
13391 assert(!initialized
);
13393 messenger
->add_dispatcher_tail(objecter
);
13394 messenger
->add_dispatcher_tail(this);
13396 monclient
->set_want_keys(CEPH_ENTITY_TYPE_MDS
| CEPH_ENTITY_TYPE_OSD
);
13397 int r
= monclient
->init();
13399 // need to do cleanup because we're in an intermediate init state
13401 client_lock
.Unlock();
13402 objecter
->shutdown();
13403 objectcacher
->stop();
13404 monclient
->shutdown();
13409 client_lock
.Unlock();
13415 void StandaloneClient::shutdown()
13417 Client::shutdown();
13418 objecter
->shutdown();
13419 monclient
->shutdown();